In [None]:
# Glove & Word2Vec with ANN

import numpy as np
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Dense, Flatten, Embedding, Dropout
from keras.optimizers import Adam
import gensim.downloader as api

# Load pre-trained GloVe word vectors
glove_model = api.load("glove-wiki-gigaword-100")

# Load pre-trained Word2Vec word vectors
word2vec_model = api.load("word2vec-google-news-300")

# Example sentences and labels
sentences = [
    "the cat sat on the mat",
    "the dog ate my homework",
    "the sun is shining"
]
labels = [0, 1, 0]  # Example labels, binary classification

# Function to preprocess sentences using embeddings
def preprocess_sentences(sentences, model):
    max_length = max(len(sentence.split()) for sentence in sentences)
    embeddings = []
    for sentence in sentences:
        words = sentence.split()
        sentence_emb = []
        for word in words:
            if word in model:
                sentence_emb.append(model[word])
            else:
                sentence_emb.append(np.zeros(model.vector_size))  # Use zero vector for out-of-vocabulary words
        embeddings.append(sentence_emb)
    return np.array(embeddings), max_length

# Preprocess sentences using GloVe embeddings
glove_embeddings, max_length_glove = preprocess_sentences(sentences, glove_model)

# Preprocess sentences using Word2Vec embeddings
word2vec_embeddings, max_length_word2vec = preprocess_sentences(sentences, word2vec_model)

# Pad sequences to ensure uniform length
from keras.preprocessing.sequence import pad_sequences
glove_embeddings = pad_sequences(glove_embeddings, maxlen=max_length_glove, padding='post')
word2vec_embeddings = pad_sequences(word2vec_embeddings, maxlen=max_length_word2vec, padding='post')

# Split data into train and test sets
X_train_glove, X_test_glove, y_train, y_test = train_test_split(glove_embeddings, labels, test_size=0.2, random_state=42)
X_train_word2vec, X_test_word2vec, _, _ = train_test_split(word2vec_embeddings, labels, test_size=0.2, random_state=42)

# Build and train ANN model with GloVe embeddings
model_glove = Sequential([
    Embedding(input_dim=len(glove_model.index2word) + 1, output_dim=100, input_length=max_length_glove, weights=[glove_model.vectors], trainable=False),
    Flatten(),
    Dense(64, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])
model_glove.compile(loss='binary_crossentropy', optimizer=Adam(), metrics=['accuracy'])
model_glove.fit(X_train_glove, y_train, validation_data=(X_test_glove, y_test), epochs=10, batch_size=32)

# Build and train ANN model with Word2Vec embeddings
model_word2vec = Sequential([
    Embedding(input_dim=len(word2vec_model.index2word) + 1, output_dim=300, input_length=max_length_word2vec, weights=[word2vec_model.vectors], trainable=False),
    Flatten(),
    Dense(64, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])
model_word2vec.compile(loss='binary_crossentropy', optimizer=Adam(), metrics=['accuracy'])
model_word2vec.fit(X_train_word2vec, y_train, validation_data=(X_test_word2vec, y_test), epochs=10, batch_size=32)


In [None]:
# Simple Glove, Word2Vec, PCA 

import gensim.downloader as api
from sklearn.decomposition import PCA
import numpy as np

# Download pre-trained GloVe word vectors
glove_model = api.load("glove-wiki-gigaword-100")

# Download pre-trained Word2Vec word vectors
word2vec_model = api.load("word2vec-google-news-300")

# Example sentences
sentences = [
    "dog cat pet",
    "blue sky",
    "banana apple fruit"
]

# Preprocess sentences using GloVe embeddings
def preprocess_glove(sentences, model):
    embeddings = []
    for sentence in sentences:
        words = sentence.split()
        vector_sum = np.zeros(model.vector_size)
        for word in words:
            if word in model:
                vector_sum += model[word]
        embeddings.append(vector_sum)
    return np.array(embeddings)

glove_embeddings = preprocess_glove(sentences, glove_model)
print("GloVe Embeddings:")
print(glove_embeddings)

# Preprocess sentences using Word2Vec embeddings
def preprocess_word2vec(sentences, model):
    embeddings = []
    for sentence in sentences:
        words = sentence.split()
        vector_sum = np.zeros(model.vector_size)
        for word in words:
            if word in model:
                vector_sum += model[word]
        embeddings.append(vector_sum)
    return np.array(embeddings)

word2vec_embeddings = preprocess_word2vec(sentences, word2vec_model)
print("\nWord2Vec Embeddings:")
print(word2vec_embeddings)

# PCA for dimensionality reduction
def apply_pca(embeddings, n_components=2):
    pca = PCA(n_components=n_components)
    pca_result = pca.fit_transform(embeddings)
    return pca_result

# Apply PCA on GloVe embeddings
glove_pca_result = apply_pca(glove_embeddings)
print("\nPCA Result for GloVe Embeddings:")
print(glove_pca_result)

# Apply PCA on Word2Vec embeddings
word2vec_pca_result = apply_pca(word2vec_embeddings)
print("\nPCA Result for Word2Vec Embeddings:")
print(word2vec_pca_result)
