# 1.Dot product between two words using word2vec embeddings 


In [14]:
from gensim.models import KeyedVectors
import numpy as np

# Load Pre-trained Word2Vec Model
def load_word2vec_model(path):
    return KeyedVectors.load_word2vec_format(path, binary=True)

# Get Word Vector
def get_word_vector(word, model):
    if word in model.key_to_index:
        return model[word]
    else:
        raise ValueError(f"Word '{word}' not found in the vocabulary!")

# Compute Dot Product
def dot_product(vec1, vec2):
    return np.dot(vec1, vec2)

# Display Embeddings
def display_embeddings(word, vector):
    print(f"Embedding for '{word}':\n{vector}")

# Main Script
model_path = "/kaggle/input/word2vec/pytorch/default/1/GoogleNews-vectors-negative300.bin"  # Update with actual path
model = load_word2vec_model(model_path)

word1 = input("Enter the first word: ").strip()
word2 = input("Enter the second word: ").strip()

try:
    vec1 = get_word_vector(word1, model)
    vec2 = get_word_vector(word2, model)
    result = dot_product(vec1, vec2)
    print(f"Dot product between '{word1}' and '{word2}': {result:.4f}\n")
   # display_embeddings(word1, vec1)
   #  display_embeddings(word2, vec2)
except ValueError as e:
    print(e)


Enter the first word:  king
Enter the second word:  queen


Dot product between 'king' and 'queen': 5.7224



# 2.Cosine similarity between two words using word2vec embeddings 


In [15]:
from gensim.models import KeyedVectors

# Load pre-trained Word2Vec model (e.g., Google News vectors)
model = KeyedVectors.load_word2vec_format('/kaggle/input/word2vec/pytorch/default/1/GoogleNews-vectors-negative300.bin', binary=True)
from numpy import dot
from numpy.linalg import norm

word1 = input("Enter the first word: ").strip()
word2 = input("Enter the second word: ").strip()

vector1 = model[word1]
vector2 = model[word2]

cosine_similarity = dot(vector1, vector2) / (norm(vector1) * norm(vector2))
print(f"Cosine Similarity between '{word1}' and '{word2}': {cosine_similarity}")


Enter the first word:  king
Enter the second word:  queen


Cosine Similarity between 'king' and 'queen': 0.6510956883430481


# 3.Sentiment analysis on sample comments(positive/negative) using word2vec embeddings and two layer simple neural network 

In [22]:
from gensim.models import KeyedVectors
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# Load Pre-trained Word2Vec Model
def load_word2vec_model(path):
    return KeyedVectors.load_word2vec_format(path, binary=True)

# Get Sentence Vector
def get_sentence_vector(sentence, model):
    words = sentence.split()
    word_vectors = [model[word] for word in words if word in model.key_to_index]
    return np.mean(word_vectors, axis=0) if word_vectors else np.zeros(model.vector_size)

# Perform Sentiment Analysis using Neural Network
def sentiment_analysis_nn(texts, labels, model):
    X = np.array([get_sentence_vector(text, model) for text in texts])
    le = LabelEncoder()
    y = le.fit_transform(labels)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    nn_model = Sequential([
        Dense(128, activation='relu', input_shape=(X.shape[1],)),
        Dense(64, activation='relu'),
        Dense(1, activation='sigmoid')
    ])
    
    nn_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    nn_model.fit(X_train, y_train, epochs=5, batch_size=32, validation_data=(X_test, y_test))
    
    return nn_model

# Display Word Embeddings
def display_embeddings(text, model):
    words = text.split()
    for word in words:
        if word in model.key_to_index:
            print(f"Embedding for '{word}':\n{model[word]}\n")

# Main Script
model_path = "/kaggle/input/word2vec/pytorch/default/1/GoogleNews-vectors-negative300.bin"  # Update with actual path
word2vec_model = load_word2vec_model(model_path)

sample_texts = ["I love this movie", "This product is terrible", "Amazing experience", "Not good at all"]
sample_labels = ["positive", "negative", "positive", "negative"]

# Perform Sentiment Analysis
nn_model = sentiment_analysis_nn(sample_texts, sample_labels, word2vec_model)

# Comment out the lines printing embeddings
# for text in sample_texts:
#     print(f"Text: {text}")
#     display_embeddings(text, word2vec_model)
#     print("--------------------------------------------------")

# Take User Sentence for Prediction
user_sentence = input("Enter a sentence to analyze sentiment: ").strip()
user_vector = get_sentence_vector(user_sentence, word2vec_model).reshape(1, -1)
prediction = nn_model.predict(user_vector)
sentiment = "positive" if prediction[0][0] > 0.5 else "negative"

print(f"Predicted sentiment for '{user_sentence}': {sentiment}")


Epoch 1/5


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step - accuracy: 0.3333 - loss: 0.6898 - val_accuracy: 0.0000e+00 - val_loss: 0.7231
Epoch 2/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step - accuracy: 1.0000 - loss: 0.6468 - val_accuracy: 0.0000e+00 - val_loss: 0.7380
Epoch 3/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step - accuracy: 1.0000 - loss: 0.6146 - val_accuracy: 0.0000e+00 - val_loss: 0.7527
Epoch 4/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step - accuracy: 1.0000 - loss: 0.5885 - val_accuracy: 0.0000e+00 - val_loss: 0.7669
Epoch 5/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step - accuracy: 1.0000 - loss: 0.5625 - val_accuracy: 0.0000e+00 - val_loss: 0.7773


Enter a sentence to analyze sentiment:  That movie is so good


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 154ms/step
Predicted sentiment for 'That movie is so good': positive
