##Find Similar Words Using Word Embeddings

###1. Task: Load pre-trained word embeddings and take any input word from the user.
###Return top 5 similar words.

In [9]:
# Install Gensim
!pip install gensim -q

# Imports
from gensim.models import KeyedVectors
from gensim.downloader import load

# Load pre-trained word vectors
model = load("glove-wiki-gigaword-50")
print("Model Loaded Successfully!")

# Function to find similar words
def find_similar_words(word, topn=5):
    try:
        similar = model.most_similar(word, topn=topn)
        return similar
    except KeyError:
        return f"'{word}' not found in vocabulary."

# Input word
word = input("Enter a word: ")
result = find_similar_words(word)

# Display results
print("\nTop similar words:")
if isinstance(result, str):
    print(result)
else:
    for w, score in result:
        print(f"{w} --> similarity: {score:.4f}")


[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m27.9/27.9 MB[0m [31m25.2 MB/s[0m eta [36m0:00:00[0m
Model Loaded Successfully!
Enter a word: hello

Top similar words:
goodbye --> similarity: 0.8538
hey --> similarity: 0.8074
! --> similarity: 0.7951
kiss --> similarity: 0.7892
wow --> similarity: 0.7641


###2. Task:
###Build a small word embedding model using FastText and use it to find similar words for any given input word, even if the word was not present in the training dataset.

In [10]:
# Install gensim
!pip install gensim -q

# Import FastText
from gensim.models import FastText

# Example sentences
sentences = [
    ["i", "love", "machine", "learning"],
    ["word", "embeddings", "are", "useful"],
    ["fasttext", "handles", "unknown", "words"]
]

# Train FastText model
model = FastText(sentences, vector_size=50, min_count=1)

# Find words similar to "learning"
similar_words = model.wv.most_similar("learning")
print(similar_words)


[('embeddings', 0.23739147186279297), ('word', 0.15370918810367584), ('fasttext', 0.07506413757801056), ('handles', 0.010171581991016865), ('i', -0.004249247722327709), ('are', -0.0609765499830246), ('love', -0.16465850174427032), ('machine', -0.19357778131961823), ('words', -0.23307032883167267), ('useful', -0.2368934452533722)]


##1. Using the same GloVe word embedding model, create a program that answers word analogies like:
###“Man is to Woman as King is to ___ ?”

In [11]:
# Install gensim if not already installed
!pip install gensim -q

# Imports
from gensim.downloader import load

# Load pretrained embeddings
model = load("glove-wiki-gigaword-50")
print("Model Loaded Successfully!")

# Function to solve analogies: A is to B as C is to ?
def solve_analogy(a, b, c):
    try:
        result = model.most_similar(positive=[b, c], negative=[a], topn=1)
        return result[0]
    except KeyError:
        return "One or more words not in vocabulary."

# User input
a = input("Enter word A (e.g., man): ")
b = input("Enter word B (e.g., woman): ")
c = input("Enter word C (e.g., king): ")

# Solve analogy
res = solve_analogy(a, b, c)
print("\nAnalogy Result:")
print(res)


Model Loaded Successfully!
Enter word A (e.g., man): man
Enter word B (e.g., woman): woman
Enter word C (e.g., king): king

Analogy Result:
('queen', 0.8523604273796082)


##2. Given 3–5 words, find the one that does NOT match the others using word embeddings.

In [12]:
# Install gensim if not already installed
!pip install gensim -q

# Imports
from gensim.downloader import load

# Load pretrained embeddings
model = load("glove-wiki-gigaword-50")
print("Model Loaded Successfully!")

# Function to find the odd one out
def find_odd_one(words):
    try:
        return model.doesnt_match(words)
    except KeyError:
        return "Some words not found."

# Input words
words = input("Enter comma-separated words: ").split(",")
words = [w.strip() for w in words]

# Find odd one
result = find_odd_one(words)
print("\nOdd one out:", result)


Model Loaded Successfully!
Enter comma-separated words: hello,world

Odd one out: hello


##3. Using GloVe embeddings, compute the similarity between two sentences.

In [13]:
# Install gensim if not already installed
!pip install gensim -q

# Imports
from gensim.downloader import load
import numpy as np

# Load pretrained embeddings
model = load("glove-wiki-gigaword-50")
print("Model Loaded Successfully!")

# Function to compute sentence vector
def sentence_vector(sentence):
    words = sentence.lower().split()
    word_vecs = [model[w] for w in words if w in model]

    if not word_vecs:
        return np.zeros(50)
    return np.mean(word_vecs, axis=0)

# Function to compute cosine similarity between two sentences
def similarity(s1, s2):
    v1, v2 = sentence_vector(s1), sentence_vector(s2)
    sim = np.dot(v1, v2) / (np.linalg.norm(v1) * np.linalg.norm(v2))
    return sim

# Input sentences
s1 = input("Sentence 1: ")
s2 = input("Sentence 2: ")

# Compute similarity
print("\nSentence Similarity:", similarity(s1, s2))


Model Loaded Successfully!
Sentence 1: hello man
Sentence 2: how are you

Sentence Similarity: 0.63890004


##Word Similarity Using Different Library (spaCy)
###1. Use spaCy instead of Gensim to compute similarity between two words.

In [14]:
# Install SpaCy and download model
!pip install -q spacy
!python -m spacy download en_core_web_md

# Import and load model
import spacy
nlp = spacy.load("en_core_web_md")

# Input words
w1 = input("Enter first word: ")
w2 = input("Enter second word: ")

# Convert words to SpaCy tokens
word1 = nlp(w1)
word2 = nlp(w2)

# Compute similarity
print("\nSimilarity Score:", word1.similarity(word2))


Collecting en-core-web-md==3.8.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_md-3.8.0/en_core_web_md-3.8.0-py3-none-any.whl (33.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m33.5/33.5 MB[0m [31m19.5 MB/s[0m eta [36m0:00:00[0m
[?25h[31mERROR: Operation cancelled by user[0m[31m
[0m

OSError: [E050] Can't find model 'en_core_web_md'. It doesn't seem to be a Python package or a valid path to a data directory.