<a href="https://colab.research.google.com/github/Harika-Mullaguri/NLP/blob/main/WORD_EMBEDDINGS.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install spacy



In [2]:
!pip install gensim

Collecting gensim
  Downloading gensim-4.4.0-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl.metadata (8.4 kB)
Downloading gensim-4.4.0-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl (27.9 MB)
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m27.9/27.9 MB[0m [31m41.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: gensim
Successfully installed gensim-4.4.0


In [3]:
from gensim.models import Word2Vec
import spacy
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
import gradio as gr

In [4]:
sentences = [
    ["king", "queen", "man", "woman"],
    ["apple", "banana", "mango", "fruit"],
    ["paris", "france", "tokyo", "japan"],
    ["car", "bus", "train","transport"],
]

In [5]:
model = Word2Vec(sentences, vector_size=50, window=3, min_count=1,sg=1)

In [6]:
nlp = spacy.load("en_core_web_sm")

In [7]:
def word_vector(word):
    try:
        return model.wv[word].tolist()
    except KeyError:
        return f"'{word}' not in vocabulary!"

In [8]:
def most_similar(word):
    try:
        return model.wv.most_similar(word, topn=5)
    except KeyError:
        return f"'{word}' not in vocabulary!"

In [9]:
def analogy(word1, word2, word3):
    try:
        result = model.wv.most_similar(positive=[word1, word3], negative=[word2], topn=1)
        return f"{word1} - {word2} + {word3} ‚âà {result[0][0]}"
    except KeyError as e:
        return f"Error: {str(e)}"

def similarity(word1, word2):
    try:
        return f"Similarity({word1}, {word2}) = {nlp(word1).similarity(nlp(word2)):.3f}"
    except:
        return "Error calculating similarity."

In [10]:
def visualize():
    words = ["king", "queen", "man", "woman", "apple", "banana", "car", "bus"]
    word_vectors = [model.wv[w] for w in words]
    pca = PCA(n_components=2)
    result = pca.fit_transform(word_vectors)

    plt.figure(figsize=(6,5))
    plt.scatter(result[:, 0], result[:, 1])
    for i, word in enumerate(words):
        plt.annotate(word, xy=(result[i, 0], result[i, 1]))
    plt.title("Word Embedding Visualization")
    plt.savefig("embedding.png")
    return "embedding.png"

In [11]:
import gradio as gr

In [12]:
with gr.Blocks() as demo:
    gr.Markdown("## üß† Word Embedding Playground")

    with gr.Tab("Word Vector"):
        word_in = gr.Textbox(label="Enter a word")
        word_out = gr.JSON(label="Vector")
        word_in.submit(word_vector, word_in, word_out)

    with gr.Tab("Most Similar"):
        sim_in = gr.Textbox(label="Enter a word")
        sim_out = gr.JSON(label="Top 5 similar words")
        sim_in.submit(most_similar, sim_in, sim_out)

    with gr.Tab("Analogy"):
        w1 = gr.Textbox(label="Word 1")
        w2 = gr.Textbox(label="Word 2 (to subtract)")
        w3 = gr.Textbox(label="Word 3")
        ana_out = gr.Textbox(label="Result")
        btn1 = gr.Button("Compute Analogy")
        btn1.click(analogy, [w1, w2, w3], ana_out)

    with gr.Tab("Similarity"):
        s1 = gr.Textbox(label="Word 1")
        s2 = gr.Textbox(label="Word 2")
        simscore = gr.Textbox(label="Similarity Score")
        btn2 = gr.Button("Check Similarity")
        btn2.click(similarity, [s1, s2], simscore)

    with gr.Tab("Visualization"):
        viz_btn = gr.Button("Show Embedding Plot")
        viz_img = gr.Image(label="2D PCA Projection")
        viz_btn.click(visualize, None, viz_img)

In [13]:
demo.launch()


It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://69f04a533f0978ea42.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


