In [None]:
from gensim.models import Word2Vec
import pandas as pd
import nltk
import numpy as np
from utils import data_science_utils
from utils.data_manager import DataManager

In [None]:
model = DataManager.getModel(data_science_utils.Models.W2V)

In [None]:
full_model = DataManager.getModel(data_science_utils.Models.NEWS_VECT)['title']

In [None]:
model = full_model['title']['news_vector']

In [None]:
words=list(model.wv.vocab)
print(words[0:100])

In [None]:
X=model[model.wv.vocab]
df=pd.DataFrame(X)
df.shape
df.head()

In [None]:
X_corr=df.corr()

#Computing eigen values and eigen vectors
values,vectors=np.linalg.eig(X_corr)

#Sorting the eigen vectors coresponding to eigen values in descending order
args = (-values).argsort()
values = vectors[args]
vectors = vectors[:, args]

#Taking first 2 components which explain maximum variance for projecting
new_vectors=vectors[:,:2]

#Projecting it onto new dimesion with 2 axis
neww_X=np.dot(X,new_vectors)

In [None]:
import matplotlib.pyplot as plt
plt.figure(figsize=(13,7))
plt.scatter(neww_X[:,0],neww_X[:,1],linewidths=3,color='blue')
plt.xlabel("PC1",size=8)
plt.ylabel("PC2",size=8)
plt.title("Word Embedding Space",size=20)
vocab=list(model.wv.vocab)
for i, word in enumerate(vocab):
  plt.annotate(word,xy=(neww_X[i,0],neww_X[i,1]))

In [None]:
def tsne_plot(model):
    "Creates and TSNE model and plots it"
    labels = []
    tokens = []

    for word in model.wv.vocab:
        tokens.append(model[word])
        labels.append(word)
    
    tsne_model = TSNE(perplexity=40, n_components=2, init='pca', n_iter=2500, random_state=23)
    new_values = tsne_model.fit_transform(tokens)

    x = []
    y = []
    for value in new_values:
        x.append(value[0])
        y.append(value[1])
        
    plt.figure(figsize=(16, 16)) 
    for i in range(len(x)):
        plt.scatter(x[i],y[i])
        plt.annotate(labels[i],
                     xy=(x[i], y[i]),
                     xytext=(5, 2),
                     textcoords='offset points',
                     ha='right',
                     va='bottom')
    plt.show()

In [None]:
tsne_plot(model)