### Importing the Libraries

In [1]:
import pandas as pd
import plotly.io as pio
import plotly.graph_objects as go
from matplotlib import pyplot as plt

import gensim.downloader as api
from gensim.models import Word2Vec

from sklearn.decomposition import PCA

### Training the Model

In [3]:
corpus = api.load('text8')
corpus = [word  for word in corpus]

corpora = ''
for corp in corpus:
  corpora += ' '.join(corp)

print('Corpus Size : ' , len(corpora.split(' ')))
print('Vocab Size  : ' , len(set(corpora.split(' '))))

del corpora

model = Word2Vec( corpus, min_count = 2000, vector_size = 300, window = 3, sg = 0, epochs = 5)

Corpus Size :  17003507
Vocab Size  :  255310


### Reducing the Dimensions

In [30]:
X     = [model.wv[word] for word in model.wv.index_to_key]
words = list(model.wv.index_to_key)

print('Dimensions of each Vector | Before : ', len(X[0]))

pca     = PCA(n_components = 2)
result  = pca.fit_transform(X)

print('Dimensions of each Vector | After  : ', len(result[0]))

data = {'x' : result[:,0], 'y' : result[:,1], 'label' : words}
df = pd.DataFrame(data)

Dimensions of each Vector | Before :  300
Dimensions of each Vector | After  :  2


### Visualising Word Vectors

In [40]:
scatter = go.Scatter( x = df['x'], y = df['y'], mode = 'markers+text', text = df['label'], textposition = 'top center',
                     marker = dict( size = 5 , color = 'rgba(157,0,0, .8)'))

layout = go.Layout(title = 'Word Vectors Visualisation', xaxis = dict(title = 'Dimension - 1'), yaxis = dict(title = 'Dimension - 2'), hovermode = 'closest')

fig = go.Figure(data = [scatter], layout = layout)

pio.show(fig)