In [None]:
from gensim.models import word2vec
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt
import networkx as nx
import csv
from node2vec import Node2Vec

# Creating the Graph (example)

bipartite graph is already created, but here's the process:

In [None]:
B = nx.Graph()
documents = []
names = []
edges = []
# Add nodes with the node attribute "bipartite"
with open('all_names.csv') as all_names:
    name_reader = csv.reader(all_names)
    for row in name_reader:
        documents.append(row[0])
        for name in row[1:]:
            names.append(name)
            edges.append((row[0], name))
            
B.add_nodes_from(documents, bipartite=0)
B.add_nodes_from(names, bipartite=1)
B.add_edges_from(edges)

nx.is_bipartite(B)

True

In [None]:
# FILES
EMBEDDING_FILENAME = './bipartite.emb'
EMBEDDING_MODEL_FILENAME = './bipartite.model'

# Precompute probabilities and generate walks
node2vec = Node2Vec(B, dimensions=64, walk_length=30, num_walks=200, workers=4)

# Embed
model = node2vec.fit(window=10, min_count=1, batch_words=4)

# Any keywords acceptable by gensim.Word2Vec can be passed,
# `diemnsions` and `workers` are automatically passed (from the Node2Vec constructor)

# Look for most similar nodes
print(model.wv.most_similar('2'))  # Output node names are always strings

# Save embeddings for later use
model.wv.save_word2vec_format(EMBEDDING_FILENAME)

# Save model for later use
model.save(EMBEDDING_MODEL_FILENAME)

# Loading the word2vec/node2vec model

In [None]:
model = word2vec.Word2Vec.load('bipartite.model')
X = model.wv[model.wv.vocab]

# Visualizing a word2vec model (with words)
[Reference here](https://www.kaggle.com/jeffd23/visualizing-word-vectors-with-t-sne)

In [6]:
labels = []
tokens = []

for word in model.wv.vocab:
    tokens.append(model.wv[word])
    labels.append(word)

# Create TSNE model
tsne_model = TSNE(perplexity=40, init='pca', n_iter=2500, random_state=23)
new_values = tsne_model.fit_transform(tokens)

x = []
y = []
for value in new_values:
    x.append(value[0])
    y.append(value[1])

plt.figure(figsize=(16, 16)) 

for i in range(len(x)):
    plt.scatter(x[i],y[i])
    plt.annotate(labels[i],
                 xy=(x[i], y[i]),
                 xytext=(5, 2),
                 textcoords='offset points',
                 ha='right',
                 va='bottom')
plt.show()