# Phase 1: Create a Word-Embedding layer 

In [12]:
#imports
from flair.embeddings import WordEmbeddings
from flair.data import Sentence
import networkx as nx
import torch

In [13]:
def read_data(datafile):
    with open(datafile) as f:
        lines = f.readlines()
    return lines

glove_embedding = WordEmbeddings('glove')
words = read_data('../data/vertomul.txt')
words = [line.strip() for line in open('../data/vertomul.txt')]

In [15]:
word_emb = {}
for word in words:
    sentence = Sentence(word) # --> strip() removes the white space from beginning and end of word
     # embed a sentence using glove.
    glove_embedding.embed(sentence)
    for token in sentence:

        word_emb[word]=token.embedding
    

###  We decided to compute the distance using cosine similarity

In [16]:
G = nx.Graph()

In [17]:
G.add_nodes_from(words)

In [18]:
len(list(G.nodes))

529

A similarity of 1 means that the words are the same.
Cosine similarity value smaller than 80 makes the edges increase by a lot. 

In [19]:
for item in word_emb:
    x = word_emb[item]
    
    for word in word_emb:
        if word is not item:
            y = word_emb[word]
            cosine_sim = torch.cosine_similarity(x.unsqueeze(0), y.unsqueeze(0))
            
            if cosine_sim > .8:
                G.add_edge(item, word)
#             print("x:", item, "y:", word, "distance: ", cosine_sim)

In [20]:
G.number_of_nodes()

529

In [21]:
G.number_of_edges()

246

The graph is not connected 

In [22]:
print(nx.is_connected(G))

False


Nodes with higher closeness centrality are better connected with the rest of the nodes. I am assuming that nodes with a closeness centrality equal to zero are not connected to any other node. 

In [23]:
word_emb_cc = nx.closeness_centrality(G)

# Multiplex Network - 4 layers 

In [24]:
phonological_sim_words = [line.strip() for line in open('../data/phcmul.txt')]
feature_sharing_words = [line.strip() for line in open('../data/mrmul.txt')]
free_association_words = [line.strip() for line in open('../data/famul.txt')]
co_occurrances_words = [line.strip() for line in open('../data/cumul.txt')]

In [25]:
phonological_sim_list = []
feature_sharing_list = []
free_association_list = []
co_occurrances_list = []

for pair in phonological_sim_words:
    phonological_sim_list.append(tuple(map(str, pair.split('\t'))))
    
for pair in feature_sharing_words:
    feature_sharing_list.append(tuple(map(str, pair.split('\t'))))
    
for pair in free_association_words:
    free_association_list.append(tuple(map(str, pair.split('\t'))))
    
for pair in co_occurrances_words :
    co_occurrances_list.append(tuple(map(str, pair.split('\t'))))


In [33]:
multiplex = nx.Graph()

In [34]:
multiplex.add_nodes_from(words)

In [35]:
len(list(multiplex.nodes))

529

Adding the phonological connections to the graph

In [39]:
for pair in phonological_sim_list:
    multiplex.add_edge(pair[0], pair[1])
    

In [40]:
multiplex.number_of_edges()

349

Adding the feature sharing connections to the graph

In [48]:
for pair in feature_sharing_list:
    multiplex.add_edge(pair[0], pair[1])

In [49]:
multiplex.number_of_edges()

2732

Adding the free association connections to the graph

In [50]:
for pair in free_association_list:
    multiplex.add_edge(pair[0], pair[1])

In [51]:
multiplex.number_of_edges()

5001

Adding the cooccurrances connections to the graph

In [52]:
for pair in co_occurrances_list:
    multiplex.add_edge(pair[0], pair[1])

In [53]:
multiplex.number_of_edges()

6998

##  The original multiplex layer has been constucted. 

In [56]:
multiplex_cc = nx.closeness_centrality(multiplex)

In [57]:
print(nx.is_connected(multiplex))

True


# Now, we add the visual layer to the multiplex

In [76]:
visual_words = [line.strip().replace('"', "") for line in open('../data/wac_clip_freechild.txt')]

In [79]:
visual_list = []

for pair in visual_words:   
    visual_list.append(tuple(map(str, pair.split('->'))))

In [80]:
for pair in visual_list:
    multiplex.add_edge(pair[0], pair[1])

In [81]:
multiplex_visual_cc = nx.closeness_centrality(multiplex)