In [1]:
import numpy as np
from math import *
import random

import nltk
nltk.download('wordnet')  # in order to have access to wordnet
from nltk.corpus import wordnet

[nltk_data] Downloading package wordnet to
[nltk_data]     /Users/charlesdognin/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [2]:
# Choose a source word for our graph, here the word "mammal", whose level in the graph is 0 (by default)

mammal = wordnet.synset("mammal.n.01")
print(mammal.definition())  # definition of "mammal"
print('-------------------------')
print(mammal)

any warm-blooded vertebrate having the skin more or less covered with hair; young are born alive except for the small subclass of monotremes and nourished with milk
-------------------------
Synset('mammal.n.01')


In [3]:
# Hyponyms of the source word, i.e. its direct children in the graph
mammal.hyponyms()

[Synset('female_mammal.n.01'),
 Synset('fossorial_mammal.n.01'),
 Synset('metatherian.n.01'),
 Synset('placental.n.01'),
 Synset('prototherian.n.01'),
 Synset('tusker.n.01')]

In [4]:
# words network
graph = {}

# levels of nodes
levels = {}

## Sample graph

In [5]:
# Sample graph as a dictionnary

def sample_graph(root_node, max_level = 4) :
    """
    Function that samples a hierarchical network from a root node and its hyponyms.
    :param root_node: root node of the network
    :param max_level: (int) maximum level of the network
    :return graph: dictionnary representing the graph {"node" : [hyponyms]}
    :return levels: dictionnary representing the level of each node {"node" : level}
    """
    
    # keep track of visited nodes
    explored = []
    
    # keep track of nodes to be checked
    queue = [root_node]
    
    levels = {}
    levels[root_node] = 0
    
    visited = [root_node]
    
    while queue:
        
        # take out first node from queue
        node = queue.pop(0)
        
        # condition on maximum level
        if levels[node] == max_level :
            graphe[node] = []
            return
        
        # mark node as explored node
        explored.append(node)
        
        # sample neighbours of node (i.e. its hyponyms)
        neighbours = [neighbour for neighbour in node.hyponyms()]
        
        # add neighbours to the graph (as children of the node)
        graph[node] = neighbours
        
        # add neighbours of node to queue
        for neighbour in neighbours :
            if neighbour not in visited :
                queue.append(neighbour)
                visited.append(neighbour)

                levels[neighbour] = levels[node] + 1

    print(levels)

    return graph, levels

## Sample embedded words

In [6]:
# Initialize embedded words

def sample_embeddings(graph, dimension):
    """
    Initializes embedded vectors of graph.
    :param graph: graph containing words
    :param dimension: (int) dimension of the embedding space
    :return embeddings: dictionnary of the form {"node" : vector}
    """
    
    embeddings = {}
    
    for node in graph:
        embeddings[node] = np.random.uniform(low=-0.001, high=0.001, size=(dimension,))
        
    return embeddings

In [7]:
# Vocabulary

data = list(embeddings.keys())
random.shuffle(data)

NameError: name 'embeddings' is not defined

## Plots

In [8]:
# Plotting function when a 2 dimensional embedding space is used

from matplotlib import pyplot as plt

def plot_graph_2D(embeddings):
    """
    Function that allows to plot the embedded vectors when the embedding space is 2 dimensional
    """
    
    fig = plt.figure()
    
    # plot all the nodes
    for word in embeddings:
        plt.plot(embeddings[word][0], embeddings[word][1])
    
    plt.show()