In [4]:
from graph_of_words import GraphOfWords
# t= "Cecilia Love, 52, a retired police investigator who lives in Massachusetts, said she paid around $370 a ticket with tax for nonstop United Airlines flights to Sacramento from Boston for her niece's high school graduation in June, 2020."
text = "Elizabeth, I need to update several of our faculty and teaching staff on the changes in the English department’s process for assigning teaching schedules to Graduate Teaching Assistants. As you know, the old process gave preference to more senior TAs, which led to numerous complaints of unfairness and an overall lack of clarity. Starting next semester, the English department will transition to a new rolling system that divides TAs into four groups alphabetically and ensures each group will be given first preference of teaching times once every two academic years. This change will go into effect starting January 5, 2015. In your message I would like you to outline the new process and explain the reasons why we need to make this change. Please provide contact information for the assistant department head in case anyone needs further information. Oh, and this information will also need to be copied to the appropriate associate dean. I appreciate your help with this."

# graph = GraphOfWords(window_size=2)
# graph.build_graph(
#     text,
#     # OR a sentences list['Roses  are  red.', 'Violets are blue'],
#     remove_stopwords=False,
#     workers=4
# )

# graph.display_graph()
# # graph.write_graph_edges('edges_list.txt')


In [1]:
# SPDX-FileCopyrightText: 2019 Vincent Lequertier <vi.le@autistici.org>
# SPDX-License-Identifier: GPL-3.0-only
import spacy
nlp = spacy.load('en_core_web_lg')
from allennlp.predictors.predictor import Predictor

model_url = 'https://storage.googleapis.com/allennlp-public-models/coref-spanbert-large-2020.02.27.tar.gz'
predictor = Predictor.from_path(model_url)


def get_sen_structure(doc):
    structure = {}
    for token in nlp(doc):
        ancestors = [t.text for t in token.ancestors]
        children = [t.text for t in token.children]
        structure[token.i] = {"text" : token.text,"pos":token.pos_,"dep":token.dep_,"head":token.head.text,"children":children,"ancestors":ancestors}
                              # "children":children,"ancestors":ancestors}
        # print(token.text, "\t", token.i, "\t", 
        #       token.pos_, "\t", token.dep_, "\t", 
        #       ancestors, "\t", children)
        # print(token.text,"\t", token.pos_,"\t", token.head.text, "\t", token.dep_, "\t", 
        #        children)
              # ," \t",ancestors)
    return structure

2021-11-16 12:58:04,284 - INFO - allennlp.common.plugins - Plugin allennlp_models available
2021-11-16 12:58:04,479 - INFO - cached_path - cache of https://storage.googleapis.com/allennlp-public-models/coref-spanbert-large-2020.02.27.tar.gz is up-to-date
2021-11-16 12:58:04,481 - INFO - allennlp.models.archival - loading archive file https://storage.googleapis.com/allennlp-public-models/coref-spanbert-large-2020.02.27.tar.gz from cache at /home/hadeer/.allennlp/cache/0f6b052811b20b13280e609a96efe71ebc636b9c823a5c906ba24459e6e68af9.c1dab61d84cc7c3f7d6751c260040607cb7023a002778ba8f9b9d196b6539174
2021-11-16 12:58:04,483 - INFO - allennlp.models.archival - extracting archive file /home/hadeer/.allennlp/cache/0f6b052811b20b13280e609a96efe71ebc636b9c823a5c906ba24459e6e68af9.c1dab61d84cc7c3f7d6751c260040607cb7023a002778ba8f9b9d196b6539174 to temp dir /tmp/tmpz8z7udvr
2021-11-16 12:58:14,637 - INFO - allennlp.common.params - dataset_reader.type = coref
2021-11-16 12:58:14,638 - INFO - allennl

In [2]:
import networkx as nx

In [7]:
def get_sentences(text):
    Sentences = [str(i).lstrip().lower()for i in list(nlp(predictor.coref_resolved(text)).sents)]
    return Sentences

In [8]:
text

'Elizabeth, I need to update several of our faculty and teaching staff on the changes in the English department’s process for assigning teaching schedules to Graduate Teaching Assistants. As you know, the old process gave preference to more senior TAs, which led to numerous complaints of unfairness and an overall lack of clarity. Starting next semester, the English department will transition to a new rolling system that divides TAs into four groups alphabetically and ensures each group will be given first preference of teaching times once every two academic years. This change will go into effect starting January 5, 2015. In your message I would like you to outline the new process and explain the reasons why we need to make this change. Please provide contact information for the assistant department head in case anyone needs further information. Oh, and this information will also need to be copied to the appropriate associate dean. I appreciate your help with this.'

In [9]:
Sentences = get_sentences(text)
# [str(i).lstrip().lower()for i in list(nlp((text)).sents)]
def create_graph(Sentences):
    attr_graph = nx.MultiDiGraph()
    for i in Sentences:#[1:2]:
        All_info = get_sen_structure(i)
        # Add Nodes
        for key in All_info:
            attr_graph.add_node(All_info[key]['text'],pos=All_info[key]['pos'])
        # ADD EDGES
        for key in All_info:
            attr_graph.add_edge(All_info[key]['head'],All_info[key]['text'], label = All_info[key]['dep'])
    return attr_graph

In [10]:
attr_graph = create_graph(Sentences)

In [11]:
from pyvis import network as pvnet

def plot_g_pyviz(G, name='out.html', height='300px', width='500px'):
    g = G.copy() # some attributes added to nodes
    net = pvnet.Network(notebook=True, directed=True, height=height, width=width)
    opts = '''
        var options = {
          "physics": {
            "forceAtlas2Based": {
              "gravitationalConstant": -100,
              "centralGravity": 0.11,
              "springLength": 100,
              "springConstant": 0.09,
              "avoidOverlap": 1
            },
            "minVelocity": 0.75,
            "solver": "forceAtlas2Based",
            "timestep": 0.22
          }
          
        }
    '''

    net.set_options(opts)
    # uncomment this to play with layout
    # net.show_buttons(filter_=['physics'])
    # net.enable_physics(True)
    net.from_nx(g)
    return net.show(name)

In [12]:
plot_g_pyviz(attr_graph)

In [13]:
import os
def get_graph_dict(graph):
    return nx.to_dict_of_dicts(graph)
def save_graph_pickle(graph):
    return nx.write_gpickle(graph,"word2graph.gpickle")
def load_graph_pickle(path):
    return nx.read_gpickle(path)