In [1]:
import spacy
from spacy.lang.en import English
import networkx as nx
import matplotlib.pyplot as plt

def getSentences(text):
    nlp = English()
    nlp.add_pipe('sentencizer')
    document = nlp(text)
    return [sent.text.strip() for sent in document.sents]

def printToken(token):
    print(token.text, "->", token.dep_)

def appendChunk(original, chunk):
    return original + ' ' + chunk

def isRelationCandidate(token):
    deps = ["ROOT", "adj", "attr", "agent", "amod"]
    return any(subs in token.dep_ for subs in deps)

def isConstructionCandidate(token):
    deps = ["compound", "prep", "conj", "mod"]
    return any(subs in token.dep_ for subs in deps)

def processSubjectObjectPairs(tokens):
    subject = ''
    object = ''
    relation = ''
    subjectConstruction = ''
    objectConstruction = ''
    for token in tokens:
        printToken(token)
        if "punct" in token.dep_:
            continue
        if isRelationCandidate(token):
            relation = appendChunk(relation, token.lemma_)
        if isConstructionCandidate(token):
            if subjectConstruction:
                subjectConstruction = appendChunk(subjectConstruction, token.text)
            if objectConstruction:
                objectConstruction = appendChunk(objectConstruction, token.text)
        if "subj" in token.dep_:
            subject = appendChunk(subject, token.text)
            subject = appendChunk(subjectConstruction, subject)
            subjectConstruction = ''
        if "obj" in token.dep_:
            object = appendChunk(object, token.text)
            object = appendChunk(objectConstruction, object)
            objectConstruction = ''

#     print (subject.strip(), ",", relation.strip(), ",", object.strip())
    return (subject.strip(), relation.strip(), object.strip())

def processSentence(sentence):
    tokens = nlp_model(sentence)
    return processSubjectObjectPairs(tokens)

def printGraph(triples):
    G = nx.Graph()
    for triple in triples:
        G.add_node(triple[0])
        G.add_node(triple[1])
        G.add_node(triple[2])
        G.add_edge(triple[0], triple[1])
        G.add_edge(triple[1], triple[2])

    pos = nx.spring_layout(G)
    plt.figure()
    nx.draw(G, pos, edge_color='black', width=1, linewidths=1,
            node_size=500, node_color='seagreen', alpha=0.9,
            labels={node: node for node in G.nodes()})
    plt.axis('off')
    plt.show()

ModuleNotFoundError: No module named 'spacy'

In [None]:
# text = "Tesla Cuts Prices on Cheapest Model 3 and Y SUV in U.S. an hour ago. "\
#             "Tesla and Bitcoin Are Better Meme Investments Than Most 5 hours ago"
# lines = open("titles.txt", "r", encoding='UTF-8').readlines()
# text = ''.join(lines)
# print(text)

In [None]:
text = "Tesla cuts prices on cheapest model 3 and Y SUV in U.S. an hour ago. "\
            "Tesla and Bitcoin are better meme investments than most 5 hours ago"

In [None]:
sentences = getSentences(text)
nlp_model = spacy.load('en_core_web_sm')

triples = []
for sentence in sentences:
    triples.append(processSentence(sentence))

printGraph(triples)
for t in triples:
    print(t)

In [3]:
import sys
print(sys.path)

['C:\\Users\\Joey\\Desktop\\projects\\bigdata', 'C:\\Users\\Joey\\Anaconda3\\python37.zip', 'C:\\Users\\Joey\\Anaconda3\\DLLs', 'C:\\Users\\Joey\\Anaconda3\\lib', 'C:\\Users\\Joey\\Anaconda3', '', 'C:\\Users\\Joey\\Anaconda3\\lib\\site-packages', 'C:\\Users\\Joey\\Anaconda3\\lib\\site-packages\\win32', 'C:\\Users\\Joey\\Anaconda3\\lib\\site-packages\\win32\\lib', 'C:\\Users\\Joey\\Anaconda3\\lib\\site-packages\\Pythonwin', 'C:\\Users\\Joey\\Anaconda3\\lib\\site-packages\\IPython\\extensions', 'C:\\Users\\Joey\\.ipython']
