# 1. Triplet Extraction

In [1]:
!pip install spacy
!python3 -m spacy download en_core_web_sm

[38;5;2m✔ Download and installation successful[0m
You can now load the model via spacy.load('en_core_web_sm')


In [2]:

import spacy
from spacy.lang.en import English
import networkx as nx
import matplotlib.pyplot as plt

In [3]:
def getSentences(text):
    nlp = English()
    nlp.add_pipe(nlp.create_pipe('sentencizer'))
    document = nlp(text)
    return [sent.string.strip() for sent in document.sents]

def printToken(token):
    #print(token.text, "->", token.dep_)
    return

def appendChunk(original, chunk):
    return original + ' ' + chunk

def isRelationCandidate(token):
    deps = ["ROOT", "adj", "attr", "agent", "amod"]
    return any(subs in token.dep_ for subs in deps)

def isConstructionCandidate(token):
    deps = ["compound", "prep", "conj", "mod"]
    return any(subs in token.dep_ for subs in deps)

In [4]:
def processSubjectObjectPairs(tokens):
    subject = ''
    object = ''
    relation = ''
    subjectConstruction = ''
    objectConstruction = ''
    for token in tokens:
        printToken(token)
        if "punct" in token.dep_:
            continue
        if isRelationCandidate(token):
            relation = appendChunk(relation, token.lemma_)
        if isConstructionCandidate(token):
            if subjectConstruction:
                subjectConstruction = appendChunk(subjectConstruction, token.text)
            if objectConstruction:
                objectConstruction = appendChunk(objectConstruction, token.text)
        if "subj" in token.dep_:
            subject = appendChunk(subject, token.text)
            subject = appendChunk(subjectConstruction, subject)
            subjectConstruction = ''
        if "obj" in token.dep_:
            object = appendChunk(object, token.text)
            object = appendChunk(objectConstruction, object)
            objectConstruction = ''

  # printing triplets of given sentence
    # print ("\nThe triplet of the given sentence is: \nSubject: ", subject.strip(),
    #         ",\nRelation: ", relation.strip(),
    #         ",\nObject: ", object.strip())
    return (subject.strip(), relation.strip(), object.strip())

In [5]:
def processSentence(sentence):
    tokens = nlp_model(sentence)
    return processSubjectObjectPairs(tokens)

In [6]:
if __name__ == "__main__":

    text = "I won everything but Georgia. And I won Georgia, I know that. By a lot. And the people know it. And something happened there. Something bad happened." \
            "Trump reportedly told Watson during the phone call. Results of the audit found no evidence of fraudulent mail-in ballots and Biden was declared winner of Georgia in the election" 
    sentences = getSentences(text)
    nlp_model = spacy.load('en_core_web_sm')

    
    triples = []
    print (text)
    for sentence in sentences:
        triples.append(processSentence(sentence))

    print(triples)

I won everything but Georgia. And I won Georgia, I know that. By a lot. And the people know it. And something happened there. Something bad happened.Trump reportedly told Watson during the phone call. Results of the audit found no evidence of fraudulent mail-in ballots and Biden was declared winner of Georgia in the election
[('I', 'win', 'everything Georgia'), ('I I', 'know', 'Georgia that'), ('', 'by', 'lot'), ('people', 'know', 'it'), ('something', 'happen', ''), ('Something', 'bad happen', ''), ('Trump', 'tell', 'Watson call'), ('Results', 'find fraudulent', 'audit evidence mail ballots Georgia election')]


In [7]:
if __name__ == "__main__":

    text = "The agency is still urging unvaccinated Americans to socially distance from people who don't live in their home, wear masks and avoid crowds, measures that have been critical to slowing the spread of the deadly virus over the last year.\
          Fauci also warned that the US could see a situation similar to that in Italy, where a surge in cases due to new variants and other issues caused the government to announce a new lockdown starting Monday." 
    sentences = getSentences(text)
    nlp_model = spacy.load('en_core_web_sm')

    
    triples = []
    print (text)
    for sentence in sentences:
        triples.append(processSentence(sentence))

    print(triples)

The agency is still urging unvaccinated Americans to socially distance from people who don't live in their home, wear masks and avoid crowds, measures that have been critical to slowing the spread of the deadly virus over the last year.          Fauci also warned that the US could see a situation similar to that in Italy, where a surge in cases due to new variants and other issues caused the government to announce a new lockdown starting Monday.
[('agency who that', 'urge unvaccinated deadly last', 'Americans distance people home masks crowds measures spread virus year'), ('Fauci US surge government', 'warn similar due new other new', 'situation that Italy cases variants lockdown')]


In [8]:
if __name__ == "__main__":

    text = "The average birth rate in South Korea dropped to a fresh low of 0.84 last year, the world’s lowest, which population experts said will unlikely bounce back without an increase in decent jobs and affordable housing for young adults.\
        The 2020 number compared with 0.92 recorded a year earlier, meaning since 2018 the average South Korean woman gives birth to fewer than one child during her lifetime, Statistics Korea said on Feb. 24. The tally marked the lowest since the country began compiling the data." 
    sentences = getSentences(text)
    nlp_model = spacy.load('en_core_web_sm')

    
    triples = []
    print (text)
    for sentence in sentences:
        triples.append(processSentence(sentence))

    print(triples)

The average birth rate in South Korea dropped to a fresh low of 0.84 last year, the world’s lowest, which population experts said will unlikely bounce back without an increase in decent jobs and affordable housing for young adults.        The 2020 number compared with 0.92 recorded a year earlier, meaning since 2018 the average South Korean woman gives birth to fewer than one child during her lifetime, Statistics Korea said on Feb. 24. The tally marked the lowest since the country began compiling the data.
[('rate which experts', 'average drop fresh last decent affordable young', 'Korea low year increase jobs adults'), ('number woman Korea', 'average south korean few say', 'recorded 2018 birth child lifetime Feb.'), ('tally country', 'mark', 'lowest data')]
