#1. 	IE: Triplet Extraction

In [1]:
!pip install spacy
!python3 -m spacy download en_core_web_sm


[38;5;2mâœ” Download and installation successful[0m
You can now load the model via spacy.load('en_core_web_sm')


In [2]:
import spacy
from spacy.lang.en import English
import networkx as nx
import matplotlib.pyplot as plt

In [3]:
def getSentences(text):
    nlp = English()
    nlp.add_pipe(nlp.create_pipe('sentencizer'))
    document = nlp(text)
    return [sent.string.strip() for sent in document.sents]

def printToken(token):
    print(token.text, "->", token.dep_)

def appendChunk(original, chunk):
    return original + ' ' + chunk

def isRelationCandidate(token):
    deps = ["ROOT", "adj", "attr", "agent", "amod"]
    return any(subs in token.dep_ for subs in deps)

def isConstructionCandidate(token):
    deps = ["compound", "prep", "conj", "mod"]
    return any(subs in token.dep_ for subs in deps)

In [4]:
def processSubjectObjectPairs(tokens):
    subject = ''
    object = ''
    relation = ''
    subjectConstruction = ''
    objectConstruction = ''
    for token in tokens:
        printToken(token)
        if "punct" in token.dep_:
            continue
        if isRelationCandidate(token):
            relation = appendChunk(relation, token.lemma_)
        if isConstructionCandidate(token):
            if subjectConstruction:
                subjectConstruction = appendChunk(subjectConstruction, token.text)
            if objectConstruction:
                objectConstruction = appendChunk(objectConstruction, token.text)
        if "subj" in token.dep_:
            subject = appendChunk(subject, token.text)
            subject = appendChunk(subjectConstruction, subject)
            subjectConstruction = ''
        if "obj" in token.dep_:
            object = appendChunk(object, token.text)
            object = appendChunk(objectConstruction, object)
            objectConstruction = ''

  # printing triplets of given sentence
    print ("\nThe triplet of the given sentence is: \nSubject: ", subject.strip(),
            ",\nRelation: ", relation.strip(),
            ",\nObject: ", object.strip())
    return (subject.strip(), relation.strip(), object.strip())


In [5]:
def processSentence(sentence):
    tokens = nlp_model(sentence)
    return processSubjectObjectPairs(tokens)

In [6]:
if __name__ == "__main__":

    text = "The designer confirmed that all fittings were conducted via Zoom due to the Covid-19 pandemic." \
            "Miley Cyrus appeared in a football jersey-inspired top and denim shorts by Gucci. Tom Brady was spotted before kick off in a casual" 
    sentences = getSentences(text)
    nlp_model = spacy.load('en_core_web_sm')

    
    triples = []
    print (text)
    for sentence in sentences:
        triples.append(processSentence(sentence))

    print(triples)


The designer confirmed that all fittings were conducted via Zoom due to the Covid-19 pandemic.Miley Cyrus appeared in a football jersey-inspired top and denim shorts by Gucci. Tom Brady was spotted before kick off in a casual
The -> det
designer -> nsubj
confirmed -> ROOT
that -> mark
all -> det
fittings -> nsubjpass
were -> auxpass
conducted -> ccomp
via -> prep
Zoom -> pobj
due -> prep
to -> pcomp
the -> det
Covid-19 -> compound
pandemic -> pobj
. -> punct

The triplet of the given sentence is: 
Subject:  designer fittings ,
Relation:  confirm ,
Object:  Zoom pandemic
Miley -> compound
Cyrus -> nsubj
appeared -> ROOT
in -> prep
a -> det
football -> compound
jersey -> pobj
- -> punct
inspired -> amod
top -> amod
and -> cc
denim -> conj
shorts -> appos
by -> prep
Gucci -> pobj
. -> punct

The triplet of the given sentence is: 
Subject:  Cyrus ,
Relation:  appear inspire top ,
Object:  jersey Gucci
Tom -> compound
Brady -> nsubjpass
was -> auxpass
spotted -> ROOT
before -> prep
kick -> 

#2. WordNet Task: (use the nltk wordnet library provided in the source code) 

In [7]:
import nltk
nltk.download('wordnet')

from nltk.corpus import wordnet as wn

[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Unzipping corpora/wordnet.zip.


##1.	Hyponym (a more specific concept)

In [8]:
vehicle_synset = wn.synset('play.n.01')
# getting sorted list of HYPONYMS from vehicle_synset
vehicle_synset.hyponyms()
# somethiing like child words

[Synset('grand_guignol.n.01'),
 Synset('miracle_play.n.01'),
 Synset('morality_play.n.01'),
 Synset('mystery_play.n.01'),
 Synset('passion_play.n.01'),
 Synset('playlet.n.01'),
 Synset('satyr_play.n.01'),
 Synset('theater_of_the_absurd.n.01')]

##2.	Hypernym (a more general concept)

In [9]:

vehicle_synset = wn.synset('party.n.02')
# getting sorted list of HYPONYMS from vehicle_synset
sorted([lemma.name() for synset in vehicle_synset.hypernyms() for lemma in synset.lemmas()])

['social_affair', 'social_gathering']

##3.	Meronym (denotes a part of something)

In [10]:

animal_synset = wn.synset('bird.n.01')
# getting sorted list of HYPONYMS from vehicle_synset
print(animal_synset.substance_meronyms())
print(animal_synset.part_meronyms())

[]
[Synset('air_sac.n.03'), Synset('beak.n.02'), Synset('bird's_foot.n.01'), Synset('bird.n.02'), Synset('feather.n.01'), Synset('furcula.n.01'), Synset('hindquarters.n.02'), Synset('pennon.n.02'), Synset('syrinx.n.02'), Synset('uropygial_gland.n.01'), Synset('uropygium.n.01'), Synset('wing.n.01')]


##4.	Holonym (denotes a membership to something)

In [11]:

human_synset = wn.synset('human.n.01')
# hyponyms for human
human_synset.hyponyms()

[Synset('homo_erectus.n.01'),
 Synset('homo_habilis.n.01'),
 Synset('homo_sapiens.n.01'),
 Synset('homo_soloensis.n.01'),
 Synset('neandertal_man.n.01'),
 Synset('rhodesian_man.n.01'),
 Synset('world.n.08')]

##5.	Entailment (denotes how verbs are involved)

In [12]:
stand_synset = wn.synset('sit.v.01')
# entailment for verb stand
stand_synset.entailments()

[Synset('sit_down.v.01')]