In [20]:
import spacy
from spacy import displacy
from afinn import Afinn

In [2]:
nlp = spacy.load("en_core_web_sm")

In [3]:
document_string="""U.S. intelligence
agencies concluded in January 2017 that Russia mounted a far-ranging influence campaign aimed at helping
Trump beat Clinton. And the bipartisan Senate Intelligence Committee, after three years of investigation, affirmed
those conclusions, saying intelligence officials had specific information that Russia preferred Trump and that
Russian President Vladimir Putin had “approved and directed aspects” of the Kremlin’s influence campaign."""
# with open("APonTrump") as document:
#    document_string = document.read()

In [4]:
document = nlp(document_string)

In [5]:
sentence_tokens =list(document.sents)

In [6]:
print(len(sentence_tokens))

2


In [7]:
for sent in sentence_tokens:
    print(sent)

U.S. intelligence
agencies concluded in January 2017 that Russia mounted a far-ranging influence campaign aimed at helping
Trump beat Clinton.
And the bipartisan Senate Intelligence Committee, after three years of investigation, affirmed
those conclusions, saying intelligence officials had specific information that Russia preferred Trump and that
Russian President Vladimir Putin had “approved and directed aspects” of the Kremlin’s influence campaign.


In [8]:
displacy.serve(sentence_tokens, style="dep")




Using the 'dep' visualizer
Serving on http://0.0.0.0:5000 ...

Shutting down server on port 5000.


In [47]:
tokens_list = []
for sent in sentence_tokens:
    for token in sent:
        tokens_list.append(token)

In [48]:
print(len(tokens_list))

74


In [49]:
print(tokens_list)

[U.S., intelligence, 
, agencies, concluded, in, January, 2017, that, Russia, mounted, a, far, -, ranging, influence, campaign, aimed, at, helping, 
, Trump, beat, Clinton, ., And, the, bipartisan, Senate, Intelligence, Committee, ,, after, three, years, of, investigation, ,, affirmed, 
, those, conclusions, ,, saying, intelligence, officials, had, specific, information, that, Russia, preferred, Trump, and, that, 
, Russian, President, Vladimir, Putin, had, “, approved, and, directed, aspects, ”, of, the, Kremlin, ’s, influence, campaign, .]


In [9]:
entity_tokens = list(document.ents)

In [10]:
print(len(entity_tokens))

12


In [61]:
ent_dict={}
for ent in entity_tokens:
    ent_dict[ent.text] = ent.label_
print(ent_dict)

{'U.S.': 'GPE', 'January 2017': 'DATE', 'Russia': 'GPE', 'Trump': 'ORG', 'Clinton': 'PERSON', 'Senate Intelligence Committee': 'ORG', 'three years': 'DATE', 'Russian': 'NORP', 'Vladimir Putin': 'PERSON', 'Kremlin': 'ORG'}


In [11]:
displacy.serve(entity_tokens, style="dep")


Using the 'dep' visualizer
Serving on http://0.0.0.0:5000 ...

Shutting down server on port 5000.


In [23]:
afinn = Afinn()
afinn_scores_sentences = []
for sent in sentence_tokens:
    afinn_scores_sentences.append(afinn.score(sent.text))

In [24]:
print(afinn_scores_sentences)

[2.0, 2.0]


In [58]:
afinn_scores_tokens = []
for token in tokens_list:
    score = (afinn.score(token.text))
#     print(token.text,score)
    afinn_scores_tokens.append(score)

In [59]:
print(afinn_scores_tokens)

[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]


In [80]:
NE_binary=[]
for token in tokens_list:
    if token.text in ent_dict:
        NE_binary.append(1.0)
    else:
        NE_binary.append(0.0)

In [81]:
print(NE_binary)

[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0]


In [82]:
NE_type=[]
for token in tokens_list:
    if token.text in ent_dict:
        NE_type.append(ent_dict[token.text])
    else:
        NE_type.append("")

In [83]:
print(NE_type)

['GPE', '', '', '', '', '', '', '', '', 'GPE', '', '', '', '', '', '', '', '', '', '', '', 'ORG', '', 'PERSON', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', 'GPE', '', 'ORG', '', '', '', 'NORP', '', '', '', '', '', '', '', '', '', '', '', '', 'ORG', '', '', '', '']


In [84]:
token_governor=[]
for token in tokens_list:
    token_governor.append(token.head)

In [85]:
print(token_governor)

[agencies, agencies, intelligence, concluded, concluded, concluded, in, January, mounted, mounted, concluded, campaign, ranging, ranging, campaign, campaign, mounted, campaign, aimed, at, helping, helping, concluded, beat, concluded, affirmed, Committee, Committee, Committee, Committee, affirmed, affirmed, affirmed, years, after, years, of, affirmed, affirmed, affirmed, conclusions, affirmed, affirmed, affirmed, officials, had, saying, information, had, preferred, preferred, information, preferred, preferred, approved, that, President, Putin, Putin, approved, approved, approved, aspects, approved, approved, preferred, aspects, aspects, Kremlin, campaign, Kremlin, campaign, of, affirmed]
