In [1]:
import spacy
#!python -m spacy download en_core_web_sm

## Spacy nlp exploration 

In [2]:
nlp = spacy.load("en_core_web_sm")

In [3]:
doc = nlp(u'Human ambition is the key to staying ahead of automation.')
for token in doc:
    print(token.text)

Human
ambition
is
the
key
to
staying
ahead
of
automation
.


In [4]:
doc = nlp(u'John bought a car and Mary a motorcycle.')

for token in doc:
    print("\t".join( (token.text, str(token.idx), token.lemma_, token.pos_, token.tag_, token.dep_,
          token.shape_, str(token.is_alpha), str(token.is_stop) )))

John	0	John	PROPN	NNP	nsubj	Xxxx	True	False
bought	5	buy	VERB	VBD	ROOT	xxxx	True	False
a	12	a	DET	DT	det	x	True	True
car	14	car	NOUN	NN	dobj	xxx	True	False
and	18	and	CCONJ	CC	cc	xxx	True	True
Mary	22	Mary	PROPN	NNP	conj	Xxxx	True	False
a	27	a	DET	DT	det	x	True	True
motorcycle	29	motorcycle	NOUN	NN	appos	xxxx	True	False
.	39	.	PUNCT	.	punct	.	False	False


In [5]:
for token in doc:
    print(token.text, token.dep_, token.head.text, token.head.pos_,
          [child for child in token.children])

John nsubj bought VERB []
bought ROOT bought VERB [John, car, .]
a det car NOUN []
car dobj bought VERB [a, and, Mary]
and cc car NOUN []
Mary conj car NOUN [motorcycle]
a det motorcycle NOUN []
motorcycle appos Mary PROPN [a]
. punct bought VERB []


In [6]:
for ent in doc.ents:
    print(ent.text, ent.start_char, ent.end_char, ent.label_)

John 0 4 PERSON
Mary 22 26 PERSON


In [7]:
doc = nlp(u'Ali Hassan Kuban said that Apple Inc. will buy Google in May 2018.')

In [8]:
for ent in doc.ents:
    print(ent.text, ent.start_char, ent.end_char, ent.label_)

Ali Hassan Kuban 0 16 PERSON
Apple Inc. 27 37 ORG
Google 47 53 ORG
May 2018 57 65 DATE


In [9]:
from spacy import displacy

In [10]:
doc = nlp(u"John met Peter and Susan called Paul.")

In [11]:
#displacy.serve(doc, style='dep')

In [12]:
text = """Apple decided to fire Tim Cook and hire somebody called John Doe as the new CEO.
They also discussed a merger with Google. On the long run it seems more likely that Apple
will merge with Amazon and Microsoft with Google. The companies will all relocate to
Austin in Texas before the end of the century. John Doe bought a Porsche."""

doc = nlp(text)
displacy.render(doc, style='ent', jupyter=True)

In [14]:
tokens = nlp(u'apples and dogs are bananas and cats person')

In [15]:
for token1 in tokens:
    for token2 in tokens:
        print(token1, token2, token1.similarity(token2))

apples apples 1.0
apples and 0.044306833
apples dogs 0.4178702
apples are 0.019538091
apples bananas 0.6533332
apples and 0.017371425
apples cats 0.27148277
apples person 0.13093166
and apples 0.044306833
and and 1.0
and dogs 0.043407787
and are 0.2009429
and bananas 0.11184211
and and 1.0
and cats 0.015565561
and person -0.11508114
dogs apples 0.4178702
dogs and 0.043407787
dogs dogs 1.0
dogs are 0.013911209
dogs bananas 0.44729885
dogs and 0.013348826
dogs cats 0.52501935
dogs person 0.15286715
are apples 0.019538091
are and 0.2009429
are dogs 0.013911209
are are 1.0
are bananas 0.2086141
are and 0.14163685
are cats -0.01583488
are person 0.1131013
bananas apples 0.6533332
bananas and 0.11184211
bananas dogs 0.44729885
bananas are 0.2086141
bananas bananas 1.0
bananas and 0.17506078
bananas cats 0.31777847
bananas person 0.09854238
and apples 0.017371425
and and 1.0
and dogs 0.013348826
and are 0.14163685
and bananas 0.17506078
and and 1.0
and cats 0.09988121
and person -0.10388825
c

  This is separate from the ipykernel package so we can avoid doing imports until


In [16]:
doc1 = nlp(u"The labrador barked.")
doc2 = nlp(u"The labrador swam.")
doc3 = nlp(u"the labrador people live in canada.")

dog = nlp(u"dog")

count = 0
for doc in [doc1, doc2, doc3]:
    lab = doc[1]
    count += 1
    print(str(count) + ":", lab.similarity(dog))

1: 0.1705783234813302
2: 0.2751764529693607
3: 0.1957795219474742


  # This is added back by InteractiveShellApp.init_path()


In [17]:
docs = ( nlp(u"Paris is the largest city in France."),
        nlp(u"Vilnius is the capital of Lithuania."),
        nlp(u"An emu is a large bird.") )

for x in range(len(docs)):
    for y in range(len(docs)):
        print(x, y, docs[x].similarity(docs[y]))

0 0 1.0
0 1 0.8045532009754652
0 2 0.6841753801186126
1 0 0.8045532009754652
1 1 1.0
1 2 0.5060953510737396
2 0 0.6841753801186126
2 1 0.5060953510737396
2 2 1.0


  import sys


In [18]:
docs = [nlp(u"dog bites man"), nlp(u"man bites dog"),
        nlp(u"man dog bites"), nlp(u"cat eats mouse")]

for doc in docs:
    for other_doc in docs:
        print('"' + doc.text + '"', '"' + other_doc.text + '"', doc.similarity(other_doc))

"dog bites man" "dog bites man" 1.0
"dog bites man" "man bites dog" 0.8422244882281177
"dog bites man" "man dog bites" 0.8688251513379205
"dog bites man" "cat eats mouse" 0.33363362907991617
"man bites dog" "dog bites man" 0.8422244882281177
"man bites dog" "man bites dog" 1.0
"man bites dog" "man dog bites" 0.8189262546757138
"man bites dog" "cat eats mouse" 0.3792663613754763
"man dog bites" "dog bites man" 0.8688251513379205
"man dog bites" "man bites dog" 0.8189262546757138
"man dog bites" "man dog bites" 1.0
"man dog bites" "cat eats mouse" 0.25610488284650623
"cat eats mouse" "dog bites man" 0.33363362907991617
"cat eats mouse" "man bites dog" 0.3792663613754763
"cat eats mouse" "man dog bites" 0.25610488284650623
"cat eats mouse" "cat eats mouse" 1.0


  


## Spacy example 2

In [None]:
#https://towardsdatascience.com/aspect-based-sentiment-analysis-using-spacy-textblob-4c8de3e0d2b9

In [37]:
sentences = [
  'The food we had yesterday was delicious',
  'My time in Italy was very enjoyable',
  'I found the meal to be tasty',
  'The internet was slow.',
  'Our experience was suboptimal'
]

In [40]:
for sentence in sentences:
    doc = nlp(sentence)
    for token in doc:
        print(token.text, token.dep_, token.head.text, token.head.pos_,
            token.pos_,[child for child in token.children])

The det food NOUN DET []
food nsubj was AUX NOUN [The, had]
we nsubj had VERB PRON []
had relcl food NOUN VERB [we, yesterday]
yesterday npadvmod had VERB NOUN []
was ROOT was AUX AUX [food, delicious]
delicious acomp was AUX ADJ []
My poss time NOUN PRON []
time nsubj was AUX NOUN [My, in]
in prep time NOUN ADP [Italy]
Italy pobj in ADP PROPN []
was ROOT was AUX AUX [time, enjoyable]
very advmod enjoyable ADJ ADV []
enjoyable acomp was AUX ADJ [very]
I nsubj found VERB PRON []
found ROOT found VERB VERB [I, be]
the det meal NOUN DET []
meal nsubj be VERB NOUN [the]
to aux be VERB PART []
be ccomp found VERB VERB [meal, to, tasty]
tasty acomp be VERB ADJ []
The det internet NOUN DET []
internet nsubj was AUX NOUN [The]
was ROOT was AUX AUX [internet, slow, .]
slow acomp was AUX ADJ []
. punct was AUX PUNCT []
Our poss experience NOUN PRON []
experience nsubj was AUX NOUN [Our]
was ROOT was AUX AUX [experience, suboptimal]
suboptimal acomp was AUX ADJ []


In [41]:
for sentence in sentences:
    doc = nlp(sentence)
    descriptive_term = ''
    for token in doc:
        if token.pos_ == 'ADJ':
            descriptive_term = token
    print(sentence)
    print(descriptive_term)

The food we had yesterday was delicious
delicious
My time in Italy was very enjoyable
enjoyable
I found the meal to be tasty
tasty
The internet was slow.
slow
Our experience was suboptimal
suboptimal


In [46]:
for sentence in sentences:
  doc = nlp(sentence)
  descriptive_term = ''
  for token in doc:
    if token.pos_ == 'ADJ':
      prepend = ''
      for child in token.children:
        if child.pos_ != 'ADV':
          continue
        prepend += child.text + ' '
      descriptive_term = prepend + token.text
  print(sentence)
  print(descriptive_term)

The food we had yesterday was delicious
delicious
My time in Italy was very enjoyable
very enjoyable
I found the meal to be tasty
tasty
The internet was slow.
slow
Our experience was suboptimal
suboptimal


In [47]:
aspects = []
for sentence in sentences:
  doc = nlp(sentence)
  descriptive_term = ''
  target = ''
  for token in doc:
    if token.dep_ == 'nsubj' and token.pos_ == 'NOUN':
      target = token.text
    if token.pos_ == 'ADJ':
      prepend = ''
      for child in token.children:
        if child.pos_ != 'ADV':
          continue
        prepend += child.text + ' '
      descriptive_term = prepend + token.text
  aspects.append({'aspect': target,
    'description': descriptive_term})
print(aspects)

[{'aspect': 'food', 'description': 'delicious'}, {'aspect': 'time', 'description': 'very enjoyable'}, {'aspect': 'meal', 'description': 'tasty'}, {'aspect': 'internet', 'description': 'slow'}, {'aspect': 'experience', 'description': 'suboptimal'}]


In [49]:
from textblob import TextBlob
for aspect in aspects:
  aspect['sentiment'] = TextBlob(aspect['description']).sentiment
aspects

[{'aspect': 'food',
  'description': 'delicious',
  'sentiment': Sentiment(polarity=1.0, subjectivity=1.0)},
 {'aspect': 'time',
  'description': 'very enjoyable',
  'sentiment': Sentiment(polarity=0.65, subjectivity=0.78)},
 {'aspect': 'meal',
  'description': 'tasty',
  'sentiment': Sentiment(polarity=0.0, subjectivity=0.0)},
 {'aspect': 'internet',
  'description': 'slow',
  'sentiment': Sentiment(polarity=-0.30000000000000004, subjectivity=0.39999999999999997)},
 {'aspect': 'experience',
  'description': 'suboptimal',
  'sentiment': Sentiment(polarity=0.0, subjectivity=0.0)}]

In [52]:
from textblob.classifiers import NaiveBayesClassifier
# We train the NaivesBayesClassifier
train = [
  ('Slow internet.', 'negative'),
  ('Delicious food', 'positive'),
  ('Suboptimal experience', 'negative'),
  ('Very enjoyable time', 'positive'),
  ('delicious food.', 'neg')
]
cl = NaiveBayesClassifier(train)
# And then we try to classify some sample sentences.
blob = TextBlob("Delicious food. Very Slow internet. Suboptimal experience. Enjoyable food.", classifier=cl)
for s in blob.sentences:
    print(s)
    print(s.classify())
    print("\n")

Delicious food.
positive


Very Slow internet.
negative


Suboptimal experience.
negative


Enjoyable food.
positive


