In [4]:
import spacy
from spacy import displacy
nlp = spacy.load('en_core_web_sm')

In [2]:
doc = nlp("Company Y is planning to acquire stake in X company for $23 billion")

for token in doc:
    print(token.text,'|', token.pos_,'|', token.dep_)

Company | PROPN | compound
Y | PROPN | nsubj
is | AUX | aux
planning | VERB | ROOT
to | PART | aux
acquire | VERB | xcomp
stake | NOUN | dobj
in | ADP | prep
X | PROPN | compound
company | NOUN | pobj
for | ADP | prep
$ | SYM | quantmod
23 | NUM | compound
billion | NUM | pobj


In [3]:
doc = nlp("Reliance is looking at buying U.K. based analytics startup for $7 billion")
for token in doc:
    print(token.text)

Reliance
is
looking
at
buying
U.K.
based
analytics
startup
for
$
7
billion


In [5]:
doc = nlp("Reliance is looking at buying U.K. based analytics startup for $7 billion")
displacy.render(doc, style="dep" , jupyter=True)

In [6]:
# Create an nlp object
doc = nlp("Reliance is looking at buying U.K. based analytics startup for $7 billion")
 
# Iterate over the tokens
for token in doc:
    # Print the token and its part-of-speech tag
    print(token.text, "-->", token.dep_)

Reliance --> nsubj
is --> aux
looking --> ROOT
at --> prep
buying --> pcomp
U.K. --> npadvmod
based --> amod
analytics --> dobj
startup --> advcl
for --> prep
$ --> quantmod
7 --> compound
billion --> pobj


In [7]:
spacy.explain("nsubj"), spacy.explain("ROOT"), spacy.explain("aux"), spacy.explain("advcl"), spacy.explain("dobj")

('nominal subject',
 None,
 'auxiliary',
 'adverbial clause modifier',
 'direct object')

In [8]:
# Create an nlp object
doc = nlp("Reliance is looking at buying U.K. based analytics startup for $7 billion")
 
# Iterate over the tokens
for token in doc:
    # Print the token and its part-of-speech tag
    print(token.text, "-->", token.lemma_)

Reliance --> reliance
is --> be
looking --> look
at --> at
buying --> buy
U.K. --> U.K.
based --> base
analytics --> analytic
startup --> startup
for --> for
$ --> $
7 --> 7
billion --> billion


In [9]:
# Create an nlp object
doc = nlp("Reliance is looking at buying U.K. based analytics startup for $7 billion.This is India.India is great")
 
sentences = list(doc.sents)
len(sentences)

3

In [10]:
for sentence in sentences:
     print (sentence)

Reliance is looking at buying U.K. based analytics startup for $7 billion.
This is India.
India is great


In [11]:
doc = nlp("Reliance is looking at buying U.K. based analytics startup for $7 billion")

for ent in doc.ents:
    print(ent.text, ent.start_char, ent.end_char, ent.label_)

Reliance 0 8 ORG
U.K. 30 34 GPE
$7 billion 63 73 MONEY


In [12]:
doc= nlp(u"""The Amazon rainforest,[a] alternatively, the Amazon Jungle, also known in English as Amazonia, is a moist broadleaf tropical rainforest in the Amazon biome that covers most of the Amazon basin of South America. This basin encompasses 7,000,000 km2 (2,700,000 sq mi), of which 5,500,000 km2 (2,100,000 sq mi) are covered by the rainforest. This region includes territory belonging to nine nations.

The majority of the forest is contained within Brazil, with 60% of the rainforest, followed by Peru with 13%, Colombia with 10%, and with minor amounts in Bolivia, Ecuador, French Guiana, Guyana, Suriname, and Venezuela. Four nations have "Amazonas" as the name of one of their first-level administrative regions and France uses the name "Guiana Amazonian Park" for its rainforest protected area. The Amazon represents over half of the planet's remaining rainforests,[2] and comprises the largest and most biodiverse tract of tropical rainforest in the world, with an estimated 390 billion individual trees divided into 16,000 species.[3]

Etymology
The name Amazon is said to arise from a war Francisco de Orellana fought with the Tapuyas and other tribes. The women of the tribe fought alongside the men, as was their custom.[4] Orellana derived the name Amazonas from the Amazons of Greek mythology, described by Herodotus and Diodorus.[4]

History
See also: History of South America § Amazon, and Amazon River § History
Tribal societies are well capable of escalation to all-out wars between tribes. Thus, in the Amazonas, there was perpetual animosity between the neighboring tribes of the Jivaro. Several tribes of the Jivaroan group, including the Shuar, practised headhunting for trophies and headshrinking.[5] The accounts of missionaries to the area in the borderlands between Brazil and Venezuela have recounted constant infighting in the Yanomami tribes. More than a third of the Yanomamo males, on average, died from warfare.[6]""")

entities=[(i, i.label_, i.label) for i in doc.ents]
entities

[(Amazon, 'ORG', 383),
 (Amazon Jungle, 'PERSON', 380),
 (English, 'LANGUAGE', 389),
 (Amazonia, 'GPE', 384),
 (Amazon, 'ORG', 383),
 (Amazon, 'ORG', 383),
 (South America, 'LOC', 385),
 (7,000,000, 'CARDINAL', 397),
 (2,700,000, 'CARDINAL', 397),
 (5,500,000, 'CARDINAL', 397),
 (2,100,000, 'CARDINAL', 397),
 (nine, 'CARDINAL', 397),
 (Brazil, 'GPE', 384),
 (60%, 'PERCENT', 393),
 (Peru, 'GPE', 384),
 (13%, 'PERCENT', 393),
 (Colombia, 'GPE', 384),
 (10%, 'PERCENT', 393),
 (Bolivia, 'GPE', 384),
 (Ecuador, 'GPE', 384),
 (French, 'NORP', 381),
 (Guiana, 'PERSON', 380),
 (Guyana, 'GPE', 384),
 (Venezuela, 'GPE', 384),
 (Four, 'CARDINAL', 397),
 (Amazonas, 'PERSON', 380),
 (one, 'CARDINAL', 397),
 (first, 'ORDINAL', 396),
 (France, 'GPE', 384),
 (Guiana Amazonian Park, 'WORK_OF_ART', 388),
 (Amazon, 'ORG', 383),
 (over half, 'CARDINAL', 397),
 (an estimated 390 billion, 'CARDINAL', 397),
 (16,000, 'CARDINAL', 397),
 (Amazon, 'ORG', 383),
 (Francisco de Orellana, 'LOC', 385),
 (Tapuyas, 'F

In [13]:
displacy.render(doc, style = "ent",jupyter = True)

In [14]:
tokens = nlp("dog cat banana afskfsd")

for token in tokens:
    print(token.text, token.has_vector, token.vector_norm, token.is_oov)

dog True 4.8634477 True
cat True 5.2525845 True
banana True 5.3935223 True
afskfsd True 5.5341754 True


In [15]:
for token1 in tokens:
    for token2 in tokens:
        print(token1.text, token2.text, token1.similarity(token2))

dog dog 1.0
dog cat 0.585781
dog banana 0.28486
dog afskfsd 0.15531388
cat dog 0.585781
cat cat 1.0
cat banana 0.45521784
cat afskfsd 0.3430463
banana dog 0.28486
banana cat 0.45521784
banana banana 1.0
banana afskfsd 0.33351398
afskfsd dog 0.15531388
afskfsd cat 0.3430463
afskfsd banana 0.33351398
afskfsd afskfsd 1.0


  This is separate from the ipykernel package so we can avoid doing imports until


In [16]:
import pandas as pd

In [17]:
df_amazon = pd.read_csv('amazon_alexa.tsv', sep="\t")

In [18]:
df_amazon

Unnamed: 0,rating,date,variation,verified_reviews,feedback
0,5,31-Jul-18,Charcoal Fabric,Love my Echo!,1
1,5,31-Jul-18,Charcoal Fabric,Loved it!,1
2,4,31-Jul-18,Walnut Finish,"Sometimes while playing a game, you can answer...",1
3,5,31-Jul-18,Charcoal Fabric,I have had a lot of fun with this thing. My 4 ...,1
4,5,31-Jul-18,Charcoal Fabric,Music,1
...,...,...,...,...,...
3145,5,30-Jul-18,Black Dot,"Perfect for kids, adults and everyone in betwe...",1
3146,5,30-Jul-18,Black Dot,"Listening to music, searching locations, check...",1
3147,5,30-Jul-18,Black Dot,"I do love these things, i have them running my...",1
3148,5,30-Jul-18,White Dot,Only complaint I have is that the sound qualit...,1
