In [1]:
!pip install -U spacy==3.*
!python -m spacy download en_core_web_sm
!python -m spacy info

Collecting spacy==3.*
  Downloading spacy-3.7.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (6.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.6/6.6 MB[0m [31m21.1 MB/s[0m eta [36m0:00:00[0m
Collecting weasel<0.4.0,>=0.1.0 (from spacy==3.*)
  Downloading weasel-0.3.4-py3-none-any.whl (50 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.1/50.1 kB[0m [31m4.8 MB/s[0m eta [36m0:00:00[0m
Collecting cloudpathlib<0.17.0,>=0.7.0 (from weasel<0.4.0,>=0.1.0->spacy==3.*)
  Downloading cloudpathlib-0.16.0-py3-none-any.whl (45 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m45.0/45.0 kB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: cloudpathlib, weasel, spacy
  Attempting uninstall: spacy
    Found existing installation: spacy 3.6.1
    Uninstalling spacy-3.6.1:
      Successfully uninstalled spacy-3.6.1
[31mERROR: pip's dependency resolver does not currently take into account all

In [2]:
import spacy
nlp = spacy.load('en_core_web_sm')
s = "John watched an old cinema called 'IND VS SA' at the cinema"
doc = nlp(s)

In [3]:
doc

John watched an old cinema called 'IND VS SA' at the cinema

**Part_Of_Speech Tagging**

In [5]:
[(t.text, t.pos_) for t in doc] #prints the token with parts of seech

[('John', 'PROPN'),
 ('watched', 'VERB'),
 ('an', 'DET'),
 ('old', 'ADJ'),
 ('cinema', 'NOUN'),
 ('called', 'VERB'),
 ("'", 'PUNCT'),
 ('IND', 'PROPN'),
 ('VS', 'PROPN'),
 ('SA', 'PROPN'),
 ("'", 'PUNCT'),
 ('at', 'ADP'),
 ('the', 'DET'),
 ('cinema', 'NOUN')]

In [6]:
spacy.explain('DET')

'determiner'

In [7]:
print(spacy.explain('NNP'))

noun, proper singular


In [8]:
print(spacy.explain('PRP'))

pronoun, personal


***NAMED ENTITY RECOGNITION***

method 1 is through the *ent_type_* attribute

In [9]:
s = "Volkswagen is developping an elctric vehicle sedan which could potentially come to USA in the mid bleak winter of NEXT Fall"

In [10]:
doc = nlp(s)

In [11]:
[(t.text, t.ent_type_) for t in doc]

[('Volkswagen', 'ORG'),
 ('is', ''),
 ('developping', ''),
 ('an', ''),
 ('elctric', ''),
 ('vehicle', ''),
 ('sedan', ''),
 ('which', ''),
 ('could', ''),
 ('potentially', ''),
 ('come', ''),
 ('to', ''),
 ('USA', 'GPE'),
 ('in', ''),
 ('the', ''),
 ('mid', ''),
 ('bleak', ''),
 ('winter', ''),
 ('of', ''),
 ('NEXT', 'DATE'),
 ('Fall', 'DATE')]

In [12]:
spacy.explain('GPE')

'Countries, cities, states'

In [13]:
print([(t.text, t.ent_type_) for t in doc if t.ent_type_ !=0])

[('Volkswagen', 'ORG'), ('is', ''), ('developping', ''), ('an', ''), ('elctric', ''), ('vehicle', ''), ('sedan', ''), ('which', ''), ('could', ''), ('potentially', ''), ('come', ''), ('to', ''), ('USA', 'GPE'), ('in', ''), ('the', ''), ('mid', ''), ('bleak', ''), ('winter', ''), ('of', ''), ('NEXT', 'DATE'), ('Fall', 'DATE')]


# 2nd Method is using *ents* property of the Doc object

In [14]:
print([(ent.text,ent.label_) for ent in doc.ents])

[('Volkswagen', 'ORG'), ('USA', 'GPE'), ('NEXT Fall', 'DATE')]


Position of entities

In [15]:
print([(ent.text, ent.label_, ent.start_char, ent.end_char) for ent in doc.ents])

[('Volkswagen', 'ORG', 0, 10), ('USA', 'GPE', 83, 86), ('NEXT Fall', 'DATE', 114, 123)]


Visualising NER

In [18]:
from spacy import displacy

In [19]:
displacy.render(doc, style='ent', jupyter=True)

In [20]:
s = 'Ridley Scott directed the Mr and Mrs Perfect'
doc = nlp(s)
displacy.render(doc,style='ent',jupyter=True)

**Parsing**

In [21]:
displacy.render(doc, style='dep', jupyter=True)

In [22]:
[(t.text, t.dep_) for t in doc]

[('Ridley', 'compound'),
 ('Scott', 'nsubj'),
 ('directed', 'ROOT'),
 ('the', 'det'),
 ('Mr', 'dobj'),
 ('and', 'cc'),
 ('Mrs', 'compound'),
 ('Perfect', 'conj')]

In [23]:
[( t.text, t.dep_, t.head.text) for t in doc]

[('Ridley', 'compound', 'Scott'),
 ('Scott', 'nsubj', 'directed'),
 ('directed', 'ROOT', 'directed'),
 ('the', 'det', 'Mr'),
 ('Mr', 'dobj', 'directed'),
 ('and', 'cc', 'Mr'),
 ('Mrs', 'compound', 'Perfect'),
 ('Perfect', 'conj', 'Mr')]

In [24]:
from spacy.matcher import Matcher

In [25]:
matcher = Matcher(nlp.vocab)

In [26]:
s = 'I want to book a hotel room'
doc = nlp(s)

In [31]:
pattern = [
    {'TEXT': 'book'},
    {'POS': 'DET', 'OP': '?'},
    {'POS': 'NOUN', 'OP': '+'}
]

In [32]:
matcher.add('USER_INTENT',[pattern])
matches = matcher(doc)

print('Matches: ',[doc[start:end].text for match_id, start, end in matches])

Matches:  ['book', 'book a', 'book a flight']


In [30]:
doc = nlp('I want to book a flight and hotel room in india.')
for noun_phrase in doc.noun_chunks :
  print('phrase: {}, root head: {}'.format(noun_phrase,noun_phrase.root.head))


phrase: I, root head: want
phrase: a flight and hotel room, root head: book
phrase: india, root head: in
