In [2]:
import spacy
from spacy.lang.en import English

In [3]:
nlp = English()

In [24]:
doc = nlp('Hello world, and 33 other planets!')

for token in doc:
    print(token.text)
    
doc[0].text
doc[2:4]

Hello
world
,
and
33
other
planets
!


, and

In [25]:
stats = [f'Index: {token.i}, Is alpha: {token.is_alpha}, Is punct: {token.is_punct}, like_num: {token.like_num}, Text: {token.text}' for token in doc]

for s in stats:
    print(s)


Index: 0, Is alpha: True, Is punct: False, like_num: False, Text: Hello
Index: 1, Is alpha: True, Is punct: False, like_num: False, Text: world
Index: 2, Is alpha: False, Is punct: True, like_num: False, Text: ,
Index: 3, Is alpha: True, Is punct: False, like_num: False, Text: and
Index: 4, Is alpha: False, Is punct: False, like_num: True, Text: 33
Index: 5, Is alpha: True, Is punct: False, like_num: False, Text: other
Index: 6, Is alpha: True, Is punct: False, like_num: False, Text: planets
Index: 7, Is alpha: False, Is punct: True, like_num: False, Text: !


In [31]:
from spacy.lang.en import English
from spacy.lang.de import German
from spacy.lang.es import Spanish

In [32]:
nlp = English()
doc = nlp('The test sentence goes here this time at least so let\'s just let it be.')
print(doc.text)

nlp_g = German()
doc = nlp_g("Liebe Grüße!")

print(doc.text)

nlp_s = Spanish()
doc = nlp_s("¿Cómo estás?")

print(doc.text)


The test sentence goes here this time at least so let's just let it be.
Liebe Grüße!
¿Cómo estás?


In [35]:
from spacy.lang.en import English

nlp = English()

doc = nlp('I like tree kangaroos and narwhals')
first_token = doc[0]
print(first_token.text)

tree_kangaroos = doc[2:4]
print(tree_kangaroos.text)

tree_kangaroos_and_narwhals = doc[2:6]
print(tree_kangaroos_and_narwhals.text)

I
tree kangaroos
tree kangaroos and narwhals


In [40]:
doc = nlp(
    "In 1990, more than 60% of people in East Asia were in extreme poverty. "
    "Now less than 4% are."
)

for token in doc:
    if token.like_num:
        next_token = doc[token.i + 1]
        
        if next_token.text == '%':
            print('Percentage found:', token.text)

Percentage found: 60
Percentage found: 4


In [47]:
nlp = spacy.load('en_core_web_sm')
doc = nlp('She ate the prize')

for token in doc:
    print(token.text, token.pos_, token.dep_, token.head.text)

doc = nlp('Apple is looking at buying U.K. startup for $1 billion')

print('\n ---- \n')

for ent in doc.ents:
    print(ent.text, ent.label_)
    print(ent.label_, '=', spacy.explain(ent.label_))
spacy.explain('nsubj')

She PRON nsubj ate
ate VERB ROOT ate
the DET det prize
prize NOUN dobj ate

 ---- 

Apple ORG
ORG = Companies, agencies, institutions, etc.
U.K. GPE
GPE = Countries, cities, states
$1 billion MONEY
MONEY = Monetary values, including unit


'nominal subject'

In [62]:
nlp = spacy.load('en_core_web_sm')

text = "It's official: Apple is the first U.S. public company to reach $1 trillion market value."

doc = nlp(text)
print(doc.text + '\n')

for token in doc:
    token_text = token.text
    token_pos = token.pos_
    token_dep = token.dep_
    print("{:<12}{:<10}{:<10}".format(token_text, token_pos, token_dep))

print('\n----\n')
for ent in doc.ents:
    print(ent.text, ent.label_)
    

It's official: Apple is the first U.S. public company to reach $1 trillion market value.

It          PRON      nsubj     
's          VERB      ccomp     
official    ADJ       acomp     
:           PUNCT     punct     
Apple       PROPN     nsubj     
is          VERB      ROOT      
the         DET       det       
first       ADJ       amod      
U.S.        PROPN     nmod      
public      ADJ       amod      
company     NOUN      attr      
to          PART      aux       
reach       VERB      relcl     
$           SYM       quantmod  
1           NUM       compound  
trillion    NUM       nummod    
market      NOUN      compound  
value       NOUN      dobj      
.           PUNCT     punct     

----

Apple ORG
first ORDINAL
U.S. GPE
$1 trillion MONEY


In [92]:
text = 'New iPhone X release date leaked as Apple reveals pre-orders by mistake. Having a 10 hour battery life in a new laptop is something truly marvelous to behold in the eyes of a beholder. This is something the MacBook Pro offers. I have an iPhone X on the other hand can\'t reach such a feat. These are both made by Apple in California.'

doc = nlp(text)

for ent in doc.ents:
    print(ent.text, ent.label_)

iphone_x = doc[1:3]

print('Missing:', iphone_x.text)


Apple ORG
10 hour TIME
MacBook Pro ORG
Apple ORG
California GPE
Missing: iPhone X


In [115]:
from spacy.matcher import Matcher

nlp = spacy.load('en_core_web_sm')

text = 'New iPhone X release date leaked as Apple reveals pre-orders by mistake. Having a 10 hour battery life in a new laptop is something truly marvelous to behold in the eyes of a beholder. This is something the MacBook Pro offers. I have an iPhone X on the other hand can\'t reach such a feat. These are both made by Apple in California.'

doc = nlp(text)
matcher = Matcher(nlp.vocab)

pattern = [{'TEXT': 'iPhone'}, {'TEXT': 'X'}]

matcher.add('IPHONE_PATTERN', None, pattern)

matcher.add('LEMMA_BUY', None, [{'LEMMA': 'battery'}, {'POS': 'NOUN'}])

matches = matcher(doc)

for match_id, start, end in matches:
    match_span = doc[start:end]
    
    print(match_span.text)
    

iPhone X
battery life
iPhone X


In [129]:
nlp = spacy.load('en_core_web_sm')


text = 'Love is blind. I love cats but their love is insignificant compared to my love.'

doc = nlp(text)
matcher = Matcher(nlp.vocab)

matcher.add('LOVE', None, [{'LEMMA': 'love', 'POS': 'NOUN'}])

matches = matcher(doc)

for match_id, start, end in matches:
    match_span = doc[start:end]
    
    print(match_span.text)

Love
love
love


In [132]:
from spacy.matcher import Matcher

nlp = spacy.load('en_core_web_sm')
doc = nlp('New iPhone X release date leaked as Apple reveals pre-orders by mistake.')

matcher = Matcher(doc.vocab)

pattern = [{'TEXT': 'iPhone'}, {'TEXT': 'X'}]

matcher.add('IPHONE', None, pattern)

matches = matcher(doc)

print("Matches:", [doc[start:end].text for match_id, start, end in matches])

Matches: ['iPhone X']


In [135]:
nlp = spacy.load("en_core_web_sm")
doc = nlp(
    "After making the iOS update you won't notice a radical system-wide "
    "redesign: nothing like the aesthetic upheaval we got with iOS 7. Most of "
    "iOS 11's furniture remains the same as in iOS 10. But you will discover "
    "some tweaks once you delve a little deeper."
)

matcher = Matcher(doc.vocab)

pattern =  [{'TEXT': 'iOS'}, {'IS_DIGIT': True}]

matcher.add('IOS_PATTERN', None, pattern)

matches = matcher(doc)

for match_id, start, end in matches:
    print('MATCH FOUND: ', doc[start:end].text)

MATCH FOUND:  iOS 7
MATCH FOUND:  iOS 11
MATCH FOUND:  iOS 10


In [137]:
nlp = spacy.load("en_core_web_sm")
matcher = Matcher(nlp.vocab)

doc = nlp(
    "i downloaded Fortnite on my laptop and can't open the game at all. Help? "
    "so when I was downloading Minecraft, I got the Windows version where it "
    "is the '.zip' folder and I used the default program to unpack it... do "
    "I also need to download Winzip?"
)

pattern = [{'LEMMA': 'download'}, {'POS': 'PROPN'}]

matcher.add('DOWNLOAD_THINGS_PATTERN', None, pattern)

matches = matcher(doc)

for mid, start, end in matches:
    print('Match found:', doc[start:end].text)


Match found: downloaded Fortnite
Match found: downloading Minecraft
Match found: download Winzip


In [None]:
import spacy
from spacy.matcher import Matcher

nlp = spacy.load("en_core_web_sm")
matcher = Matcher(nlp.vocab)

doc = nlp(
    "Features of the app include a beautiful design, smart search, automatic "
    "labels and optional voice responses."
)

pattern = [{'POS': 'ADJ'}, {'POS': 'NOUN'}, {'POS': 'NOUN', 'OP': '?}'}]