# Loading Library

In [1]:
import spacy

# Load English tokenizer, tagger, parser, NER and word vectors

In [2]:
nlp = spacy.load('en_core_web_sm')

# Sample Text

In [3]:
# Process whole documents
text = "Google was initially funded by an August 1998 contribution of $100,000 from Andy Bechtolsheim, co-founder of Sun Microsystems; the money was given before Google was incorporated.[30] Google received money from three other angel investors in 1998: Amazon.com founder Jeff Bezos, Stanford University computer science professor David Cheriton, and entrepreneur Ram Shriram.[31] Between these initial investors, friends, and family Google raised around 1 million dollars, which is what allowed them to open up their original shop in Menlo Park, California"

# NLP at Work

In [4]:
doc = nlp(text)
doc

Google was initially funded by an August 1998 contribution of $100,000 from Andy Bechtolsheim, co-founder of Sun Microsystems; the money was given before Google was incorporated.[30] Google received money from three other angel investors in 1998: Amazon.com founder Jeff Bezos, Stanford University computer science professor David Cheriton, and entrepreneur Ram Shriram.[31] Between these initial investors, friends, and family Google raised around 1 million dollars, which is what allowed them to open up their original shop in Menlo Park, California

# Tokenization

In [5]:
for token in doc:
    print(token)

Google
was
initially
funded
by
an
August
1998
contribution
of
$
100,000
from
Andy
Bechtolsheim
,
co
-
founder
of
Sun
Microsystems
;
the
money
was
given
before
Google
was
incorporated.[30
]
Google
received
money
from
three
other
angel
investors
in
1998
:
Amazon.com
founder
Jeff
Bezos
,
Stanford
University
computer
science
professor
David
Cheriton
,
and
entrepreneur
Ram
Shriram.[31
]
Between
these
initial
investors
,
friends
,
and
family
Google
raised
around
1
million
dollars
,
which
is
what
allowed
them
to
open
up
their
original
shop
in
Menlo
Park
,
California


# Only Noun

In [9]:
for token in doc:
    if token.pos_ == "NOUN":
        print(token)

contribution
co
-
founder
money
money
angel
investors
Amazon.com
founder
computer
science
professor
entrepreneur
Shriram.[31
investors
friends
family
dollars
shop


In [7]:
for entity in doc.ents:
        print(entity.text, entity.label_)

August 1998 DATE
100,000 MONEY
Andy Bechtolsheim PERSON
Sun Microsystems ORG
three CARDINAL
1998 DATE
Amazon.com ORG
Jeff Bezos PERSON
Stanford University ORG
David Cheriton PERSON
around 1 million dollars MONEY
Menlo Park GPE
California GPE


# Only Verbs

In [8]:
for token in doc:
    if token.pos_ == "VERB":
        print(token)

funded
given
received
raised
allowed
open


# Only Adjetives

In [10]:
for token in doc:
    if token.pos_ == "ADJ":
        print(token)

other
initial
original


# Named Entity Recognition

# Match adjectives followed by nouns

In [12]:
from spacy.matcher import Matcher

matcher = Matcher(nlp.vocab)

patterns = [
    [{'POS':'ADJ'}, {'POS':'NOUN'}],
    ]
matcher.add("demo", patterns)

matches = matcher(doc)
for match_id, start, end in matches:
    string_id = nlp.vocab.strings[match_id]  # Get string representation
    span = doc[start:end]  # The matched span
    print(match_id, string_id, start, end, span.text)

2193290520773312886 demo 37 39 other angel
2193290520773312886 demo 63 65 initial investors
2193290520773312886 demo 86 88 original shop
