# NLP using Spacy

In [1]:
import spacy

nlp = spacy.load('en')
doc = nlp(u'Apple is looking at buying U.K. startup for $1 billion')

## Tokens

In [2]:
for token in doc:
    print(token.text)

Apple
is
looking
at
buying
U.K.
startup
for
$
1
billion


## Lemma, POS, Tag, Dependency Tag, Is Alphanumeric, Is Stop Word

In [3]:
for token in doc:
    print(token.text, token.lemma_, token.pos_, token.tag_, token.dep_, token.is_alpha, token.is_stop)

Apple apple PROPN NNP nsubj True False
is be VERB VBZ aux True True
looking look VERB VBG ROOT True False
at at ADP IN prep True True
buying buy VERB VBG pcomp True False
U.K. u.k. PROPN NNP compound False False
startup startup NOUN NN dobj True False
for for ADP IN prep True True
$ $ SYM $ quantmod False False
1 1 NUM CD compound False False
billion billion NUM CD pobj True False


## Phrases (chunks) and dependencies

In [8]:
for chunk in doc.noun_chunks:
    print(chunk.text, '|', chunk.root.dep_,)

Apple | nsubj
U.K. startup | dobj


## Named Entities

In [9]:
for ent in doc.ents:
    print(ent.text, ent.start_char, ent.end_char, ent.label_)

Apple 0 5 ORG
U.K. 27 31 GPE
$1 billion 44 54 MONEY


# NLP using TextBlob

In [10]:
from textblob import TextBlob
text = """Natural language processing (NLP) deals with the application of computational models to text or speech data.
Application areas within NLP include automatic (machine) translation between languages; dialogue systems, which allow a human to interact with a machine using natural language; and information extraction, where the goal is to transform unstructured text into structured (database) representations that can be searched and browsed in flexible ways."""
blob = TextBlob(text)

## Tokens and POS Tags

In [12]:
print(blob.tags)

[('Natural', 'JJ'), ('language', 'NN'), ('processing', 'NN'), ('NLP', 'NNP'), ('deals', 'VBZ'), ('with', 'IN'), ('the', 'DT'), ('application', 'NN'), ('of', 'IN'), ('computational', 'JJ'), ('models', 'NNS'), ('to', 'TO'), ('text', 'VB'), ('or', 'CC'), ('speech', 'VB'), ('data', 'NNS'), ('Application', 'NNP'), ('areas', 'NNS'), ('within', 'IN'), ('NLP', 'NNP'), ('include', 'VBP'), ('automatic', 'JJ'), ('machine', 'NN'), ('translation', 'NN'), ('between', 'IN'), ('languages', 'NNS'), ('dialogue', 'NN'), ('systems', 'NNS'), ('which', 'WDT'), ('allow', 'VBP'), ('a', 'DT'), ('human', 'JJ'), ('to', 'TO'), ('interact', 'VB'), ('with', 'IN'), ('a', 'DT'), ('machine', 'NN'), ('using', 'VBG'), ('natural', 'JJ'), ('language', 'NN'), ('and', 'CC'), ('information', 'NN'), ('extraction', 'NN'), ('where', 'WRB'), ('the', 'DT'), ('goal', 'NN'), ('is', 'VBZ'), ('to', 'TO'), ('transform', 'VB'), ('unstructured', 'JJ'), ('text', 'NN'), ('into', 'IN'), ('structured', 'VBN'), ('database', 'NN'), ('represen

## Noun Phrases

In [14]:
print(blob.noun_phrases)

['natural language processing', 'nlp', 'computational models', 'speech data', 'application', 'nlp', 'dialogue systems', 'natural language', 'information extraction', 'flexible ways']


## Sentences

In [15]:
print(blob.sentences)

[Sentence("Natural language processing (NLP) deals with the application of computational models to text or speech data."), Sentence("Application areas within NLP include automatic (machine) translation between languages; dialogue systems, which allow a human to interact with a machine using natural language; and information extraction, where the goal is to transform unstructured text into structured (database) representations that can be searched and browsed in flexible ways.")]


## Translation!

In [18]:
print(blob.translate(to="fr"))

Le traitement du langage naturel (NLP) traite de l’application de modèles informatiques aux données textuelles ou vocales.
Les domaines d’application de la PNL incluent la traduction automatique (machine) entre les langues; les systèmes de dialogue, qui permettent à un humain d'interagir avec une machine en utilisant le langage naturel; et l'extraction d'informations, où l'objectif est de transformer le texte non structuré en représentations structurées (base de données) pouvant être recherchées et parcourues de manière flexible.
