**Text Analysis Using Spacy and Gensim**

In [5]:
import gensim
import numpy as np
import spacy
from spacy import displacy
from gensim.corpora import Dictionary
from gensim.models import LdaModel
import matplotlib.pyplot as plt
import sklearn
import keras

Using TensorFlow backend.


In [0]:
import warnings
import os
warnings.filterwarnings('ignore')  
%matplotlib inline

In [0]:
test_data_dir = '{}'.format(os.sep).join([gensim.__path__[0], 'test', 'test_data'])
lee_train_file = test_data_dir + os.sep + 'lee_background.cor'
text = open(lee_train_file).read()

In [0]:
nlp = spacy.load("en")

In [0]:
my_stop_words = [u'say', u'\'s', u'mr', u'be', u'said', u'says', u'saying', 'today']
for stopword in my_stop_words:
    lexeme = nlp.vocab[stopword]
    lexeme.is_stop = True

In [0]:
doc = nlp(text.lower())

** Computational Linguistic **

In [0]:
sample_word = nlp(u'The Clever Fox Jump over the long wall')

In [12]:
## Parts of Speech Tagging


for token in sample_word:
  print(token.text, token.pos_, token.tag_)

The DET DT
Clever PROPN NNP
Fox PROPN NNP
Jump PROPN NNP
over ADP IN
the DET DT
long ADJ JJ
wall NOUN NN


In [13]:
## NER Tagging 

for token in sample_word:
  print(token.text, token.ent_type_)

The 
Clever 
Fox 
Jump 
over 
the 
long 
wall 


In [0]:
for ent in sample_word.ents:
  print(ent.text, ent.labels)

In [17]:
## Dependency Parsing 

displacy.render(sample_word, style='ent', jupyter=True)


In [19]:
for chunk in sample_word.noun_chunks:
    print(chunk.text, chunk.root.text, chunk.root.dep_,
          chunk.root.head.text)

The Clever Fox Jump Jump ROOT Jump
the long wall wall pobj over


In [20]:
for token in sample_word:
    print(token.text, token.dep_, token.head.text, token.head.pos_,
          [child for child in token.children])

The det Jump PROPN []
Clever compound Fox PROPN []
Fox compound Jump PROPN [Clever]
Jump ROOT Jump PROPN [The, Fox, over]
over prep Jump PROPN [wall]
the det wall NOUN []
long amod wall NOUN []
wall pobj over ADP [the, long]


In [22]:
displacy.render(sample_word, style='dep', jupyter=True)