In [1]:
# for Importing the Dataset
import pandas as pd

In [2]:
# lets read the dataset
data = pd.read_csv('amazon_alexa.tsv', delimiter = '\t')

# lets check the shape of the dataset
data.shape

(3150, 5)

In [3]:
# lets check the head of the dataset
data.head()

Unnamed: 0,rating,date,variation,verified_reviews,feedback
0,5,31-Jul-18,Charcoal Fabric,Love my Echo!,1
1,5,31-Jul-18,Charcoal Fabric,Loved it!,1
2,4,31-Jul-18,Walnut Finish,"Sometimes while playing a game, you can answer...",1
3,5,31-Jul-18,Charcoal Fabric,I have had a lot of fun with this thing. My 4 ...,1
4,5,31-Jul-18,Charcoal Fabric,Music,1


In [4]:
# lets create a Part of speech Dictionary
pos_dic = {
    'noun' : ['NN','NNS','NNP','NNPS'],
    'pron' : ['PRP','PRP$','WP','WP$'],
    'verb' : ['VB','VBD','VBG','VBN','VBP','VBZ'],
    'adj' :  ['JJ','JJR','JJS'],
    'adv' : ['RB','RBR','RBS','WRB']
}

In [5]:
# function to check and get the part of speech tag count of a words in a given sentence
def pos_check(x, flag):
    cnt = 0
    try:
        wiki = TextBlob(x)
        for tup in wiki.tags:
            ppo = list(tup)[1]
            if ppo in pos_dic[flag]:
                cnt += 1
    except:
        pass
    return cnt

### Nouns

A noun is a word that functions as the name of a specific object or set of objects, such as living creatures, places, actions, qualities, states of existence, or ideas. However, noun is not a semantic category, so that it cannot be characterized in terms of its meaning.

In [6]:
# lets calculate the count of Nouns in the Text
data['noun_count'] = data['verified_reviews'].apply(lambda x: pos_check(x, 'noun'))

### Verbs

A verb, from the Latin verbum meaning word, is a word that in syntax conveys an action, an occurrence, or a state of being. In the usual description of English, the basic form, with or without the particle to, is the infinitive. In many languages, verbs are inflected to encode tense, aspect, mood, and voice.

In [7]:
# lets calculate the count of Verbs in the Text
data['verb_count'] = data['verified_reviews'].apply(lambda x: pos_check(x, 'verb'))

In [8]:
#3 lets summarize the Newly Created Features
data[['noun_count','verb_count']].describe()

Unnamed: 0,noun_count,verb_count
count,3150.0,3150.0
mean,0.0,0.0
std,0.0,0.0
min,0.0,0.0
25%,0.0,0.0
50%,0.0,0.0
75%,0.0,0.0
max,0.0,0.0


In [9]:
# for Importing the Dataset
import pandas as pd

# for NLP
import spacy 
  
# python -m spacy download en_core_web_sm 
nlp = spacy.load("en_core_web_sm")

In [10]:
# lets read the dataset
data = pd.read_csv('amazon_alexa.tsv', delimiter = '\t')

# lets check the shape of the dataset
data.shape

(3150, 5)

In [11]:
# lets check the head of the dataset
data.head()

Unnamed: 0,rating,date,variation,verified_reviews,feedback
0,5,31-Jul-18,Charcoal Fabric,Love my Echo!,1
1,5,31-Jul-18,Charcoal Fabric,Loved it!,1
2,4,31-Jul-18,Walnut Finish,"Sometimes while playing a game, you can answer...",1
3,5,31-Jul-18,Charcoal Fabric,I have had a lot of fun with this thing. My 4 ...,1
4,5,31-Jul-18,Charcoal Fabric,Music,1


In [12]:
# Process whole documents 
text = ("""My name is Shaurya Uppal.  
I enjoy writing articles on GeeksforGeeks checkout 
my other article by going to my profile section.""") 
  
doc = nlp(text) 
  
# Token and Tag 
for token in doc: 
    print(token, token.pos_) 
    
# You want list of Verb tokens 
print("Verbs:", [token.text for token in doc if token.pos_ == "VERB"]) 

My PRON
name NOUN
is AUX
Shaurya PROPN
Uppal PROPN
. PUNCT
 
 SPACE
I PRON
enjoy VERB
writing VERB
articles NOUN
on ADP
GeeksforGeeks PROPN
checkout NOUN

 SPACE
my PRON
other ADJ
article NOUN
by ADP
going VERB
to ADP
my PRON
profile ADJ
section NOUN
. PUNCT
Verbs: ['enjoy', 'writing', 'going']


In [13]:
# Named entity Recognition

doc = nlp("Apple is looking at buying U.K. startup for $1 billion")

for ent in doc.ents:
    print(ent.text, ent.start_char, ent.end_char, ent.label_)

Apple 0 5 ORG
U.K. 27 31 GPE
$1 billion 44 54 MONEY


In [None]:
# using displacy

from spacy import displacy
import warnings 
warnings.filterwarnings("ignore")

doc = nlp(data['verified_reviews'][0])
displacy.serve(doc, style="dep")


Using the 'dep' visualizer
Serving on http://0.0.0.0:5000 ...

