### POS Tagging Library

In [31]:
!pip install nltk
!pip install pandas
import nltk
nltk.download('punkt')



[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\ranjit09\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\ranjit09\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping corpora\stopwords.zip.


True

### Using tokenizer

In [7]:
from nltk import word_tokenize
sentence = "The greatest glory in living lies not in never falling, but in rising every time we fall."
word_tokens = word_tokenize(sentence)
print(word_tokens)

['The', 'greatest', 'glory', 'in', 'living', 'lies', 'not', 'in', 'never', 'falling', ',', 'but', 'in', 'rising', 'every', 'time', 'we', 'fall', '.']


### Using POS Tagger

In [9]:
from nltk import pos_tag 
nltk.download('averaged_perceptron_tagger')
nltk.download('tagsets')

[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\ranjit09\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package tagsets to
[nltk_data]     C:\Users\ranjit09\AppData\Roaming\nltk_data...
[nltk_data]   Package tagsets is already up-to-date!


True

In [10]:
tagged_sentence = pos_tag(word_tokenize(sentence))
print(tagged_sentence)

[('The', 'DT'), ('greatest', 'JJS'), ('glory', 'NN'), ('in', 'IN'), ('living', 'VBG'), ('lies', 'NNS'), ('not', 'RB'), ('in', 'IN'), ('never', 'RB'), ('falling', 'VBG'), (',', ','), ('but', 'CC'), ('in', 'IN'), ('rising', 'VBG'), ('every', 'DT'), ('time', 'NN'), ('we', 'PRP'), ('fall', 'VBP'), ('.', '.')]


### Get Tagset help 


In [11]:
nltk.help.upenn_tagset('VBD')  #give more details on the parts of speech displayed as part of the pos_tag abbrevation

VBD: verb, past tense
    dipped pleaded swiped regummed soaked tidied convened halted registered
    cushioned exacted snubbed strode aimed adopted belied figgered
    speculated wore appreciated contemplated ...


### Using a text file perform lab work from nlp project

In [27]:
with open('files/LabE_5.txt','r') as file:
    content = file.read()

print(content)

Great buy, always go with white dark colors melt on the Florida sun!! Experience over $$. Cordless is cool too.
Awesome product...love it
Love it, easy to put up as well!!!!
These go up very easily and I love the cordless feature. Color is perfect too.
When using the rod to open the blind, the rod gets stuck and snaps back to the original position. It requires frequent adjustments but ultimately works.
one of the top brackets was broken.  how do I get a replacement?
I love the sting free pushing and pulling to adjust the blind height. No tangles. Very hands free. Nothing getting stuck. And fast!
Very agitating to order a 25 x 72 inch blind (a replacement) and it comes in the box that says 25x72 but it's actually measuring at 24.5 x 72 and that little bit throws it all off.  Contacted walmart and they contacted seller but never heard anything from seller so got a refund.  Just agitating .
I was so happy to be able to order these blinds for my narrow windows with out special ordering. Th

In [34]:
#For each word in the sentence, tag/ markthe corresponding POS using any POS tagger
import nltk
from nltk.corpus import stopwords
from string import punctuation
import pandas as pd
nltk.download('stopwords')


tokens = nltk.word_tokenize(content)

stop_words = stopwords.words('english')

filtered_tokens = [token for token in tokens if token not in punctuation]

pos_tags = nltk.pos_tag(filtered_tokens)

#df  = pd.DataFrame(pos_tags,columns=['word','tags'])
#pivot_table = pd.crosstab(index=df['word'],columns=df['tags'])

print(pos_tags)

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\ranjit09\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!




### Using lemmatization to the above token

In [47]:

nltk.download('wordnet')
nltk.download('universal_tagset')
from nltk.stem import WordNetLemmatizer

lemmatizer = WordNetLemmatizer()
wn_pos_tags = {'N':'n','V':'v','J':'a','R':'r'}

lemmas = []
for token, pos in pos_tags:   
    pos = nltk.map_tag('en-ptb', 'universal',pos) 
    wn_pos = wn_pos_tags.get(pos[0], None)
    if wn_pos is None:
        lemmas.append(token)
    else:
        lemma = lemmatizer.lemmatize(token, pos=wn_pos)
        lemmas.append(lemma)

print(lemmas)



['Great', 'buy', 'always', 'go', 'with', 'white', 'dark', 'color', 'melt', 'on', 'the', 'Florida', 'sun', 'Experience', 'over', 'Cordless', 'be', 'cool', 'too', 'Awesome', 'product', '...', 'love', 'it', 'Love', 'it', 'easy', 'to', 'put', 'up', 'as', 'well', 'These', 'go', 'up', 'very', 'easily', 'and', 'I', 'love', 'the', 'cordless', 'feature', 'Color', 'be', 'perfect', 'too', 'When', 'use', 'the', 'rod', 'to', 'open', 'the', 'blind', 'the', 'rod', 'get', 'stick', 'and', 'snap', 'back', 'to', 'the', 'original', 'position', 'It', 'require', 'frequent', 'adjustment', 'but', 'ultimately', 'work', 'one', 'of', 'the', 'top', 'bracket', 'be', 'break', 'how', 'do', 'I', 'get', 'a', 'replacement', 'I', 'love', 'the', 'sting', 'free', 'push', 'and', 'pull', 'to', 'adjust', 'the', 'blind', 'height', 'No', 'tangle', 'Very', 'hand', 'free', 'Nothing', 'get', 'stick', 'And', 'fast', 'Very', 'agitate', 'to', 'order', 'a', '25', 'x', '72', 'inch', 'blind', 'a', 'replacement', 'and', 'it', 'come', 'i

[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\ranjit09\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package universal_tagset to
[nltk_data]     C:\Users\ranjit09\AppData\Roaming\nltk_data...
[nltk_data]   Package universal_tagset is already up-to-date!


### Extract Product descriptors and generate sentiments

#### Sentiment Score return polarity and subjectivity
#### Polarity will return from 0 to 1 and represent sentiment, if it is close to 0, it means a negative sentiment and close to 1, it means a positive sentiment, we got 0.30 below, which means it is more of negative sentiment
#### Subjectivity tell us, if it is a subjective statement or objective statement, subjective express a personnel opinion or belief and objective reprsents facts or information,
#### in this a 0 represent an objective statement and 1 represents a subjective statement, in our case it is more subjective statement as it is 0.56


In [69]:
#!pip install textblob
from textblob import TextBlob

tokens = nltk.word_tokenize(content)
tagged_tokens = pos_tag(tokens)

#extract all the adjectives
adjectives = [token for token,pos in tagged_tokens if pos == 'JJ']

#generate a summary
text = TextBlob(content)

print("All Adjectives : ", adjectives)
print("------------------------------")
print("Sentiment Score : ", text.sentiment)



All Adjectives :  ['white', 'cool', 'Awesome', 'easy', 'perfect', 'original', 'frequent', 'top', 'free', 'free', 'x', 'little', 'happy', 'able', 'narrow', 'special', 'first', 'bottom', 'suppose', 'full', 'last', 'high', 'nice', 'timely', 'easy', 'old', 'french', 'several', 'great', 'cheap', 'Good', 'arrived', 'quick', 'easy', 'good', 'blind', 'good', 'comfortable', 'fit', 'only', 'actual', 'burnt', 'super', 'soft', 'weird', 'silky', 'soft', 'quilt', 'soft', 'inch', 'last', 'Other', 'good', 'good', 'different', 'other', 'soft', 'definite', 'such', 'sensitive', 'soft', 'different', 'pillow', 'hot', 'workable', 'elastic', 'small', 'poor', 'pillow', 'salmon', 'dirty', 'correct', 'perfect', 'soft', 'deep', 'fitting', 'Several', 'perfect', 'many', 'Overall', 'difficult', 'soft', 'comfortable', 'orange', 'bright', 'holy', 'closed', 'last', 'different', 'purchased', 'soft', 'comfortable', 'multiple', 'new', 'confident', 'new', 'orange', 'single', 'separate', 'existing', 'helper', 'clear', 'lig