In [None]:
!pip install nltk tqdm seaborn

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

plt.style.use('ggplot')

import nltk
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('maxent_ne_chunker')
nltk.download('words')
nltk.download('vader_lexicon')

In [None]:


import io
df = pd.read_csv('cleaned_data (1).csv')
print(df.shape)
df = df.head(500)
print(df.shape)
df.head()


In [None]:
ax = df['Stars'].value_counts().sort_index() \
    .plot(kind='bar',
          title='Count of Reviews by Stars',
          figsize=(10, 5))
ax.set_xlabel('Review Stars')
plt.show()


In [None]:
for index, row in df.iterrows():
       example = row['Title']

       print(example)
       tokens = nltk.word_tokenize(example)

tokens[:10]
tagged = nltk.pos_tag(tokens)
tagged[:10]
entities = nltk.chunk.ne_chunk(tagged)
entities.pprint()

In [None]:
from nltk.sentiment import SentimentIntensityAnalyzer
from tqdm.notebook import tqdm

sia = SentimentIntensityAnalyzer()
print(sia.polarity_scores('I am so happy!'))
print(sia.polarity_scores('This is the worst thing ever.'))
print(sia.polarity_scores(example))

res = {}
for i, row in tqdm(df.iterrows(), total=len(df)):
    text = row['Title']
    myid = i
    res[myid] = sia.polarity_scores(text)

vaders = pd.DataFrame(res).T
vaders = vaders.reset_index().rename(columns={'index': 'Id'})
vaders = vaders.merge(df, how='left', left_on='Id', right_index=True)
vaders.head()

In [None]:
ax = sns.barplot(data=vaders, x='Stars', y='compound')
ax.set_title('Compound Score by Amazon Star Review')
plt.show()


In [None]:
fig, axs = plt.subplots(1, 3, figsize=(12, 3))
sns.barplot(data=vaders, x='Stars', y='pos', ax=axs[0])
sns.barplot(data=vaders, x='Stars', y='neu', ax=axs[1])
sns.barplot(data=vaders, x='Stars', y='neg', ax=axs[2])
axs[0].set_title('Positive')
axs[1].set_title('Neutral')
axs[2].set_title('Negative')
plt.tight_layout()
plt.show()