# **Reading data**

In [14]:
import pandas as pd


df = pd.read_json("C:\\Users\\keert\\Downloads\\Musical_Instruments_5.json", lines=True)


print(df.sample(10))




          reviewerID        asin                        reviewerName helpful  \
8439  A206T0W1VE8K1Q  B003YVRGN6                             C3p-BRO  [2, 3]   
1976   AJH2W783HOXZV  B0002F58TG                    Daniel Berry Jr.  [0, 0]   
7687  A2XVE2J5WGCSGX  B0037ME50Y                   William J. Stasse  [2, 2]   
9486  A3FW4THIMBIN6V  B00646MZHK  Jeffrey A. Hawkins "HumanJHawkins"  [0, 0]   
3674  A1B9TIERY43Q8H  B0006NDF8A                          GeekParrot  [0, 0]   
9428   AF1I90O6U8VCQ  B005VLR6XC                       Rolf J Craven  [0, 0]   
2823  A2XSWF2AJSXQZB  B0002H0A3S                          bassestkkm  [0, 0]   
1573  A15TYOEWBQYF0X  B0002E2GMY                      David W "Dave"  [0, 0]   
214   A319B090A2POEB  B0000AQRSU                                 Lee  [0, 0]   
9059  A3W2E6S24BTXXK  B005CX4GLE                    airchamp "ariel"  [0, 0]   

                                             reviewText  overall  \
8439  This is an amazing guitar. I couldn't put it 

# **Analyzing Sentiment Lexicon in NLTK**

In [15]:
from sklearn import preprocessing
import nltk
nltk.download('opinion_lexicon')
from nltk.corpus import opinion_lexicon
from nltk.tokenize import word_tokenize
print('Total number of words in opinion lexicon', len(opinion_lexicon.words()))
print('Examples of positive words in opinion lexicon',
opinion_lexicon.positive()[:10])
print('Examples of negative words in opinion lexicon',
opinion_lexicon.negative()[:10])

Total number of words in opinion lexicon 6789
Examples of positive words in opinion lexicon ['a+', 'abound', 'abounds', 'abundance', 'abundant', 'accessable', 'accessible', 'acclaim', 'acclaimed', 'acclamation']
Examples of negative words in opinion lexicon ['2-faced', '2-faces', 'abnormal', 'abolish', 'abominable', 'abominably', 'abominate', 'abomination', 'abort', 'aborted']


[nltk_data] Downloading package opinion_lexicon to
[nltk_data]     C:\Users\keert\AppData\Roaming\nltk_data...
[nltk_data]   Package opinion_lexicon is already up-to-date!


# **Creating Sentiment Score Dictionary**


In [16]:
nltk.download('punkt')
df.rename(columns={"reviewText": "text"}, inplace=True)
pos_score = 1
neg_score = -1
word_dict = {}
# Adding the positive words to the dictionary
for word in opinion_lexicon.positive():
 word_dict[word] = pos_score
# Adding the negative words to the dictionary
for word in opinion_lexicon.negative():
 word_dict[word] = neg_score

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\keert\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


# **Calculating Sentiment Score Using Bing Liu Lexicon**

In [17]:
def bing_liu_score(text):
 sentiment_score = 0
 bag_of_words = word_tokenize(text.lower())
 for word in bag_of_words:
  if word in word_dict:
    sentiment_score += word_dict[word]
 return sentiment_score

# **Calculating Bing Liu Sentiment Scores for DataFrame Text Column**

In [18]:
df['text'].fillna('no review', inplace=True)
df['Bing_Liu_Score'] = df['text'].apply(bing_liu_score)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['text'].fillna('no review', inplace=True)


# **Displaying Specific Columns in DataFrame**

In [19]:
df[['overall',"text", 'Bing_Liu_Score']].head(10)

Unnamed: 0,overall,text,Bing_Liu_Score
0,5,"Not much to write about here, but it does exac...",3
1,5,The product does exactly as it should and is q...,3
2,5,The primary job of this device is to block the...,2
3,5,Nice windscreen protects my MXL mic and preven...,-1
4,5,This pop filter is great. It looks and perform...,2
5,5,So good that I bought another one. Love the h...,5
6,5,"I have used monster cables for years, and with...",0
7,3,I now use this cable to run from the output of...,-2
8,5,Perfect for my Epiphone Sheraton II. Monster ...,0
9,5,Monster makes the best cables and a lifetime w...,0


# **Mean Bing Liu Sentiment Score Grouped by Overall Rating**

In [20]:
df.groupby('overall').agg({'Bing_Liu_Score':'mean'})

Unnamed: 0_level_0,Bing_Liu_Score
overall,Unnamed: 1_level_1
1,0.023041
2,1.684
3,2.46114
4,3.355566
5,3.715624
