## Open the dataset:

In [7]:
import pandas as pd

df = pd.read_csv('all_kindle_review .csv')

df.sample(10)


Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,asin,helpful,rating,reviewText,reviewTime,reviewerID,reviewerName,summary,unixReviewTime
4178,4178,1603,B001892DJS,"[0, 0]",5,"The story begins with our heroine, Becca, putt...","03 26, 2013",A28VGN6IJA6ZUO,"Teresa Tio ""sometimes fluff is important""",Loved This Pirate Tale!,1364256000
11940,11940,11470,B0033AGU12,"[7, 7]",1,There is something strange about the ratings f...,"03 31, 2012",A12DCRMQOH93YK,Halvena,The story never takes off,1333152000
4532,4532,1200,B00149M19E,"[3, 3]",5,Brett Andrews has been living with and loving ...,"05 16, 2008",A320TMDV6KCFU,MNix,On the Edge,1210896000
2764,2764,6082,B002HWSQS2,"[0, 0]",4,Now i'm hoping the next set of anthologies wil...,"06 8, 2013",AHTX4TWG3AMZA,Melissa Brown,nice,1370649600
5375,5375,62,B000FC2MB8,"[0, 0]",5,L'Among classic. Great read. Great book . Gre...,"04 10, 2013",A1P3Q598K6TNFC,Michael,Classic,1365552000
8268,8268,3225,B001ULBOTO,"[1, 1]",5,This is book was just about a perfect read! Ye...,"05 26, 2013",A310KT1UQC5UNU,Reading Fanatic (CMP),"Wonderful, Wonderful Story!",1369526400
2083,2083,4203,B0028OLE2E,"[1, 1]",5,"This was an excellent book, it was a book that...","05 17, 2014",A3T1L4UAJ4JICA,louise,Excellent book,1400284800
765,765,19915,B003SX0Y70,"[0, 1]",2,pot line again mess up and why does the man ha...,"10 13, 2013",A1MFUAAGOS2KH3,Brittney Bower,Ghost Sexs,1381622400
6582,6582,2539,B001HQHCBQ,"[0, 0]",5,As with all the Black Dagger Brotherhood novel...,"05 18, 2014",A3T53XMRR7JKUR,Penny Young,Great Story,1400371200
5464,5464,2579,B001IP0UII,"[0, 0]",5,Great book~ it's about lost loves that reconne...,"08 23, 2012",A3KM84RAZA7XXC,Tracie Lampe,Sweet hotness!,1345680000


## Get the Bing Liu lexicon

In [10]:
from sklearn import preprocessing

import nltk

nltk.download('opinion_lexicon')

from nltk.corpus import opinion_lexicon

from nltk.tokenize import word_tokenize

print('Total number of words in opinion lexicon', len(opinion_lexicon.words()))

print('Examples of positive words in opinion lexicon',

     opinion_lexicon.positive()[:10])

print('Examples of negative words in opinion lexicon',

     opinion_lexicon.negative()[:10])


[nltk_data] Downloading package opinion_lexicon to
[nltk_data]     C:\Users\ragha\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping corpora\opinion_lexicon.zip.


Total number of words in opinion lexicon 6789
Examples of positive words in opinion lexicon ['a+', 'abound', 'abounds', 'abundance', 'abundant', 'accessable', 'accessible', 'acclaim', 'acclaimed', 'acclamation']
Examples of negative words in opinion lexicon ['2-faced', '2-faces', 'abnormal', 'abolish', 'abominable', 'abominably', 'abominate', 'abomination', 'abort', 'aborted']


## Create the “Dictionary”

In [11]:
#Let's create a dictionary which we can use for scoring our review text

nltk.download('punkt')

df.rename(columns={"reviewText": "text"}, inplace=True)

pos_score = 1

neg_score = -1

word_dict = {}


# Adding the positive words to the dictionary

for word in opinion_lexicon.positive():

     word_dict[word] = pos_score


# Adding the negative words to the dictionary

for word in opinion_lexicon.negative():

     word_dict[word] = neg_score


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\ragha\AppData\Roaming\nltk_data...


[nltk_data]   Unzipping tokenizers\punkt.zip.


## Function to do the math

In [12]:
def bing_liu_score(text):

    sentiment_score = 0

    bag_of_words = word_tokenize(text.lower())

    for word in bag_of_words:

         if word in word_dict:

               sentiment_score += word_dict[word]

    return sentiment_score


## Make sure all texts are non-empty and are type text then call the function

In [13]:
df['text'].fillna('no review', inplace=True)

df['Bing_Liu_Score'] = df['text'].apply(bing_liu_score)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['text'].fillna('no review', inplace=True)


## Let’s see the output

In [18]:
df[['rating', 'text', 'Bing_Liu_Score']].head(10)

Unnamed: 0,rating,text,Bing_Liu_Score
0,3,"Jace Rankin may be short, but he's nothing to ...",-9
1,5,Great short read. I didn't want to put it dow...,6
2,3,I'll start by saying this is the first of four...,1
3,3,Aggie is Angela Lansbury who carries pocketboo...,0
4,4,I did not expect this type of book to be in li...,2
5,5,Aislinn is a little girl with big dreams. Afte...,11
6,2,This has the makings of a good story... unfort...,-3
7,4,I got this because I like collaborated short s...,8
8,5,"Loved this book, I am hooked on this series an...",2
9,4,"And that's a good thing. Short, sweet tease th...",0


In [20]:
df.groupby('rating').agg({'Bing_Liu_Score': 'mean'})

Unnamed: 0_level_0,Bing_Liu_Score
rating,Unnamed: 1_level_1
1,-0.339
2,0.41
3,1.947
4,3.294
5,3.370333
