# Sentiment Analysis

## Reading Datafile

In [7]:
import pandas as pd  #Importing pandas library 
df = pd.read_csv('Arts_Crafts_and_Sewing_5.csv') #reading a CSV file into a DataFrame
df.sample(10) # Displaying a sample of 10 rows from the DataFrame

Unnamed: 0,overall,verified,reviewerID,asin,reviewText,summary
265092,5,True,A1QJDPQJ0K70A2,B008QSJDGK,Finally exactly what I was looking for. Easy o...,Love these stitch markers!
46872,5,True,A3W328RA5TIRDD,B000XANNVA,Great,Great
461694,5,True,A3H19BNX8EQ2AU,B00CMSK828,Used to make bracelets. Am happy with it.,Happy
58625,5,True,AR8RRQURPG8N4,B000YZ7QLC,Very good storage solution for keeping extra p...,Great little box - perfect for storing extra b...
50024,5,True,A39BP303UQARE8,B000Y00DDK,Good color,Five Stars
195968,4,True,A14BK0ISXOPPR3,B003IXFPC4,very decorative,Four Stars
488237,5,True,AJ98H5CPDYMOV,B015MA6DRQ,Love the look of this pattern. Ready to cut ou...,Will let you know Great service?
355693,5,True,A21A82RP1TJDH5,B00OLZWOEU,The Bead Smith Tarnish Resistant wire is just ...,Wired
113141,4,True,AJ1QCT650MVSH,B001283FMO,These are great tools to have when drawing and...,Four Stars
174904,5,True,A3HVA6BTVB1UYP,B00281K4X2,i haven't made any thing yet with this yarn bu...,gorgeous colors


## Installing Scikit-learn Package

In [8]:
!pip install scikit-learn



## Exploring Opinion Lexicon in NLTK Library

In [9]:
from sklearn import preprocessing #Importing preprocessing module from sklearn
import nltk #Importing Natural Language Toolkit library 
nltk.download('opinion_lexicon')#Downloading the opinion lexicon dataset from NLTK
from nltk.corpus import opinion_lexicon#Importing the opinion_lexicon corpus, which contains positive&negative opinion words
from nltk.tokenize import word_tokenize#Importing word_tokenize function, which is used for tokenizing words

print('Total number of words in opinion lexicon', len(opinion_lexicon.words()))#Printing total no of words in opinion lexicon
print('Examples of positive words in opinion lexicon',
      opinion_lexicon.positive()[:10]) # Printing examples of positive words in the opinion lexicon
print('Examples of negative words in opinion lexicon',
      opinion_lexicon.negative()[:10]) # Printing examples of negative words in the opinion lexicon


[nltk_data] Downloading package opinion_lexicon to
[nltk_data]     C:\Users\Alex\AppData\Roaming\nltk_data...


Total number of words in opinion lexicon 6789
Examples of positive words in opinion lexicon ['a+', 'abound', 'abounds', 'abundance', 'abundant', 'accessable', 'accessible', 'acclaim', 'acclaimed', 'acclamation']
Examples of negative words in opinion lexicon ['2-faced', '2-faces', 'abnormal', 'abolish', 'abominable', 'abominably', 'abominate', 'abomination', 'abort', 'aborted']


[nltk_data]   Unzipping corpora\opinion_lexicon.zip.


## Creation of Dictionary for Sentiment Analysis 

In [11]:
# Let's create a dictionary which we can use for scoring our review text

# Downloading punkt from NLTK library
nltk.download('punkt')

# Renaming the column 'reviewText' to 'text' in the DataFrame
df.rename(columns={"reviewText": "text"}, inplace=True)

# Assigning positive and negative scores
pos_score = 1
neg_score = -1

# Initializing an empty dictionary
word_dict = {}
 
# Adding the positive words to the dictionary
for word in opinion_lexicon.positive():
        word_dict[word] = pos_score
      
# Adding the negative words to the dictionary
for word in opinion_lexicon.negative():
        word_dict[word] = neg_score


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Alex\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


## Calculating Sentiment Score with Bing Liu Lexicon

In [13]:
#Creating a fuunction text
def bing_liu_score(text):
    #Initializing the sentiment score
    sentiment_score = 0
    #Tokenizing the input text into words and convert them to lowercase
    bag_of_words = word_tokenize(text.lower())
     # creating loop to check each word in the bag of words
    for word in bag_of_words:
        #Checking if the word exists in the sentiment dictionary
        if word in word_dict:
            # If the word exist, adding its sentiment score to the sentiment score
            sentiment_score += word_dict[word]
    return sentiment_score  #Returning the sentiment score for the text

## Data Preprocessing and Applying Sentiment Analysis with Bing Liu Lexicon

In [15]:
# Fill NaN values in the 'text' column
df['text'].fillna('no review', inplace=True)
#creating new column 'Bing_Liu_Score' to store the scores by applying  bing_liu_score to calculate sentiment scores for each text
df['Bing_Liu_Score'] = df['text'].apply(bing_liu_score)

## Displaying First 10 Columns 

In [16]:
# Displaying the first 10 rows of the DataFrame with few columns
df[['overall',"text", 'Bing_Liu_Score']].head(10)

Unnamed: 0,overall,text,Bing_Liu_Score
0,4,Contains some interesting stitches.,1
1,5,I'm a fairly experienced knitter of the one-co...,22
2,4,Great book but the index is terrible. Had to w...,0
3,5,I purchased the Kindle edition which is incred...,4
4,5,Very well laid out and very easy to read.\n\nT...,5
5,5,"Beginning her career as a freelance knitter, M...",15
6,5,This is a terrific stitch handbook (and I have...,9
7,4,The book needs to be coil bound. The content i...,1
8,5,I really am enjoying this book! I like the siz...,12
9,5,Just received this book and looked over it cov...,6


## Calculating Mean Sentiment Score

In [17]:
# Grouping 'overall' rating and calculating the mean sentiment score using the Bing Liu Lexicon
df.groupby('overall').agg({'Bing_Liu_Score':'mean'})

Unnamed: 0_level_0,Bing_Liu_Score
overall,Unnamed: 1_level_1
1,-0.255049
2,0.566098
3,1.158796
4,2.027999
5,2.129986
