# Sentiment Analysis

## Reading Datafile

In [1]:
import pandas as pd  #Importing pandas library 
df = pd.read_csv('singapore_airlines_reviews.csv') #reading a CSV file into a DataFrame
df.sample(10) # Displaying a sample of 10 rows from the DataFrame

Unnamed: 0,published_date,published_platform,rating,type,text,title,helpful_votes
7914,2018-12-16T08:23:58-05:00,Mobile,5,review,I took the new long haul flight on Singapore A...,Best flight experience ever!,3
3274,2019-10-09T03:02:26-04:00,Desktop,5,review,Execlent service from booking in at the airpo...,Holiday to Australia,1
7635,2018-12-18T00:39:29-05:00,Desktop,5,review,They run their airline like their country. Qui...,Their warmth is world class,1
4472,2019-07-11T21:27:21-04:00,Desktop,4,review,The service was really good. But sadly in my n...,The chair's reclaining was broken,1
7700,2018-12-17T18:32:55-05:00,Desktop,5,review,The extra legroom compared to some carriers ga...,"Quality, legroom and baggage allowance",0
2157,2020-01-03T16:45:45-05:00,Desktop,3,review,My husband has a medical problem which means h...,Disappointment at request,2
3131,2019-10-22T17:46:57-04:00,Desktop,3,review,Well it didn't start well on our flight when i...,Good things and bad things,2
8809,2018-10-04T15:00:41-04:00,Desktop,4,review,Singapore always delivers a very professional ...,Solid professionalism,0
5199,2019-06-07T00:09:43-04:00,Desktop,5,review,As usual you get the service you expect from a...,Lovely flight,1
4799,2019-07-09T08:36:24-04:00,Mobile,1,review,"I will never ever fly Singapore Airline again,...",Nightmare Flight,3


## Exploring Opinion Lexicon in NLTK Library

In [2]:
from sklearn import preprocessing #Importing preprocessing module from sklearn
import nltk #Importing Natural Language Toolkit library 
nltk.download('opinion_lexicon')#Downloading the opinion lexicon dataset from NLTK
from nltk.corpus import opinion_lexicon#Importing the opinion_lexicon corpus, which contains positive&negative opinion words
from nltk.tokenize import word_tokenize#Importing word_tokenize function, which is used for tokenizing words

print('Total number of words in opinion lexicon', len(opinion_lexicon.words()))#Printing total no of words in opinion lexicon
print('Examples of positive words in opinion lexicon',
      opinion_lexicon.positive()[:10]) # Printing examples of positive words in the opinion lexicon
print('Examples of negative words in opinion lexicon',
      opinion_lexicon.negative()[:10]) # Printing examples of negative words in the opinion lexicon

Total number of words in opinion lexicon 6789
Examples of positive words in opinion lexicon ['a+', 'abound', 'abounds', 'abundance', 'abundant', 'accessable', 'accessible', 'acclaim', 'acclaimed', 'acclamation']
Examples of negative words in opinion lexicon ['2-faced', '2-faces', 'abnormal', 'abolish', 'abominable', 'abominably', 'abominate', 'abomination', 'abort', 'aborted']


[nltk_data] Downloading package opinion_lexicon to
[nltk_data]     C:\Users\Alex\AppData\Roaming\nltk_data...
[nltk_data]   Package opinion_lexicon is already up-to-date!


## Creation of Dictionary for Sentiment Analysis

In [3]:
# Let's create a dictionary which we can use for scoring our review text

# Downloading punkt from NLTK library
nltk.download('punkt')

# Renaming the column 'reviewText' to 'Modules' in the DataFrame
df.rename(columns={"text": "Modules"}, inplace=True)

# Assigning positive and negative scores
pos_score = 1
neg_score = -1

# Initializing an empty dictionary
word_dict = {}
 
# Adding the positive words to the dictionary
for word in opinion_lexicon.positive():
        word_dict[word] = pos_score
      
# Adding the negative words to the dictionary
for word in opinion_lexicon.negative():
        word_dict[word] = neg_score

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Alex\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


## Calculating Sentiment Score with Bing Liu Lexicon

In [4]:
#Creating a fuunction text
def bing_liu_score(Modules):
    #Initializing the sentiment score
    sentiment_score = 0
    #Tokenizing the input text into words and convert them to lowercase
    bag_of_words = word_tokenize(Modules.lower())
     # creating loop to check each word in the bag of words
    for word in bag_of_words:
        #Checking if the word exists in the sentiment dictionary
        if word in word_dict:
            # If the word exist, adding its sentiment score to the sentiment score
            sentiment_score += word_dict[word]
    return sentiment_score  #Returning the sentiment score for the text

In [5]:
# Fill NaN values in the 'text' column
df['Modules'].fillna('no review', inplace=True)
#creating new column 'Bing_Liu_Score' to store the scores by applying  bing_liu_score to calculate sentiment scores for Module column
df['Bing_Liu_Score'] = df['Modules'].apply(bing_liu_score)

In [7]:
# Displaying the first 5 rows of the DataFrame with few columns
df[['type', 'rating','title',"Modules", 'Bing_Liu_Score']].head(5)

Unnamed: 0,type,rating,title,Modules,Bing_Liu_Score
0,review,3,Ok,We used this airline to go from Singapore to L...,5
1,review,5,The service in Suites Class makes one feel lik...,The service on Singapore Airlines Suites Class...,20
2,review,1,Don’t give them your money,"Booked, paid and received email confirmation f...",-1
3,review,5,Best Airline in the World,"Best airline in the world, seats, food, servic...",4
4,review,2,Premium Economy Seating on Singapore Airlines ...,Premium Economy Seating on Singapore Airlines ...,-1


## Calculating Mean Sentiment Score

In [8]:
df.groupby('rating').agg({'Bing_Liu_Score':'mean'})

Unnamed: 0_level_0,Bing_Liu_Score
rating,Unnamed: 1_level_1
1,-1.854305
2,-0.440147
3,1.272547
4,3.770208
5,4.912058
