# Initial Set up

In [1]:
import pandas as pd
import numpy as np

from textblob import TextBlob

In [2]:
REVIEW = "Review"
LIKED = "Liked"

SENTENCE = "sentence"
POLARITY = "polarity"
SENTIMENT_TYPE = "sentiment_type"
SENTIMENT_BINARY = "sentiment_binary"

In [3]:
df_raw_comments = pd.read_csv("../resources/Restaurant_Reviews.tsv", sep='\t')
df_raw_comments.head()

Unnamed: 0,Review,Liked
0,Wow... Loved this place.,1
1,Crust is not good.,0
2,Not tasty and the texture was just nasty.,0
3,Stopped by during the late May bank holiday of...,1
4,The selection on the menu was great and so wer...,1


# Sentiment analysis

### Prepare dataset

In [4]:
def _compute_polarity(text: list) -> int:
    return TextBlob(text).sentiment.polarity

In [5]:
def create_polarity_column(df: pd.DataFrame) -> pd.DataFrame:
    df[POLARITY] = df[REVIEW].apply(_compute_polarity)
    return df

In [6]:
def _sentiment_function(df: pd.DataFrame) -> int:
    if df[POLARITY] > 0:
        val = "Positive"
    else:
        val = "Negative"
    return val

In [7]:
def determine_sentiment(df: pd.DataFrame) -> pd.DataFrame:
    df[SENTIMENT_TYPE] = df.apply(_sentiment_function, axis=1)
    return df

In [8]:
def _compute_sentiment_binary(df: pd.DataFrame) -> int:
    if df[SENTIMENT_TYPE] == "Positive":
        val = 1
    else:
        val = 0
    return val

In [9]:
def determine_sentiment_binary(df: pd.DataFrame) -> pd.DataFrame:
    df[SENTIMENT_BINARY] = df.apply(_compute_sentiment_binary, axis=1)
    return df

In [10]:
def prepare_dataset(df: pd.DataFrame) -> pd.DataFrame:
    df_prepared = df.copy()
    
    df_prepared = create_polarity_column(df_prepared)
    df_prepared = determine_sentiment(df_prepared)
    df_prepared = determine_sentiment_binary(df_prepared)
    
    return df_prepared

In [11]:
df_polarity = prepare_dataset(df_raw_comments)
df_polarity.head()

Unnamed: 0,Review,Liked,polarity,sentiment_type,sentiment_binary
0,Wow... Loved this place.,1,0.4,Positive,1
1,Crust is not good.,0,-0.35,Negative,0
2,Not tasty and the texture was just nasty.,0,-1.0,Negative,0
3,Stopped by during the late May bank holiday of...,1,0.2,Positive,1
4,The selection on the menu was great and so wer...,1,0.8,Positive,1


In [16]:
# df_polarity.to_csv("../resources/df_sentiment.csv", index=False, sep="," , encoding="utf-8")

### Analysis of ambivalent reviews

In [13]:
positive_comments = len(df_raw_comments.loc[df_raw_comments["Liked"]==1])
negative_comments = len(df_raw_comments.loc[df_raw_comments["Liked"]==0])

print(f"The dataset contains {positive_comments} comments labeled as positive")
print(f"                 and {negative_comments} comments labeled as negative ")

The dataset contains 500 comments labeled as positive
                 and 500 comments labeled as negative 


In [14]:
positive_sentiment = len(df_polarity.loc[df_polarity["sentiment_binary"]==1])
negative_sentiment = len(df_polarity.loc[df_polarity["sentiment_binary"]==0])

print(f"The sentiment analysis states that we have {positive_sentiment} comments considered as positive") 
print(f"                                       and {negative_sentiment} comments considered as negative ")

The sentiment analysis states that we have 514 comments considered as positive
                                       and 486 comments considered as negative 


###### Ambivalent comments

In [15]:
ambivalent_review = df_polarity.loc[(df_polarity[LIKED]==1) & (df_polarity[SENTIMENT_BINARY]==0) |
                                    (df_polarity[LIKED]==0) & (df_polarity[SENTIMENT_BINARY]==1)]
ambivalent_review

Unnamed: 0,Review,Liked,polarity,sentiment_type,sentiment_binary
6,Honeslty it didn't taste THAT fresh.),0,0.300000,Positive,1
13,"I tried the Cape Cod ravoli, chicken, with cra...",1,-0.750000,Negative,0
24,So they performed.,1,0.000000,Negative,0
28,Took an hour to get our food only 4 tables in ...,0,0.200000,Positive,1
32,I found this place by accident and I could not...,1,0.000000,Negative,0
...,...,...,...,...,...
981,We started with the tuna sashimi which was bro...,0,0.150000,Positive,1
983,It sure does beat the nachos at the movies but...,0,0.270833,Positive,1
985,The problem I have is that they charge $11.99 ...,0,0.275000,Positive,1
988,It really is impressive that the place hasn't ...,0,0.248148,Positive,1
