In [334]:
import pandas as pd
import numpy as np
import math
from sklearn.metrics import accuracy_score

In [343]:
df = pd.read_csv("data_sentiment.csv")
df["Date"] = pd.to_datetime(df["Date"])
df["Review"] = df["Review"].apply(str)

In [232]:
df.dtypes

Restaurant            object
Review                object
Date          datetime64[ns]
Label                  int64
Stars                  int64
dtype: object

In [234]:
df["Label"][1] == df["Label"][3]

True

In [102]:
df.head()

Unnamed: 0,Restaurant,Review,Date,Label,Stars
0,Happy Tummy,fresh ingredient friendly peep and so much che...,2016-04-06,1,5
1,Cibo Italiano,a small selection of italian wine by the glass...,2015-12-24,1,4
2,Cibo Italiano,cultural relevant singaporean cuisine in very ...,2018-08-19,4,5
3,Cibo Italiano,generous with the clam,2016-11-28,1,5
4,Yan kee Noodle House,the plus point is that the price remains the s...,2018-12-28,1,4


# Sentiment Analysis

In [103]:
from textblob import TextBlob

In [113]:
for row in df:
    print(row)
    break

Restaurant


In [344]:
df["sentiment_polarity"] = df.apply (lambda row: TextBlob(row[1]).sentiment.polarity, axis=1)

In [345]:
df.head()

Unnamed: 0,Restaurant,Review,Date,Label,Stars,sentiment_polarity
0,Happy Tummy,fresh ingredient friendly peep and so much che...,2016-04-06,1,5,0.000714
1,Cibo Italiano,a small selection of italian wine by the glass...,2015-12-24,1,4,0.15
2,Cibo Italiano,cultural relevant singaporean cuisine in very ...,2018-08-19,4,5,0.26375
3,Cibo Italiano,generous with the clam,2016-11-28,1,5,0.0
4,Yan kee Noodle House,the plus point is that the price remains the s...,2018-12-28,1,4,0.0


In [346]:
def adj_sentiment(sentiment):
    return ((sentiment + 1)/2)

In [347]:
df["adjusted_sentiment"] = df.apply (lambda row: adj_sentiment(row[5]), axis=1)

In [348]:
df.head()

Unnamed: 0,Restaurant,Review,Date,Label,Stars,sentiment_polarity,adjusted_sentiment
0,Happy Tummy,fresh ingredient friendly peep and so much che...,2016-04-06,1,5,0.000714,0.500357
1,Cibo Italiano,a small selection of italian wine by the glass...,2015-12-24,1,4,0.15,0.575
2,Cibo Italiano,cultural relevant singaporean cuisine in very ...,2018-08-19,4,5,0.26375,0.631875
3,Cibo Italiano,generous with the clam,2016-11-28,1,5,0.0,0.5
4,Yan kee Noodle House,the plus point is that the price remains the s...,2018-12-28,1,4,0.0,0.5


# Creating Date weights

In [141]:
base = df["Date"].min()
max = df["Date"].max()

In [142]:
print (base)
print (max)

2010-02-05 00:00:00
2020-03-09 00:00:00


In [155]:
np.exp(float((max-base).days))

  """Entry point for launching an IPython kernel.


inf

In [349]:
def weight(date):
    if (date == base):
        return 1
    return (date - base).days

In [350]:
def grouped_weighted_avg(values, weights, by):
    return (values * weights).groupby(by).sum() / weights.groupby(by).sum()

In [351]:
df["weights"] = df.apply (lambda row: weight(row[2]), axis=1)

In [352]:
df = df.sort_values(["Date"])

In [353]:
adjusted_weight = df["weights"].ewm(span = 365).mean()

In [354]:
df["adjusted_weight"] = adjusted_weight

In [355]:
df.tail()

Unnamed: 0,Restaurant,Review,Date,Label,Stars,sentiment_polarity,adjusted_sentiment,weights,adjusted_weight
509,Beauty in the Pot,i personally think for th price we payed this ...,2020-02-24,1,4,0.020833,0.510417,3671,3349.912036
541,Tai Hwa Pork Noodle,they offer a variety of soup base so we tried 4 1,2020-02-24,5,4,-0.8,0.1,3671,3351.666661
942,Candlenut,what sophistication,2020-02-25,4,1,0.0,0.5,3672,3353.417163
40,Song Fa Bak Kut Teh,a large portion is around dollar 10 singaporea...,2020-03-04,1,5,0.214286,0.607143,3680,3355.201815
1402,Odette,the only thing that wa even sort of exciting w...,2020-03-09,2,3,0.2,0.6,3685,3357.004038


In [448]:
# Removing label 5 since it is not a preassigned class
clean_data = df[df.Label != 5]

# Star Prediction for each Label

In [449]:
label_sentiment = grouped_weighted_avg(clean_data["adjusted_sentiment"], clean_data["adjusted_weight"], (clean_data["Restaurant"], clean_data["Label"]))

  


In [450]:
label_sentiment = label_sentiment.apply((lambda x: math.ceil(x*5)))

In [451]:
label_sentiment.head()

Restaurant        Label
10 at Claymore    1        3
                  2        4
                  3        4
126 Eating House  3        5
2it & Drink       2        5
dtype: int64

In [452]:
label_sentiment.shape

(824,)

In [453]:
pred_label = []
for x in label_sentiment:
    pred_label.append(x)

In [454]:
label_star = grouped_weighted_avg(clean_data["Stars"], clean_data["adjusted_weight"], (clean_data["Restaurant"], clean_data["Label"]))

  


In [455]:
label_star = label_star.apply((lambda x: math.ceil(x)))

In [456]:
label_truth = []
for x in label_star:
    label_truth.append(x)

In [457]:
accuracy_score(label_truth, pred_label)

0.32645631067961167

# Overall Restaurant Star Prediction

In [483]:
overall_sentiment = grouped_weighted_avg(clean_data["adjusted_sentiment"], clean_data["adjusted_weight"], (clean_data["Restaurant"]))

In [484]:
overall_sentiment = overall_sentiment.apply((lambda x: round(x*5)))

In [485]:
overall_sentiment.head()

Restaurant
10 at Claymore      3
126 Eating House    4
2it & Drink         5
328 Katong Laksa    3
8 Korean BBQ        4
dtype: int64

In [488]:
pred_overall = []
for x in overall_sentiment:
    pred_overall.append(x)

In [496]:
overall_star = grouped_weighted_avg(clean_data["Stars"], clean_data["adjusted_weight"], (clean_data["Restaurant"]))

In [497]:
overall_star = overall_star.apply((lambda x: round(x)))

In [498]:
overall_star.head()

Restaurant
10 at Claymore      4
126 Eating House    4
2it & Drink         5
328 Katong Laksa    3
8 Korean BBQ        4
dtype: int64

In [499]:
overall_truth = []
for x in overall_star:
    overall_truth.append(x)

In [500]:
accuracy_score(overall_truth, pred_overall)

0.26973684210526316