In [1]:
import pandas as pd
import matplotlib.pyplot as plt
from wordcloud import WordCloud
from textblob import TextBlob
from textblob.sentiments import NaiveBayesAnalyzer

#Utilized https://www.learnsteps.com/naive-bayesian-text-classifier-using-textblob-python/ as basis for this classifier
#Restaurant review dataset from https://www.kaggle.com/datasets/jurk06/restaurant-dataset

In [2]:

def prRed(skk): print("\033[91m {}\033[00m" .format(skk))
 
 
def prGreen(skk): print("\033[92m {}\033[00m" .format(skk))

In [3]:
def chartSentiment(text):
    df_sent = pd.DataFrame(columns=("sentence","NBSentiment",'polarity'))

    blob =  TextBlob(text, analyzer=NaiveBayesAnalyzer())
    blob2 =  TextBlob(text)

    for idx, (sentence, sentence2) in enumerate(zip(blob.sentences, blob2.sentences)):
        if sentence.sentiment.p_neg > sentence.sentiment.p_pos:
            prRed(sentence)
            df_sent.loc[idx] = [sentence, -sentence.sentiment.p_neg, sentence2.sentiment.polarity]
        elif sentence.sentiment.p_pos > sentence.sentiment.p_neg:
            prGreen(sentence)
            df_sent.loc[idx] = [sentence, sentence.sentiment.p_pos, sentence2.sentiment.polarity]
        else:
            print(sentence)
    df_sent.plot.bar(rot=90)

In [4]:
df_rest = pd.read_csv('Restaurant_Reviews.tsv', sep='\t')

df_rest.head()
df_rest.shape
df_rest['Liked'].value_counts()

1    500
0    500
Name: Liked, dtype: int64

In [5]:
allReviews = []
pos_count = 0
neg_count = 0
for idx, row in df_rest.iterrows():
    if row['Liked'] == 1:
        allReviews.append((str(row.Review),'pos'))
        pos_count += 1
    else:
        allReviews.append((str(row.Review),'neg'))
        neg_count += 1

import random

random.shuffle(allReviews)
train =  allReviews[:850]
update = allReviews[850:950]
test =  allReviews[950:]

print(len(allReviews))
print(len(train))
print(len(test))
print(pos_count, neg_count)

1000
850
50
500 500


In [6]:
from textblob.classifiers import NaiveBayesClassifier
cl = NaiveBayesClassifier(train)

# Lets test the accuracy of the classifier
print(cl.accuracy(test))

fp = 0
fn = 0

for x in test:
    result = cl.classify(x[0])
    if ((x[1] == 'neg') & (result == 'pos')):
        prRed(str(x) + " " + str(result))
        fp += 1
    elif ((x[1] == 'pos') & (result == 'neg')):
        prGreen(str(x) + " " + str(result))
        fn += 1
    else: print(str(x) + " " + str(result))

print("False Positives: ", fp, " False Negatives: ", fn)

0.8
('My friend loved the salmon tartar.', 'pos') pos
('the food is not tasty at all, not to say its "real traditional Hunan style".', 'neg') neg
('The food is good.', 'pos') pos
('The chefs were friendly and did a good job.', 'pos') pos
[92m ("This was my first time and I can't wait until the next.", 'pos') neg[00m
('It is PERFECT for a sit-down family meal or get together with a few friends.', 'pos') pos
('The steaks are all well trimmed and also perfectly cooked.', 'pos') pos
('But the service was beyond bad.', 'neg') neg
("too bad cause I know it's family owned, I really wanted to like this place.", 'neg') neg
('Of all the dishes, the salmon was the best, but all were great.', 'pos') pos
[91m ('I also decided not to send it back because our waitress looked like she was on the verge of having a heart attack.', 'neg') pos[00m
('The portion was huge!', 'pos') pos
('This place is great!!!!!!!!!!!!!!', 'pos') pos
("Perhaps I caught them on an off night judging by the other reviews, 

In [7]:
cl.update(update)

# Lets test the accuracy of the classifier
print(cl.accuracy(test))

fp = 0
fn = 0

for x in test:
    result = cl.classify(x[0])
    if ((x[1] == 'neg') & (result == 'pos')):
        prRed(str(x) + " " + str(result))
        fp += 1
    elif ((x[1] == 'pos') & (result == 'neg')):
        prGreen(str(x) + " " + str(result))
        fn += 1
    else: print(str(x) + " " + str(result))

print("False Positives: ", fp, " False Negatives: ", fn)

0.8
('My friend loved the salmon tartar.', 'pos') pos
('the food is not tasty at all, not to say its "real traditional Hunan style".', 'neg') neg
('The food is good.', 'pos') pos
('The chefs were friendly and did a good job.', 'pos') pos
[92m ("This was my first time and I can't wait until the next.", 'pos') neg[00m
('It is PERFECT for a sit-down family meal or get together with a few friends.', 'pos') pos
('The steaks are all well trimmed and also perfectly cooked.', 'pos') pos
('But the service was beyond bad.', 'neg') neg
("too bad cause I know it's family owned, I really wanted to like this place.", 'neg') neg
('Of all the dishes, the salmon was the best, but all were great.', 'pos') pos
[91m ('I also decided not to send it back because our waitress looked like she was on the verge of having a heart attack.', 'neg') pos[00m
('The portion was huge!', 'pos') pos
('This place is great!!!!!!!!!!!!!!', 'pos') pos
("Perhaps I caught them on an off night judging by the other reviews, 