# import libraries

In [1]:
import pandas as pd

import string

import nltk

from nltk.sentiment.vader import SentimentIntensityAnalyzer

from nltk.corpus import stopwords

from nltk.tokenize import word_tokenize

from nltk.stem import WordNetLemmatizer,PorterStemmer

from nltk import pos_tag

from sklearn.feature_extraction.text import CountVectorizer

from sklearn.model_selection import train_test_split

from sklearn.ensemble import RandomForestClassifier

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score,classification_report

nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('wordnet')
nltk.download('stopwords')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\lenovo\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\lenovo\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\lenovo\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\lenovo\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

# import the data

In [2]:
data= pd.read_csv("Restaurant_Reviews.csv")
data.head(20)

Unnamed: 0,Review,Liked
0,Wow... Loved this place.,Yes
1,Crust is not good.,No
2,Not tasty and the texture was just nasty.,No
3,Stopped by during the late May bank holiday of...,Yes
4,The selection on the menu was great and so wer...,Yes
5,Now I am getting angry and I want my damn pho.,No
6,Honeslty it didn't taste THAT fresh.,No
7,The potatoes were like rubber and you could te...,No
8,The fries were great too.,Yes
9,A great touch.,Yes


In [3]:
data.shape

(2220, 2)

In [4]:
data.describe().transpose()

Unnamed: 0,count,unique,top,freq
Review,2220,1610,The restaurant had clean and well-maintained f...,31
Liked,2220,2,No,1119


In [5]:
count= data.isnull().sum().sort_values(ascending=False)
percentage= ((data.isnull().sum()/len(data)*100)).sort_values(ascending=False)
missing_data=pd.concat([count,percentage],axis=1,keys=['count,percentage'])
missing_data

  missing_data=pd.concat([count,percentage],axis=1,keys=['count,percentage'])


Unnamed: 0,"count,percentage"
Review,0
Liked,0


# Preprocessing

In [6]:
def preprocess_text(text):
    tokens = word_tokenize(text)
    stop_words = set(stopwords.words('english'))
    # original of the words
    lemmatizer = WordNetLemmatizer()
    tokens = [lemmatizer.lemmatize(word) for word in tokens]
    # remove stop words
    tokens = [word for word in tokens if word.lower() not in stop_words]
    # Remove punctuation
    tokens = [word for word in tokens if word not in string.punctuation]

    return ' '.join(tokens)

data['Review'] = data['Review'].apply(preprocess_text)

# Feature Extraction (Bag of Words)

In [7]:
# convert the data to vectors of the word and the number of his appearence
vectorizer = CountVectorizer()

# feature column
x = vectorizer.fit_transform(data['Review'])

# target column
y = data['Liked']

# choose model and train it

In [8]:
# spilt the data to train and test
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.20, random_state=0)

# train the data by using RandomForestClassifier
classifier = RandomForestClassifier(n_estimators=100, criterion='entropy', random_state=0)
classifier.fit(X_train, y_train)

# prediction
y_pred = classifier.predict(X_test)


# evaluate the model

In [9]:
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, pos_label='Yes')
recall = recall_score(y_test, y_pred, pos_label='Yes')
f1 = f1_score(y_test, y_pred, pos_label='Yes')
classification_rep = classification_report(y_test, y_pred)

print(f'Accuracy: {accuracy*100:.2f} %')
print(f'Precision: {precision*100:.2f} %')
print(f'Recall: {recall*100:.2f} %')
print(f'F1 Score: {f1*100:.2f} %')
print(f'Classification Report:\n{classification_rep}')


Accuracy: 88.96 %
Precision: 93.75 %
Recall: 82.95 %
F1 Score: 88.02 %
Classification Report:
              precision    recall  f1-score   support

          No       0.85      0.95      0.90       227
         Yes       0.94      0.83      0.88       217

    accuracy                           0.89       444
   macro avg       0.90      0.89      0.89       444
weighted avg       0.89      0.89      0.89       444



In [10]:
new_record = preprocess_text("i am happy")
numerical_record = vectorizer.transform([new_record])
prediction = classifier.predict(numerical_record)
print("Predicted label:", prediction)

Predicted label: ['Yes']


In [11]:
import gradio as gr

def predict_sentiment(review):
    preprocessed_review = preprocess_text(review)
    numerical_review = vectorizer.transform([preprocessed_review])
    prediction = classifier.predict(numerical_review)
    return prediction[0]
input_text = gr.Textbox(lines=5, label="Enter a review")  # You can use Textbox for multiline input
output_label = gr.Text(label="Predicted Sentiment")
demo= gr.Interface(fn=predict_sentiment, inputs=input_text, outputs=output_label)
demo.launch()

  from .autonotebook import tqdm as notebook_tqdm


Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.


