In [3]:
import pandas as pd
import numpy as np
import re


import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer

from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.naive_bayes import BernoulliNB
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

nltk.download('stopwords')
nltk.download('wordnet')

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\bolla\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\bolla\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [4]:
import warnings
warnings.filterwarnings('ignore')

In [5]:
df = pd.read_csv("output.csv")
df.head()

Unnamed: 0,Review,Liked
0,Wow... Loved this place.,1
1,Crust is not good.,0
2,Not tasty and the texture was just nasty.,0
3,Stopped by during the late May bank holiday of...,1
4,The selection on the menu was great and so wer...,1


In [6]:
df.isnull().sum()

 Review    0
Liked      0
dtype: int64

In [7]:
df.rename(columns={' Review':'Review'}, inplace=True)

In [8]:
print(df['Liked'].value_counts())

Liked
1    500
0    500
Name: count, dtype: int64


In [9]:
df['length'] = df['Review'].apply(len)

In [10]:
stop_words = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()

def clean_text(text):
    text = text.lower()
    text = re.sub(r'[^a-z\s]', '',text)
    text = re.sub(r'\s+', ' ',text).strip()
    words = text.split()
    words = [lemmatizer.lemmatize(word) for word in words if word not in stop_words]
    return " ".join(words)

df['clean_review'] = df['Review'].apply(clean_text)
print(df[['Review','clean_review']].head())

                                              Review  \
0                           Wow... Loved this place.   
1                                 Crust is not good.   
2          Not tasty and the texture was just nasty.   
3  Stopped by during the late May bank holiday of...   
4  The selection on the menu was great and so wer...   

                                        clean_review  
0                                    wow loved place  
1                                         crust good  
2                                tasty texture nasty  
3  stopped late may bank holiday rick steve recom...  
4                         selection menu great price  


In [11]:
count_vectorizer = CountVectorizer(binary = True)
x_bow = count_vectorizer.fit_transform(df['clean_review'])

In [12]:
tfidf_vectorizer = TfidfVectorizer()
X_tfidf = tfidf_vectorizer.fit_transform(df['clean_review'])

In [13]:
y = df['Liked']

X_train, X_test, y_train, y_test = train_test_split(X_tfidf,y,test_size=0.2,random_state=42)

In [14]:
model = BernoulliNB()
model.fit(X_train,y_train)

In [15]:
y_pred = model.predict(X_test)

In [16]:
print("Accuracy:", accuracy_score(y_test,y_pred))
print("\nconfusion matrix:\n", confusion_matrix(y_test,y_pred))
print("\nclassification Report:\n", classification_report(y_test,y_pred))

Accuracy: 0.75

confusion matrix:
 [[69 27]
 [23 81]]

classification Report:
               precision    recall  f1-score   support

           0       0.75      0.72      0.73        96
           1       0.75      0.78      0.76       104

    accuracy                           0.75       200
   macro avg       0.75      0.75      0.75       200
weighted avg       0.75      0.75      0.75       200



In [17]:
new_reviews = [
    "The food was fanstastic!",
    "Worst service ever."
]

new_reviews_clean = [clean_text(r) for r in new_reviews]

new_reviews_vec = tfidf_vectorizer.transform(new_reviews_clean)

predictions = model.predict(new_reviews_vec)

for review, pred in zip(new_reviews, predictions):
    print(review, "->", "Positive ðŸ˜Š" if pred==1 else "Negative ðŸ˜¡")

The food was fanstastic! -> Positive ðŸ˜Š
Worst service ever. -> Negative ðŸ˜¡


In [18]:
import pickle

In [19]:
with open("Sentiment Analysis.pkl","wb") as file:
    pickle.dump(model,file)

In [20]:
with open("Sentiment Analysis.pkl","rb") as file:
    loaded_model = pickle.load(file)
print("Model saved successfulyy as model.pkl")

Model saved successfulyy as model.pkl


In [21]:
pickle.dump(count_vectorizer, open("vectorizer.pkl","wb"))