In [3]:
import pandas as pd

df = pd.read_csv("../data/reviews_badminton.csv")
df.head()


Unnamed: 0,Reviewer Name,Review Title,Place of Review,Up Votes,Down Votes,Month,Review text,Ratings
0,Kamal Suresh,Nice product,"Certified Buyer, Chirakkal",889.0,64.0,Feb 2021,"Nice product, good quality, but price is now r...",4
1,Flipkart Customer,Don't waste your money,"Certified Buyer, Hyderabad",109.0,6.0,Feb 2021,They didn't supplied Yonex Mavis 350. Outside ...,1
2,A. S. Raja Srinivasan,Did not meet expectations,"Certified Buyer, Dharmapuri",42.0,3.0,Apr 2021,Worst product. Damaged shuttlecocks packed in ...,1
3,Suresh Narayanasamy,Fair,"Certified Buyer, Chennai",25.0,1.0,,"Quite O. K. , but nowadays the quality of the...",3
4,ASHIK P A,Over priced,,147.0,24.0,Apr 2016,Over pricedJust â?¹620 ..from retailer.I didn'...,1


In [4]:
def get_sentiment(rating):
    if rating >= 4:
        return "Positive"
    elif rating <= 2:
        return "Negative"
    else:
        return None  # Neutral

df["Sentiment"] = df["Ratings"].apply(get_sentiment)


In [7]:
#Removing Neutral Reviews
df = df.dropna(subset=["Sentiment"])
df["Sentiment"].value_counts()


Sentiment
Positive    6826
Negative    1077
Name: count, dtype: int64

In [8]:
import re
import nltk
import pandas as pd

nltk.download('stopwords')
nltk.download('wordnet')

from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer


[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\asus\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping corpora\stopwords.zip.
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\asus\AppData\Roaming\nltk_data...


In [13]:
def clean_text(text):
    if not isinstance(text, str):
        return ""
    
    text = text.lower()
    text = re.sub(r'[^a-z\s]', '', text)
    words = text.split()
    words = [lemmatizer.lemmatize(word) for word in words if word not in stop_words]
    
    return " ".join(words)


In [14]:
df["clean_review"] = df["Review text"].apply(clean_text)


In [16]:
df[["Review text", "clean_review"]].head()


Unnamed: 0,Review text,clean_review
0,"Nice product, good quality, but price is now r...",nice product good quality price rising bad sig...
1,They didn't supplied Yonex Mavis 350. Outside ...,didnt supplied yonex mavis outside cover yonex...
2,Worst product. Damaged shuttlecocks packed in ...,worst product damaged shuttlecock packed new b...
4,Over pricedJust â?¹620 ..from retailer.I didn'...,pricedjust retaileri didnt understand wat adva...
5,Good quality product. Delivered on time.READ MORE,good quality product delivered timeread


In [17]:
X = df["clean_review"]
y = df["Sentiment"]


In [18]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)


In [19]:
from sklearn.feature_extraction.text import TfidfVectorizer

tfidf = TfidfVectorizer(
    max_features=5000,
    ngram_range=(1, 2)
)

X_train_tfidf = tfidf.fit_transform(X_train)
X_test_tfidf = tfidf.transform(X_test)


In [33]:
from sklearn.linear_model import LogisticRegression

model = LogisticRegression(max_iter=1000, class_weight="balanced")
model.fit(X_train_tfidf, y_train)


In [34]:
y_pred = model.predict(X_test_tfidf)


In [35]:
from sklearn.metrics import f1_score, classification_report

print("F1 Score:", f1_score(y_test, y_pred, pos_label="Positive"))
print(classification_report(y_test, y_pred))


F1 Score: 0.9440715883668904
              precision    recall  f1-score   support

    Negative       0.62      0.77      0.69       215
    Positive       0.96      0.93      0.94      1366

    accuracy                           0.91      1581
   macro avg       0.79      0.85      0.82      1581
weighted avg       0.92      0.91      0.91      1581



In [37]:
import pickle

with open("../sentiment_model.pkl", "wb") as f:
    pickle.dump(model, f)

with open("../tfidf_vectorizer.pkl", "wb") as f:
    pickle.dump(tfidf, f)

print("Updated model saved successfully!")


Updated model saved successfully!
