In [27]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, precision_recall_fscore_support
import re
import nltk
from nltk.corpus import stopwords
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\DELL\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [28]:
df = pd.read_csv('McDonald_s_Reviews.csv', encoding='unicode_escape')


In [29]:
df.head()

Unnamed: 0,reviewer_id,store_name,category,store_address,latitude,longitude,rating_count,review_time,review,rating
0,1,McDonald's,Fast food restaurant,"13749 US-183 Hwy, Austin, TX 78750, United States",30.460718,-97.792874,1240,3 months ago,Why does it look like someone spit on my food?...,1 star
1,2,McDonald's,Fast food restaurant,"13749 US-183 Hwy, Austin, TX 78750, United States",30.460718,-97.792874,1240,5 days ago,It'd McDonalds. It is what it is as far as the...,4 stars
2,3,McDonald's,Fast food restaurant,"13749 US-183 Hwy, Austin, TX 78750, United States",30.460718,-97.792874,1240,5 days ago,Made a mobile order got to the speaker and che...,1 star
3,4,McDonald's,Fast food restaurant,"13749 US-183 Hwy, Austin, TX 78750, United States",30.460718,-97.792874,1240,a month ago,My mc. Crispy chicken sandwich was ï¿½ï¿½ï¿½ï¿...,5 stars
4,5,McDonald's,Fast food restaurant,"13749 US-183 Hwy, Austin, TX 78750, United States",30.460718,-97.792874,1240,2 months ago,"I repeat my order 3 times in the drive thru, a...",1 star


In [30]:
df.tail()

Unnamed: 0,reviewer_id,store_name,category,store_address,latitude,longitude,rating_count,review_time,review,rating
33391,33392,McDonald's,Fast food restaurant,"3501 Biscayne Blvd, Miami, FL 33137, United St...",25.81,-80.189098,2810,4 years ago,They treated me very badly.,1 star
33392,33393,McDonald's,Fast food restaurant,"3501 Biscayne Blvd, Miami, FL 33137, United St...",25.81,-80.189098,2810,a year ago,The service is very good,5 stars
33393,33394,McDonald's,Fast food restaurant,"3501 Biscayne Blvd, Miami, FL 33137, United St...",25.81,-80.189098,2810,a year ago,To remove hunger is enough,4 stars
33394,33395,McDonald's,Fast food restaurant,"3501 Biscayne Blvd, Miami, FL 33137, United St...",25.81,-80.189098,2810,5 years ago,"It's good, but lately it has become very expen...",5 stars
33395,33396,McDonald's,Fast food restaurant,"3501 Biscayne Blvd, Miami, FL 33137, United St...",25.81,-80.189098,2810,2 years ago,they took good care of me,5 stars


In [31]:
df.isnull().sum()

reviewer_id        0
store_name         0
category           0
store_address      0
latitude         660
longitude        660
rating_count       0
review_time        0
review             0
rating             0
dtype: int64

In [32]:
stop_words = set(stopwords.words('english'))

def clean_text(text):
    text = str(text)
    text = text.lower()  # lowercase
    text = re.sub(r'[^a-zA-Z\s]', '', text)  # remove punctuation/numbers
    text = ' '.join([word for word in text.split() if word not in stop_words])  # remove stopwords
    return text



In [33]:
df['review'] = df['review'].apply(clean_text)

In [35]:
tfidf = TfidfVectorizer(max_features=5000)

In [36]:
# Extract numeric part (handles cases like '1 star', '4 s', etc.)
df['rating_cleaned'] = df['rating'].str.extract(r'(\d+\.?\d*)').astype(float)

In [37]:
df['label'] = (df['rating_cleaned'] > 2.5).astype(int)


In [38]:
df.head()

Unnamed: 0,reviewer_id,store_name,category,store_address,latitude,longitude,rating_count,review_time,review,rating,rating_cleaned,label
0,1,McDonald's,Fast food restaurant,"13749 US-183 Hwy, Austin, TX 78750, United States",30.460718,-97.792874,1240,3 months ago,look like someone spit food normal transaction...,1 star,1.0,0
1,2,McDonald's,Fast food restaurant,"13749 US-183 Hwy, Austin, TX 78750, United States",30.460718,-97.792874,1240,5 days ago,itd mcdonalds far food atmosphere go staff mak...,4 stars,4.0,1
2,3,McDonald's,Fast food restaurant,"13749 US-183 Hwy, Austin, TX 78750, United States",30.460718,-97.792874,1240,5 days ago,made mobile order got speaker checked line mov...,1 star,1.0,0
3,4,McDonald's,Fast food restaurant,"13749 US-183 Hwy, Austin, TX 78750, United States",30.460718,-97.792874,1240,a month ago,mc crispy chicken sandwich customer service qu...,5 stars,5.0,1
4,5,McDonald's,Fast food restaurant,"13749 US-183 Hwy, Austin, TX 78750, United States",30.460718,-97.792874,1240,2 months ago,repeat order times drive thru still manage mes...,1 star,1.0,0


In [49]:
df.tail()

Unnamed: 0,reviewer_id,store_name,category,store_address,latitude,longitude,rating_count,review_time,review,rating,rating_cleaned,label
33391,33392,McDonald's,Fast food restaurant,"3501 Biscayne Blvd, Miami, FL 33137, United St...",25.81,-80.189098,2810,4 years ago,treated badly,1 star,1.0,0
33392,33393,McDonald's,Fast food restaurant,"3501 Biscayne Blvd, Miami, FL 33137, United St...",25.81,-80.189098,2810,a year ago,service good,5 stars,5.0,1
33393,33394,McDonald's,Fast food restaurant,"3501 Biscayne Blvd, Miami, FL 33137, United St...",25.81,-80.189098,2810,a year ago,remove hunger enough,4 stars,4.0,1
33394,33395,McDonald's,Fast food restaurant,"3501 Biscayne Blvd, Miami, FL 33137, United St...",25.81,-80.189098,2810,5 years ago,good lately become expensive,5 stars,5.0,1
33395,33396,McDonald's,Fast food restaurant,"3501 Biscayne Blvd, Miami, FL 33137, United St...",25.81,-80.189098,2810,2 years ago,took good care,5 stars,5.0,1


In [39]:
X = tfidf.fit_transform(df['review']).toarray()
y = df['label']

In [40]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [41]:
model = LogisticRegression()
model.fit(X_train, y_train)

In [42]:
y_pred = model.predict(X_test)

In [43]:
from sklearn.metrics import precision_recall_fscore_support, accuracy_score

precision, recall, f1, _ = precision_recall_fscore_support(y_test, y_pred, average='macro')

print(f"Accuracy:  {accuracy_score(y_test, y_pred) * 100:.2f}%")
print(f"Precision: {precision:.2f}")
print(f"Recall:    {recall:.2f}")
print(f"F1 Score:  {f1:.2f}")


Accuracy:  87.93%
Precision: 0.87
Recall:    0.87
F1 Score:  0.87


In [44]:
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))


Confusion Matrix:
 [[2117  426]
 [ 380 3757]]


In [47]:
def predict_sentiment(review):
    cleaned = clean_text(review)
    vectorized = tfidf.transform([cleaned])
    # Use predict_proba if available
    if hasattr(model, "predict_proba"):
        prob = model.predict_proba(vectorized)[0][1]
        sentiment = 'Positive' if prob >= 0.5 else 'Negative'
    else:
        prediction = model.predict(vectorized)[0]
        sentiment = 'Positive' if prediction == 1 else 'Negative'
    return sentiment



In [48]:
review = ["This is Very Good Service! I Love it!","I hate this food, its terrible"]
for r in review:
    print(f"Review: {r}\nSentiment: {predict_sentiment(r)}\n")

Review: This is Very Good Service! I Love it!
Sentiment: Positive

Review: I hate this food, its terrible
Sentiment: Negative

