In [2]:
import pandas as pd
import numpy as np
import re           # used for working with regular expressions

In [4]:
# Load the dataset
data = pd.read_csv(r"C:\Users\shashi\Downloads\European Restaurant Reviews.csv")

In [8]:
data.head()      # dataset preview

Unnamed: 0,Country,Restaurant Name,Sentiment,Review Title,Review Date,Review
0,France,The Frog at Bercy Village,Negative,Rude manager,May 2024 •,The manager became agressive when I said the c...
1,France,The Frog at Bercy Village,Negative,A big disappointment,Feb 2024 •,"I ordered a beef fillet ask to be done medium,..."
2,France,The Frog at Bercy Village,Negative,Pretty Place with Bland Food,Nov 2023 •,"This is an attractive venue with welcoming, al..."
3,France,The Frog at Bercy Village,Negative,Great service and wine but inedible food,Mar 2023 •,Sadly I used the high TripAdvisor rating too ...
4,France,The Frog at Bercy Village,Negative,Avoid- Worst meal in Rome - possibly ever,Nov 2022 •,From the start this meal was bad- especially g...


In [10]:
data.columns

Index(['Country', 'Restaurant Name', 'Sentiment', 'Review Title',
       'Review Date', 'Review'],
      dtype='object')

In [12]:
positive_sentiments = data[data['Sentiment'] == 'Positive']

In [14]:
positive_sentiments.head()

Unnamed: 0,Country,Restaurant Name,Sentiment,Review Title,Review Date,Review
152,France,The Frog at Bercy Village,Positive,Wonderful,Jun 2024 •,We booked the Frog in advance and pleased we d...
153,France,The Frog at Bercy Village,Positive,Excellent family dinner.,Jun 2024 •,"Great atmosphere, very popular bar + restauran..."
154,France,The Frog at Bercy Village,Positive,Nice lemonade,Jun 2024 •,Nice little place at Bercy Village. Lovely ser...
155,France,The Frog at Bercy Village,Positive,Good hangout,May 2024 •,Great place to hang out with friends for drink...
156,France,The Frog at Bercy Village,Positive,Great fast Service and Delicious food,May 2024 •,I tried Frog for the first time since my hotel...


In [16]:
negative_sentiments = data[data['Sentiment'] == 'Negative']

In [18]:
negative_sentiments.head()

Unnamed: 0,Country,Restaurant Name,Sentiment,Review Title,Review Date,Review
0,France,The Frog at Bercy Village,Negative,Rude manager,May 2024 •,The manager became agressive when I said the c...
1,France,The Frog at Bercy Village,Negative,A big disappointment,Feb 2024 •,"I ordered a beef fillet ask to be done medium,..."
2,France,The Frog at Bercy Village,Negative,Pretty Place with Bland Food,Nov 2023 •,"This is an attractive venue with welcoming, al..."
3,France,The Frog at Bercy Village,Negative,Great service and wine but inedible food,Mar 2023 •,Sadly I used the high TripAdvisor rating too ...
4,France,The Frog at Bercy Village,Negative,Avoid- Worst meal in Rome - possibly ever,Nov 2022 •,From the start this meal was bad- especially g...


In [20]:
data.shape

(1502, 6)

In [22]:
positive_sentiments.shape

(1237, 6)

In [24]:
negative_sentiments.shape

(265, 6)

In [26]:
print("Percentage of positive sentiments:",(1237/1502)*100)

Percentage of positive sentiments: 82.35685752330227


In [28]:
print("Percentage of negative sentiments:",(265/1502)*100)

Percentage of negative sentiments: 17.643142476697736


In [30]:
# Display only sentiment and review columns
data = data[["Review", "Sentiment"]]
print("Dataset Preview (Review and Sentiment columns only):")
print(data.head())

Dataset Preview (Review and Sentiment columns only):
                                              Review Sentiment
0  The manager became agressive when I said the c...  Negative
1  I ordered a beef fillet ask to be done medium,...  Negative
2  This is an attractive venue with welcoming, al...  Negative
3  Sadly I  used the high TripAdvisor rating too ...  Negative
4  From the start this meal was bad- especially g...  Negative


In [32]:
# Specify columns for analysis
review_column = 'Review'
sentiment_column = 'Sentiment'

In [34]:
# Data Preprocessing
def clean_text(text):
    """Cleans the input text by removing special characters, numbers, extra spaces, and applying stemming."""
    text = re.sub(r'[^a-zA-Z\s]', '', text)          # Remove special characters and numbers
    text = re.sub(r'\s+', ' ', text)                 # Remove extra spaces
    text = text.strip().lower()
    
    # Tokenize and apply stemming
    tokens = word_tokenize(text)
    stemmer = PorterStemmer()
    text = ' '.join([stemmer.stem(word) for word in tokens])
    
    return text

In [38]:
from nltk.stem import PorterStemmer
from nltk.tokenize import word_tokenize
import nltk

In [40]:
# Download the punkt tokenizer (needed for word_tokenize)
nltk.download('punkt')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\shashi\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [42]:
data[review_column] = data[review_column].apply(clean_text)

In [44]:
from sklearn.preprocessing import LabelEncoder

In [46]:
# Encode target labels (e.g., Positive/Negative)
label_encoder = LabelEncoder()
data[sentiment_column] = label_encoder.fit_transform(data[sentiment_column])  # Convert strings to integers

In [48]:
from sklearn.model_selection import train_test_split

In [50]:
# Split the data into training and test sets
X = data[review_column]
y = data[sentiment_column]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [52]:
from sklearn.feature_extraction.text import TfidfVectorizer

In [54]:
# TF-IDF Vectorization with more features
tfidf = TfidfVectorizer(max_features=8000, ngram_range=(1, 2))  # Use unigrams and bigrams
X_train_tfidf = tfidf.fit_transform(X_train)
X_test_tfidf = tfidf.transform(X_test)

In [56]:
from sklearn.utils.class_weight import compute_class_weight

In [58]:
# Handle class imbalance by computing class weights
class_weights = compute_class_weight('balanced', classes=np.unique(y_train), y=y_train)
class_weight_dict = dict(enumerate(class_weights))

In [60]:
from sklearn.linear_model import LogisticRegression

In [62]:
# Logistic Regression Model with class weights
model = LogisticRegression(class_weight=class_weight_dict, max_iter=1000)
model.fit(X_train_tfidf, y_train)

In [64]:
# Predictions
y_pred = model.predict(X_test_tfidf)

In [66]:
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [68]:
# Evaluation
accuracy = accuracy_score(y_test, y_pred)
print("\nModel Accuracy:", accuracy)


Model Accuracy: 0.9435215946843853


In [70]:
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))


Classification Report:
              precision    recall  f1-score   support

    Negative       0.81      0.86      0.83        49
    Positive       0.97      0.96      0.97       252

    accuracy                           0.94       301
   macro avg       0.89      0.91      0.90       301
weighted avg       0.95      0.94      0.94       301



In [72]:
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))


Confusion Matrix:
[[ 42   7]
 [ 10 242]]


In [74]:
import pickle

In [76]:
# Save TF-IDF Vectorizer and Model
with open("tfidf_vectorizer.pkl", "wb") as f:
    pickle.dump(tfidf, f)

with open("logistic_regression_model.pkl", "wb") as f:
    pickle.dump(model, f)

In [78]:
# Example Prediction
def predict_sentiment(review):
    """Predicts the sentiment of a single review."""
    review_cleaned = clean_text(review)
    review_vectorized = tfidf.transform([review_cleaned])
    sentiment_encoded = model.predict(review_vectorized)[0]
    sentiment_label = label_encoder.inverse_transform([sentiment_encoded])[0]
    return sentiment_label

In [88]:
# Example Usage 1
example_review = "The food was absolutely wonderful, from preparation to presentation, very pleasing."
predicted_sentiment = predict_sentiment(example_review) 
print("\nExample Review:", example_review) 
print("Predicted Sentiment:", predicted_sentiment) 


Example Review: The food was absolutely wonderful, from preparation to presentation, very pleasing.
Predicted Sentiment: Positive


In [90]:
# Example Usage 2
example_review = "The food was terrible, I would never come back. Very disappointing."
predicted_sentiment = predict_sentiment(example_review) 
print("\nExample Review:", example_review) 
print("Predicted Sentiment:", predicted_sentiment) 


Example Review: The food was terrible, I would never come back. Very disappointing.
Predicted Sentiment: Negative


In [92]:
# Example Usage 3
example_review = "I booked online as it was our anniversary, the food was fantastic along with the service. The staff had a wealth of knowledge about the food and wines, I will be going back. To my surprise the bill should have been 170 Euro bu because I booked online, the gave me 10percent off. Paying 160 Euro for 7 course fine dining restaurant is very reasonable especially in Rome, I would highly recommended."
predicted_sentiment = predict_sentiment(example_review) 
print("\nExample Review:", example_review) 
print("Predicted Sentiment:", predicted_sentiment) 


Example Review: I booked online as it was our anniversary, the food was fantastic along with the service. The staff had a wealth of knowledge about the food and wines, I will be going back. To my surprise the bill should have been 170 Euro bu because I booked online, the gave me 10percent off. Paying 160 Euro for 7 course fine dining restaurant is very reasonable especially in Rome, I would highly recommended.
Predicted Sentiment: Positive
