Importing Libraries And Dataset 


In [64]:
import pandas as pd
import numpy as np
import tensorflow as tf
import re
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer 
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout, Bidirectional
from tensorflow.keras.callbacks import EarlyStopping

In [65]:
# seed for reproducibility
np.random.seed(42)
tf.random.set_seed(42)

Loading Dataset

In [66]:
data = pd.read_csv('swiggy.csv')
data.head()

Unnamed: 0,ID,Area,City,Restaurant Price,Avg Rating,Total Rating,Food Item,Food Type,Delivery Time,Review
0,1,Suburb,Ahmedabad,600,4.2,6198,Sushi,Fast Food,30-40 min,"Good, but nothing extraordinary."
1,2,Business District,Pune,200,4.7,4865,Pepperoni Pizza,Non-Vegetarian,50-60 min,"Good, but nothing extraordinary."
2,3,Suburb,Bangalore,600,4.7,2095,Waffles,Fast Food,50-60 min,Late delivery ruined it.
3,4,Business District,Mumbai,900,4.0,6639,Sushi,Vegetarian,50-60 min,Best meal I've had in a while!
4,5,Tech Park,Mumbai,200,4.7,6926,Spring Rolls,Gluten-Free,20-30 min,Mediocre experience.


In [67]:
print(data.columns.tolist())

['ID', 'Area', 'City', 'Restaurant Price', 'Avg Rating', 'Total Rating', 'Food Item', 'Food Type', 'Delivery Time', 'Review']


Text Cleaning and Sentiment Labeling

In [68]:
data["Review"] = data["Review"].str.lower()
data['Review'] = data['Review'].replace(r'[^a-z0-9\s]', '' , regex=True)

data['sentiment'] = data['Avg Rating'].apply(lambda x: 1 if x >= 4 else 0)

 Tokenization and Padding

In [69]:
max_features = 10000
max_length = 200

tokenizer = Tokenizer(num_words = max_features)
tokenizer.fit_on_texts(data['Review'])
X = pad_sequences(tokenizer.texts_to_sequences(data['Review']), maxlen=max_length)
y = data['sentiment'].values
y

array([1, 1, 1, ..., 1, 1, 1], dtype=int64)

 Splitting the Data

In [70]:
X_train , X_test, y_train, y_test = train_test_split(X,y , test_size=0.2, random_state=42 , stratify = y)

X_train, X_val, y_train, y_val = train_test_split(
    X_train, y_train, test_size=0.1, random_state=42, stratify=y_train
)

Building RNN Model :



In [71]:
model = Sequential([
    Embedding(input_dim=max_features, output_dim=128, input_length=max_length),
    Bidirectional(LSTM(128, return_sequences=False)), 
    Dropout(0.5),
    Dense(1, activation='sigmoid')
    
])
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

early_stop = EarlyStopping(monitor='val_loss', patience=2, restore_best_weights=True)


Training and Evaluating Model :

We will train the model on training data, validate it during training, then evaluate its performance on test data.


In [72]:
history = model.fit(X_train, y_train, epochs=10, batch_size=64, validation_data=(X_val, y_val),callbacks=[early_stop],verbose=1)
score = model.evaluate(X_test, y_test, verbose=0)
print(f"Test accuracy: {score[1]*100:.2f}%")

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Test accuracy: 71.56%


In [73]:
def predict_sentiment(review_text):
    text = review_text.lower()
    text = re.sub(r'[^a-z0-9\s]', '', text)
    seq = tokenizer.texts_to_sequences([text])
    padded = pad_sequences(seq, maxlen=max_length)
    prediction = model.predict(padded, verbose=0)[0][0]
    return f"{'Positive' if prediction >= 0.5 else 'Negative'} (Probability: {prediction:.2f})"

# Test
sample_review = "The food was great."
print(f"Review: {sample_review}")
print(f"Sentiment: {predict_sentiment(sample_review)}")

Review: The food was great.
Sentiment: Positive (Probability: 0.71)
