### 1. Import Necessary Libraries

In [1]:
import pandas as pd
import numpy as np
import re  
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNN, Dense, Embedding

### 2. Load and Preprocess Data

In [2]:

data = pd.read_csv(r'C:\Users\WB\Desktop\swiggy.csv')
data

Unnamed: 0,ID,Area,City,Restaurant Price,Avg Rating,Total Rating,Food Item,Food Type,Delivery Time,Review
0,1,Suburb,Ahmedabad,600,4.2,6198,Sushi,Fast Food,30-40 min,"Good, but nothing extraordinary."
1,2,Business District,Pune,200,4.7,4865,Pepperoni Pizza,Non-Vegetarian,50-60 min,"Good, but nothing extraordinary."
2,3,Suburb,Bangalore,600,4.7,2095,Waffles,Fast Food,50-60 min,Late delivery ruined it.
3,4,Business District,Mumbai,900,4.0,6639,Sushi,Vegetarian,50-60 min,Best meal I've had in a while!
4,5,Tech Park,Mumbai,200,4.7,6926,Spring Rolls,Gluten-Free,20-30 min,Mediocre experience.
...,...,...,...,...,...,...,...,...,...,...
7995,7996,City Center,Mumbai,300,4.0,3303,BBQ Ribs,Vegan,20-30 min,My new favorite dish!
7996,7997,Downtown,Chennai,100,4.7,8742,Butter Chicken,Non-Vegetarian,20-30 min,Amazing taste and quick delivery.
7997,7998,Tech Park,Chennai,900,4.5,4645,Mango Shake,Fast Food,30-40 min,Nothing special but edible.
7998,7999,Old Town,Delhi,500,4.2,3218,Grilled Cheese,Non-Vegetarian,50-60 min,It was okay.


In [3]:
data["Review"] = data["Review"].str.lower().replace(r'[^a-z0-9\s]', '', regex=True)
data['sentiment'] = (data['Avg Rating'] > 3.5).astype(int)
data = data.dropna() 


### 3. Tokenization & Text Processing

In [4]:
max_features, max_length = 5000, 200
tokenizer = Tokenizer(num_words=max_features)
tokenizer.fit_on_texts(data["Review"])
X = pad_sequences(tokenizer.texts_to_sequences(data["Review"]), maxlen=max_length)
y = data['sentiment'].values  

### 4. Splitting Data into Train, Test Sets

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.1, random_state=42, stratify=y_train)

### 5. Building the Model

In [6]:
model = Sequential([
    Embedding(max_features, 16, input_length=max_length),
    SimpleRNN(64, activation='tanh'),
    Dense(1, activation='sigmoid')
])
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])



### 6. Train the Model

In [7]:
model.fit(X_train, y_train, epochs=5, batch_size=32, validation_data=(X_val, y_val), verbose=1)

Epoch 1/5
[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 102ms/step - accuracy: 0.7081 - loss: 0.6044 - val_accuracy: 0.7156 - val_loss: 0.5987
Epoch 2/5
[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 79ms/step - accuracy: 0.7090 - loss: 0.6053 - val_accuracy: 0.7156 - val_loss: 0.5971
Epoch 3/5
[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 73ms/step - accuracy: 0.7140 - loss: 0.5990 - val_accuracy: 0.7156 - val_loss: 0.5972
Epoch 4/5
[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 72ms/step - accuracy: 0.7179 - loss: 0.5965 - val_accuracy: 0.7156 - val_loss: 0.5987
Epoch 5/5
[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 87ms/step - accuracy: 0.7174 - loss: 0.5957 - val_accuracy: 0.7156 - val_loss: 0.5967


<keras.src.callbacks.history.History at 0x20d13c79eb0>

### 7. Model Evaluation

In [32]:
_, test_acc = model.evaluate(X_test, y_test, verbose=0)
print(f"Test accuracy: {test_acc:.2f}")

Test accuracy: 0.72


### 8. Predict Sentiment for New Reviews

In [33]:
def predict_sentiment(text):
    text = re.sub(r'[^a-z0-9\s]', '', text.lower())
    seq = pad_sequences(tokenizer.texts_to_sequences([text]), maxlen=max_length)
    prob = model.predict(seq)[0][0]
    return f"{'Positive' if prob >= 0.5 else 'Negative'} (Probability: {prob:.2f})"

### 9. Prediction

In [34]:
print(predict_sentiment("The food was great."))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step
Positive (Probability: 0.72)
