<a href="https://colab.research.google.com/github/Redcoder815/Deep_Learning_PyTorch/blob/main/SentimentAnalysisWithRNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [29]:
import pandas as pd
import numpy as np

In [30]:
data = pd.read_csv('swiggy.csv')
print("Columns in the dataset:")
print(data.columns.tolist())

Columns in the dataset:
['ID', 'Area', 'City', 'Restaurant Price', 'Avg Rating', 'Total Rating', 'Food Item', 'Food Type', 'Delivery Time', 'Review']


In [31]:
data.head()

Unnamed: 0,ID,Area,City,Restaurant Price,Avg Rating,Total Rating,Food Item,Food Type,Delivery Time,Review
0,1,Suburb,Ahmedabad,600,4.2,6198,Sushi,Fast Food,30-40 min,"Good, but nothing extraordinary."
1,2,Business District,Pune,200,4.7,4865,Pepperoni Pizza,Non-Vegetarian,50-60 min,"Good, but nothing extraordinary."
2,3,Suburb,Bangalore,600,4.7,2095,Waffles,Fast Food,50-60 min,Late delivery ruined it.
3,4,Business District,Mumbai,900,4.0,6639,Sushi,Vegetarian,50-60 min,Best meal I've had in a while!
4,5,Tech Park,Mumbai,200,4.7,6926,Spring Rolls,Gluten-Free,20-30 min,Mediocre experience.


In [32]:
data["Review"] = data["Review"].str.lower()
data["Review"] = data["Review"].replace(r'[^a-z0-9\s]', '', regex=True)

data['sentiment'] = data['Avg Rating'].apply(lambda x: 1 if x > 3.5 else 0)
data = data.dropna()

In [33]:
data.head()

Unnamed: 0,ID,Area,City,Restaurant Price,Avg Rating,Total Rating,Food Item,Food Type,Delivery Time,Review,sentiment
0,1,Suburb,Ahmedabad,600,4.2,6198,Sushi,Fast Food,30-40 min,good but nothing extraordinary,1
1,2,Business District,Pune,200,4.7,4865,Pepperoni Pizza,Non-Vegetarian,50-60 min,good but nothing extraordinary,1
2,3,Suburb,Bangalore,600,4.7,2095,Waffles,Fast Food,50-60 min,late delivery ruined it,1
3,4,Business District,Mumbai,900,4.0,6639,Sushi,Vegetarian,50-60 min,best meal ive had in a while,1
4,5,Tech Park,Mumbai,200,4.7,6926,Spring Rolls,Gluten-Free,20-30 min,mediocre experience,1


In [34]:
data['sentiment'].value_counts()

Unnamed: 0_level_0,count
sentiment,Unnamed: 1_level_1
1,5727
0,2273


In [35]:
max_features = 5000
max_length = 200

tokenizer = Tokenizer(num_words=max_features)
tokenizer.fit_on_texts(data["Review"])
X = pad_sequences(tokenizer.texts_to_sequences(
    data["Review"]), maxlen=max_length)
y = data['sentiment'].values

In [36]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)
X_train, X_val, y_train, y_val = train_test_split(
    X_train, y_train, test_size=0.1, random_state=42, stratify=y_train
)

In [37]:
import torch
from torch.utils.data import Dataset, DataLoader

In [38]:
X_train_tensor = torch.tensor(X_train, dtype=torch.long)
y_train_tensor = torch.tensor(y_train, dtype=torch.float).unsqueeze(1)
X_val_tensor = torch.tensor(X_val, dtype=torch.long)
y_val_tensor = torch.tensor(y_val, dtype=torch.float).unsqueeze(1)
X_test_tensor = torch.tensor(X_test, dtype=torch.long)
y_test_tensor = torch.tensor(y_test, dtype=torch.float).unsqueeze(1)

In [39]:
class TextDataset(Dataset):
    def __init__(self, features, labels):
        self.features = features
        self.labels = labels

    def __len__(self):
        return len(self.features)

    def __getitem__(self, idx):
        return self.features[idx], self.labels[idx]


In [40]:
batch_size = 32

train_dataset = TextDataset(X_train_tensor, y_train_tensor)
val_dataset = TextDataset(X_val_tensor, y_val_tensor)
test_dataset = TextDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)


In [41]:
import torch.nn as nn

class RNNModel(nn.Module):
    def __init__(self, num_embeddings, embedding_dim, hidden_size, output_size):
        super(RNNModel, self).__init__()
        self.embedding = nn.Embedding(num_embeddings, embedding_dim)
        self.rnn = nn.RNN(embedding_dim, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        embedded = self.embedding(x)
        output, hidden = self.rnn(embedded)
        hidden = hidden.squeeze(0)  # Remove the first dimension (num_layers)
        output = self.fc(hidden)
        return output


In [42]:
embedding_dim = 16
hidden_size = 64
output_size = 1

model_pt = RNNModel(max_features, embedding_dim, hidden_size, output_size)

In [43]:
criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model_pt.parameters(), lr=0.001)

In [44]:
num_epochs = 5

for epoch in range(num_epochs):
    model_pt.train()  # Set the model to training mode
    total_loss = 0
    correct_predictions = 0
    total_samples = 0

    for inputs, labels in train_loader:
        optimizer.zero_grad()  # Clear gradients
        outputs = model_pt(inputs)
        loss = criterion(outputs, labels)
        loss.backward()  # Backpropagation
        optimizer.step()  # Update weights

        total_loss += loss.item() * inputs.size(0)

        # Calculate training accuracy
        predicted = (torch.sigmoid(outputs) >= 0.5).float()
        correct_predictions += (predicted == labels).sum().item()
        total_samples += labels.size(0)

    avg_train_loss = total_loss / total_samples
    train_accuracy = correct_predictions / total_samples

    # Validation phase
    model_pt.eval()  # Set the model to evaluation mode
    val_loss = 0
    val_correct_predictions = 0
    val_total_samples = 0

    with torch.no_grad():  # Disable gradient calculation for validation
        for inputs_val, labels_val in val_loader:
            outputs_val = model_pt(inputs_val)
            loss_val = criterion(outputs_val, labels_val)
            val_loss += loss_val.item() * inputs_val.size(0)

            predicted_val = (torch.sigmoid(outputs_val) >= 0.5).float()
            val_correct_predictions += (predicted_val == labels_val).sum().item()
            val_total_samples += labels_val.size(0)

    avg_val_loss = val_loss / val_total_samples
    val_accuracy = val_correct_predictions / val_total_samples

    print(f"Epoch {epoch+1}/{num_epochs}: Train Loss: {avg_train_loss:.4f}, Train Accuracy: {train_accuracy:.4f}, Val Loss: {avg_val_loss:.4f}, Val Accuracy: {val_accuracy:.4f}")


Epoch 1/5: Train Loss: 0.6035, Train Accuracy: 0.7137, Val Loss: 0.5978, Val Accuracy: 0.7156
Epoch 2/5: Train Loss: 0.5984, Train Accuracy: 0.7160, Val Loss: 0.5977, Val Accuracy: 0.7156
Epoch 3/5: Train Loss: 0.5981, Train Accuracy: 0.7160, Val Loss: 0.5992, Val Accuracy: 0.7156
Epoch 4/5: Train Loss: 0.5977, Train Accuracy: 0.7160, Val Loss: 0.5974, Val Accuracy: 0.7156
Epoch 5/5: Train Loss: 0.5971, Train Accuracy: 0.7160, Val Loss: 0.5969, Val Accuracy: 0.7156


In [45]:
model_pt.eval()  # Set the model to evaluation mode
test_correct_predictions = 0
test_total_samples = 0

with torch.no_grad():
    for inputs_test, labels_test in test_loader:
        outputs_test = model_pt(inputs_test)
        predicted_test = (torch.sigmoid(outputs_test) >= 0.5).float()
        test_correct_predictions += (predicted_test == labels_test).sum().item()
        test_total_samples += labels_test.size(0)

test_accuracy = test_correct_predictions / test_total_samples
print(f"PyTorch Test Accuracy: {test_accuracy:.2f}")

PyTorch Test Accuracy: 0.72


In [46]:
def predict_sentiment_pytorch(review_text):
    text = review_text.lower()
    text = re.sub(r'[^a-z0-9\s]', '', text)

    seq = tokenizer.texts_to_sequences([text])
    padded = pad_sequences(seq, maxlen=max_length)
    input_tensor = torch.tensor(padded, dtype=torch.long)

    model_pt.eval()  # Set the model to evaluation mode
    with torch.no_grad():
        output = model_pt(input_tensor)
        prediction = torch.sigmoid(output).item()

    return f"{'Positive' if prediction >= 0.5 else 'Negative'} (Probability: {prediction:.2f})"

sample_review = "The food was great."
print(f"Review: {sample_review}")
print(f"Sentiment (PyTorch): {predict_sentiment_pytorch(sample_review)}")

sample_review_negative = "Late delivery ruined it."
print(f"Review: {sample_review_negative}")
print(f"Sentiment (PyTorch): {predict_sentiment_pytorch(sample_review_negative)}")

Review: The food was great.
Sentiment (PyTorch): Positive (Probability: 0.72)
Review: Late delivery ruined it.
Sentiment (PyTorch): Positive (Probability: 0.63)
