<a href="https://colab.research.google.com/github/RajeeveTD007/AVIT-july-2025/blob/main/SentimentRNNsol.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#Alternate Approach

# === Upload File ===
from google.colab import files
uploaded = files.upload()

# === Load and Preprocess ===
import pandas as pd
import numpy as np
import string
from collections import Counter

# Load CSV
df = pd.read_csv("reviews.csv")  # Ensure your file is named reviews.csv
df = df[['review', 'label']].dropna()
df['label'] = df['label'].astype(int)

# Clean text
def clean_text(text):
    text = text.lower()
    return ''.join([c for c in text if c not in string.punctuation])

df['cleaned'] = df['review'].astype(str).apply(clean_text)

# Tokenization
all_words = ' '.join(df['cleaned']).split()
word_counts = Counter(all_words)
vocab = sorted(word_counts, key=word_counts.get, reverse=True)
vocab_to_int = {word: i+1 for i, word in enumerate(vocab)}  # start at 1

df['tokens'] = df['cleaned'].apply(lambda x: [vocab_to_int[word] for word in x.split()])

# Padding
def pad_features(reviews, seq_length):
    features = np.zeros((len(reviews), seq_length), dtype=int)
    for i, row in enumerate(reviews):
        row = row[:seq_length]
        features[i, -len(row):] = np.array(row)
    return features

seq_length = 200
features = pad_features(df['tokens'], seq_length)
labels = np.array(df['label'].astype(int))

# === PyTorch Dataset ===
import torch
from torch.utils.data import TensorDataset, DataLoader, random_split

X = torch.from_numpy(features).long()
y = torch.from_numpy(labels).float()

dataset = TensorDataset(X, y)
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_data, val_data = random_split(dataset, [train_size, val_size])

batch_size = 64
train_loader = DataLoader(train_data, shuffle=True, batch_size=batch_size)
val_loader = DataLoader(val_data, shuffle=False, batch_size=batch_size)

# === RNN Model ===
import torch.nn as nn

class SentimentRNN(nn.Module):
    def __init__(self, vocab_size, embed_dim, hidden_dim, output_dim):
        super(SentimentRNN, self).__init__()
        self.embedding = nn.Embedding(vocab_size+1, embed_dim)
        self.rnn = nn.RNN(embed_dim, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        embedded = self.embedding(x)
        output, _ = self.rnn(embedded)
        output = self.fc(output[:, -1, :])
        return self.sigmoid(output)

# === Train ===
vocab_size = len(vocab_to_int)
embed_dim = 128
hidden_dim = 64
output_dim = 1

model = SentimentRNN(vocab_size, embed_dim, hidden_dim, output_dim)
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

model.train()
epochs = 5

for epoch in range(epochs):
    train_loss = 0.0
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        output = model(inputs).squeeze()
        loss = criterion(output, labels)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
    print(f"Epoch {epoch+1}/{epochs} - Loss: {train_loss / len(train_loader):.4f}")

# === Inference Function ===
def predict_sentiment(model, sentence):
    model.eval()
    sentence = clean_text(sentence)
    tokens = [vocab_to_int.get(word, 0) for word in sentence.split()]
    padded = pad_features([tokens], seq_length)
    input_tensor = torch.from_numpy(padded).long()
    with torch.no_grad():
        output = model(input_tensor).item()
    return "Positive" if output >= 0.5 else "Negative"

# === Try Predictions ===
print(predict_sentiment(model, "I loved this movie, it was awesome!"))
print(predict_sentiment(model, "This was boring and terrible."))


In [None]:
# === Predict sentiments for entire dataset ===
predictions = []

for review in df['review']:
    pred = predict_sentiment(model, review)
    predictions.append(pred)
#Save the Predictions to a CSV File
df['predicted_sentiment'] = predictions
df.to_csv("reviews_with_predictions.csv", index=False)

# Download
from google.colab import files
files.download("reviews_with_predictions.csv")


In [None]:
#Creating a Downloadable csv file input.
import pandas as pd

# Sample data
data = {
    "review": [
        "I loved this movie, it was awesome!",
        "This was boring and terrible.",
        "Absolutely fantastic experience.",
        "Worst film I have ever seen.",
        "Mediocre acting but good storyline.",
        "Really enjoyed the characters.",
        "Not worth the time.",
        "Amazing direction and visuals.",
        "Disappointing and slow.",
        "Brilliant performance by the cast."
    ],
    "label": [1, 0, 1, 0, 0, 1, 0, 1, 0, 1]
}

df = pd.DataFrame(data)

# Save to CSV
df.to_csv("reviews.csv", index=False)
