<a href="https://colab.research.google.com/github/PunitRaveendran/ACM/blob/main/Everyday.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, f1_score
from torch.nn.utils.rnn import pad_sequence
from collections import Counter
from tqdm import tqdm

df = pd.read_csv("emotion_dataset_raw.csv")
df['Text'] = df['Text'].astype(str)

# Label encode
le = LabelEncoder()
df['label'] = le.fit_transform(df['Emotion'])

def tokenize(text): return text.lower().split()

# Build vocab from tokens
tokens = [token for text in df['Text'] for token in tokenize(text)]
vocab = {"<PAD>": 0, "<UNK>": 1}
vocab.update({w: i + 2 for i, (w, _) in enumerate(Counter(tokens).most_common(8000))})

# Encoding text
def encode(text): return [vocab.get(w, 1) for w in tokenize(text)]
df['input_ids'] = df['Text'].apply(encode)

# Split
X_train, X_test, y_train, y_test = train_test_split(df['input_ids'], df['label'], test_size=0.2, stratify=df['label'])

class EmotionDataset(Dataset):
    def __init__(self, texts, labels):
        self.texts = [torch.tensor(x) for x in texts]
        self.labels = torch.tensor(labels.values)

    def __getitem__(self, idx): return self.texts[idx], self.labels[idx]
    def __len__(self): return len(self.labels)

def pad_collate(batch):
    texts, labels = zip(*batch)
    padded = pad_sequence(texts, batch_first=True)
    return padded, torch.tensor(labels)

train_dl = DataLoader(EmotionDataset(X_train, y_train), batch_size=64, shuffle=True, collate_fn=pad_collate)
test_dl  = DataLoader(EmotionDataset(X_test, y_test), batch_size=64, collate_fn=pad_collate)

# Model
class LSTM(nn.Module):
    def __init__(self, vocab_size, embed_dim, hidden_dim, out_dim):
        super().__init__()
        self.emb = nn.Embedding(vocab_size, embed_dim, padding_idx=0)
        self.lstm = nn.LSTM(embed_dim, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, out_dim)

    def forward(self, x):
        x = self.emb(x)
        _, (h, _) = self.lstm(x)
        return self.fc(h[-1])

model = LSTM(len(vocab), 100, 128, len(le.classes_)).to("cpu")
opt = torch.optim.Adam(model.parameters())
loss_fn = nn.CrossEntropyLoss()

# Training
for epoch in range(3):
    model.train()
    for X, y in train_dl:
        opt.zero_grad()
        pred = model(X)
        loss = loss_fn(pred, y)
        loss.backward()
        opt.step()
    print(f"Epoch {epoch+1} complete")

# Evaluate
model.eval()
all_preds, all_true = [], []
with torch.no_grad():
    for X, y in test_dl:
        logits = model(X)
        preds = logits.argmax(dim=1)
        all_preds.extend(preds.tolist())
        all_true.extend(y.tolist())

print("Accuracy:", accuracy_score(all_true, all_preds))
print("F1 Score:", f1_score(all_true, all_preds, average='weighted'))

# Sample Predictions
print("\nSample Predictions:")
for i in range(10):
    text_tokens = X_test.iloc[i]
    text_tensor = torch.tensor(text_tokens).unsqueeze(0)
    padded = pad_sequence([text_tensor.squeeze()], batch_first=True)

    model.eval()
    with torch.no_grad():
        output = model(padded)
        pred_label = output.argmax(dim=1).item()

    input_words = [word for word, idx in vocab.items() if idx in text_tokens][:15]
    print(f"Text: {' '.join(input_words)}...")
    print(f"Actual: {le.inverse_transform([y_test.iloc[i]])[0]} | Predicted: {le.inverse_transform([pred_label])[0]}\n")



Epoch 1 complete
Epoch 2 complete
Epoch 3 complete
Accuracy: 0.42621066245150163
F1 Score: 0.33658006756665154

Sample Predictions:
Text: <UNK> i to my in not be so they no were am being over things...
Actual: anger | Predicted: anger

Text: in you when on have your &amp; don't know even every you're scared it! light...
Actual: fear | Predicted: fear

Text: <UNK> i is it have so like no now looks bought nails skin nail practically...
Actual: joy | Predicted: joy

Text: <UNK> the to that you one really want thing person notice obvious...
Actual: sadness | Predicted: anger

Text: <UNK> a and in for two days dinner leaving reading drinking beer cooking...
Actual: joy | Predicted: fear

Text: i to and . , was that me had this time by very or think...
Actual: sadness | Predicted: fear

Text: the to and of that we are one our away twitter 5 follow giving win...
Actual: surprise | Predicted: joy

Text: <UNK> the . not by were miffed frantic stock currency...
Actual: anger | Predicted: anger

