In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from torch.nn.utils.rnn import pad_sequence
import pandas as pd
from sklearn.metrics import mean_absolute_error, accuracy_score, f1_score



In [2]:
DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
DEVICE

device(type='cuda', index=0)

In [3]:
train_df = pd.read_csv("/content/trac2_CONVT_train.csv")
train_df = train_df[train_df["EmotionalPolarity"] <= 2]
test_df = pd.read_csv("/content/trac2_CONVT_test.csv", sep=",", escapechar='\\', on_bad_lines="skip")
dev_df = pd.read_csv("/content/trac2_CONVT_dev.csv", sep=",", escapechar='\\', on_bad_lines="skip")
print("Train size:", len(train_df))
print("Test size:", len(test_df))
print("Dev size:", len(dev_df))



Train size: 11089
Test size: 2316
Dev size: 990


In [4]:
def tokenize(text):
  if isinstance(text, str):
    return text.lower().split()
  return []

In [5]:
vocab = {"<pad>": 0, "<unk>": 1}
for text in train_df["text"]:
  for token in tokenize(text):
    if token not in vocab:
      vocab[token] = len(vocab)
print("Vocab Size:", vocab)



In [6]:
def encode(text):
  tokens = tokenize(text)
  ids = [vocab.get(token, 1) for token in tokens]
  return torch.tensor(ids, dtype=torch.long)


In [7]:
train_texts = [encode(text) for text in train_df["text"]]
dev_texts = [encode(text) for text in dev_df["text"]]
test_texts = [encode(text) for text in test_df["text"]]


In [8]:
train_emotion = torch.tensor(train_df["Emotion"].values, dtype=torch.float)
train_polarity = torch.tensor(train_df["EmotionalPolarity"].values, dtype=torch.long)
train_empathy = torch.tensor(train_df["Empathy"].values, dtype=torch.float)
dev_emotion = torch.tensor(dev_df["Emotion"].values, dtype=torch.float)
dev_polarity = torch.tensor(dev_df["EmotionalPolarity"].values, dtype=torch.long)
dev_empathy = torch.tensor(dev_df["Empathy"].values, dtype=torch.float)

In [9]:
train_padded = pad_sequence(train_texts, batch_first=True, padding_value=0)
dev_padded = pad_sequence(dev_texts, batch_first=True, padding_value=0)
test_padded = pad_sequence(test_texts, batch_first=True, padding_value=0)


In [10]:
train_dataset = TensorDataset(train_padded, train_emotion, train_polarity, train_empathy)
dev_dataset = TensorDataset(dev_padded, dev_emotion, dev_polarity, dev_empathy)


In [11]:
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
dev_loader = DataLoader(dev_dataset, batch_size=32, shuffle=False)


In [12]:
class EmotionRNN(nn.Module):
  def __init__(self, vocab_size, embed_dim=100, hidden_dim=128, num_classes=3, dropout=0.3):
    super(EmotionRNN, self).__init__()
    self.embedding = nn.Embedding(vocab_size, embed_dim, padding_idx=0)
    self.lstm = nn.LSTM(embed_dim, hidden_dim, batch_first=True)
    self.dropout = nn.Dropout(dropout)
    self.fc_emotion = nn.Linear(hidden_dim, 1)
    self.fc_polarity = nn.Linear(hidden_dim, num_classes)
    self.fc_empathy = nn.Linear(hidden_dim, 1)
  def forward(self, x):
    embedded = self.embedding(x)
    lstm_out, (h_n, c_n) = self.lstm(embedded)
    hidden = self.dropout(h_n[-1])
    emotion_out = self.fc_emotion(hidden).squeeze(1)
    polarity_out = self.fc_polarity(hidden)
    empathy_out = self.fc_empathy(hidden).squeeze(1)
    return emotion_out, polarity_out, empathy_out

In [13]:
model = EmotionRNN(vocab_size=len(vocab)).to(DEVICE)
criterion_emotion = nn.MSELoss()
criterion_polarity = nn.CrossEntropyLoss()
criterion_empathy = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)



In [14]:
num_epochs = 10

for epoch in range(num_epochs):
  model.train()
  total_loss = 0.0
  for batch in train_loader:
    texts, emotion, polarity, empathy = [b.to(DEVICE) for b in batch]
    optimizer.zero_grad()
    out_emotion, out_polarity, out_empathy = model(texts)
    loss_emotion = criterion_emotion(out_emotion, emotion)
    loss_polarity = criterion_polarity(out_polarity, polarity)
    loss_empathy = criterion_empathy(out_empathy, empathy)
    loss = loss_emotion + loss_polarity + loss_empathy
    loss.backward()
    optimizer.step()
    total_loss += loss.item()
  print(f"Epoch [{epoch+1}/{num_epochs}], Total Loss: {total_loss:.4f}")

Epoch [1/10], Total Loss: 1070.5387
Epoch [2/10], Total Loss: 931.1669
Epoch [3/10], Total Loss: 926.7317
Epoch [4/10], Total Loss: 926.5729
Epoch [5/10], Total Loss: 924.2797
Epoch [6/10], Total Loss: 921.3843
Epoch [7/10], Total Loss: 920.8150
Epoch [8/10], Total Loss: 920.6001
Epoch [9/10], Total Loss: 921.6977
Epoch [10/10], Total Loss: 920.5831


In [15]:
model.eval()
all_true_polarity, all_pred_polarity = [], []
all_true_emotion, all_pred_emotion = [], []
all_true_empathy, all_pred_empathy = [], []
with torch.no_grad():
  for batch in dev_loader:
    texts, emotion, polarity, empathy = [b.to(DEVICE) for b in batch]
    out_emotion, out_polarity, out_empathy = model(texts)
    all_true_emotion.extend(emotion.cpu().numpy())
    all_pred_emotion.extend(out_emotion.cpu().numpy())
    all_true_empathy.extend(empathy.cpu().numpy())
    all_pred_empathy.extend(out_empathy.cpu().numpy()) # Added parentheses here
    preds = torch.argmax(out_polarity, dim=1)
    all_true_polarity.extend(polarity.cpu().numpy())
    all_pred_polarity.extend(preds.cpu().numpy())
mae_emotion = mean_absolute_error(all_true_emotion, all_pred_emotion)
mae_empathy = mean_absolute_error(all_true_empathy, all_pred_empathy)
acc_polarity = accuracy_score(all_true_polarity, all_pred_polarity)
f1_polarity = f1_score(all_true_polarity, all_pred_polarity, average="weighted") # Corrected variable name
print("\n--- DEV SET PERFORMANCE ---")
print(f"Emotion MAE: {mae_emotion:.4f}")
print(f"Empathy MAE: {mae_empathy:.4f}")
print(f"Polarity Accuracy: {acc_polarity:.4f}")
print(f"Polarity F1 (macro): {f1_polarity:.4f}")



--- DEV SET PERFORMANCE ---
Emotion MAE: 0.5970
Empathy MAE: 0.8844
Polarity Accuracy: 0.4657
Polarity F1 (macro): 0.2959


In [16]:
model.eval()
test_loader = DataLoader(test_padded, batch_size=32, shuffle=False)
test_preds = {"id": [], "Emotion": [], "EmotionalPolarity": [], "Empathy": []}
with torch.no_grad():
  for i, batch in enumerate(test_loader):
    batch = batch.to(DEVICE)
    out_emotion, out_polarity, out_empathy = model(batch)
    preds_polarity = torch.argmax(out_polarity, dim=1)
    start_idx = i * 32
    end_idx = start_idx + batch.size(0)
    test_preds["id"].extend(test_df["id"].iloc[start_idx:end_idx].tolist())
    test_preds["Emotion"].extend(out_emotion.cpu().numpy().tolist())
    test_preds["EmotionalPolarity"].extend(preds_polarity.cpu().numpy().tolist())
    test_preds["Empathy"].extend(out_empathy.cpu().numpy().tolist())
submission_df = pd.DataFrame(test_preds)
submission_df.to_csv("out_rnn.csv", index=False)
print(out_polarity[:5])
print(preds_polarity[:5])
print("\n✅ Predictions saved to submission.csv")
print(submission_df["EmotionalPolarity"].value_counts())



tensor([[-0.6353,  0.5732,  0.5466],
        [-0.6353,  0.5732,  0.5466],
        [-0.6353,  0.5732,  0.5466],
        [-0.6353,  0.5732,  0.5466],
        [-0.6353,  0.5732,  0.5466]], device='cuda:0')
tensor([1, 1, 1, 1, 1], device='cuda:0')

✅ Predictions saved to submission.csv
EmotionalPolarity
1    2314
2       2
Name: count, dtype: int64
