In [1]:
import sys
import os
sys.path.insert(0, os.path.abspath(".."))

In [2]:
from dataclasses import dataclass
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.optim import Adam
from torch.utils.data import Dataset, DataLoader, random_split
from sklearn.preprocessing import LabelEncoder
from utils import get_loader, EarlyStopper
from typing import Optional
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [3]:
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_sizes, output_size, dropout=0.1):
        super(LSTMModel, self).__init__()
        self.lstm1 = nn.LSTM(input_size, hidden_sizes[0], batch_first=True)
        self.dropout1 = nn.Dropout(dropout)

        self.lstm2 = nn.LSTM(hidden_sizes[0], hidden_sizes[1], batch_first=True)
        self.dropout2 = nn.Dropout(dropout)

        self.lstm3 = nn.LSTM(hidden_sizes[1], hidden_sizes[2], batch_first=True)
        self.dropout3 = nn.Dropout(dropout)

        self.fc = nn.Linear(hidden_sizes[2], output_size)

    def forward(self, x):
        x = x.unsqueeze(1)
        x, _ = self.lstm1(x)
        x = self.dropout1(x)

        x, _ = self.lstm2(x)
        x = self.dropout2(x)

        x, _ = self.lstm3(x)
        x = self.dropout3(x)
        x = x.flatten(start_dim=1)
        x = self.fc(x)
        return x

In [4]:
def train_model(model, train_loader, valid_loader, optimizer, criterion, num_epochs, device, scheduler, stopper_args: Optional[dict]=None):
    if stopper_args:
        stopper = EarlyStopper(**stopper_args)
        
    num_batches = len(train_loader)
    num_items = len(train_loader.dataset)

    for epoch in range(num_epochs):
        correct_predictions_train = 0
        total_loss_train = 0
        model.train()
        for X_batch, y_batch in train_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            optimizer.zero_grad()
            y_pred = model(X_batch)
            loss = criterion(y_pred, y_batch)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()

            _, predicted = torch.max(y_pred, 1)
            correct_predictions_train += (predicted == y_batch).sum().item()
            total_loss_train += loss.item()
        
        train_loss = total_loss_train / num_batches
        train_accuracy = correct_predictions_train / num_items
        valid_loss, valid_accuracy, _ = test(model, valid_loader, criterion, device, verbose=0)
        scheduler.step(valid_loss)

        if not (epoch + 1) % 10:
            print(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.4f}, Val Loss: {valid_loss:.4f}, Valid Accuracy: {valid_accuracy:.4f}")
        if stopper and stopper.early_stop(valid_loss):
            print("Early stopping triggered. ")
            break

def test(model, test_loader, criterion, device, verbose):
    model.eval()
    num_batches = len(test_loader)
    num_items = len(test_loader.dataset)
    total_loss = 0.0
    total_correct = 0
    
    all_preds = []

    with torch.no_grad():
        for X_batch, y_batch in test_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            y_pred = model(X_batch)
            loss = criterion(y_pred, y_batch)
            total_loss += loss.item()
            
            _, predicted = torch.max(y_pred, 1)
            total_correct += (predicted == y_batch).sum().item()
            
            all_preds.extend(predicted.cpu().numpy())
            
    test_loss = total_loss / num_batches
    test_accuracy = total_correct / num_items
    if verbose:
        print(f'Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}')
    return test_loss, test_accuracy, all_preds

In [5]:
train_feature = "../features/feature_aug_train.npy"
valid_feature = "../features/feature_aug_validation.npy"
test_feature = "../features/feature_aug_test.npy"
train_label = "../features/label_train.csv"
valid_label = "../features/label_validation.csv"
test_label = "../features/label_test.csv"

batch_size = 256
# valid_size = 0.2

train_loader, valid_loader, test_loader, encoder = get_loader(train_feature, train_label, valid_feature, valid_label, test_feature, test_label, batch_size)

n_features = train_loader.dataset[0][0].shape[0]
hidden_sizes = [2048, 1024, 512]
n_classes = 4
dropout = 0.1

model = LSTMModel(n_features, hidden_sizes, n_classes, dropout).to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=0.00005, weight_decay=1e-5)
criterion = nn.CrossEntropyLoss()
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=5, verbose=True)

epochs = 500

train_model(model, train_loader, valid_loader, optimizer, criterion, epochs, device, scheduler, stopper_args={'threshold': 20, 'epsilon': 1e-4})

test_loss, test_accuracy, pred = test(model, test_loader, criterion, device, verbose=1)
pred_labels = encoder.inverse_transform(pred)




Epoch 10/500, Train Loss: 0.1740, Train Accuracy: 0.9383, Val Loss: 0.2185, Valid Accuracy: 0.9233
Epoch 20/500, Train Loss: 0.0732, Train Accuracy: 0.9733, Val Loss: 0.1574, Valid Accuracy: 0.9487
Epoch 30/500, Train Loss: 0.0459, Train Accuracy: 0.9833, Val Loss: 0.1363, Valid Accuracy: 0.9588
Epoch 40/500, Train Loss: 0.0161, Train Accuracy: 0.9942, Val Loss: 0.1105, Valid Accuracy: 0.9704
Epoch 50/500, Train Loss: 0.0069, Train Accuracy: 0.9980, Val Loss: 0.1193, Valid Accuracy: 0.9708
Epoch 60/500, Train Loss: 0.0045, Train Accuracy: 0.9988, Val Loss: 0.1145, Valid Accuracy: 0.9734
Early stopping triggered. 
Test Loss: 1.4160, Test Accuracy: 0.8276


In [8]:
print(pred_labels)
pd.DataFrame(pred_labels, columns=['Stance']).to_csv('../output/preds_lstm.csv', index=False)

['unrelated' 'discuss' 'unrelated' ... 'agree' 'agree' 'agree']
