In [1]:
from dataclasses import dataclass
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.optim import Adam
from torch.utils.data import Dataset, DataLoader, random_split
from sklearn.preprocessing import LabelEncoder
from utils import get_loader, EarlyStopper
from typing import Optional
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [2]:
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_sizes, output_size, dropout=0.1):
        super(LSTMModel, self).__init__()
        self.lstm1 = nn.LSTM(input_size, hidden_sizes[0], batch_first=True)
        self.dropout1 = nn.Dropout(dropout)

        self.lstm2 = nn.LSTM(hidden_sizes[0], hidden_sizes[1], batch_first=True)
        self.dropout2 = nn.Dropout(dropout)

        self.lstm3 = nn.LSTM(hidden_sizes[1], hidden_sizes[2], batch_first=True)
        self.dropout3 = nn.Dropout(dropout)

        self.fc = nn.Linear(hidden_sizes[2], output_size)

    def forward(self, x):
        x = x.unsqueeze(1)
        x, _ = self.lstm1(x)
        x = self.dropout1(x)

        x, _ = self.lstm2(x)
        x = self.dropout2(x)

        x, _ = self.lstm3(x)
        x = self.dropout3(x)
        x = x.flatten(start_dim=1)
        x = self.fc(x)
        return x

In [3]:
def train_model(model, train_loader, valid_loader, optimizer, criterion, num_epochs, device, scheduler, stopper_args: Optional[dict]=None):
    if stopper_args:
        stopper = EarlyStopper(**stopper_args)
        
    num_batches = len(train_loader)
    num_items = len(train_loader.dataset)

    for epoch in range(num_epochs):
        correct_predictions_train = 0
        total_loss_train = 0
        model.train()
        for X_batch, y_batch in train_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            optimizer.zero_grad()
            y_pred = model(X_batch)
            loss = criterion(y_pred, y_batch)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()

            _, predicted = torch.max(y_pred, 1)
            correct_predictions_train += (predicted == y_batch).sum().item()
            total_loss_train += loss.item()
        
        train_loss = total_loss_train / num_batches
        train_accuracy = correct_predictions_train / num_items
        valid_loss, valid_accuracy, _ = test(model, valid_loader, criterion, device, verbose=0)
        scheduler.step(valid_loss)

        if not (epoch + 1) % 10:
            print(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.4f}, Val Loss: {valid_loss:.4f}, Valid Accuracy: {valid_accuracy:.4f}")
        if stopper and stopper.early_stop(valid_loss):
            print("Early stopping triggered. ")
            break

def test(model, test_loader, criterion, device, verbose):
    model.eval()
    num_batches = len(test_loader)
    num_items = len(test_loader.dataset)
    total_loss = 0.0
    total_correct = 0
    
    all_preds = []

    with torch.no_grad():
        for X_batch, y_batch in test_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            y_pred = model(X_batch)
            loss = criterion(y_pred, y_batch)
            total_loss += loss.item()
            
            _, predicted = torch.max(y_pred, 1)
            total_correct += (predicted == y_batch).sum().item()
            
            all_preds.extend(predicted.cpu().numpy())
            
    test_loss = total_loss / num_batches
    test_accuracy = total_correct / num_items
    if verbose:
        print(f'Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}')
    return test_loss, test_accuracy, all_preds

In [5]:
train_feature_path = "./features/feature_aug_train.npy"
test_feature_path = "./features/feature_aug_test.npy"
train_label_path = "./features/label_train.csv"
test_label_path = "./features/label_test.csv"

batch_size = 256
valid_size = 0.2

train_loader, valid_loader, test_loader, encoder = get_loader(train_feature_path, train_label_path, test_feature_path, test_label_path, batch_size, valid_size)

n_features = train_loader.dataset[0][0].shape[0]
hidden_sizes = [2048, 1024, 512]
n_classes = 4
dropout = 0.1

model = LSTMModel(n_features, hidden_sizes, n_classes, dropout).to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=0.00005, weight_decay=1e-5)
criterion = nn.CrossEntropyLoss()
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=5, verbose=True)

epochs = 500

train_model(model, train_loader, valid_loader, optimizer, criterion, epochs, device, scheduler, stopper_args={'threshold': 20, 'epsilon': 1e-4})

test_loss, test_accuracy, pred = test(model, test_loader, criterion, device, verbose=1)
pred_labels = encoder.inverse_transform(pred)


Epoch 10/500, Train Loss: 0.1612, Train Accuracy: 0.9416, Val Loss: 0.2513, Valid Accuracy: 0.9087
Epoch 20/500, Train Loss: 0.0682, Train Accuracy: 0.9754, Val Loss: 0.1387, Valid Accuracy: 0.9535
Epoch 30/500, Train Loss: 0.0260, Train Accuracy: 0.9910, Val Loss: 0.1273, Valid Accuracy: 0.9627
Epoch 40/500, Train Loss: 0.0134, Train Accuracy: 0.9954, Val Loss: 0.1190, Valid Accuracy: 0.9694
Epoch 50/500, Train Loss: 0.0084, Train Accuracy: 0.9977, Val Loss: 0.1255, Valid Accuracy: 0.9699
Early stopping triggered. 
Test Loss: 1.2736, Test Accuracy: 0.8257




In [6]:
print(pred_labels)
pd.DataFrame(pred_labels, columns=['Stance']).to_csv('./preds_lstm.csv', index=False)

['unrelated' 'disagree' 'unrelated' ... 'agree' 'agree' 'agree']


In [12]:
class AdvancedTransformerModel(nn.Module):
    def __init__(self, input_size, d_model, nhead, num_layers, output_size, dropout=0.3):
        super(AdvancedTransformerModel, self).__init__()
        self.input_linear = nn.Linear(input_size, d_model)
        self.positional_encoding = nn.Parameter(torch.randn(1, d_model))
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=d_model,
            nhead=nhead,
            dim_feedforward=4 * d_model,
            dropout=dropout,
            batch_first=True,
        )
        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        self.norm = nn.LayerNorm(d_model)
        self.output_linear = nn.Linear(d_model, output_size)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        x = x.unsqueeze(1)
        x = self.input_linear(x) + self.positional_encoding
        x = self.norm(x)
        x = self.dropout(x)
        x = self.transformer_encoder(x)
        x = x.flatten(start_dim=1)
        x = self.output_linear(x)
        return x

input_size = train_loader.dataset[0][0].shape[0]
d_model = 128
nhead = 8
num_layers = 3
output_size = 4

model = AdvancedTransformerModel(input_size, d_model, nhead, num_layers, output_size, dropout=0.3).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=0.00005, weight_decay=0.001)
scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr=0.001, steps_per_epoch=len(train_loader), epochs=200)

def train_model_with_early_stopping(model, train_loader, val_loader, num_epochs=200):
    best_val_loss = float('inf')
    correct_predictions_train, correct_predictions_valid = 0, 0
    total_predictions_train, total_predictions_valid = 0, 0
    train_losses, val_losses = [], []
    patience, patience_counter = 20, 0

    for epoch in range(num_epochs):
        model.train()
        train_loss = 0.0
        for X_batch, y_batch in train_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            optimizer.zero_grad()
            y_pred = model(X_batch)
            loss = criterion(y_pred, y_batch)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()
            _, predicted = torch.max(y_pred, 1)
            correct_predictions_train += (predicted == y_batch).sum().item()
            total_predictions_train += y_batch.size(0)
            train_loss += loss.item()

        train_losses.append(train_loss / len(train_loader))
        train_accuracy = correct_predictions_train / total_predictions_train

        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for X_batch, y_batch in val_loader:
                X_batch, y_batch = X_batch.to(device), y_batch.to(device)
                y_pred = model(X_batch)
                loss = criterion(y_pred, y_batch)
                _, predicted = torch.max(y_pred, 1)
                correct_predictions_valid += (predicted == y_batch).sum().item()
                total_predictions_valid += y_batch.size(0)
                val_loss += loss.item()

        val_loss /= len(val_loader)
        valid_accuracy = correct_predictions_valid / total_predictions_valid
        val_losses.append(val_loss)

        print(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {train_losses[-1]:.4f}, Train Accuracy: {train_accuracy:.4f}, Val Loss: {val_loss:.4f}, Valid Accuracy: {valid_accuracy:.4f}")

        if val_loss < best_val_loss:
            best_val_loss = val_loss
            torch.save(model.state_dict(), 'best_advanced_transformer_model.pth')
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print("Early stopping triggered.")
                model.load_state_dict(torch.load('best_advanced_transformer_model.pth'))
                break

        scheduler.step()

    return train_losses, val_losses

train_losses, val_losses = train_model_with_early_stopping(model, train_loader, valid_loader)

Epoch 1/200, Train Loss: 0.8023, Train Accuracy: 0.7295, Val Loss: 0.7460, Valid Accuracy: 0.7332
Epoch 2/200, Train Loss: 0.7427, Train Accuracy: 0.7353, Val Loss: 0.7054, Valid Accuracy: 0.7448
Epoch 3/200, Train Loss: 0.6665, Train Accuracy: 0.7441, Val Loss: 0.5761, Valid Accuracy: 0.7605
Epoch 4/200, Train Loss: 0.5544, Train Accuracy: 0.7573, Val Loss: 0.4350, Valid Accuracy: 0.7809
Epoch 5/200, Train Loss: 0.4484, Train Accuracy: 0.7730, Val Loss: 0.3544, Valid Accuracy: 0.7984
Epoch 6/200, Train Loss: 0.3881, Train Accuracy: 0.7872, Val Loss: 0.3111, Valid Accuracy: 0.8124
Epoch 7/200, Train Loss: 0.3553, Train Accuracy: 0.7988, Val Loss: 0.2799, Valid Accuracy: 0.8241
Epoch 8/200, Train Loss: 0.3282, Train Accuracy: 0.8088, Val Loss: 0.2597, Valid Accuracy: 0.8340
Epoch 9/200, Train Loss: 0.3108, Train Accuracy: 0.8175, Val Loss: 0.2513, Valid Accuracy: 0.8419
Epoch 10/200, Train Loss: 0.2931, Train Accuracy: 0.8250, Val Loss: 0.2498, Valid Accuracy: 0.8485
Epoch 11/200, Train

  model.load_state_dict(torch.load('best_model.pth'))


RuntimeError: Error(s) in loading state_dict for AdvancedTransformerModel:
	Missing key(s) in state_dict: "positional_encoding", "input_linear.weight", "input_linear.bias", "transformer_encoder.layers.0.self_attn.in_proj_weight", "transformer_encoder.layers.0.self_attn.in_proj_bias", "transformer_encoder.layers.0.self_attn.out_proj.weight", "transformer_encoder.layers.0.self_attn.out_proj.bias", "transformer_encoder.layers.0.linear1.weight", "transformer_encoder.layers.0.linear1.bias", "transformer_encoder.layers.0.linear2.weight", "transformer_encoder.layers.0.linear2.bias", "transformer_encoder.layers.0.norm1.weight", "transformer_encoder.layers.0.norm1.bias", "transformer_encoder.layers.0.norm2.weight", "transformer_encoder.layers.0.norm2.bias", "transformer_encoder.layers.1.self_attn.in_proj_weight", "transformer_encoder.layers.1.self_attn.in_proj_bias", "transformer_encoder.layers.1.self_attn.out_proj.weight", "transformer_encoder.layers.1.self_attn.out_proj.bias", "transformer_encoder.layers.1.linear1.weight", "transformer_encoder.layers.1.linear1.bias", "transformer_encoder.layers.1.linear2.weight", "transformer_encoder.layers.1.linear2.bias", "transformer_encoder.layers.1.norm1.weight", "transformer_encoder.layers.1.norm1.bias", "transformer_encoder.layers.1.norm2.weight", "transformer_encoder.layers.1.norm2.bias", "transformer_encoder.layers.2.self_attn.in_proj_weight", "transformer_encoder.layers.2.self_attn.in_proj_bias", "transformer_encoder.layers.2.self_attn.out_proj.weight", "transformer_encoder.layers.2.self_attn.out_proj.bias", "transformer_encoder.layers.2.linear1.weight", "transformer_encoder.layers.2.linear1.bias", "transformer_encoder.layers.2.linear2.weight", "transformer_encoder.layers.2.linear2.bias", "transformer_encoder.layers.2.norm1.weight", "transformer_encoder.layers.2.norm1.bias", "transformer_encoder.layers.2.norm2.weight", "transformer_encoder.layers.2.norm2.bias", "norm.weight", "norm.bias", "output_linear.weight", "output_linear.bias". 
	Unexpected key(s) in state_dict: "lstm1.weight_ih_l0", "lstm1.weight_hh_l0", "lstm1.bias_ih_l0", "lstm1.bias_hh_l0", "lstm2.weight_ih_l0", "lstm2.weight_hh_l0", "lstm2.bias_ih_l0", "lstm2.bias_hh_l0", "lstm3.weight_ih_l0", "lstm3.weight_hh_l0", "lstm3.bias_ih_l0", "lstm3.bias_hh_l0", "fc.weight", "fc.bias". 