Google Colab

In [1]:
# connecting to the google drive and giving dataset path for the CSV file

from google.colab import drive
drive.mount('/content/drive', force_remount=True)

dataset_path = '/content/drive/MyDrive/AES/dataset.csv'

Mounted at /content/drive


General Utils

In [2]:
import pandas as pd
from sklearn.metrics import cohen_kappa_score
import numpy as np

# score ranges for each prompt
SCORE_RANGES = {
    1: {
        "sentence_fluency": (1, 6),
        "word_choice": (1, 6),
        "conventions": (1, 6),
        "organization": (1, 6),
        "content": (1, 6),
        "holistic": (2, 12),
    },
    2: {
        "sentence_fluency": (1, 6),
        "word_choice": (1, 6),
        "conventions": (1, 6),
        "organization": (1, 6),
        "content": (1, 6),
        "holistic": (1, 6),
    },
    3: {
        "narrativity": (0, 3),
        "language": (0, 3),
        "prompt_adherence": (0, 3),
        "content": (0, 3),
        "holistic": (0, 3),
    },
    4: {
        "narrativity": (0, 3),
        "language": (0, 3),
        "prompt_adherence": (0, 3),
        "content": (0, 3),
        "holistic": (0, 3),
    },
    5: {
        "narrativity": (0, 4),
        "language": (0, 4),
        "prompt_adherence": (0, 4),
        "content": (0, 4),
        "holistic": (0, 4),
    },
    6: {
        "narrativity": (0, 4),
        "language": (0, 4),
        "prompt_adherence": (0, 4),
        "content": (0, 4),
        "holistic": (0, 4),
    },
    7: {
        "conventions": (0, 6),
        "organization": (0, 6),
        "content": (0, 6),
        "holistic": (0, 30),
    },
    8: {
        "sentence_fluency": (2, 12),
        "word_choice": (2, 12),
        "conventions": (2, 12),
        "organization": (2, 12),
        "content": (2, 12),
        "holistic": (0, 60),
    },
}


def read_data(path):
    """
    Reads the CSV file and returns a dictionary that has parallel lists of values.

    Parameters:
    - path (str): Path to the CSV file containing the essay data.

    Returns: data_dict (dict): A dictionary that has parallel lists, with the following keys:
        - 'essay_ids': Unique identifiers for each essay
        - 'prompt_ids': Identifiers for the prompt id
        - 'essay_text': Text contents of the essays
        - 'features': The 86 extracted features extracted from the essays
        - 'holistic': Holistic scores
        - 'content': Content scores
        - 'organization': Organization scores
        - 'word_choice': Word choice scores
        - 'sentence_fluency': Sentence fluency scores
        - 'conventions': Conventions scores
        - 'prompt_adherence': Prompt adherence scores
        - 'language': Language scores
        - 'narrativity': Narrativity scores
    """

    data = pd.read_csv(path)

    data_dict = {
        "essay_ids": data["essay_id"].values,
        "prompt_ids": data["prompt_id"].values,
        "essay_text": data["essay_text"].values,
        "features": data.iloc[:, 12:].values,
        "holistic": data["holistic"].values,
        "content": data["content"].values,
        "organization": data["organization"].values,
        "word_choice": data["word_choice"].values,
        "sentence_fluency": data["sentence_fluency"].values,
        "conventions": data["conventions"].values,
        "prompt_adherence": data["prompt_adherence"].values,
        "language": data["language"].values,
        "narrativity": data["narrativity"].values,
    }

    return data_dict


def quadratic_weighted_kappa(y_true, y_pred):
    """
    Calculates the Quadratic Weighted Kappa (QWK) score between true labels and predictions using sklearn.

    Parameters:
    - y_true (array-like): The true labels
    - y_pred (array-like): The predicted labels

    Returns:
    - float: The QWK score between y_true and y_pred.
    """
    # Round the continuous targets to the nearest integer
    y_true_rounded = np.round(y_true).astype(int)
    y_pred_rounded = np.round(y_pred).astype(int)

    # Calculate the quadratic weighted kappa score
    return cohen_kappa_score(y_true_rounded, y_pred_rounded, weights="quadratic")

# Approach A

In [None]:
import itertools
import numpy as np
import torch
import torch.optim as optim
import torch.nn as nn
from sklearn.model_selection import KFold
from torch.utils.data import DataLoader, TensorDataset

# Hyperparameters
PROMPTS = 8
SCORE_RANGES = {
    1: {"holistic": (2, 12)},
    2: {"holistic": (1, 6)},
    3: {"holistic": (0, 3)},
    4: {"holistic": (0, 3)},
    5: {"holistic": (0, 4)},
    6: {"holistic": (0, 4)},
    7: {"holistic": (0, 30)},
    8: {"holistic": (0, 60)},
}

def scale_score(value, prompt, score_type="holistic"):
    min_val, max_val = SCORE_RANGES[prompt][score_type]
    return (value - min_val) / (max_val - min_val)

def rescale_score(scaled_value, prompt, score_type="holistic"):
    min_val, max_val = SCORE_RANGES[prompt][score_type]
    return scaled_value * (max_val - min_val) + min_val

def build_model(input_size, hidden_layers, hidden_units):
    layers = []
    layers.append(nn.Linear(input_size, hidden_units))
    layers.append(nn.ReLU())

    for _ in range(hidden_layers - 1):
        layers.append(nn.Linear(hidden_units, hidden_units))
        layers.append(nn.ReLU())

    layers.append(nn.Linear(hidden_units, 1))
    return nn.Sequential(*layers)

def init_weights(model):
    for m in model.modules():
        if isinstance(m, nn.Linear):
            nn.init.kaiming_normal_(m.weight)

def load_data(prompt):
    data_dict = read_data(dataset_path)
    prompt_mask = (data_dict["prompt_ids"] == prompt)
    features = data_dict["features"][prompt_mask]
    targets = data_dict["holistic"][prompt_mask]
    scaled_targets = np.array([scale_score(target, prompt) for target in targets])
    return features, scaled_targets.reshape(-1, 1)

def train_and_evaluate(model, train_loader, val_loader, criterion, optimizer):
    best_val_qwk = 0.0
    patience, epochs_no_improve, max_epochs = 5, 0, 15

    for epoch in range(max_epochs):
        model.train()
        for features, targets in train_loader:
            optimizer.zero_grad()
            outputs = model(features)
            loss = criterion(outputs.view(-1), targets.view(-1))
            loss.backward()
            optimizer.step()

        model.eval()
        val_qwk = evaluate_qwk(model, val_loader)

        if val_qwk > best_val_qwk:
            best_val_qwk = val_qwk
            epochs_no_improve = 0
        else:
            epochs_no_improve += 1
            if epochs_no_improve >= patience:
                print(f"Early stopping triggered after {epoch} epochs with no improvement.")
                break

    return best_val_qwk

def evaluate_qwk(model, val_loader):
    predictions, actuals = [], []
    for features, targets in val_loader:
        with torch.no_grad():
            outputs = model(features)  # Model output
            # Ensure outputs is always a tensor, convert it to a Python list
            predictions.extend(outputs.view(-1).tolist())
            actuals.extend(targets.view(-1).tolist())  # Flatten targets as well

    predictions = [rescale_score(pred, target_prompt) for pred in predictions]
    actuals = [rescale_score(actual, target_prompt) for actual in actuals]

    return quadratic_weighted_kappa(actuals, predictions)


param_grid = {
    'hidden_layers': [1, 2, 4, 8],
    'hidden_units': [8, 16, 32],
    'learning_rate': [0.001, 0.01, 0.1]
}

for target_prompt in range(2, PROMPTS + 1):
    print(f"\nProcessing Prompt {target_prompt}...\n")

    test_features, test_targets = load_data(target_prompt)
    test_dataset = TensorDataset(torch.tensor(test_features, dtype=torch.float32),
                                 torch.tensor(test_targets, dtype=torch.float32))
    test_loader = DataLoader(test_dataset, batch_size=8, shuffle=False)

    train_features, train_targets = [], []
    for prompt in range(1, PROMPTS + 1):
        if prompt != target_prompt:
            features, targets = load_data(prompt)
            train_features.append(features)
            train_targets.append(targets)
    train_features = np.concatenate(train_features)
    train_targets = np.concatenate(train_targets)

    best_qwk = 0.0
    best_hyperparameters = None

    for hidden_layers, hidden_units, learning_rate in itertools.product(
            param_grid['hidden_layers'], param_grid['hidden_units'], param_grid['learning_rate']):

        print(f"Trying hidden_layers={hidden_layers}, hidden_units={hidden_units}, learning_rate={learning_rate}...")

        model = build_model(input_size=86, hidden_layers=hidden_layers, hidden_units=hidden_units)
        init_weights(model)
        criterion = nn.MSELoss()
        optimizer = optim.AdamW(model.parameters(), lr=learning_rate)

        kf = KFold(n_splits=7)
        fold_qwks = []

        for train_index, val_index in kf.split(train_features):
            X_train, X_val = train_features[train_index], train_features[val_index]
            y_train, y_val = train_targets[train_index], train_targets[val_index]

            train_loader = DataLoader(TensorDataset(torch.tensor(X_train, dtype=torch.float32),
                                                    torch.tensor(y_train, dtype=torch.float32)),
                                      batch_size=8, shuffle=True)
            val_loader = DataLoader(TensorDataset(torch.tensor(X_val, dtype=torch.float32),
                                                  torch.tensor(y_val, dtype=torch.float32)),
                                    batch_size=8, shuffle=False)

            fold_qwk = train_and_evaluate(model, train_loader, val_loader, criterion, optimizer)
            fold_qwks.append(fold_qwk)

        avg_qwk = np.mean(fold_qwks)
        if avg_qwk > best_qwk:
            best_qwk = avg_qwk
            best_hyperparameters = (hidden_layers, hidden_units, learning_rate)

    print(f"Best Hyperparameters for Prompt {target_prompt}: {best_hyperparameters}")

    batch_size_grid = [4, 8, 16, 32]
    best_batch_size_qwk = 0.0
    best_batch_size = None

    for batch_size in batch_size_grid:
        model = build_model(input_size=86, hidden_layers=best_hyperparameters[0], hidden_units=best_hyperparameters[1])
        init_weights(model)
        optimizer = optim.AdamW(model.parameters(), lr=best_hyperparameters[2])

        kf = KFold(n_splits=7)
        fold_qwks = []

        for train_index, val_index in kf.split(train_features):
            X_train, X_val = train_features[train_index], train_features[val_index]
            y_train, y_val = train_targets[train_index], train_targets[val_index]

            train_loader = DataLoader(TensorDataset(torch.tensor(X_train, dtype=torch.float32),
                                                    torch.tensor(y_train, dtype=torch.float32)),
                                      batch_size=batch_size, shuffle=True)
            val_loader = DataLoader(TensorDataset(torch.tensor(X_val, dtype=torch.float32),
                                                  torch.tensor(y_val, dtype=torch.float32)),
                                    batch_size=batch_size, shuffle=False)

            fold_qwk = train_and_evaluate(model, train_loader, val_loader, criterion, optimizer)
            fold_qwks.append(fold_qwk)

        avg_qwk = np.mean(fold_qwks)
        if avg_qwk > best_batch_size_qwk:
            best_batch_size_qwk = avg_qwk
            best_batch_size = batch_size

    final_model = build_model(input_size=86, hidden_layers=best_hyperparameters[0], hidden_units=best_hyperparameters[1])
    init_weights(final_model)
    optimizer = optim.AdamW(final_model.parameters(), lr=best_hyperparameters[2])

    train_loader = DataLoader(TensorDataset(torch.tensor(train_features, dtype=torch.float32),
                                            torch.tensor(train_targets, dtype=torch.float32)),
                              batch_size=best_batch_size, shuffle=True)

    print(f"Evaluating final model on test set for Prompt {target_prompt}...")
    train_and_evaluate(final_model, train_loader, test_loader, criterion, optimizer)

    # Save the final model
    print(f"Saving the best model for Prompt {target_prompt}...")
    scripted_model = torch.jit.script(final_model)
    scripted_model.save(f'/content/drive/MyDrive/ApproachA-1/model-A-{target_prompt}.pt')



Processing Prompt 2...

Trying hidden_layers=1, hidden_units=8, learning_rate=0.001...
Early stopping triggered after 5 epochs with no improvement.
Early stopping triggered after 5 epochs with no improvement.
Early stopping triggered after 5 epochs with no improvement.
Early stopping triggered after 6 epochs with no improvement.
Early stopping triggered after 6 epochs with no improvement.
Early stopping triggered after 5 epochs with no improvement.
Early stopping triggered after 8 epochs with no improvement.
Trying hidden_layers=1, hidden_units=8, learning_rate=0.01...
Early stopping triggered after 11 epochs with no improvement.
Early stopping triggered after 14 epochs with no improvement.
Early stopping triggered after 5 epochs with no improvement.
Early stopping triggered after 10 epochs with no improvement.
Early stopping triggered after 5 epochs with no improvement.
Early stopping triggered after 10 epochs with no improvement.
Early stopping triggered after 7 epochs with no impro