In [1]:
import os
import sys
import json
import torch

sys.path.append("../")
from lib.utils import get_device, get_current_date
from lib.utils.constants import Subtask, Track, PreprocessTextLevel, DatasetType
from lib.utils.models import sequential_fully_connected
from lib.data.loading import load_train_dev_test_df
from lib.data.tokenizer import get_tokenizer
from lib.training.optimizer import get_optimizer, get_scheduler
from lib.training.loss import get_loss_fn
from lib.training.metric import get_metric

  warn(


In [2]:
CONFIG_FILE_PATH = os.path.relpath("../config.json")

config = {}
with open(CONFIG_FILE_PATH, "r") as config_file:
    config = json.load(config_file)

DEVICE = get_device()
print(f"Using device: {DEVICE}")

Using device: cuda


In [3]:
# config

In [4]:
task = None
if "task" in config:
    task = Subtask(config["task"])
else:
    raise ValueError("Task not specified in config")

track = None
if "track" in config:
    track = Track(config["track"])
else:
    print(f"Warning: Track not specified in config for subtask: {task}")

dataset_type = DatasetType.TransformerTruncationDataset
if "dataset_type" in config["data"]:
    dataset_type = DatasetType(config["data"]["dataset_type"])

dataset_type_settings = None
if "dataset_type_settings" in config["data"]:
    dataset_type_settings = config["data"]["dataset_type_settings"]

df_train, df_dev, df_test = load_train_dev_test_df(
    task=task,
    track=track,
    data_dir=f"../{config['data']['data_dir']}",
    label_column=config["data"]["label_column"],
    test_size=config["data"]["test_size"],
    preprocess_text_level=PreprocessTextLevel(
        config["data"]["preprocess_text_level"]
    ),
)

print(f"df_train.shape: {df_train.shape}")
print(f"df_dev.shape: {df_dev.shape}")
print(f"df_test.shape: {df_test.shape}")

Loading train data...
Train/dev split... (df_train.shape: (3649, 3))
Loading test data....././data/original_data/SubtaskC/SubtaskC_dev.jsonl
df_train.shape: (2919, 3)
df_dev.shape: (730, 3)
df_test.shape: (505, 3)


In [5]:
results_dir = os.path.relpath(
    f"../runs/{get_current_date()}-{task.value}-{config['model']}"
)
if not os.path.exists(results_dir):
    os.makedirs(results_dir)

print(f"Will save results to: {results_dir}")

with open(results_dir + "/config.json", "w") as f:
    json.dump(config, f, indent=4)

Will save results to: ../runs/29-12-2023_13:52:52-SubtaskC-longformer


# Build the dataset

In [6]:
import numpy as np
from torch.utils.data import Dataset
from transformers import PreTrainedTokenizer


class TokenClassificationDataset(Dataset):
    def __init__(
        self,
        ids: np.ndarray,
        texts: np.ndarray,
        targets: np.ndarray | None,
        tokenizer: PreTrainedTokenizer | None,
        max_len: int,
        debug: bool = False,
    ):
        super().__init__()

        if tokenizer is None:
            raise ValueError("Tokenizer cannot be None")

        self.ids = ids
        self.texts = texts
        self.targets = targets
        self.tokenizer = tokenizer
        self.max_len = max_len
        self.debug = debug

    def __len__(self):
        return len(self.ids)
    
    def __getitem__(self, index):
        item_id = self.ids[index]
        text = self.texts[index]
        target = -1 if self.targets is None else self.targets[index]
        targets_available = False if target == -1 else True

        text = text.replace("\n", " ")
        text = text.replace("\t", " ")
        text = text.replace("\r", " ")

        words = [w for w in text.split(" ") if w != ""]

        if self.debug:
            print(f"Text: {text}")
            print(f"Words: {words}")
            print(f"Machine text start position: {target}")
            print()

        targets = []
        corresponding_word = []
        tokens = []
        input_ids = []
        attention_mask = []

        for idx, word in enumerate(words):
            word_encoded = self.tokenizer.tokenize(word)  # No [CLS] or [SEP]
            sub_words = len(word_encoded)

            if targets_available:
                is_machine_text = 1 if idx >= target else 0
                targets.extend([is_machine_text] * sub_words)

            corresponding_word.extend([idx] * sub_words)
            tokens.extend(word_encoded)
            input_ids.extend(self.tokenizer.convert_tokens_to_ids(word_encoded))
            attention_mask.extend([1] * sub_words)

            if self.debug:
                print(
                    f"word[{idx}]:\n"
                    f"{'':-<5}> tokens: {word_encoded} (no. of subwords: {sub_words})\n"
                    f"{'':-<5}> corresponding_word: {corresponding_word[-sub_words:]}\n"
                    f"{'':-<5}> input_ids: {input_ids[-sub_words:]}\n"
                    f"{'':-<5}> is_machine_text: {is_machine_text}"
                )

        if self.debug:
            print()

            print(f"corresponding_word: {corresponding_word}")
            print(f"tokens: {tokens}")
            print(f"input_ids: {input_ids}")
            print(f"attention_mask: {attention_mask}")

            print()

            print(f"Machine text start word: {words[corresponding_word[targets.index(1)]]}")
            print(f"True machine text start word: {words[target]}")

            print()

        if len(input_ids) < self.max_len - 2:
            if targets_available:
                targets = (
                    [-100]
                    + targets
                    + [-100] * (self.max_len - len(input_ids) - 1)
                )

            corresponding_word = (
                [-100]
                + corresponding_word
                + [-100] * (self.max_len - len(input_ids) - 1)
            )
            tokens = (
                [self.tokenizer.bos_token]
                + tokens
                + [self.tokenizer.eos_token]
                + [self.tokenizer.pad_token] * (self.max_len - len(tokens) - 2)
            )
            input_ids = (
                [self.tokenizer.bos_token_id]
                + input_ids
                + [self.tokenizer.eos_token_id]
                + [self.tokenizer.pad_token_id] * (self.max_len - len(input_ids) - 2)
            )
            attention_mask = (
                [1]
                + attention_mask
                + [1]
                + [0] * (self.max_len - len(attention_mask) - 2)
            )
        else:
            if targets_available:
                targets = [-100] + targets[: self.max_len - 2] + [-100]

            corresponding_word = (
                [-100]
                + corresponding_word[: self.max_len - 2]
                + [-100]
            )
            tokens = (
                [self.tokenizer.bos_token]
                + tokens[: self.max_len - 2]
                + [self.tokenizer.eos_token]
            )
            input_ids = (
                [self.tokenizer.bos_token_id]
                + input_ids[: self.max_len - 2]
                + [self.tokenizer.eos_token_id]
            )
            attention_mask = (
                [1]
                + attention_mask[: self.max_len - 2]
                + [1]
            )

        encoded = {}
        encoded["id"] = item_id
        encoded["text"] = text
        encoded["true_target"] = torch.tensor(target)
        encoded["corresponding_word"] = torch.tensor(corresponding_word)
        encoded["input_ids"] = torch.tensor(input_ids)
        encoded["attention_mask"] = torch.tensor(attention_mask)
        if targets_available:
            encoded["target"] = torch.tensor(targets)

        if self.debug:
            print(f"Tokenized human position: {targets.index(1)}")
            print(f"Original human position: {target}")
            print(f"Full human text: {text}\n\n")
            print(f"Human truncated text: {[w for w in text.split(' ')[:target] if w != '']}\n\n")

            encoded["partial_human_review"] = " ".join(
                [w for w in text.split(' ')[:target] if w != '']
            )

        return encoded

In [7]:
from torch.utils.data import DataLoader

tokenizer = get_tokenizer(**config["tokenizer"])

train_dataset = TokenClassificationDataset(
    ids=df_train["id"].values,
    texts=df_train["text"].values,
    targets=df_train["label"].values,
    tokenizer=tokenizer,
    max_len=config["data"]["max_len"],
    debug=False,
)
dev_dataset = TokenClassificationDataset(
    ids=df_dev["id"].values,
    texts=df_dev["text"].values,
    targets=df_dev["label"].values,
    tokenizer=tokenizer,
    max_len=config["data"]["max_len"],
    debug=False,
)
test_dataset = TokenClassificationDataset(
    ids=df_test["id"].values,
    texts=df_test["text"].values,
    targets=df_test["label"].values,
    tokenizer=tokenizer,
    max_len=config["data"]["max_len"],
    debug=False,
)

train_dataloader = DataLoader(
    train_dataset,
    batch_size=config["data"]["batch_size"],
    shuffle=True,
)
dev_dataloader = DataLoader(
    dev_dataset,
    batch_size=config["data"]["batch_size"],
    shuffle=False,
)
test_dataloader = DataLoader(
    test_dataset,
    batch_size=config["data"]["batch_size"],
    shuffle=False,
)

In [8]:
# for i, batch in enumerate(train_dataloader):
#     print(f"Batch=[{i + 1}/{len(train_dataloader)}]")
#     # break

# for i, batch in enumerate(dev_dataloader):
#     print(f"Batch=[{i + 1}/{len(dev_dataloader)}]")
#     # break

# Create Longformer model for token classification

In [9]:
import torch.nn as nn
from transformers import LongformerModel


class LongformerForTokenClassification(nn.Module):
    def __init__(
        self,
        pretrained_model_name,
        out_size,
        device,
        dropout_p=0.3,
        fc=[],
        finetune_last_transformer_layers=None,
    ):
        super().__init__()

        self.out_size = out_size
        self.device = device
        self.finetune_last_transformer_layers = finetune_last_transformer_layers

        self.longformer = LongformerModel.from_pretrained(
            pretrained_model_name, return_dict=False
        )
        self.dropout = nn.Dropout(p=dropout_p)
        # self.classifier = nn.Linear(self.longformer.config.hidden_size, out_size)
        self.classifier = sequential_fully_connected(
            self.longformer.config.hidden_size, out_size, fc, dropout_p
        )

        self.freeze_transformer_layer()

    def forward(self, input_ids, attention_mask, labels=None):
        sequence_output, _ = self.longformer(
            input_ids=input_ids, attention_mask=attention_mask
        )

        sequence_output = self.dropout(sequence_output)
        logits = self.classifier(sequence_output)

        loss = None
        if labels is not None:
            loss_fn = nn.CrossEntropyLoss().to(self.device)
            loss = loss_fn(logits.view(-1, self.out_size), labels.view(-1))

        return loss, logits

    def freeze_transformer_layer(self):
        for param in self.longformer.parameters():
            param.requires_grad = False

    def unfreeze_transformer_layer(self):
        if self.finetune_last_transformer_layers is not None:
            # Fine-tune only the last selected layer
            for layer in self.longformer.encoder.layer[-self.finetune_last_transformer_layers :]:
                for param in layer.parameters():
                    param.requires_grad = True
        else:
            # No fine-tuning
            pass

    def get_predictions_from_logits(self, logits, labels=None, corresponding_word=None):
        batch_size = logits.shape[0]

        # logits: (batch_size, max_seq_len, out_size)
        # labels: (batch_size, max_seq_len)
        # corresponding_word: (batch_size, max_seq_len)

        # print(f"logits.shape: {logits.shape}")
        # print(f"logits: {logits}")

        # preds: (batch_size, max_seq_len)
        preds = torch.argmax(logits, dim=-1)

        # print(f"preds.shape: {preds.shape}")
        # print(f"preds: {preds}")

        if labels is not None:
            # print(f"labels.shape: {labels.shape}")
            # print(f"labels: {labels}")

            # Keep only predictions where labels are not -100
            # clean_preds = preds[labels != -100].reshape(batch_size, -1)
            # clean_labels = labels[labels != -100].reshape(batch_size, -1)

            # print(f"clean_preds.shape: {clean_preds.shape}")
            # print(f"clean_preds: {clean_preds}")

            # print(f"clean_labels.shape: {clean_labels.shape}")
            # print(f"clean_labels: {clean_labels}")

            # Get the index of the first machine text word
            # predicted_positions = clean_preds.argmax(dim=-1)
            # true_positions = clean_labels.argmax(dim=-1)

            predicted_positions = []
            true_positions = []
            for p, l in zip(preds, labels):
                mask = l != -100

                clean_pred = p[mask]
                clean_label = l[mask]

                # print(f"clean_pred.shape: {clean_pred.shape}")
                # print(f"clean_pred: {clean_pred}")
                # print(f"clean_label.shape: {clean_label.shape}")
                # print(f"clean_label: {clean_label}")

                predicted_position = clean_pred.argmax(dim=-1)
                true_position = clean_label.argmax(dim=-1)

                # print(f"predicted_position: {predicted_position}")
                # print(f"true_position: {true_position}")

                predicted_positions.append(predicted_position.item())
                true_positions.append(true_position.item())

            # print(f"predicted_positions.shape: {predicted_positions.shape}")
            # print(f"predicted_positions: {predicted_positions}")

            # print(f"true_positions.shape: {true_positions.shape}")
            # print(f"true_positions: {true_positions}")

            # print(f"predicted_positions type: {type(predicted_positions)}")
            # print(f"true_positions type: {type(true_positions)}")

            return torch.Tensor(predicted_positions), torch.Tensor(true_positions)
        elif corresponding_word is not None:
            # print(f"corresponding_word.shape: {corresponding_word.shape}")
            # print(f"corresponding_word: {corresponding_word}")

            # Keep only predictions where corresponding_word are not -100
            # clean_preds = preds[corresponding_word != -100].reshape(
            #     batch_size, -1
            # ).detach().cpu().numpy()
            # clean_corresponding_word = corresponding_word[corresponding_word != -100].reshape(
            #     batch_size, -1
            # ).detach().cpu().numpy()

            # print(f"clean_preds.shape: {clean_preds.shape}")
            # print(f"clean_preds: {clean_preds}")

            # print(f"clean_corresponding_word.shape: {clean_corresponding_word.shape}")
            # print(f"clean_corresponding_word: {clean_corresponding_word}")

            predicted_positions = []
            for p, w in zip(preds, corresponding_word):
                mask = w != -100

                clean_pred = p[mask]
                clean_corresponding_word = w[mask]

                # print(f"clean_pred.shape: {clean_pred.shape}")
                # print(f"clean_pred: {clean_pred}")
                # print(f"clean_corresponding_word.shape: {clean_corresponding_word.shape}")
                # print(f"clean_corresponding_word: {clean_corresponding_word}")

                # Get the index of the first machine text word
                index = torch.where(clean_pred == 1)[0]
                value = index[0] if index.size else len(clean_pred) - 1
                position = clean_corresponding_word[value]

                # print(f"index: {index}")
                # print(f"value: {value}")
                # print(f"position: {position}")

                predicted_positions.append(position.item())
            #     # pred = pred.detach().cpu().numpy()

            #     index = np.where(pred == 1)[0]
            #     value = index[0] if index.size else len(pred) - 1
            #     position = clean_corresponding_word[idx][value]

            #     predicted_positions.append(position.item())

            print(f"predicted_positions: {predicted_positions}")

            return predicted_positions, None
        else:
            raise ValueError("Either labels or corresponding_word must be provided")

# Train model

In [10]:
import pandas as pd
from tqdm import tqdm
# from time import time
from collections import defaultdict


def train_epoch(
    model,
    dataloader,
    loss_fn,
    optimizer,
    device,
    scheduler,
    metric_fn,
    print_freq=10,
):
    model.train()

    losses = []

    all_predictions = []
    all_true = []
    all_ids = []

    for i, batch in enumerate(dataloader):
        ids = batch["id"]
        input_ids = batch["input_ids"].to(device)
        attention_mask = batch["attention_mask"].to(device)
        targets = batch["target"].to(device)
        corresponding_word = batch["corresponding_word"].to(device)

        loss, logits = model(
            input_ids=input_ids,
            attention_mask=attention_mask,
            labels=targets,
        )

        predictions, true_predictions = model.get_predictions_from_logits(
            logits=logits,
            labels=targets,
            corresponding_word=corresponding_word
        )

        # print(f"predictions: {predictions}")
        # print(f"true_predictions: {true_predictions}")

        losses.append(loss.item())

        all_predictions.extend(predictions.tolist())
        all_true.extend(true_predictions.tolist())
        all_ids.extend(ids)

        if i % print_freq == 0:
            print(
                f"Batch [{i + 1}/{len(dataloader)}]; "
                f"Loss: {loss.item():.5f}; "
                f"Mean absolute error: {metric_fn(true_predictions, predictions):.5f}"
            )

        loss.backward()

        nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)

        optimizer.step()
        if scheduler is not None:
            scheduler.step()
        optimizer.zero_grad()

    return np.mean(losses), (all_ids, all_true, all_predictions)


def validation_epoch(
    model,
    dataloader,
    loss_fn,
    device,
    metric_fn,
):
    model.eval()

    losses = []
    all_predictions = []
    all_true = []
    all_ids = []

    with torch.no_grad():
        for i, batch in enumerate(tqdm(dataloader)):
            ids = batch["id"]
            input_ids = batch["input_ids"].to(device)
            attention_mask = batch["attention_mask"].to(device)
            targets = batch["target"].to(device)
            corresponding_word = batch["corresponding_word"].to(device)

            loss, logits = model(
                input_ids=input_ids,
                attention_mask=attention_mask,
                labels=targets,
            )

            predictions, true_predictions = model.get_predictions_from_logits(
                logits=logits,
                labels=targets,
                corresponding_word=corresponding_word
            )

            losses.append(loss.item())

            all_predictions.extend(predictions.tolist())
            all_true.extend(true_predictions.tolist())
            all_ids.extend(ids)

    return np.mean(losses), (all_ids, all_true, all_predictions)


def training_loop(
    model,
    num_epochs,
    train_dataloader,
    dev_dataloader,
    loss_fn,
    optimizer_config,
    scheduler_config,
    device,
    metric_fn,
    is_better_metric_fn,
    num_epochs_before_finetune,
    results_dir,
):
    history = defaultdict(list)
    best_metric = None
    best_model_state = None

    optimizer = get_optimizer(model, optimizer_config, finetune=False)
    scheduler = None

    for epoch in range(1, num_epochs + 1):
        print(f"Epoch {epoch}/{num_epochs}")
        if epoch <= num_epochs_before_finetune:
            print("Freeze transformer")
        else:
            print("Finetune transformer")
        print("-" * 10)

        if epoch == num_epochs_before_finetune + 1:
            model.unfreeze_transformer_layer()
            optimizer = get_optimizer(model, optimizer_config, finetune=True)
            scheduler = get_scheduler(
                optimizer,
                num_training_steps=len(train_dataloader) * num_epochs,
                **scheduler_config,
            )

        train_loss, (train_ids, train_true, train_predict) = train_epoch(
            model,
            train_dataloader,
            loss_fn,
            optimizer,
            device,
            scheduler,
            metric_fn,
        )

        train_metric = metric_fn(train_true, train_predict)

        print(f"Train Loss: {train_loss:.5f}; Train Metric: {train_metric:.5f}")

        dev_loss, (dev_ids, dev_true, dev_predict) = validation_epoch(
            model,
            dev_dataloader,
            loss_fn,
            device,
            metric_fn,
        )

        dev_metric = metric_fn(dev_true, dev_predict)

        print(
            f"Validation Loss: {dev_loss:.5f}; "
            f"Validation Metric: {dev_metric:.5f}"
        )

        history["train_metric"].append(train_metric)
        history["train_loss"].append(train_loss)
        history["dev_metric"].append(dev_metric)
        history["dev_loss"].append(dev_loss)

        if best_metric is None or is_better_metric_fn(train_metric, best_metric):
            best_metric = train_metric
            best_model_state = model.state_dict()
            
            if results_dir is not None:
                torch.save(
                    best_model_state,
                    os.path.join(results_dir, "best_model.bin"),
                )

                df_train_predictions = pd.DataFrame(
                    {
                        "id": train_ids,
                        "true": train_true,
                        "predict": train_predict,
                    }
                )
                df_train_predictions.to_csv(
                    os.path.join(results_dir, "best_model_train_predict.csv"),
                    index=False
                )

                df_dev_predictions = pd.DataFrame(
                    {
                        "id": dev_ids,
                        "true": dev_true,
                        "predict": dev_predict,
                    }
                )
                df_dev_predictions.to_csv(
                    os.path.join(results_dir, "best_model_dev_predict.csv"),
                    index=False
                )

    df_history = pd.DataFrame(history)
    if results_dir is not None:
        df_history.to_csv(os.path.join(results_dir, "history.csv"), index=False)

        model.load_state_dict(torch.load(os.path.join(results_dir, "best_model.bin")))
    else:
        model.load_state_dict(best_model_state)

    return model, df_history

In [11]:
import torch

torch.cuda.empty_cache()

In [12]:
num_epochs = config["training"]["num_epochs"]
model = LongformerForTokenClassification(
    device=DEVICE, **config["model_config"]
).to(DEVICE)
loss_fn = get_loss_fn(config["training"]["loss"], DEVICE)
optimizer_config = config["training"]["optimizer"]
scheduler_config = config["training"]["scheduler"]
metric_fn, is_better_metric_fn = get_metric(config["training"]["metric"])
num_epochs_before_finetune = config["training"]["num_epochs_before_finetune"]

best_model, df_history = training_loop(
    model,
    num_epochs,
    train_dataloader,
    dev_dataloader,
    loss_fn,
    optimizer_config,
    scheduler_config,
    DEVICE,
    metric_fn,
    is_better_metric_fn,
    num_epochs_before_finetune,
    results_dir=results_dir,  # None
)

Some weights of the model checkpoint at allenai/longformer-base-4096 were not used when initializing LongformerModel: ['lm_head.decoder.weight', 'lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.weight', 'lm_head.dense.bias']
- This IS expected if you are initializing LongformerModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing LongformerModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch 1/10
Finetune transformer
----------
Batch [1/92]; Loss: 0.76302; Mean absolute error: 82.15625
Batch [11/92]; Loss: 0.46320; Mean absolute error: 115.59375
Batch [21/92]; Loss: 0.34986; Mean absolute error: 82.84375
Batch [31/92]; Loss: 0.37145; Mean absolute error: 100.75000
Batch [41/92]; Loss: 0.26759; Mean absolute error: 37.84375
Batch [51/92]; Loss: 0.21389; Mean absolute error: 30.50000
Batch [61/92]; Loss: 0.19438; Mean absolute error: 47.50000
Batch [71/92]; Loss: 0.17723; Mean absolute error: 40.12500
Batch [81/92]; Loss: 0.22333; Mean absolute error: 35.50000
Batch [91/92]; Loss: 0.22262; Mean absolute error: 62.15625
Train Loss: 0.31289; Train Metric: 62.22097


100%|██████████| 23/23 [00:34<00:00,  1.50s/it]


Validation Loss: 0.21862; Validation Metric: 56.70274
Epoch 2/10
Finetune transformer
----------
Batch [1/92]; Loss: 0.17524; Mean absolute error: 49.00000
Batch [11/92]; Loss: 0.13358; Mean absolute error: 44.43750
Batch [21/92]; Loss: 0.16321; Mean absolute error: 48.03125
Batch [31/92]; Loss: 0.15178; Mean absolute error: 13.09375
Batch [41/92]; Loss: 0.17794; Mean absolute error: 49.84375
Batch [51/92]; Loss: 0.14278; Mean absolute error: 30.06250
Batch [61/92]; Loss: 0.13054; Mean absolute error: 35.62500
Batch [71/92]; Loss: 0.16738; Mean absolute error: 34.53125
Batch [81/92]; Loss: 0.14050; Mean absolute error: 20.96875
Batch [91/92]; Loss: 0.12654; Mean absolute error: 27.43750
Train Loss: 0.16130; Train Metric: 36.77629


100%|██████████| 23/23 [00:33<00:00,  1.48s/it]


Validation Loss: 0.19880; Validation Metric: 54.62055
Epoch 3/10
Finetune transformer
----------
Batch [1/92]; Loss: 0.27001; Mean absolute error: 53.65625
Batch [11/92]; Loss: 0.13337; Mean absolute error: 18.46875
Batch [21/92]; Loss: 0.11059; Mean absolute error: 26.68750
Batch [31/92]; Loss: 0.15157; Mean absolute error: 39.84375
Batch [41/92]; Loss: 0.16813; Mean absolute error: 39.56250
Batch [51/92]; Loss: 0.12590; Mean absolute error: 25.06250
Batch [61/92]; Loss: 0.11788; Mean absolute error: 20.12500
Batch [71/92]; Loss: 0.12131; Mean absolute error: 21.43750
Batch [81/92]; Loss: 0.09903; Mean absolute error: 31.59375
Batch [91/92]; Loss: 0.10665; Mean absolute error: 28.93750
Train Loss: 0.12589; Train Metric: 31.53409


100%|██████████| 23/23 [00:34<00:00,  1.49s/it]


Validation Loss: 0.14984; Validation Metric: 36.58356
Epoch 4/10
Finetune transformer
----------
Batch [1/92]; Loss: 0.08819; Mean absolute error: 29.40625
Batch [11/92]; Loss: 0.13180; Mean absolute error: 38.46875
Batch [21/92]; Loss: 0.11297; Mean absolute error: 38.31250
Batch [31/92]; Loss: 0.09610; Mean absolute error: 35.62500
Batch [41/92]; Loss: 0.10150; Mean absolute error: 48.43750
Batch [51/92]; Loss: 0.07691; Mean absolute error: 13.03125
Batch [61/92]; Loss: 0.11810; Mean absolute error: 28.06250
Batch [71/92]; Loss: 0.09224; Mean absolute error: 26.34375
Batch [81/92]; Loss: 0.09174; Mean absolute error: 51.37500
Batch [91/92]; Loss: 0.14927; Mean absolute error: 35.43750
Train Loss: 0.10615; Train Metric: 27.58890


100%|██████████| 23/23 [00:34<00:00,  1.48s/it]


Validation Loss: 0.17924; Validation Metric: 43.50822
Epoch 5/10
Finetune transformer
----------
Batch [1/92]; Loss: 0.08286; Mean absolute error: 22.12500
Batch [11/92]; Loss: 0.10223; Mean absolute error: 34.68750
Batch [21/92]; Loss: 0.07683; Mean absolute error: 30.78125
Batch [31/92]; Loss: 0.09310; Mean absolute error: 24.75000
Batch [41/92]; Loss: 0.06797; Mean absolute error: 19.62500
Batch [51/92]; Loss: 0.08766; Mean absolute error: 15.31250
Batch [61/92]; Loss: 0.09032; Mean absolute error: 15.53125
Batch [71/92]; Loss: 0.08137; Mean absolute error: 18.71875
Batch [81/92]; Loss: 0.07043; Mean absolute error: 20.56250
Batch [91/92]; Loss: 0.09951; Mean absolute error: 37.50000
Train Loss: 0.08908; Train Metric: 24.55807


100%|██████████| 23/23 [00:33<00:00,  1.48s/it]


Validation Loss: 0.15607; Validation Metric: 42.32603
Epoch 6/10
Finetune transformer
----------
Batch [1/92]; Loss: 0.06328; Mean absolute error: 33.40625
Batch [11/92]; Loss: 0.06895; Mean absolute error: 10.87500
Batch [21/92]; Loss: 0.07522; Mean absolute error: 19.28125
Batch [31/92]; Loss: 0.07822; Mean absolute error: 32.53125
Batch [41/92]; Loss: 0.08720; Mean absolute error: 28.96875
Batch [51/92]; Loss: 0.05649; Mean absolute error: 16.84375
Batch [61/92]; Loss: 0.08846; Mean absolute error: 22.43750
Batch [71/92]; Loss: 0.08206; Mean absolute error: 18.96875
Batch [81/92]; Loss: 0.08086; Mean absolute error: 22.12500
Batch [91/92]; Loss: 0.06019; Mean absolute error: 10.78125
Train Loss: 0.08154; Train Metric: 23.18876


100%|██████████| 23/23 [00:33<00:00,  1.48s/it]


Validation Loss: 0.19502; Validation Metric: 48.83014
Epoch 7/10
Finetune transformer
----------
Batch [1/92]; Loss: 0.07055; Mean absolute error: 17.31250
Batch [11/92]; Loss: 0.08775; Mean absolute error: 19.93750
Batch [21/92]; Loss: 0.06973; Mean absolute error: 19.93750
Batch [31/92]; Loss: 0.06682; Mean absolute error: 13.90625
Batch [41/92]; Loss: 0.18191; Mean absolute error: 23.75000
Batch [51/92]; Loss: 0.06371; Mean absolute error: 17.18750
Batch [61/92]; Loss: 0.07259; Mean absolute error: 22.40625
Batch [71/92]; Loss: 0.06994; Mean absolute error: 24.96875
Batch [81/92]; Loss: 0.06209; Mean absolute error: 21.15625
Batch [91/92]; Loss: 0.09621; Mean absolute error: 38.28125
Train Loss: 0.07174; Train Metric: 21.18911


100%|██████████| 23/23 [00:33<00:00,  1.47s/it]


Validation Loss: 0.16522; Validation Metric: 41.18356
Epoch 8/10
Finetune transformer
----------
Batch [1/92]; Loss: 0.04836; Mean absolute error: 22.03125
Batch [11/92]; Loss: 0.07243; Mean absolute error: 30.62500
Batch [21/92]; Loss: 0.06197; Mean absolute error: 15.62500
Batch [31/92]; Loss: 0.06687; Mean absolute error: 26.43750
Batch [41/92]; Loss: 0.06365; Mean absolute error: 18.75000
Batch [51/92]; Loss: 0.07276; Mean absolute error: 19.09375
Batch [61/92]; Loss: 0.07056; Mean absolute error: 41.68750
Batch [71/92]; Loss: 0.07366; Mean absolute error: 25.37500
Batch [81/92]; Loss: 0.07042; Mean absolute error: 58.43750
Batch [91/92]; Loss: 0.06648; Mean absolute error: 12.28125
Train Loss: 0.06498; Train Metric: 20.25180


100%|██████████| 23/23 [00:33<00:00,  1.47s/it]


Validation Loss: 0.20087; Validation Metric: 46.74521
Epoch 9/10
Finetune transformer
----------
Batch [1/92]; Loss: 0.06884; Mean absolute error: 23.93750
Batch [11/92]; Loss: 0.06013; Mean absolute error: 20.25000
Batch [21/92]; Loss: 0.04884; Mean absolute error: 28.21875
Batch [31/92]; Loss: 0.04481; Mean absolute error: 14.75000
Batch [41/92]; Loss: 0.05929; Mean absolute error: 17.59375
Batch [51/92]; Loss: 0.06312; Mean absolute error: 24.87500
Batch [61/92]; Loss: 0.05826; Mean absolute error: 31.71875
Batch [71/92]; Loss: 0.05898; Mean absolute error: 14.50000
Batch [81/92]; Loss: 0.05769; Mean absolute error: 21.00000
Batch [91/92]; Loss: 0.05935; Mean absolute error: 18.56250
Train Loss: 0.05953; Train Metric: 19.67729


100%|██████████| 23/23 [00:33<00:00,  1.48s/it]


Validation Loss: 0.17147; Validation Metric: 41.07260
Epoch 10/10
Finetune transformer
----------
Batch [1/92]; Loss: 0.05266; Mean absolute error: 18.00000
Batch [11/92]; Loss: 0.05244; Mean absolute error: 8.78125
Batch [21/92]; Loss: 0.05830; Mean absolute error: 32.81250
Batch [31/92]; Loss: 0.04644; Mean absolute error: 18.50000
Batch [41/92]; Loss: 0.05483; Mean absolute error: 19.65625
Batch [51/92]; Loss: 0.04915; Mean absolute error: 19.15625
Batch [61/92]; Loss: 0.05381; Mean absolute error: 15.78125
Batch [71/92]; Loss: 0.05651; Mean absolute error: 15.43750
Batch [81/92]; Loss: 0.05259; Mean absolute error: 13.65625
Batch [91/92]; Loss: 0.04883; Mean absolute error: 11.46875
Train Loss: 0.05475; Train Metric: 18.26584


100%|██████████| 23/23 [00:33<00:00,  1.47s/it]


Validation Loss: 0.16555; Validation Metric: 39.06986


# Make predictions

In [13]:
import pandas as pd


def make_predictions(
    model,
    dataloader,
    device,
    results_dir,
    label_column,
    file_format="csv",
):
    model.eval()

    all_predictions = []
    all_true = []
    all_ids = []

    with torch.no_grad():
        for i, batch in enumerate(tqdm(dataloader)):
            ids = batch["id"]
            input_ids = batch["input_ids"].to(device)
            attention_mask = batch["attention_mask"].to(device)
            targets = batch["target"].to(device)
            corresponding_word = batch["corresponding_word"].to(device)

            _, logits = model(
                input_ids=input_ids,
                attention_mask=attention_mask,
                labels=targets,
            )

            predictions, true_predictions = model.get_predictions_from_logits(
                logits=logits,
                labels=targets,
                corresponding_word=corresponding_word
            )

            all_predictions.extend(predictions.tolist())
            all_true.extend(true_predictions.tolist())
            all_ids.extend(ids)

    df_predictions = pd.DataFrame(
        {
            "id": all_ids,
            "true": all_true,
            label_column: all_predictions,
        }
    )

    if results_dir is not None:
        if file_format == "csv":
            df_predictions.to_csv(
                os.path.join(results_dir, "submission.csv"),
                index=False,
            )
        elif file_format == "jsonl":
            df_predictions.to_json(
                os.path.join(results_dir, "submission.jsonl"),
                orient="records",
                lines=True,
            )
        else:
            raise ValueError(f"Unknown file format: {file_format}")
    else:
        print("Missing results_dir, not saving predictions to file!")

    return df_predictions

In [14]:
predictions = make_predictions(
    best_model,
    test_dataloader,
    DEVICE,
    results_dir,
    config["data"]["label_column"],
    file_format="csv",
)

100%|██████████| 16/16 [00:23<00:00,  1.45s/it]


In [16]:
!python ../scores_and_plots.py --results-dir "../runs/29-12-2023_13:52:52-SubtaskC-longformer"

Results on validation
MAE: 39.06986
--------------------
Results on test
MAE: 34.36832
--------------------
