### Main Code Body

You may choose to experiment with different methods using your program. However, you need to embed the training and inference processes at here. We will use your prediction on the unlabeled test data to grade, while checking this part to understand how your method has produced the predictions.

In [28]:
# from https://github.com/Bjarten/early-stopping-pytorch/blob/master/pytorchtools.py

import numpy as np
import torch

class EarlyStopping:
    """Early stops the training if validation loss doesn't improve after a given patience."""
    def __init__(self, patience=7, verbose=False, delta=0, trace_func=print):
        """
        Args:
            patience (int): How long to wait after last time validation loss improved.
                            Default: 7
            verbose (bool): If True, prints a message for each validation loss improvement. 
                            Default: False
            delta (float): Minimum change in the monitored quantity to qualify as an improvement.
                            Default: 0
            path (str): Path for the checkpoint to be saved to.
                            Default: 'checkpoint.pt'
            trace_func (function): trace print function.
                            Default: print            
        """
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.val_loss_min = np.Inf
        self.delta = delta
        self.trace_func = trace_func
    def __call__(self, val_loss, model, checkpointPath):

        score = -val_loss

        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(val_loss, model, checkpointPath)
        elif score < self.best_score + self.delta:
            self.counter += 1
            self.trace_func(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(val_loss, model, checkpointPath)
            self.counter = 0

    def save_checkpoint(self, val_loss, model, savePath):
        '''Saves model when validation loss decrease.'''
        if self.verbose:
            self.trace_func(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}).  Saving model ...')
        torch.save(model.state_dict(), savePath)
        self.val_loss_min = val_loss

In [29]:
from collections import namedtuple
from itertools import product
from pathlib import Path
import time
import torch
from torch.utils.tensorboard import SummaryWriter
import pandas as pd


class RunBuilder:
    def __init__(self, params) -> None:
        self.runs = self._get_runs(params)

    def __len__(self):
        return len(self.runs)

    def _get_runs(self, params):
        Run = namedtuple("Run", params.keys())

        runs = []
        for v in product(*params.values()):
            # print(f"value {v} and {Run(*v)}")
            runs.append(Run(*v))
        return runs


class RunManager:
    def __init__(self, statsFolderPath, statsFileName, earlyStop=True):
        self.epoch_count = 0
        self.epoch_train_loss = 0
        self.epoch_valid_loss = 0
        self.epoch_numTrain_correct = 0
        self.epoch_numValid_correct = 0
        self.epoch_start_time = None

        self.run_params = None
        self.run_count = 0
        self.run_data = []
        self.run_start_time = None

        self.model = None
        self.train_loader = None
        self.valid_loader = None
        self.tb = None

        self.useEarlyStop = earlyStop
        self.earlyStop = None
        self.stop = False

        self.statsFolderPath = Path(statsFolderPath)
        Path.mkdir(self.statsFolderPath, exist_ok=True, parents=True)
        self.statsFileCSV = Path(self.statsFolderPath, f"{statsFileName}.csv")
        self.errorPath = Path(statsFolderPath, "error.txt")
        open(self.errorPath, "w")  # restart the error file

    def begin_run(self, run, model, trainLoader, validLoader):
        self.run_start_time = time.time()

        self.run_params = run
        self.run_count += 1

        self.model = model
        self.train_loader = trainLoader
        self.valid_loader = validLoader
        comment = f"-{ {k: v if k != 'model' else v.__class__.__name__ for k,v in run._asdict().items()} }"
        self.tb = SummaryWriter(comment=self.sanitize_param_name(comment))

        if self.run_count == 1:
            self.tb.add_graph(self.model, next(iter(self.train_loader))[0].to(self.model.device), use_strict_trace=False)
        # images, labels = next(iter(self.train_loader))
        # grid = torchvision.utils.make_grid(images)

        # self.tb.add_image("images", grid)  # Add images and graph when begin one run
        self.earlyStop = EarlyStopping()
        self.stop = False

    def end_run(self, savePath="", save=False):
        self.tb.close()
        self.epoch_count = 0
        self.writeToCSV()
        if save:
            self.saveModel(savePath, result=self.run_data[-1])

    def begin_epoch(self):
        self.epoch_start_time = time.time()

        self.epoch_count += 1
        self.epoch_train_loss = 0
        self.epoch_numTrain_correct = 0
        self.epoch_valid_loss = 0
        self.epoch_numValid_correct = 0

    def end_epoch(self, checkptFolderPath):
        epoch_duration = time.time() - self.epoch_start_time
        run_duration = time.time() - self.run_start_time

        train_loss = self.epoch_train_loss / len(self.train_loader.dataset)
        train_accuracy = self.epoch_numTrain_correct / len(self.train_loader.dataset)

        valid_loss = self.epoch_valid_loss / len(self.valid_loader.dataset)
        valid_accuracy = self.epoch_numValid_correct / len(self.valid_loader.dataset)

        self.tb.add_scalars(
            "Loss", {"trainLoss": train_loss, "validLoss": valid_loss}, self.epoch_count
        )
        self.tb.add_scalars(
            "Accuracy",
            {"trainAcc": train_accuracy, "validAcc": valid_accuracy},
            self.epoch_count,
        )  # Add scalar is use when at the end of epoch

        for name, param in self.model.named_parameters():
            self.tb.add_histogram(name, param, self.epoch_count)
            if param.grad != None:
                self.tb.add_histogram(f"{name}.grad", param.grad, self.epoch_count)

        results = {}
        results["run"] = self.run_count
        results['model name'] = self.run_params.model.__class__.__name__
        results["epoch"] = self.epoch_count
        results["train loss"] = train_loss
        results["valid loss"] = valid_loss
        results["train accuracy"] = train_accuracy
        results["valid accuracy"] = valid_accuracy
        results["epoch duration"] = epoch_duration
        results["run duration"] = run_duration
        for k, v in self.run_params._asdict().items():
            if (k != 'model'):
                results[k] = v  # Add the hyperparameter to words to easy report
        self.run_data.append(results)
        if self.useEarlyStop:
            self.checkEarlyStop(
                valid_loss, self.model, checkptFolderPath, self.run_count
            )
            if self.earlyStop.early_stop:
                self.stop = True

        # print(f'Current run data: {self.run_data}')

    def track_train_loss(self, loss):
        self.epoch_train_loss += loss.item() * self.train_loader.batch_size

    def track_valid_loss(self, loss):
        self.epoch_valid_loss += loss.item() * self.valid_loader.batch_size

    @torch.no_grad()
    def _get_num_correct(self, preds, labels):
        return preds.argmax(dim=1).eq(labels).sum().item()

    def track_numTrain_correct(self, preds, labels):
        self.epoch_numTrain_correct += self._get_num_correct(preds, labels)

    def track_numValid_correct(self, preds, labels):
        self.epoch_numValid_correct += self._get_num_correct(preds, labels)

    def checkEarlyStop(self, valLoss, model, folderPath, run):
        fPath = Path(folderPath)
        Path.mkdir(fPath, exist_ok=True, parents=True)
        filePath = Path(fPath / f"earlyStop_run_{run}.pt")

        self.earlyStop(valLoss, model, filePath)

    def saveModel(self, pathName, result):
        moduleName = ""
        for k, v in result.items():
            if (k == 'epoch' or k == 'run' or k == 'epoch' or k == 'model name' or k == 'train loss' or k == 'valid loss' or k == 'train accuracy' or k == 'valid accuracy'):
                moduleName += f"_{k}:{v}"
        folderPath = Path(pathName)
        Path.mkdir(folderPath, exist_ok=True, parents=True)
        filePath = Path(folderPath / f"{self.sanitize_param_name(moduleName)}.pt")
        torch.save(self.model.state_dict(), filePath)

    def writeError(self, msg=""):
        with open(self.errorPath, "a") as f:
            f.write(
                f"Error at runs: {self.run_count}\nParameters: {self.run_params}\nAdditional msg: {msg}"
            )

    def writeToCSV(self):
        oldStatsDF = None
        # print(f"The stats file is {self.statsFileCSV}")
        try:
            with open(self.statsFileCSV, "r") as f:
                oldStatsDF = pd.read_csv(f)
                oldStatsDF = pd.concat(
                    [
                        oldStatsDF,
                        pd.DataFrame.from_records(self.run_data[-1], index=[0]),
                    ]
                )
        except FileNotFoundError:
            oldStatsDF = pd.DataFrame.from_dict(self.run_data)

        try:
            # Allow open file in create mode and write the new data
            with open(self.statsFileCSV, "w", newline="") as f:
                oldStatsDF.to_csv(f, index=False)
        except Exception as e:
            print(f"Error in writeToCSV: {e}")
            self.writeError(f"Error in writeToCSV: {e}")

    def sanitize_param_name(self, param_name):
        return (
            param_name.replace("(", "")
            .replace(")", "")
            .replace(",", "_")
            .replace(" ", "_")
            .replace("<", "")
            .replace(">", "")
            .replace("'", "")
            .replace(":", "")
        )


In [30]:
from typing import List, Tuple
from tqdm import tqdm
from transformers import AutoTokenizer
import torch
import pandas as pd
import os

# Maybe incorporate loading into embedding

DATAFOLDER = "../data"
TOKEN_FOLDER = "./tokenized"


class PreconditionStatementDataset(torch.utils.data.Dataset):
    def __init__(self, type="train") -> None:
        self.data = pd.read_csv(DATAFOLDER + f"/pnli_{type}.csv", header=None)
        self.dataset_type: str = type
        self.tokenized_data: List[Tuple[torch.Tensor, int]] = []

        tokenized_file = f"{TOKEN_FOLDER}/pnli_{self.dataset_type}_tokenized.pt"

        if os.path.exists(tokenized_file):
            self.tokenized_data = torch.load(tokenized_file)
        else:
            self.tokenized_data = self.tokenize_and_save(tokenized_file, type)

    def __len__(self):
        return len(self.tokenized_data)

    def __getitem__(self, idx) -> Tuple[torch.Tensor, torch.Tensor]:
        if self.dataset_type == "test_unlabeled":
            tokens = self.tokenized_data[idx]
            return torch.tensor(tokens)
        else:
            tokens, label = self.tokenized_data[idx]
            return torch.tensor(tokens), torch.tensor(label)

    def tokenize_and_save(self, tokenize_file, type) -> List[Tuple[torch.Tensor, int]]:
        tokenized_data = []
        tokenizer = AutoTokenizer.from_pretrained("FacebookAI/roberta-large-mnli")
        for _, row in tqdm(
            self.data.iterrows(), "Tokenizing data", total=self.data.shape[0]
        ):
            if type == "test_unlabeled":
                precondition, statement = row
                tokenized_data.append(
                    (tokenizer.encode(str(precondition), str(statement)))
                )
            else:
                precondition, statement, label = row
                tokenized_data.append(
                    (tokenizer.encode(str(precondition), str(statement)), int(label))
                )
        torch.save(tokenized_data, tokenize_file)
        return tokenized_data

    def custom_collate_fn(self, batch):
        if self.dataset_type == "test_unlabeled":
            tokens_batch = [tokens for tokens in batch]
            max_length = max([len(tokens) for tokens in tokens_batch])
            tokens_batch = [
                torch.nn.functional.pad(tokens, (0, max_length - len(tokens)))
                for tokens in tokens_batch
            ]
            return torch.stack(tokens_batch)
        else:
            tokens_batch, labels_batch = zip(*batch)
            max_length = max([len(tokens) for tokens in tokens_batch])
            tokens_batch = [
                torch.nn.functional.pad(tokens, (0, max_length - len(tokens)))
                for tokens in tokens_batch
            ]
            return torch.stack(tokens_batch), torch.tensor(labels_batch)

In [31]:
from collections import OrderedDict
import time
import torch
from torch.utils.data import Dataset, DataLoader
from tqdm import trange
from transformers import RobertaForSequenceClassification, get_linear_schedule_with_warmup
from sklearn.metrics import accuracy_score

In [32]:
SAVE_MODEL_PATH = "./savedModels"
STATISTIC_PATH = "./savedStatistics"
TIMESTAMP = time.strftime("%Y%m%d-%H%M%S")

params = OrderedDict(
    epoch=[100],
    lr=[0.000001],
    model=[
        RobertaForSequenceClassification.from_pretrained(
            "FacebookAI/roberta-large-mnli", num_labels=2, ignore_mismatched_sizes=True
        )
    ],
    optim=[torch.optim.Adam],
    criterion=[torch.nn.CrossEntropyLoss],
    batch_size=[32],
    num_worker=[0],
    num_layers_to_unfreeze=[6],
)


class Train:
    def freeze_roberta_layers_modified_layers(self, model, num_layers_to_unfreeze=0):
        # Freeze the embedding layer
        for layer in model.roberta.embeddings.parameters():
            layer.requires_grad = False
        model.roberta.embeddings.eval()  # Set to eval mode to avoid BatchNorm and Dropout layers to update their running stats
        # Freeze the encoder layers
        total_layers = len(model.roberta.encoder.layer)
        for i, layers in enumerate(model.roberta.encoder.layer):
            if i < total_layers - num_layers_to_unfreeze:
                for params in layers.parameters():
                    params.requires_grad = False
                layers.eval()  # Set to eval mode to avoid BatchNorm and Dropout layers to update their running stats
            else:
                layers.requires_grad = True

        # unfreeze classifier layer
        for params in model.classifier.parameters():
            params.requires_grad = True

    def __init__(self, params, trainData: Dataset, devData: Dataset) -> None:
        self.manager = RunManager(STATISTIC_PATH, TIMESTAMP)
        self.runBuilder = RunBuilder(params)
        self.trainData = trainData
        self.devData = devData
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.scaler = torch.cuda.amp.GradScaler()
        self.model = RobertaForSequenceClassification.from_pretrained(
            "FacebookAI/roberta-large-mnli", num_labels=2, ignore_mismatched_sizes=True
        )

    def run(self):
        for k, run in enumerate(self.runBuilder.runs):
            model = self.model.to(self.device)
            train_loader = DataLoader(
                self.trainData,
                batch_size=run.batch_size,
                num_workers=run.num_worker,
                collate_fn=self.trainData.custom_collate_fn,
                shuffle=True,
            )
            dev_loader = DataLoader(
                self.devData,
                batch_size=run.batch_size,
                num_workers=run.num_worker,
                collate_fn=self.devData.custom_collate_fn,
                shuffle=False,
            )
            criterion = run.criterion()
            optimizer = run.optim(
                model.parameters(),
                lr=run.lr,
                betas=(0.9, 0.98),
                eps=1e-6,
                weight_decay=0.01,
            )

            total_steps = len(train_loader) * run.epoch
            warmup_steps = int(0.1 * total_steps)
            scheduler = get_linear_schedule_with_warmup(
                optimizer, num_warmup_steps=warmup_steps, num_training_steps=total_steps
            )

            self.manager.begin_run(run, run.model, train_loader, dev_loader)
            for _ in trange(run.epoch, desc="Epoch progress"):
                self.manager.begin_epoch()
                model.train()  # Need to set before modify the layers inside the model
                self.freeze_roberta_layers_modified_layers(
                    model, run.num_layers_to_unfreeze
                )  # freeze some layers
                for inputs, labels in train_loader:
                    inputs, labels = inputs.to(self.device), labels.to(self.device)
                    optimizer.zero_grad()
                    with torch.cuda.amp.autocast():
                        outputs = model(inputs).logits
                        loss = criterion(outputs, labels)
                    self.scaler.scale(loss).backward()
                    self.scaler.step(optimizer)
                    self.scaler.update()
                    scheduler.step()
                    self.manager.track_train_loss(loss)
                    self.manager.track_numTrain_correct(outputs, labels)
                model.eval()
                with torch.no_grad():
                    for dev_inputs, dev_labels in dev_loader:
                        # for _, (dev_inputs, dev_labels) in enumerate(tqdm(dev_loader, desc="Evaluation progress")):
                        dev_inputs, dev_labels = (
                            dev_inputs.to(self.device),
                            dev_labels.to(self.device),
                        )
                        with torch.cuda.amp.autocast():
                            dev_outputs = model(dev_inputs).logits
                            dev_loss = criterion(dev_outputs, dev_labels)
                        self.manager.track_valid_loss(dev_loss)
                        self.manager.track_numValid_correct(dev_outputs, dev_labels)
                self.manager.end_epoch(SAVE_MODEL_PATH)
                if self.manager.stop:
                    break
            self.manager.end_run(SAVE_MODEL_PATH, save=True)

    def predict(self):
        self.model.to(self.device)
        self.model.eval()
        testDataset = PreconditionStatementDataset("test_unlabeled")
        test_loader = DataLoader(
            testDataset,
            batch_size=1,
            num_workers=0,
            collate_fn=testDataset.custom_collate_fn,
            shuffle=False,
        )
        predictions = []
        with torch.no_grad():
            for inputs in test_loader:
                inputs = inputs.to(self.device)
                outputs = self.model(inputs).logits
                predictions.append(torch.argmax(outputs, dim=1).item())
        return predictions

Some weights of the model checkpoint at FacebookAI/roberta-large-mnli were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/roberta-large-mnli and are newly initialized because the shapes did not match:
- classifier.out_proj.bias: found shape torch.Size([3]) in the checkpoint and torch.Size([2]) in the model instanti

In [33]:
# Eventually, results need to be a list of 2028 0 or 1's
trainDataset = PreconditionStatementDataset('train')
devDataset = PreconditionStatementDataset('dev')
trainer = Train(params, trainDataset, devDataset)
trainer.run()
results = trainer.predict()
print(results)

Some weights of the model checkpoint at FacebookAI/roberta-large-mnli were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/roberta-large-mnli and are newly initialized because the shapes did not match:
- classifier.out_proj.bias: found shape torch.Size([3]) in the checkpoint and torch.Size([2]) in the model instanti

KeyboardInterrupt: 

### Output Prediction Result File

You will need to submit a prediction result file. It should have 2028 lines, every line should be either 0 or 1, which is your model's prediction on the respective test set instance.

In [None]:
# suppose you had your model's predictions on the 2028 test cases read from test_enc_unlabeled.tsv, and 
#those results are in the list called 'results'
assert (len(results) == 4850)

In [None]:
# make sure the results are not float numbers, but intergers 0 and 1
results = [int(x) for x in results]

In [None]:
# write your prediction results to 'upload_predictions.txt' and upload that later
with open('upload_predictions.txt', 'w', encoding = 'utf-8') as fp:
    for x in results:
        fp.write(str(x) + '\n')