This notebook inspects optimal hyperparameters for classification models finetuning.

In [1]:

import torch
from numba import cuda
cuda.select_device(0)
cuda.close()
cuda.select_device(0)
torch.cuda.empty_cache()

In [2]:
label_set = ['Negative', 'Positive', 'Neutral']
STR_TO_NUM = {k: i for i, k in enumerate(label_set)}
NUM_TO_STR = {i:k for i, k in enumerate(label_set)}

import pandas as pd
df = pd.read_json("bcs_polsent.jsonl", orient="records", lines=True)
# df["label"] = df.label.apply(lambda s: STR_TO_NUM[s])
df = df[["sentence", "label", "split"]].rename(columns={"sentence": "text", "label":"labels"})
train = df[df.split=="train"].drop(columns=["split"])
dev = df[df.split=="dev"].drop(columns=["split"])
test = df[df.split=="test"].drop(columns=["split"])


In [3]:
def fine_tune_and_evaluate(model_type, model_name, args):
    import torch
    from numba import cuda

    torch.cuda.empty_cache()
    cuda.select_device(0)
    cuda.close()
    cuda.select_device(0)
    torch.cuda.empty_cache()

    from simpletransformers.classification import ClassificationModel, ClassificationArgs
    model_args = ClassificationArgs()
    model_args.num_train_epochs = args["num_train_epochs"]
    model_args.train_batch_size = args["train_batch_size"]
    model_args.overwrite_output_dir  = True
    model_args.output_dir  = f"results/"
    model_args.silent  = False
    model_args.save_model_every_epoch  = False
    model_args.num_labels  = 3
    # model_args.labels_list  = label_set
    model = ClassificationModel(model_type, model_name, use_cuda=False, args=model_args)

    accuracies = list()
    f1s = list()

    model.train_model(
        train,
    )
    from sklearn.metrics import accuracy_score, f1_score

    y_true = dev["labels"]
    y_pred = model.predict(list(test["text"].values))[0]
    accuracy = accuracy_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred, average="macro")

    accuracies.append(accuracy)
    f1s.append(f1)

    for i in range(6):
        model = ClassificationModel(
            model_type, model_name, use_cuda=True, args=model_args
        )
        model.train_model(
            train,
        )
        y_true = test["labels"]
        y_pred = model.predict(list(test["text"].values))[0]
        accuracy = accuracy_score(y_true, y_pred)
        f1 = f1_score(y_true, y_pred, average="macro")
        accuracies.append(accuracy)
        f1s.append(f1)
    print(f"Model: {model_name}, {model_type}")
    print(f"Accuracies: {accuracies}")
    print(f"F1 scores: {f1s}")
    return accuracy, f1


roberta_args = {"num_train_epochs": 6, "train_batch_size": 69}
embeddia_args = {"num_train_epochs": 14, "train_batch_size": 21}
bertic_args = {"num_train_epochs": 12, "train_batch_size": 74}


config_list = [
    ("xlm-roberta-base", "xlmroberta", roberta_args),
    ("classla/bcms-bertic", "electra", bertic_args),
    ("EMBEDDIA/crosloengual-bert", "bert", embeddia_args),
]

fine_tune_and_evaluate(
    "xlmroberta", "xlm-roberta-base", {"num_train_epochs": 4, "train_batch_size": 16}
)


Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaForSequenceClassification: ['lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.b

ValueError: too many dimensions 'str'

In [4]:
train.labels.unique()

array(['Negative', 'Neutral', 'Positive'], dtype=object)

In [8]:
from simpletransformers.classification import ClassificationModel, ClassificationArgs
import pandas as pd
import logging


logging.basicConfig(level=logging.INFO)
transformers_logger = logging.getLogger("transformers")
transformers_logger.setLevel(logging.WARNING)



# Optional model configuration
model_args = ClassificationArgs()
model_args.num_train_epochs=1
model_args.overwrite_output_dir=True
model_args.labels_list = list(label_set)
model_args.use_cuda=True

# Create a ClassificationModel
model = ClassificationModel(
    "roberta", "roberta-base", args=model_args
)

# Train the model
model.train_model(train)



Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.de

  0%|          | 0/2150 [00:00<?, ?it/s]

In [None]:
# Evaluate the model
result, model_outputs, wrong_predictions = model.eval_model(dev)

# Make predictions with the model
predictions, raw_outputs = model.predict(["Sam was a Wizard"])