In [None]:
import pandas as pd
import numpy as np
import torch

import warnings
warnings.filterwarnings('ignore')
warnings.simplefilter('ignore')


def read_file(fname: str) -> pd.DataFrame:
    """Reads a filename and formats it properly for simpletransformers"""
    df = pd.read_table(fname, sep="\t", header=None, names="text,labels,role".split(","))
    offensive_ids = df.labels != "Acceptable speech"

    df.labels[offensive_ids] = 1
    df.labels[~offensive_ids] = 0
    
    df["labels"] = df.labels.astype(np.int8)
    df = df.drop(columns=["role"])
    return df

def fine_tune_and_evaluate(
    model_type,
    model_name,
    language,
    model_args
                            ):
    import torch
    torch.cuda.empty_cache()
    if language not in {"sl", "hr", "en"}:
        raise AttributeError(f"Language {language} is not valid")
    eval_file, train_file = f"../data/merged-{language}.test.tsv" , f"../data/merged-{language}.train.tsv"
    train = read_file(train_file)
    test = read_file(eval_file)
    
    from simpletransformers.classification import ClassificationModel
    model_args["overwrite_output_dir"] = True
    model_args["output_dir"] = f"finetuned_models/{model_type}__{model_name}_{language}_current/"
    model_args["silent"] = True
    model_args["save_model_every_epoch"] = False
    model_args["save_steps"] = -1
    #model_args["manual_seed"] = 42
    model = ClassificationModel(
        model_type, model_name, use_cuda=True,
        args=model_args

    )
    
    def copy_best_model(model_type, model_name, language):
        import os
        os.system(f"cp -r finetuned_models/{model_type}__{model_name}_{language}_current finetuned_models/{model_type}__{model_name}_{language}_best")
    accuracies = list()
    f1s = list()
    
    
    model.train_model(train, )
    from sklearn.metrics import accuracy_score, f1_score
    y_true = test["labels"]
    y_pred = model.predict(list(test["text"].values))[0]
    accuracy = accuracy_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred, average="macro")
    
    accuracies.append(accuracy)
    f1s.append(f1)
    
    for i in range(10):
        model = ClassificationModel(model_type, model_name, use_cuda=True, args=model_args)
        model.train_model(train, )
        y_true = test["labels"]
        y_pred = model.predict(list(test["text"].values))[0]
        accuracy = accuracy_score(y_true, y_pred)
        f1 = f1_score(y_true, y_pred, average="macro")
    
        accuracies.append(accuracy)
        f1s.append(f1)
        
        if accuracy == max(accuracies):
            copy_best_model(model_type, model_name, language)
    print(f"Model: {model_name}, {model_type}, {language=}")
    print(f"Accuracies: {accuracies}")
    print(f"F1 scores: {f1s}")
    return #model, accuracy, f1


roberta_args = {
        "num_train_epochs": 6,
        "learning_rate": 3e-6,
        "train_batch_size": 69}
embeddia_args = {
        "num_train_epochs": 14,
        "learning_rate": 1e-5,
        "train_batch_size": 21}
bertic_args = {
        "num_train_epochs": 12,
        "learning_rate": 1e-5,
        "train_batch_size": 74}


config_dict = {
    "sl": [
        ("xlm-roberta-base", "xlmroberta", roberta_args),
        ("EMBEDDIA/sloberta", "camembert", embeddia_args),
        #("EMBEDDIA/sloberta", "roberta", embeddia_args),
        ("EMBEDDIA/crosloengual-bert", "bert", embeddia_args),
        ],
    "hr": [
        ("xlm-roberta-base", "xlmroberta", roberta_args),
        ("classla/bcms-bertic", "electra", bertic_args),
        ("EMBEDDIA/crosloengual-bert", "bert", embeddia_args),
    ],
    "en": [
        ("xlm-roberta-base", "xlmroberta", roberta_args),
        ("xlm-roberta-large", "xlmroberta", roberta_args),
        ("roberta-base", "roberta", roberta_args),
    ]
}

for language, model_list in config_dict.items():
    for model_name, model_type, model_args in model_list:
        fine_tune_and_evaluate(model_type,
                                model_name,
                                language,
                                model_args)

Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.dense.bias']
- This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.weight', 'classifier.out_proj.w

Model: xlm-roberta-base, xlmroberta, language='sl'
Accuracies: [0.660238751147842, 0.709366391184573, 0.6721763085399449, 0.6795224977043158, 0.6808999081726355, 0.6974288337924701, 0.689623507805326, 0.689623507805326, 0.657483930211203, 0.6960514233241506, 0.6887052341597796]
F1 scores: [0.6601559337129902, 0.7003895422308328, 0.6721738206544836, 0.6794249131212254, 0.6767424602390727, 0.6964607580962895, 0.688222518871888, 0.6885477968396336, 0.6572408613764471, 0.6921237931246541, 0.6873385012919897]


Some weights of the model checkpoint at EMBEDDIA/sloberta were not used when initializing CamembertForSequenceClassification: ['lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.decoder.bias', 'lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.decoder.weight', 'lm_head.dense.bias']
- This IS expected if you are initializing CamembertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing CamembertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at EMBEDDIA/sloberta and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.weight', 