In [11]:
import pandas as pd
import numpy as np
import torch

train_fname = "../data/lgbt-en.train.tsv"
test_fname = "../data/lgbt-en.test.tsv"

def read_file(fname: str) -> pd.DataFrame:
    """Reads a filename and formats it properly for simpletransformers"""
    df = pd.read_table(fname, sep="\t", header=None, names="text,labels,role".split(","))
    offensive_ids = df.labels != "Acceptable speech"

    df.labels[offensive_ids] = 1
    df.labels[~offensive_ids] = 0
    
    df["labels"] = df.labels.astype(np.int8)
    df = df.drop(columns=["role"])
    return df

def fine_tune_and_evaluate(
    model_type,
    model_name,
    language
                            ):
    import torch
    torch.cuda.empty_cache()
    if lang not in {"sl", "hr", "en"}:
        raise AttributeError(f"Language {lang} is not valid")
    eval_file, train_file = f"../data/lgbt-{lang}.test.tsv" , f"../data/lgbt-{lang}.train.tsv"
    train = read_file(train_file)
    test = read_file(eval_file)
    
    from simpletransformers.classification import ClassificationModel
    model_args = {
        "num_train_epochs": 5,
        "learning_rate": 1e-5,
        "overwrite_output_dir": True,
        "train_batch_size": 40,
        "no_save": True,
    }

    model = ClassificationModel(
        "roberta", "roberta-base", use_cuda=True,
        args=model_args

    )
    model.no_save  = True
    model.overwrite_output_dir = True
    model.train_model(train, )

    #print(model.eval_model(test))
    from sklearn.metrics import accuracy_score, f1_score
    y_true = test["labels"]
    y_pred = model.predict(list(test["text"].values))[0]

    accuracy = accuracy_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred, average="macro")
    return accuracy, f1


In [12]:
config_dict = {
    "sl": [
        ("xlm-roberta-base", "xlm-roberta"),
        ("EMBEDDIA/sloberta", "camembert"),
        ("EMBEDDIA/sloberta", "roberta"),
        ("EMBEDDIA/crosloengual-bert", "bert"),
        ],
    "hr": [
        ("xlm-roberta-base", "xlm-roberta"),
        ("classla/bcms-bertic", "electra"),
        ("EMBEDDIA/crosloengual-bert", "bert"),
    ],
    "en": [
        ("xlm-roberta-base", "xlm-roberta"),
        ("xlm-roberta-large", "xlm-roberta"),
        ("roberta-base", "roberta"),
    ]
}
output = ""
output += "|model name| model type | language | accuracy | macro F1|\n"
output += "|---|---|---|---|---|\n"
for lang, confs in config_dict.items():
    for conf in confs:
        print(output)
        model_name, model_type = conf
        try:
            a, f = fine_tune_and_evaluate(model_name, model_type, lang)
            output += f"|{model_name}|{model_type}|{lang}|{a:0.3f}|{f:0.3f}|\n"
        except Exception as e:
            output += f"|{model_name}|{model_type}|{lang}|Error|{e}|\n"
print(output)

|model name| model type | language | accuracy | macro F1|
|---|---|---|---|---|



Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out

HBox(children=(FloatProgress(value=0.0, max=2844.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Epoch', max=5.0, style=ProgressStyle(description_width='i…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 5', max=72.0, style=ProgressStyle(desc…




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 5', max=72.0, style=ProgressStyle(desc…




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 5', max=72.0, style=ProgressStyle(desc…




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 3 of 5', max=72.0, style=ProgressStyle(desc…




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 4 of 5', max=72.0, style=ProgressStyle(desc…





HBox(children=(FloatProgress(value=0.0, max=900.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=113.0), HTML(value='')))


|model name| model type | language | accuracy | macro F1|
|---|---|---|---|---|
|xlm-roberta-base|xlm-roberta|sl|0.592|0.590|



Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out

HBox(children=(FloatProgress(value=0.0, max=2844.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Epoch', max=5.0, style=ProgressStyle(description_width='i…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 5', max=72.0, style=ProgressStyle(desc…




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 5', max=72.0, style=ProgressStyle(desc…




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 5', max=72.0, style=ProgressStyle(desc…




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 3 of 5', max=72.0, style=ProgressStyle(desc…




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 4 of 5', max=72.0, style=ProgressStyle(desc…





HBox(children=(FloatProgress(value=0.0, max=900.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=113.0), HTML(value='')))


|model name| model type | language | accuracy | macro F1|
|---|---|---|---|---|
|xlm-roberta-base|xlm-roberta|sl|0.592|0.590|
|EMBEDDIA/sloberta|camembert|sl|0.579|0.578|



Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out

HBox(children=(FloatProgress(value=0.0, max=2844.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Epoch', max=5.0, style=ProgressStyle(description_width='i…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 5', max=72.0, style=ProgressStyle(desc…




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 5', max=72.0, style=ProgressStyle(desc…




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 5', max=72.0, style=ProgressStyle(desc…




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 3 of 5', max=72.0, style=ProgressStyle(desc…




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 4 of 5', max=72.0, style=ProgressStyle(desc…





HBox(children=(FloatProgress(value=0.0, max=900.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=113.0), HTML(value='')))


|model name| model type | language | accuracy | macro F1|
|---|---|---|---|---|
|xlm-roberta-base|xlm-roberta|sl|0.592|0.590|
|EMBEDDIA/sloberta|camembert|sl|0.579|0.578|
|EMBEDDIA/sloberta|roberta|sl|0.569|0.569|



Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out

HBox(children=(FloatProgress(value=0.0, max=2844.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Epoch', max=5.0, style=ProgressStyle(description_width='i…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 5', max=72.0, style=ProgressStyle(desc…




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 5', max=72.0, style=ProgressStyle(desc…




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 5', max=72.0, style=ProgressStyle(desc…




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 3 of 5', max=72.0, style=ProgressStyle(desc…




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 4 of 5', max=72.0, style=ProgressStyle(desc…





HBox(children=(FloatProgress(value=0.0, max=900.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=113.0), HTML(value='')))


|model name| model type | language | accuracy | macro F1|
|---|---|---|---|---|
|xlm-roberta-base|xlm-roberta|sl|0.592|0.590|
|EMBEDDIA/sloberta|camembert|sl|0.579|0.578|
|EMBEDDIA/sloberta|roberta|sl|0.569|0.569|
|EMBEDDIA/crosloengual-bert|bert|sl|0.597|0.596|



Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out

HBox(children=(FloatProgress(value=0.0, max=4495.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Epoch', max=5.0, style=ProgressStyle(description_width='i…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 5', max=113.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 5', max=113.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 5', max=113.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 3 of 5', max=113.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 4 of 5', max=113.0, style=ProgressStyle(des…





HBox(children=(FloatProgress(value=0.0, max=1142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=143.0), HTML(value='')))


|model name| model type | language | accuracy | macro F1|
|---|---|---|---|---|
|xlm-roberta-base|xlm-roberta|sl|0.592|0.590|
|EMBEDDIA/sloberta|camembert|sl|0.579|0.578|
|EMBEDDIA/sloberta|roberta|sl|0.569|0.569|
|EMBEDDIA/crosloengual-bert|bert|sl|0.597|0.596|
|xlm-roberta-base|xlm-roberta|hr|0.783|0.756|



Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out

HBox(children=(FloatProgress(value=0.0, max=4495.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Epoch', max=5.0, style=ProgressStyle(description_width='i…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 5', max=113.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 5', max=113.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 5', max=113.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 3 of 5', max=113.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 4 of 5', max=113.0, style=ProgressStyle(des…





HBox(children=(FloatProgress(value=0.0, max=1142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=143.0), HTML(value='')))


|model name| model type | language | accuracy | macro F1|
|---|---|---|---|---|
|xlm-roberta-base|xlm-roberta|sl|0.592|0.590|
|EMBEDDIA/sloberta|camembert|sl|0.579|0.578|
|EMBEDDIA/sloberta|roberta|sl|0.569|0.569|
|EMBEDDIA/crosloengual-bert|bert|sl|0.597|0.596|
|xlm-roberta-base|xlm-roberta|hr|0.783|0.756|
|classla/bcms-bertic|electra|hr|0.770|0.749|



Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out

HBox(children=(FloatProgress(value=0.0, max=4495.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Epoch', max=5.0, style=ProgressStyle(description_width='i…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 5', max=113.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 5', max=113.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 5', max=113.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 3 of 5', max=113.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 4 of 5', max=113.0, style=ProgressStyle(des…





HBox(children=(FloatProgress(value=0.0, max=1142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=143.0), HTML(value='')))


|model name| model type | language | accuracy | macro F1|
|---|---|---|---|---|
|xlm-roberta-base|xlm-roberta|sl|0.592|0.590|
|EMBEDDIA/sloberta|camembert|sl|0.579|0.578|
|EMBEDDIA/sloberta|roberta|sl|0.569|0.569|
|EMBEDDIA/crosloengual-bert|bert|sl|0.597|0.596|
|xlm-roberta-base|xlm-roberta|hr|0.783|0.756|
|classla/bcms-bertic|electra|hr|0.770|0.749|
|EMBEDDIA/crosloengual-bert|bert|hr|0.785|0.764|



Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out

HBox(children=(FloatProgress(value=0.0, max=4819.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Epoch', max=5.0, style=ProgressStyle(description_width='i…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 5', max=121.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 5', max=121.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 5', max=121.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 3 of 5', max=121.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 4 of 5', max=121.0, style=ProgressStyle(des…





HBox(children=(FloatProgress(value=0.0, max=1017.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=128.0), HTML(value='')))


|model name| model type | language | accuracy | macro F1|
|---|---|---|---|---|
|xlm-roberta-base|xlm-roberta|sl|0.592|0.590|
|EMBEDDIA/sloberta|camembert|sl|0.579|0.578|
|EMBEDDIA/sloberta|roberta|sl|0.569|0.569|
|EMBEDDIA/crosloengual-bert|bert|sl|0.597|0.596|
|xlm-roberta-base|xlm-roberta|hr|0.783|0.756|
|classla/bcms-bertic|electra|hr|0.770|0.749|
|EMBEDDIA/crosloengual-bert|bert|hr|0.785|0.764|
|xlm-roberta-base|xlm-roberta|en|0.845|0.793|



Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out

HBox(children=(FloatProgress(value=0.0, max=4819.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Epoch', max=5.0, style=ProgressStyle(description_width='i…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 5', max=121.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 5', max=121.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 5', max=121.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 3 of 5', max=121.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 4 of 5', max=121.0, style=ProgressStyle(des…





HBox(children=(FloatProgress(value=0.0, max=1017.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=128.0), HTML(value='')))


|model name| model type | language | accuracy | macro F1|
|---|---|---|---|---|
|xlm-roberta-base|xlm-roberta|sl|0.592|0.590|
|EMBEDDIA/sloberta|camembert|sl|0.579|0.578|
|EMBEDDIA/sloberta|roberta|sl|0.569|0.569|
|EMBEDDIA/crosloengual-bert|bert|sl|0.597|0.596|
|xlm-roberta-base|xlm-roberta|hr|0.783|0.756|
|classla/bcms-bertic|electra|hr|0.770|0.749|
|EMBEDDIA/crosloengual-bert|bert|hr|0.785|0.764|
|xlm-roberta-base|xlm-roberta|en|0.845|0.793|
|xlm-roberta-large|xlm-roberta|en|0.843|0.791|



Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out

HBox(children=(FloatProgress(value=0.0, max=4819.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Epoch', max=5.0, style=ProgressStyle(description_width='i…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 5', max=121.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 5', max=121.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 5', max=121.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 3 of 5', max=121.0, style=ProgressStyle(des…




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 4 of 5', max=121.0, style=ProgressStyle(des…





HBox(children=(FloatProgress(value=0.0, max=1017.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=128.0), HTML(value='')))


|model name| model type | language | accuracy | macro F1|
|---|---|---|---|---|
|xlm-roberta-base|xlm-roberta|sl|0.592|0.590|
|EMBEDDIA/sloberta|camembert|sl|0.579|0.578|
|EMBEDDIA/sloberta|roberta|sl|0.569|0.569|
|EMBEDDIA/crosloengual-bert|bert|sl|0.597|0.596|
|xlm-roberta-base|xlm-roberta|hr|0.783|0.756|
|classla/bcms-bertic|electra|hr|0.770|0.749|
|EMBEDDIA/crosloengual-bert|bert|hr|0.785|0.764|
|xlm-roberta-base|xlm-roberta|en|0.845|0.793|
|xlm-roberta-large|xlm-roberta|en|0.843|0.791|
|roberta-base|roberta|en|0.840|0.786|

