In [1]:
from transformers import XLMRobertaTokenizerFast, XLMRobertaForSequenceClassification, DataCollatorWithPadding, create_optimizer, TrainingArguments, Trainer
from datasets import load_dataset, load_from_disk, DatasetDict, ClassLabel
import evaluate
from evaluate import evaluator
import numpy as np
from huggingface_hub import notebook_login
from datetime import datetime
import torch
from optuna import trial
import json

2023-02-28 20:23:56.776600: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-02-28 20:23:56.965609: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-02-28 20:23:57.641284: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/nvidia/lib:/usr/local/nvidia/lib64
2023-02-28 20:23:57.641363: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinf

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [3]:
max_token_length = 512
BASE_MODEL = "xlm-roberta-base"
BASE_DATASET = "valurank/News_Articles_Categorization"
LOCAL_DIR = "./ml/"

# Load dataset
try:
    dataset = load_from_disk(LOCAL_DIR + "/datasets/public_multilingual")
    print("Loaded local dataset")
except Exception as e:
    # Adapted from https://discuss.huggingface.co/t/how-to-create-custom-classlabels/13650
    # Combine science and tech category
    label2id = {"Politics" : 2, "Tech": 3, "Entertainment": 0, "Sports": 1, "Business": 4, "Health" : 5, "science": 3}
    def label_to_id(batch):
        batch["label"] = [label2id[cat] for cat in batch["label"]]
        return batch

    dataset = load_dataset('json', data_files=LOCAL_DIR + "datasets/ml.json", split="train")
    # Drop "World" category
    dataset = dataset.filter(lambda i: i["label"] != "World")
    
    features = dataset.features.copy()
    text_category = ClassLabel(num_classes = 6, names=["Entertainment/Arts", "Sports", "Politics", "Science/Technology", "Business/Finance", "Health/Welfare"])
    features["label"] = text_category
    dataset = dataset.map(label_to_id, batched=True, features=features)
    
    dataset.save_to_disk(LOCAL_DIR + "/datasets/public_multilingual")
    print("Loaded dataset from original file")

print(dataset)

# Split dataset based on fractions
test_split = 0.2

# Split train and test
train_test = dataset.train_test_split(test_size=test_split)

eng_dataset = load_from_disk(LOCAL_DIR + "/datasets/public_en")
# Split train and test
eng_train_test = eng_dataset.train_test_split(test_size=test_split)

# Load tokenizer
tokenizer = XLMRobertaTokenizerFast.from_pretrained('xlm-roberta-base')
print("Tokenizer loaded from huggingface")

#From https://huggingface.co/docs/transformers/main/en/training
def tokenize_function(examples):
    return tokenizer(examples["text"], truncation=True)

tokenized_dataset = train_test.map(tokenize_function, batched=True)
eng_tokenized_dataset = eng_train_test.map(tokenize_function, batched=True)
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

label2id = {"Entertainment/Arts": 0, "Sports": 1, "Politics": 2, "Science/Technology": 3, "Business/Finance": 4, "Health/Welfare": 5}
id2label = {v:k for k, v in label2id.items()}

# Load model
model = XLMRobertaForSequenceClassification.from_pretrained(
'xlm-roberta-base', num_labels=6, id2label=id2label, label2id=label2id)
print("Model loaded from huggingface")

Loaded local dataset
Dataset({
    features: ['text', 'label'],
    num_rows: 18882
})
Tokenizer loaded from huggingface


  0%|          | 0/16 [00:00<?, ?ba/s]

  0%|          | 0/4 [00:00<?, ?ba/s]

  0%|          | 0/3 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'lm_head.layer_norm.bias', 'roberta.pooler.dense.weight', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.decoder.weight', 'lm_head.bias']
- This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.dense

Model loaded from huggingface


In [4]:
# Process collected data
# Load dataset
try:
    collected_dataset = load_from_disk(LOCAL_DIR + "/datasets/collected_multilingual")
    print("Loaded local dataset")
except Exception as e:
    # Adapted from https://discuss.huggingface.co/t/how-to-create-custom-classlabels/13650
    def label_to_id(batch):
        batch["label"] = [label2id[cat] for cat in batch["label"]]
        return batch
    label2id = {"Entertainment/Arts": 0, "Sports": 1, "Politics": 2, "Science/Technology": 3, "Business/Finance": 4, "Health/Welfare": 5}
    collected_dataset = load_dataset('json', data_files=LOCAL_DIR + "datasets/collected.json", split="train")
    features = collected_dataset.features.copy()
    text_category = ClassLabel(num_classes = 6, names=["Entertainment/Arts", "Sports", "Politics", "Science/Technology", "Business/Finance", "Health/Welfare"])
    features["label"] = text_category
    collected_dataset = collected_dataset.map(label_to_id, batched=True, features=features)
    collected_dataset = collected_dataset.shuffle(seed=42)
    
    collected_dataset.save_to_disk(LOCAL_DIR + "/datasets/collected_multilingual")
    print("Loaded dataset from original file")

print(collected_dataset)

# Split dataset based on fractions
test_split = 0.2

full_collected_tokenized = collected_dataset.map(tokenize_function, batched=True)

# Split train and test
collected_train_test = collected_dataset.train_test_split(test_size=test_split)
collected_tokenized_dataset = collected_train_test.map(tokenize_function, batched=True)

Loading cached processed dataset at ml/datasets/collected_multilingual/cache-6ea2aa911ddd93d8.arrow


Loaded local dataset
Dataset({
    features: ['text', 'label'],
    num_rows: 12846
})


  0%|          | 0/11 [00:00<?, ?ba/s]

  0%|          | 0/3 [00:00<?, ?ba/s]

In [5]:
# Load evaluation metric
accuracy = evaluate.load("accuracy")
precision = evaluate.load("precision")
recall = evaluate.load("recall")
f1 = evaluate.load("f1")
# From https://huggingface.co/spaces/BucketHeadP65/confusion_matrix
cfm = evaluate.load("BucketHeadP65/confusion_matrix")

# From https://huggingface.co/docs/transformers/main/en/tasks/sequence_classification
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)

    accuracy_score = accuracy.compute(predictions=predictions, references=labels)["accuracy"]
    precision_score = precision.compute(predictions=predictions, references=labels, average="weighted")["precision"]
    recall_score = recall.compute(predictions=predictions, references=labels, average="weighted")["recall"]
    f1_score = f1.compute(predictions=predictions, references=labels, average="weighted")["f1"]
    #confusion_matrix = cfm.compute(predictions=predictions, references=labels)["confusion_matrix"]

    return {"precision": precision_score, "recall": recall_score, "f1": f1_score,
     "accuracy": accuracy_score
            #,"confusion_matrix": confusion_matrix
           }

In [6]:
# Adapted from https://huggingface.co/docs/transformers/hpo_train
def optuna_hp_space(trial):
    return {
        "learning_rate": trial.suggest_categorical("learning_rate", [5e-5, 4e-5, 3e-5, 2e-5]),
        "per_device_train_batch_size": trial.suggest_categorical("per_device_train_batch_size", [16, 32]),
        "num_train_epochs": trial.suggest_categorical("num_train_epochs", [2, 3, 4]),
    }

def model_init(trial):
    return XLMRobertaForSequenceClassification.from_pretrained(
'xlm-roberta-base', num_labels=6, id2label=id2label, label2id=label2id)

In [None]:
# Tune hyperparameters
training_args = TrainingArguments(
    output_dir="ml/results/trained_roberta_model",
    weight_decay=0.01,
    evaluation_strategy="epoch",
    save_strategy="epoch"
)

trainer = Trainer(
    model=None,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["test"],
    compute_metrics=compute_metrics,
    tokenizer=tokenizer,
    model_init=model_init,
    data_collator=data_collator,
)

best_trial = trainer.hyperparameter_search(
    direction="maximize",
    backend="optuna",
    hp_space=optuna_hp_space,
    n_trials=24
)

with open("ml/best_hparams/roberta.txt", "w") as f:
    f.write(json.dumps(best_trial.hyperparameters))

loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--xlm-roberta-base/snapshots/42f548f32366559214515ec137cdd16002968bf6/config.json
Model config XLMRobertaConfig {
  "architectures": [
    "XLMRobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "Entertainment/Arts",
    "1": "Sports",
    "2": "Politics",
    "3": "Science/Technology",
    "4": "Business/Finance",
    "5": "Health/Welfare"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "Business/Finance": 4,
    "Entertainment/Arts": 0,
    "Health/Welfare": 5,
    "Politics": 2,
    "Science/Technology": 3,
    "Sports": 1
  },
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "xlm-roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "outp

Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,0.146745,0.959731,0.958962,0.958861,0.958962
2,0.338800,0.066526,0.982597,0.982261,0.982347,0.982261
3,0.068500,0.054702,0.987679,0.987556,0.987581,0.987556
4,0.028200,0.060172,0.988567,0.988351,0.988386,0.988351


The following columns in the evaluation set don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `XLMRobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 3777
  Batch size = 8
Saving model checkpoint to ml/results/trained_roberta_model/run-0/checkpoint-473
Configuration saved in ml/results/trained_roberta_model/run-0/checkpoint-473/config.json
Model weights saved in ml/results/trained_roberta_model/run-0/checkpoint-473/pytorch_model.bin
tokenizer config file saved in ml/results/trained_roberta_model/run-0/checkpoint-473/tokenizer_config.json
Special tokens file saved in ml/results/trained_roberta_model/run-0/checkpoint-473/special_tokens_map.json
The following columns in the evaluation set don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: text. If text are not expecte

Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,0.4783,0.130302,0.967873,0.966905,0.967049,0.966905
2,0.0839,0.08407,0.980649,0.980408,0.980428,0.980408


The following columns in the evaluation set don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `XLMRobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 3777
  Batch size = 8
Saving model checkpoint to ml/results/trained_roberta_model/run-1/checkpoint-945
Configuration saved in ml/results/trained_roberta_model/run-1/checkpoint-945/config.json
Model weights saved in ml/results/trained_roberta_model/run-1/checkpoint-945/pytorch_model.bin
tokenizer config file saved in ml/results/trained_roberta_model/run-1/checkpoint-945/tokenizer_config.json
Special tokens file saved in ml/results/trained_roberta_model/run-1/checkpoint-945/special_tokens_map.json
The following columns in the evaluation set don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: text. If text are not expecte

Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,0.4403,0.168781,0.956658,0.954991,0.954819,0.954991
2,0.0886,0.063368,0.983132,0.983055,0.983073,0.983055
3,0.0309,0.053887,0.987887,0.987821,0.987828,0.987821


The following columns in the evaluation set don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `XLMRobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 3777
  Batch size = 8
Saving model checkpoint to ml/results/trained_roberta_model/run-2/checkpoint-945
Configuration saved in ml/results/trained_roberta_model/run-2/checkpoint-945/config.json
Model weights saved in ml/results/trained_roberta_model/run-2/checkpoint-945/pytorch_model.bin
tokenizer config file saved in ml/results/trained_roberta_model/run-2/checkpoint-945/tokenizer_config.json
Special tokens file saved in ml/results/trained_roberta_model/run-2/checkpoint-945/special_tokens_map.json
The following columns in the evaluation set don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: text. If text are not expecte

Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,0.108015,0.966476,0.966375,0.966325,0.966375
2,0.318000,0.075217,0.981345,0.981202,0.981218,0.981202


The following columns in the evaluation set don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `XLMRobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 3777
  Batch size = 8
Saving model checkpoint to ml/results/trained_roberta_model/run-3/checkpoint-473
Configuration saved in ml/results/trained_roberta_model/run-3/checkpoint-473/config.json
Model weights saved in ml/results/trained_roberta_model/run-3/checkpoint-473/pytorch_model.bin
tokenizer config file saved in ml/results/trained_roberta_model/run-3/checkpoint-473/tokenizer_config.json
Special tokens file saved in ml/results/trained_roberta_model/run-3/checkpoint-473/special_tokens_map.json
The following columns in the evaluation set don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: text. If text are not expecte

Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,0.5025,0.183492,0.950751,0.948636,0.948599,0.948636
2,0.1169,0.103594,0.973158,0.972994,0.973028,0.972994


The following columns in the evaluation set don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `XLMRobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 3777
  Batch size = 8
Saving model checkpoint to ml/results/trained_roberta_model/run-4/checkpoint-945
Configuration saved in ml/results/trained_roberta_model/run-4/checkpoint-945/config.json
Model weights saved in ml/results/trained_roberta_model/run-4/checkpoint-945/pytorch_model.bin
tokenizer config file saved in ml/results/trained_roberta_model/run-4/checkpoint-945/tokenizer_config.json
Special tokens file saved in ml/results/trained_roberta_model/run-4/checkpoint-945/special_tokens_map.json
The following columns in the evaluation set don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: text. If text are not expecte

Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,0.10593,0.96689,0.96664,0.966631,0.96664


The following columns in the evaluation set don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `XLMRobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 3777
  Batch size = 8
Saving model checkpoint to ml/results/trained_roberta_model/run-5/checkpoint-473
Configuration saved in ml/results/trained_roberta_model/run-5/checkpoint-473/config.json
Model weights saved in ml/results/trained_roberta_model/run-5/checkpoint-473/pytorch_model.bin
tokenizer config file saved in ml/results/trained_roberta_model/run-5/checkpoint-473/tokenizer_config.json
Special tokens file saved in ml/results/trained_roberta_model/run-5/checkpoint-473/special_tokens_map.json


In [None]:
with open("./ml/best_hparams/roberta.txt") as f:
    for line in f:
        best_hparams = json.loads(line)

training_args = TrainingArguments(
    output_dir="ml/results/roberta_multilingual",
    learning_rate=best_hparams["learning_rate"],
    per_device_train_batch_size=best_hparams["per_device_train_batch_size"],
    per_device_eval_batch_size=best_hparams["per_device_train_batch_size"],
    num_train_epochs=best_hparams["num_train_epochs"],
    weight_decay=0.01,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True
)

best_hparams

In [None]:
effectiveness_acc = np.zeros(3)
effectiveness_f1 = np.zeros(3)
transferability_acc = np.zeros(3)
transferability_f1 = np.zeros(3)

for i in range(3):
    model = XLMRobertaForSequenceClassification.from_pretrained(
    'xlm-roberta-base', num_labels=6, id2label=id2label, label2id=label2id)
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=tokenized_dataset["train"],
        eval_dataset=tokenized_dataset["test"],
        tokenizer=tokenizer,
        data_collator=data_collator,
        compute_metrics=compute_metrics,
    )
    trainer.train()
    mbert_results = trainer.evaluate()
    effectiveness_acc[i] = mbert_results["eval_accuracy"]
    effectiveness_f1[i] = mbert_results["eval_f1"]
    mbert_results = trainer.evaluate(eval_dataset=full_collected_tokenized)
    transferability_acc[i] = mbert_results["eval_accuracy"]
    transferability_f1[i] = mbert_results["eval_f1"]

# Convert to percentage
effectiveness_acc *= 100
effectiveness_f1 *= 100
transferability_acc *= 100
transferability_f1 *= 100

print(f"Roberta Accuracy: {np.mean(effectiveness_acc):.4f}% +/- {np.std(effectiveness_acc):.4f} Weighted F1: {np.mean(effectiveness_f1):.4f}% +/- {np.std(effectiveness_f1):.4f}")
print(f"Roberta Transferability: {np.mean(transferability_acc):.4f}% +/- {np.std(transferability_acc):.4f} Weighted F1: {np.mean(transferability_f1):.4f}% +/- {np.std(transferability_f1):.4f}")

with open("./ml/effectiveness/roberta.txt", "w") as f:
    f.write(f"Roberta Accuracy: {np.mean(effectiveness_acc):.4f}% +/- {np.std(effectiveness_acc):.4f} Weighted F1: {np.mean(effectiveness_f1):.4f}% +/- {np.std(effectiveness_f1):.4f}\n")
    f.write(f"Roberta Transferability: {np.mean(transferability_acc):.4f}% +/- {np.std(transferability_acc):.4f} Weighted F1: {np.mean(transferability_f1):.4f}% +/- {np.std(transferability_f1):.4f}")

In [None]:
# Tune hyperparameters
training_args = TrainingArguments(
    output_dir="ml/results/trained_roberta_eng",
    weight_decay=0.01,
    evaluation_strategy="epoch",
    save_strategy="epoch"
)

trainer = Trainer(
    model=None,
    args=training_args,
    train_dataset=eng_tokenized_dataset["train"],
    eval_dataset=eng_tokenized_dataset["test"],
    compute_metrics=compute_metrics,
    tokenizer=tokenizer,
    model_init=model_init,
    data_collator=data_collator,
)

best_trial = trainer.hyperparameter_search(
    direction="maximize",
    backend="optuna",
    hp_space=optuna_hp_space,
    n_trials=24
)

with open("ml/best_hparams/roberta_eng.txt", "w") as f:
    f.write(json.dumps(best_trial.hyperparameters))

In [None]:
with open("./ml/best_hparams/roberta_eng.txt") as f:
    for line in f:
        best_hparams = json.loads(line)

training_args = TrainingArguments(
    output_dir="ml/results/roberta_eng",
    learning_rate=best_hparams["learning_rate"],
    per_device_train_batch_size=best_hparams["per_device_train_batch_size"],
    per_device_eval_batch_size=best_hparams["per_device_train_batch_size"],
    num_train_epochs=best_hparams["num_train_epochs"],
    weight_decay=0.01,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True
)

best_hparams

In [None]:
effectiveness_acc = np.zeros(3)
effectiveness_f1 = np.zeros(3)
transferability_acc = np.zeros(3)
transferability_f1 = np.zeros(3)

for i in range(3):
    model = XLMRobertaForSequenceClassification.from_pretrained(
    'xlm-roberta-base', num_labels=6, id2label=id2label, label2id=label2id)
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=eng_tokenized_dataset["train"],
        eval_dataset=eng_tokenized_dataset["test"],
        tokenizer=tokenizer,
        data_collator=data_collator,
        compute_metrics=compute_metrics,
    )
    trainer.train()
    mbert_results = trainer.evaluate()
    effectiveness_acc[i] = mbert_results["eval_accuracy"]
    effectiveness_f1[i] = mbert_results["eval_f1"]
    mbert_results = trainer.evaluate(eval_dataset=full_collected_tokenized)
    transferability_acc[i] = mbert_results["eval_accuracy"]
    transferability_f1[i] = mbert_results["eval_f1"]

# Convert to percentage
effectiveness_acc *= 100
effectiveness_f1 *= 100
transferability_acc *= 100
transferability_f1 *= 100

print(f"Roberta Accuracy: {np.mean(effectiveness_acc):.4f}% +/- {np.std(effectiveness_acc):.4f} Weighted F1: {np.mean(effectiveness_f1):.4f}% +/- {np.std(effectiveness_f1):.4f}")
print(f"Roberta Transferability: {np.mean(transferability_acc):.4f}% +/- {np.std(transferability_acc):.4f} Weighted F1: {np.mean(transferability_f1):.4f}% +/- {np.std(transferability_f1):.4f}")

with open("./ml/effectiveness/roberta_eng.txt", "w") as f:
    f.write(f"Roberta Accuracy: {np.mean(effectiveness_acc):.4f}% +/- {np.std(effectiveness_acc):.4f} Weighted F1: {np.mean(effectiveness_f1):.4f}% +/- {np.std(effectiveness_f1):.4f}\n")
    f.write(f"Roberta Transferability: {np.mean(transferability_acc):.4f}% +/- {np.std(transferability_acc):.4f} Weighted F1: {np.mean(transferability_f1):.4f}% +/- {np.std(transferability_f1):.4f}")

In [None]:
# Tune hyperparameters
training_args = TrainingArguments(
    output_dir="ml/results/trained_roberta_realworld",
    weight_decay=0.01,
    evaluation_strategy="epoch",
    save_strategy="epoch"
)

trainer = Trainer(
    model=None,
    args=training_args,
    train_dataset=collected_tokenized_dataset["train"],
    eval_dataset=collected_tokenized_dataset["test"],
    compute_metrics=compute_metrics,
    tokenizer=tokenizer,
    model_init=model_init,
    data_collator=data_collator,
)

best_trial = trainer.hyperparameter_search(
    direction="maximize",
    backend="optuna",
    hp_space=optuna_hp_space,
    n_trials=24
)

with open("ml/best_hparams/roberta_realworld.txt", "w") as f:
    f.write(json.dumps(best_trial.hyperparameters))

In [None]:
with open("./ml/best_hparams/roberta_realworld.txt") as f:
    for line in f:
        best_hparams = json.loads(line)

training_args = TrainingArguments(
    output_dir="ml/results/roberta_realworld",
    learning_rate=best_hparams["learning_rate"],
    per_device_train_batch_size=best_hparams["per_device_train_batch_size"],
    per_device_eval_batch_size=best_hparams["per_device_train_batch_size"],
    num_train_epochs=best_hparams["num_train_epochs"],
    weight_decay=0.01,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True
)

best_hparams

In [None]:
effectiveness_acc = np.zeros(3)
effectiveness_f1 = np.zeros(3)

for i in range(3):
    model = XLMRobertaForSequenceClassification.from_pretrained(
    'xlm-roberta-base', num_labels=6, id2label=id2label, label2id=label2id)
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=collected_tokenized_dataset["train"],
        eval_dataset=collected_tokenized_dataset["test"],
        tokenizer=tokenizer,
        data_collator=data_collator,
        compute_metrics=compute_metrics,
    )
    trainer.train()
    mbert_results = trainer.evaluate()
    effectiveness_acc[i] = mbert_results["eval_accuracy"]
    effectiveness_f1[i] = mbert_results["eval_f1"]

# Convert to percentage
effectiveness_acc *= 100
effectiveness_f1 *= 100

print(f"Roberta Accuracy: {np.mean(effectiveness_acc):.4f}% +/- {np.std(effectiveness_acc):.4f} Weighted F1: {np.mean(effectiveness_f1):.4f}% +/- {np.std(effectiveness_f1):.4f}")

with open("./ml/effectiveness/roberta_realworld.txt", "w") as f:
    f.write(f"Roberta Accuracy: {np.mean(effectiveness_acc):.4f}% +/- {np.std(effectiveness_acc):.4f} Weighted F1: {np.mean(effectiveness_f1):.4f}% +/- {np.std(effectiveness_f1):.4f}\n")

In [15]:
with open("./ml/best_hparams/roberta_realworld.txt") as f:
    for line in f:
        best_hparams = json.loads(line)

training_args = TrainingArguments(
    output_dir="ml/results/roberta_realworld",
    learning_rate=best_hparams["learning_rate"],
    per_device_train_batch_size=best_hparams["per_device_train_batch_size"],
    per_device_eval_batch_size=best_hparams["per_device_train_batch_size"],
    num_train_epochs=best_hparams["num_train_epochs"],
    weight_decay=0.01,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True
)
model = XLMRobertaForSequenceClassification.from_pretrained(
    'xlm-roberta-base', num_labels=6, id2label=id2label, label2id=label2id)
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=collected_tokenized_dataset["train"],
    eval_dataset=collected_tokenized_dataset["test"],
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)
trainer.train()
trainer.save_model("./ml/trained_realworld_roberta_model")

Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'lm_head.layer_norm.bias', 'roberta.pooler.dense.weight', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.decoder.weight', 'lm_head.bias']
- This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.dense

Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,0.6397,0.431684,0.866793,0.863813,0.862271,0.863813
2,0.3438,0.365479,0.891852,0.887549,0.887512,0.887549
3,0.2574,0.384069,0.893583,0.892996,0.893012,0.892996


The following columns in the evaluation set don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `XLMRobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 2570
  Batch size = 16
Saving model checkpoint to ml/results/roberta_realworld/checkpoint-643
Configuration saved in ml/results/roberta_realworld/checkpoint-643/config.json
Model weights saved in ml/results/roberta_realworld/checkpoint-643/pytorch_model.bin
tokenizer config file saved in ml/results/roberta_realworld/checkpoint-643/tokenizer_config.json
Special tokens file saved in ml/results/roberta_realworld/checkpoint-643/special_tokens_map.json
The following columns in the evaluation set don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `XLMRobertaForSequenceClassification.forward

In [16]:
from evaluate import evaluator
task_evaluator = evaluator("text-classification")

eval_results = task_evaluator.compute(
    model_or_pipeline=model,
    tokenizer=tokenizer,
    data=collected_tokenized_dataset["test"],
    metric=evaluate.combine(["BucketHeadP65/confusion_matrix", "accuracy"]),
    label_mapping=label2id
)

In [18]:
eval_results["confusion_matrix"]

array([[445,   6,   1,  19,   4,   6],
       [  6, 558,   0,   3,   2,   0],
       [  5,   1, 196,  11,  43,  15],
       [  4,   3,   0, 385,  39,   8],
       [  6,   3,   8,  55, 486,   8],
       [  3,   2,   0,  14,  14, 211]])