**Note: Cells can be run by section**
1. Select the section to run by its <u>header</u>.
2. Perform "Run all below" action.
3. Each section is escaped by an exception to stop execution at every section.
4. To run the entire notebook without interruption, set `run_without_interruption` to `True`.

In [1]:
run_without_interruption = True

In [2]:
# Core
import numpy as np
import pandas as pd
import os
import json

# Dataset prepration
#from transformers import TFAutoModel, AutoModel
from transformers import AutoTokenizer
from datasets import Dataset
from sklearn.model_selection import StratifiedKFold

# Model, hyperparameter search, evaluation
import torch
from transformers import BertPreTrainedModel, TrainingArguments
from transformers.models.roberta.modeling_roberta import (
    RobertaClassificationHead,
    RobertaConfig,
    RobertaModel,
)
from torch.nn import CrossEntropyLoss
import evaluate

from transformers import Trainer #get_scheduler, Trainer
#from torch.optim import AdamW
#from tqdm.auto import tqdm

# Simpletransformers for multilabel
from simpletransformers.classification import (
    MultiLabelClassificationModel, MultiLabelClassificationArgs
)
from sklearn.metrics import precision_recall_fscore_support

  from .autonotebook import tqdm as notebook_tqdm


# Define functions

These code blocks contain functions that are used multiple times, by each section.

In [3]:
# Prepare variables for indexing
classes = ["Explicit", "Implicit", "Non-abusive"]
labels = ["E1", "E2", "E3", "I1", "I2", "I3", "I4", "I5", "I6", "I7"]

### `load_csv`
Loads a dataset split from a constant directory.
To change the directory, modify the loaded CSV at line $10$.

- `dataset: str (train, test, split)` — Split to load.
- `use_stopwords: bool` — Whether to use a dataset with stopwords or not.

Returns: `DataFrame` containing the loaded CSV.

In [4]:
def load_csv(dataset, use_stopwords):
    if dataset not in ["train", "validate", "test"]:
        raise Exception("Invalid split.")
    if type(use_stopwords) != bool:
        raise Exception("Stop words must be specified in boolean.")
    
    stopwords = "With Stopwords"
    if not use_stopwords:
        stopwords = "Without Stopwords"
    df = pd.read_csv(f"./Data/{stopwords}/{dataset}.csv")
    
    return df

### `get_cv_indices`
Generates stratified CV indices given a DataFrame. Performs 5-fold CV.

- `df: DataFrame` — DataFrame to generate indices on.
- `stratifying class: str` — Class to use for stratification.

Returns: `list` containing indices for cross-validation.

In [5]:
def get_cv_indices(df, stratifying_class):
    # Prepare indices to retrieve for cross-validation
    indices = np.arange(0, df.shape[0], 1)
    labels_for_indices = df[stratifying_class]
    
    skf = StratifiedKFold()

    cv_indices = []
    for i, (train_index, test_index) in enumerate(skf.split(indices, labels_for_indices)):
        cv_indices += [{'train': train_index, 'test': test_index}]
        
    return cv_indices

### `format_dataset`
Formats a DataFrame for HuggingFace use.

- `df: DataFrame` — DataFrame to convert.
- `cv_indices: list` — Indices to create stratified CV datasets.

Returns: `list`, `list` containing HuggingFace `Datasets` for training and testing.

In [6]:
def format_dataset(df, cv_indices):
    df.rename({"Text": "text", "Class": "labels"}, axis=1, inplace=True)
    df.drop(labels, axis=1, inplace=True)
    
    dataset = Dataset.from_pandas(df)
    
    # Convert pd labels to huggingface ClassLabels for stratifying
    dataset = dataset.class_encode_column("labels")
    
    dataset = dataset.map(tokenize_function, batched=True)
    
    # Convert datasets to pytorch format
    dataset = dataset.remove_columns(["text"])
    dataset.set_format("torch")
    
    train_datasets = []
    test_datasets = []

    for i in range(len(cv_indices)):
        train_datasets += [dataset.select(cv_indices[i]['train'])]
        test_datasets += [dataset.select(cv_indices[i]['test'])]
        
    return train_datasets, test_datasets

### `save_preds`
Saves the predictions of a model.

- `filename: str` — Filename to use. Must include the `.json` extension.
- `data: list` — Contains the data to save.

In [7]:
def save_preds(filename, data):
    # Serializing json
    json_object = json.dumps({"predictions": data}, indent=4)

    # Writing to sample.json
    with open(filename, "w") as outfile:
        outfile.write(json_object)

### `load_metrics`
Loads all fold metrics of a model and inserts them in a dataframe.

- `directory: str` — Directory of the folder that contains the metric logs.

Returns: `DataFrame` containing all fold metrics.

In [8]:
# Loads all fold metrics and puts them in a dataframe
def load_metrics(directory):
    # Load files
    filenames = os.listdir(directory)

    metrics = []
    for file in filenames:
        f = open(f'{directory}/{file}')
        
        metrics += [json.load(f)]

        f.close()

    fold_metrics = pd.DataFrame()
    for i, cur in enumerate(metrics):
        temp = {k:[v] for k,v in cur.items()}
        fold_metrics = pd.concat([fold_metrics, pd.DataFrame.from_dict(temp)], ignore_index=True)

    return fold_metrics

In [None]:
if not run_without_interruption:
    raise Exception('End of repeated functions declarations')

# Prepare RoBERTa model

Prepares the RoBERTa model's classification head based on a repository by [Chanda](https://pchanda.github.io/Roberta-FineTuning-for-Classification/).

In addition prepares the `compute_metrics` function to be used by the HuggingFace `Trainer` class.

In [9]:
# Prepare classification head for pretrained RoBERTa
class RobertaAbusiveClassification(BertPreTrainedModel):
    
    def __init__(self, config):
        super(RobertaAbusiveClassification, self).__init__(config)
        self.num_labels = config.num_labels
        self.roberta = RobertaModel(config)
        self.classifier = RobertaClassificationHead(config)
        
        
    def forward(self, input_ids, attention_mask, labels):
        outputs = self.roberta(input_ids,attention_mask=attention_mask)
        sequence_output = outputs[0]
        logits = self.classifier(sequence_output)

        outputs = (logits,) + outputs[2:]
        
        loss_fct = CrossEntropyLoss()
        loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
        outputs = (loss,) + outputs

        return outputs  # (loss), logits, (hidden_states), (attentions)

In [10]:
def compute_metrics(eval_pred):
    global predictions
    acc_metric = evaluate.load("accuracy")
    pre_metric = evaluate.load("precision")
    rec_metric = evaluate.load("recall")
    f1_metric = evaluate.load("f1")
    
    logits, labels = eval_pred
    model_preds = np.argmax(logits, axis=-1)
    predictions.append(model_preds)
    
    accuracy = acc_metric.compute(predictions=model_preds, references=labels)["accuracy"]
    precision = pre_metric.compute(predictions=model_preds, references=labels, average="weighted")["precision"]
    recall = rec_metric.compute(predictions=model_preds, references=labels, average="weighted")["recall"]
    f1 = f1_metric.compute(predictions=model_preds, references=labels, average="weighted")["f1"]
    
    return {"accuracy": accuracy, "precision": precision, "recall": recall, "f1":f1}

In [11]:
if not run_without_interruption:
    raise Exception('End of Model preparation')

# Train RoBERTa Base with stop words

Trains a RoBERTa Base model using HuggingFace `Trainer`. Parameters are left default as what `Trainer` has. Uses the train dataset that contains stop words.

To modify, change the values specified in the following cell.

This **WILL** overwrite existing models, metrics, and predictions if the directory contains existing data.

In [12]:
# Variables
num_labels = 3
load_dir = "jcblaise/roberta-tagalog-base"
save_dir = "./Models/experiment-with-stopwords"

In [13]:
# Create directories
if not os.path.exists(save_dir):
    os.makedirs(save_dir)

if not os.path.exists(f'{save_dir}/metrics'):
    os.makedirs(f'{save_dir}/metrics')

if not os.path.exists(f'{save_dir}/predictions'):
    os.makedirs(f'{save_dir}/predictions')

In [14]:
# Get tokenizer from repository
tokenizer = AutoTokenizer.from_pretrained(load_dir, model_max_length=256)

def tokenize_function(data):
    return tokenizer(data["text"], padding="max_length", truncation=True)

In [15]:
# Load and format CSV
train_df = load_csv("train", True)
cv_indices = get_cv_indices(train_df, "Class")
train_datasets, test_datasets = format_dataset(train_df, cv_indices)

                                                                                                                       

In [16]:
# Attach classification head and prepare model for trainer
config = RobertaConfig.from_pretrained(load_dir, num_labels=num_labels)
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
training_args = TrainingArguments(
    output_dir = save_dir,
    report_to = "none")

# Perform cross-validated training
for i, (train, test) in enumerate(zip(train_datasets, test_datasets)):
    # Reset model
    model = RobertaAbusiveClassification.from_pretrained(load_dir, config=config)
    model.to(device)
    model.train()
    
    # Initialize Trainer class and train
    trainer = Trainer(
        model = model,
        args = training_args,
        train_dataset = train,
        eval_dataset = test,
        compute_metrics = compute_metrics)
    result = trainer.train()
    
    # Evaluate model and save metrics and predictions
    model.eval()
    predictions = []
    metrics = trainer.evaluate()
    trainer.save_metrics(f'metrics/fold_{i}', metrics)
    save_preds(f'{save_dir}/predictions/fold_{i}.json', predictions[0].tolist())
    
    # Save trained model
    model.save_pretrained(f'{save_dir}/fold_{i}')
    tokenizer.save_pretrained(f'{save_dir}/fold_{i}')

Some weights of the model checkpoint at jcblaise/roberta-tagalog-base were not used when initializing RobertaAbusiveClassification: ['lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.dense.bias']
- This IS expected if you are initializing RobertaAbusiveClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaAbusiveClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaAbusiveClassification were not initialized from the model checkpoint at jcblaise/roberta-tagalog-base and are newly initialized: ['classifier.out_proj.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias', 'classifier.den

Step,Training Loss


Some weights of the model checkpoint at jcblaise/roberta-tagalog-base were not used when initializing RobertaAbusiveClassification: ['lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.dense.bias']
- This IS expected if you are initializing RobertaAbusiveClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaAbusiveClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaAbusiveClassification were not initialized from the model checkpoint at jcblaise/roberta-tagalog-base and are newly initialized: ['classifier.out_proj.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias', 'classifier.den

Step,Training Loss


Some weights of the model checkpoint at jcblaise/roberta-tagalog-base were not used when initializing RobertaAbusiveClassification: ['lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.dense.bias']
- This IS expected if you are initializing RobertaAbusiveClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaAbusiveClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaAbusiveClassification were not initialized from the model checkpoint at jcblaise/roberta-tagalog-base and are newly initialized: ['classifier.out_proj.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias', 'classifier.den

Step,Training Loss


Some weights of the model checkpoint at jcblaise/roberta-tagalog-base were not used when initializing RobertaAbusiveClassification: ['lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.dense.bias']
- This IS expected if you are initializing RobertaAbusiveClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaAbusiveClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaAbusiveClassification were not initialized from the model checkpoint at jcblaise/roberta-tagalog-base and are newly initialized: ['classifier.out_proj.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias', 'classifier.den

Step,Training Loss


Some weights of the model checkpoint at jcblaise/roberta-tagalog-base were not used when initializing RobertaAbusiveClassification: ['lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.dense.bias']
- This IS expected if you are initializing RobertaAbusiveClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaAbusiveClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaAbusiveClassification were not initialized from the model checkpoint at jcblaise/roberta-tagalog-base and are newly initialized: ['classifier.out_proj.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias', 'classifier.den

Step,Training Loss


In [17]:
fold_metrics = load_metrics(f'{save_dir}/metrics')

print("Cross-validation fold metrics (with stopwords)")
fold_metrics

Cross-validation fold metrics (with stopwords)


Unnamed: 0,epoch,eval_accuracy,eval_f1,eval_loss,eval_precision,eval_recall,eval_runtime,eval_samples_per_second,eval_steps_per_second
0,3.0,0.712264,0.714693,1.027306,0.720893,0.712264,7.1478,29.659,3.777
1,3.0,0.740566,0.741393,0.87952,0.742543,0.740566,6.6357,31.949,4.069
2,3.0,0.7109,0.700346,0.966438,0.703578,0.7109,7.5922,27.792,3.556
3,3.0,0.725118,0.71756,1.177028,0.723389,0.725118,6.9677,30.283,3.875
4,3.0,0.701422,0.701287,1.075079,0.703806,0.701422,7.8708,26.808,3.43


In [18]:
print("Cross-validation average (with stopwords)")
fold_metrics.mean()

Cross-validation average (with stopwords)


epoch                       3.000000
eval_accuracy               0.718054
eval_f1                     0.715056
eval_loss                   1.025074
eval_precision              0.718842
eval_recall                 0.718054
eval_runtime                7.242840
eval_samples_per_second    29.298200
eval_steps_per_second       3.741400
dtype: float64

In [19]:
if not run_without_interruption:
    raise Exception('End of RoBERTa (with stopwords) training')

# Train RoBERTa without stop words


Trains a RoBERTa Base model using HuggingFace `Trainer`. Parameters are left default as what `Trainer` has.

To modify, change the values specified in the following cell.

This **WILL** overwrite existing models, metrics, and predictions if the directory contains existing data.

Additional note: This performs the exact same process as the training with stopwords, with the difference of using a train dataset that does not contain stop words.

In [20]:
# Variables
num_labels = 3
load_dir = "jcblaise/roberta-tagalog-base"
save_dir = "./Models/experiment-without-stopwords"

In [21]:
# Create directories
if not os.path.exists(save_dir):
    os.makedirs(save_dir)

if not os.path.exists(f'{save_dir}/metrics'):
    os.makedirs(f'{save_dir}/metrics')

if not os.path.exists(f'{save_dir}/predictions'):
    os.makedirs(f'{save_dir}/predictions')

In [22]:
# Get tokenizer from repository
tokenizer = AutoTokenizer.from_pretrained(load_dir, model_max_length=256)

def tokenize_function(data):
    return tokenizer(data["text"], padding="max_length", truncation=True)

In [23]:
# Reload CV indices
train_df = load_csv("train", True)
cv_indices = get_cv_indices(train_df, "Class")

# Load and format CSV
train_df = load_csv("train", False)
train_datasets, test_datasets = format_dataset(train_df, cv_indices)

                                                                                                                       

In [24]:
# Set configurations
num_labels=3

# Attach classification head and prepare model for trainer
config = RobertaConfig.from_pretrained(load_dir, num_labels=num_labels)
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
training_args = TrainingArguments(
    output_dir = save_dir,
    report_to = "none")

# Perform cross-validated training
for i, (train, test) in enumerate(zip(train_datasets, test_datasets)):
    # Reset model
    model = RobertaAbusiveClassification.from_pretrained(load_dir, config=config)
    model.to(device)
    model.train()
    
    # Initialize Trainer class and train
    trainer = Trainer(
        model = model,
        args = training_args,
        train_dataset = train,
        eval_dataset = test,
        compute_metrics = compute_metrics)
    result = trainer.train()
    
    # Evaluate model and save metrics and predictions
    model.eval()
    metrics = trainer.evaluate()
    trainer.save_metrics(f'metrics/fold_{i}', metrics)
    save_preds(f'{save_dir}/predictions/fold_{i}.json', predictions[0].tolist())
    
    # Save trained model
    model.save_pretrained(f'{save_dir}/fold_{i}')
    tokenizer.save_pretrained(f'{save_dir}/fold_{i}')

Some weights of the model checkpoint at jcblaise/roberta-tagalog-base were not used when initializing RobertaAbusiveClassification: ['lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.dense.bias']
- This IS expected if you are initializing RobertaAbusiveClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaAbusiveClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaAbusiveClassification were not initialized from the model checkpoint at jcblaise/roberta-tagalog-base and are newly initialized: ['classifier.out_proj.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias', 'classifier.den

Step,Training Loss


Some weights of the model checkpoint at jcblaise/roberta-tagalog-base were not used when initializing RobertaAbusiveClassification: ['lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.dense.bias']
- This IS expected if you are initializing RobertaAbusiveClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaAbusiveClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaAbusiveClassification were not initialized from the model checkpoint at jcblaise/roberta-tagalog-base and are newly initialized: ['classifier.out_proj.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias', 'classifier.den

Step,Training Loss


Some weights of the model checkpoint at jcblaise/roberta-tagalog-base were not used when initializing RobertaAbusiveClassification: ['lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.dense.bias']
- This IS expected if you are initializing RobertaAbusiveClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaAbusiveClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaAbusiveClassification were not initialized from the model checkpoint at jcblaise/roberta-tagalog-base and are newly initialized: ['classifier.out_proj.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias', 'classifier.den

Step,Training Loss


Some weights of the model checkpoint at jcblaise/roberta-tagalog-base were not used when initializing RobertaAbusiveClassification: ['lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.dense.bias']
- This IS expected if you are initializing RobertaAbusiveClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaAbusiveClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaAbusiveClassification were not initialized from the model checkpoint at jcblaise/roberta-tagalog-base and are newly initialized: ['classifier.out_proj.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias', 'classifier.den

Step,Training Loss


Some weights of the model checkpoint at jcblaise/roberta-tagalog-base were not used when initializing RobertaAbusiveClassification: ['lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.dense.bias']
- This IS expected if you are initializing RobertaAbusiveClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaAbusiveClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaAbusiveClassification were not initialized from the model checkpoint at jcblaise/roberta-tagalog-base and are newly initialized: ['classifier.out_proj.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias', 'classifier.den

Step,Training Loss


In [25]:
fold_metrics = load_metrics(f'{save_dir}/metrics')

print("Cross-validation fold metrics (without stopwords)")
fold_metrics

Cross-validation fold metrics (without stopwords)


Unnamed: 0,epoch,eval_accuracy,eval_f1,eval_loss,eval_precision,eval_recall,eval_runtime,eval_samples_per_second,eval_steps_per_second
0,3.0,0.650943,0.652751,1.333095,0.659968,0.650943,6.7365,31.47,4.008
1,3.0,0.707547,0.706585,1.000525,0.707356,0.707547,7.5897,27.933,3.557
2,3.0,0.725118,0.722906,0.924243,0.723174,0.725118,7.1306,29.591,3.786
3,3.0,0.701422,0.699391,1.001769,0.698663,0.701422,7.3607,28.666,3.668
4,3.0,0.663507,0.666403,1.127239,0.676998,0.663507,6.7111,31.441,4.023


In [26]:
print("Cross-validation average (without stopwords)")
fold_metrics.mean()

Cross-validation average (without stopwords)


epoch                       3.000000
eval_accuracy               0.689708
eval_f1                     0.689607
eval_loss                   1.077374
eval_precision              0.693232
eval_recall                 0.689708
eval_runtime                7.105720
eval_samples_per_second    29.820200
eval_steps_per_second       3.808400
dtype: float64

In [27]:
if not run_without_interruption:
    raise Exception('End of RoBERTa (without stopwords) training')

# Train Sublabel Classification

Trains a RoBERTa Base model using `simpletransformers`. Parameters are set to what defaults HuggingFace `Trainer` has. Two evaluations are performed: (1) on the entire test set and (2) on an *abusive-only* test set. The *abusive-only* test set is derived from the test set by removing texts that are classes as *non-abusive*. This makes testing go under the assumption that only abusive data is given to the model for prediction.

To modify, change the values specified in the following cell.

This does not save the trained models.

In [28]:
# Variables
num_labels = 10
load_dir = "jcblaise/roberta-tagalog-base"
save_dir = "./Models/experiment-multilabel"

In [29]:
# Create directories
if not os.path.exists(save_dir):
    os.makedirs(save_dir)

if not os.path.exists(f'{save_dir}/metrics'):
    os.makedirs(f'{save_dir}/metrics/all')
    os.makedirs(f'{save_dir}/metrics/abusive-only')
    
if not os.path.exists(f'{save_dir}/predictions'):
    os.makedirs(f'{save_dir}/predictions/all')
    os.makedirs(f'{save_dir}/predictions/abusive-only')

In [30]:
# Reload CV indices
train_df = load_csv("train", True)
cv_indices = get_cv_indices(train_df, "Class")

# Format dataset
train_df["labels"] = train_df.apply(lambda x: x[-10:].values.tolist(), axis=1)
train_df.drop(labels, axis=1, inplace=True)
train_df = train_df.rename({"Text": "text", "Class": "class"}, axis=1)

In [31]:
# Create cross-validation splits
train_datasets = []
test_datasets = []

for i in range(len(cv_indices)):
    train_datasets += [train_df.iloc[cv_indices[i]['train']]]
    test_datasets += [train_df.iloc[cv_indices[i]['test']]]

### `save_metrics`
Saves metrics of the `simpletransformers` model.

- `filename: str` — Filename to use.
- `data: dict` — Dict containing the metrics to save.

In [32]:
def save_metrics_multilabel(filename, data):
    metrics = {
        "precision": data[0].tolist(),
        "recall": data[1].tolist(),
        "f1": data[2].tolist(),
        "support": data[3].tolist()
    }

    json_object = json.dumps(metrics, indent=4)
    with open(filename, "w") as outfile:
        outfile.write(json_object)

In [33]:
# Prepare model
model_args = MultiLabelClassificationArgs(
    learning_rate = 5e-5,
    num_train_epochs=3,
    output_dir = save_dir,
    overwrite_output_dir = True
)

# Perform cross-validated training
for i, (train, test) in enumerate(zip(train_datasets, test_datasets)):
    # Reset and train model
    model = MultiLabelClassificationModel(
        "roberta",
        load_dir,
        num_labels=num_labels,
        args = model_args
    )
    model.train_model(train)
    
    # Predict, evaluate, and save
    # Raw dataset
    preds, outputs = model.predict(test["text"].tolist())
    metrics = precision_recall_fscore_support(test["labels"].tolist(), preds)
    
    save_preds(f'{save_dir}/predictions/all/fold_{i}.json', preds)
    save_metrics(f'{save_dir}/metrics/all/fold_{i}.json', metrics)
    
    # Assume dataset is purely abusive
    preds, outputs = model.predict(test[test["class"] != 0]["text"].tolist())
    metrics = precision_recall_fscore_support(test[test["class"] != 0]["labels"].tolist(), preds)
    
    save_preds(f'{save_dir}/predictions/abusive-only/fold_{i}.json', preds)
    save_metrics_multilabel(f'{save_dir}/metrics/abusive-only/fold_{i}.json', metrics)

Some weights of the model checkpoint at jcblaise/roberta-tagalog-base were not used when initializing RobertaForMultiLabelSequenceClassification: ['lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.dense.bias']
- This IS expected if you are initializing RobertaForMultiLabelSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForMultiLabelSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForMultiLabelSequenceClassification were not initialized from the model checkpoint at jcblaise/roberta-tagalog-base and are newly initialized: ['classifier.out_proj.weight', 'roberta.pooler.den

Epochs 0/3. Running Loss:    0.2229:  57%|████████████████████████▉                   | 60/106 [00:04<00:03, 13.57it/s][A
Epochs 0/3. Running Loss:    0.1924:  57%|████████████████████████▉                   | 60/106 [00:04<00:03, 13.57it/s][A
Epochs 0/3. Running Loss:    0.1924:  58%|█████████████████████████▎                  | 61/106 [00:04<00:03, 13.50it/s][A
Epochs 0/3. Running Loss:    0.1895:  58%|█████████████████████████▎                  | 61/106 [00:04<00:03, 13.50it/s][A
Epochs 0/3. Running Loss:    0.1895:  58%|█████████████████████████▋                  | 62/106 [00:04<00:03, 13.56it/s][A
Epochs 0/3. Running Loss:    0.1765:  58%|█████████████████████████▋                  | 62/106 [00:04<00:03, 13.56it/s][A
Epochs 0/3. Running Loss:    0.1765:  59%|██████████████████████████▏                 | 63/106 [00:04<00:03, 13.54it/s][A
Epochs 0/3. Running Loss:    0.2724:  59%|██████████████████████████▏                 | 63/106 [00:05<00:03, 13.54it/s][A
Epochs 0/3. Runn

Epochs 1/3. Running Loss:    0.1758:  18%|███████▉                                    | 19/106 [00:01<00:06, 13.53it/s][A
Epochs 1/3. Running Loss:    0.2616:  18%|███████▉                                    | 19/106 [00:01<00:06, 13.53it/s][A
Epochs 1/3. Running Loss:    0.2616:  19%|████████▎                                   | 20/106 [00:01<00:06, 13.52it/s][A
Epochs 1/3. Running Loss:    0.1655:  19%|████████▎                                   | 20/106 [00:01<00:06, 13.52it/s][A
Epochs 1/3. Running Loss:    0.1655:  20%|████████▋                                   | 21/106 [00:01<00:06, 13.52it/s][A
Epochs 1/3. Running Loss:    0.2735:  20%|████████▋                                   | 21/106 [00:01<00:06, 13.52it/s][A
Epochs 1/3. Running Loss:    0.2735:  21%|█████████▏                                  | 22/106 [00:01<00:06, 13.57it/s][A
Epochs 1/3. Running Loss:    0.2365:  21%|█████████▏                                  | 22/106 [00:01<00:06, 13.57it/s][A
Epochs 1/3. Runn

Epochs 1/3. Running Loss:    0.1821:  80%|███████████████████████████████████▎        | 85/106 [00:06<00:01, 13.57it/s][A
Epochs 1/3. Running Loss:    0.1925:  80%|███████████████████████████████████▎        | 85/106 [00:06<00:01, 13.57it/s][A
Epochs 1/3. Running Loss:    0.1925:  81%|███████████████████████████████████▋        | 86/106 [00:06<00:01, 13.55it/s][A
Epochs 1/3. Running Loss:    0.2152:  81%|███████████████████████████████████▋        | 86/106 [00:06<00:01, 13.55it/s][A
Epochs 1/3. Running Loss:    0.2152:  82%|████████████████████████████████████        | 87/106 [00:06<00:01, 13.54it/s][A
Epochs 1/3. Running Loss:    0.1809:  82%|████████████████████████████████████        | 87/106 [00:06<00:01, 13.54it/s][A
Epochs 1/3. Running Loss:    0.1809:  83%|████████████████████████████████████▌       | 88/106 [00:06<00:01, 13.53it/s][A
Epochs 1/3. Running Loss:    0.2081:  83%|████████████████████████████████████▌       | 88/106 [00:06<00:01, 13.53it/s][A
Epochs 1/3. Runn

Epochs 2/3. Running Loss:    0.1410:  42%|██████████████████▎                         | 44/106 [00:03<00:04, 13.51it/s][A
Epochs 2/3. Running Loss:    0.1539:  42%|██████████████████▎                         | 44/106 [00:03<00:04, 13.51it/s][A
Epochs 2/3. Running Loss:    0.1539:  42%|██████████████████▋                         | 45/106 [00:03<00:04, 13.51it/s][A
Epochs 2/3. Running Loss:    0.1266:  42%|██████████████████▋                         | 45/106 [00:03<00:04, 13.51it/s][A
Epochs 2/3. Running Loss:    0.1266:  43%|███████████████████                         | 46/106 [00:03<00:04, 13.51it/s][A
Epochs 2/3. Running Loss:    0.2401:  43%|███████████████████                         | 46/106 [00:03<00:04, 13.51it/s][A
Epochs 2/3. Running Loss:    0.2401:  44%|███████████████████▌                        | 47/106 [00:03<00:04, 13.56it/s][A
Epochs 2/3. Running Loss:    0.1258:  44%|███████████████████▌                        | 47/106 [00:03<00:04, 13.56it/s][A
Epochs 2/3. Runn

  1%|▌                                                                                 | 1/142 [00:05<11:46,  5.01s/it]
100%|██████████████████████████████████████████████████████████████████████████████████| 18/18 [00:00<00:00, 37.41it/s]
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Some weights of the model checkpoint at jcblaise/roberta-tagalog-base were not used when initializing RobertaForMultiLabelSequenceClassification: ['lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.dense.bias']
- This IS expected if you are initializing RobertaForMultiLabelSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForMultiLabelSequenceClassification from the checkpoint of a model tha

Epochs 0/3. Running Loss:    0.2139:  53%|███████████████████████▏                    | 56/106 [00:04<00:03, 13.53it/s][A
Epochs 0/3. Running Loss:    0.2139:  54%|███████████████████████▋                    | 57/106 [00:04<00:03, 13.52it/s][A
Epochs 0/3. Running Loss:    0.2690:  54%|███████████████████████▋                    | 57/106 [00:04<00:03, 13.52it/s][A
Epochs 0/3. Running Loss:    0.2690:  55%|████████████████████████                    | 58/106 [00:04<00:03, 13.57it/s][A
Epochs 0/3. Running Loss:    0.2199:  55%|████████████████████████                    | 58/106 [00:04<00:03, 13.57it/s][A
Epochs 0/3. Running Loss:    0.2199:  56%|████████████████████████▍                   | 59/106 [00:04<00:03, 13.55it/s][A
Epochs 0/3. Running Loss:    0.1971:  56%|████████████████████████▍                   | 59/106 [00:04<00:03, 13.55it/s][A
Epochs 0/3. Running Loss:    0.1971:  57%|████████████████████████▉                   | 60/106 [00:04<00:03, 13.60it/s][A
Epochs 0/3. Runn

Epochs 1/3. Running Loss:    0.3149:  14%|██████▏                                     | 15/106 [00:01<00:06, 13.49it/s][A
Epochs 1/3. Running Loss:    0.3149:  15%|██████▋                                     | 16/106 [00:01<00:06, 13.49it/s][A
Epochs 1/3. Running Loss:    0.1432:  15%|██████▋                                     | 16/106 [00:01<00:06, 13.49it/s][A
Epochs 1/3. Running Loss:    0.1432:  16%|███████                                     | 17/106 [00:01<00:06, 13.50it/s][A
Epochs 1/3. Running Loss:    0.1443:  16%|███████                                     | 17/106 [00:01<00:06, 13.50it/s][A
Epochs 1/3. Running Loss:    0.1443:  17%|███████▍                                    | 18/106 [00:01<00:06, 13.50it/s][A
Epochs 1/3. Running Loss:    0.2453:  17%|███████▍                                    | 18/106 [00:01<00:06, 13.50it/s][A
Epochs 1/3. Running Loss:    0.2453:  18%|███████▉                                    | 19/106 [00:01<00:06, 13.56it/s][A
Epochs 1/3. Runn

Epochs 1/3. Running Loss:    0.2946:  76%|█████████████████████████████████▌          | 81/106 [00:06<00:01, 13.58it/s][A
Epochs 1/3. Running Loss:    0.2946:  77%|██████████████████████████████████          | 82/106 [00:06<00:01, 13.62it/s][A
Epochs 1/3. Running Loss:    0.1415:  77%|██████████████████████████████████          | 82/106 [00:06<00:01, 13.62it/s][A
Epochs 1/3. Running Loss:    0.1415:  78%|██████████████████████████████████▍         | 83/106 [00:06<00:01, 13.58it/s][A
Epochs 1/3. Running Loss:    0.1126:  78%|██████████████████████████████████▍         | 83/106 [00:06<00:01, 13.58it/s][A
Epochs 1/3. Running Loss:    0.1126:  79%|██████████████████████████████████▊         | 84/106 [00:06<00:01, 13.56it/s][A
Epochs 1/3. Running Loss:    0.2232:  79%|██████████████████████████████████▊         | 84/106 [00:06<00:01, 13.56it/s][A
Epochs 1/3. Running Loss:    0.2232:  80%|███████████████████████████████████▎        | 85/106 [00:06<00:01, 13.60it/s][A
Epochs 1/3. Runn

Epochs 2/3. Running Loss:    0.2156:  38%|████████████████▌                           | 40/106 [00:03<00:04, 13.57it/s][A
Epochs 2/3. Running Loss:    0.2156:  39%|█████████████████                           | 41/106 [00:03<00:04, 13.61it/s][A
Epochs 2/3. Running Loss:    0.1690:  39%|█████████████████                           | 41/106 [00:03<00:04, 13.61it/s][A
Epochs 2/3. Running Loss:    0.1690:  40%|█████████████████▍                          | 42/106 [00:03<00:04, 13.58it/s][A
Epochs 2/3. Running Loss:    0.2717:  40%|█████████████████▍                          | 42/106 [00:03<00:04, 13.58it/s][A
Epochs 2/3. Running Loss:    0.2717:  41%|█████████████████▊                          | 43/106 [00:03<00:04, 13.56it/s][A
Epochs 2/3. Running Loss:    0.1699:  41%|█████████████████▊                          | 43/106 [00:03<00:04, 13.56it/s][A
Epochs 2/3. Running Loss:    0.1699:  42%|██████████████████▎                         | 44/106 [00:03<00:04, 13.60it/s][A
Epochs 2/3. Runn

Epoch 3 of 3: 100%|██████████████████████████████████████████████████████████████████████| 3/3 [00:43<00:00, 14.58s/it]
  0%|▍                                                                                 | 1/212 [00:05<17:44,  5.05s/it]
100%|██████████████████████████████████████████████████████████████████████████████████| 27/27 [00:00<00:00, 33.70it/s]
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  1%|▌                                                                                 | 1/141 [00:05<11:40,  5.00s/it]
100%|██████████████████████████████████████████████████████████████████████████████████| 18/18 [00:00<00:00, 34.94it/s]
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Some weights of the model checkpoint at jcblaise/roberta-tagalog-base were not used when initializing RobertaForMultiLabelSequenceClassification: ['lm_head.layer_norm.weight', 'lm_

Epochs 0/3. Running Loss:    0.2143:  49%|█████████████████████▌                      | 52/106 [00:04<00:03, 13.60it/s][A
Epochs 0/3. Running Loss:    0.2143:  50%|██████████████████████                      | 53/106 [00:04<00:03, 13.57it/s][A
Epochs 0/3. Running Loss:    0.2762:  50%|██████████████████████                      | 53/106 [00:04<00:03, 13.57it/s][A
Epochs 0/3. Running Loss:    0.2762:  51%|██████████████████████▍                     | 54/106 [00:04<00:03, 13.55it/s][A
Epochs 0/3. Running Loss:    0.2269:  51%|██████████████████████▍                     | 54/106 [00:04<00:03, 13.55it/s][A
Epochs 0/3. Running Loss:    0.2269:  52%|██████████████████████▊                     | 55/106 [00:04<00:03, 13.59it/s][A
Epochs 0/3. Running Loss:    0.2050:  52%|██████████████████████▊                     | 55/106 [00:04<00:03, 13.59it/s][A
Epochs 0/3. Running Loss:    0.2050:  53%|███████████████████████▏                    | 56/106 [00:04<00:03, 13.57it/s][A
Epochs 0/3. Runn

Epochs 1/3. Running Loss:    0.2193:  10%|████▌                                       | 11/106 [00:01<00:07, 13.08it/s][A
Epochs 1/3. Running Loss:    0.2193:  11%|████▉                                       | 12/106 [00:01<00:07, 13.26it/s][A
Epochs 1/3. Running Loss:    0.1506:  11%|████▉                                       | 12/106 [00:01<00:07, 13.26it/s][A
Epochs 1/3. Running Loss:    0.1506:  12%|█████▍                                      | 13/106 [00:01<00:06, 13.34it/s][A
Epochs 1/3. Running Loss:    0.2358:  12%|█████▍                                      | 13/106 [00:01<00:06, 13.34it/s][A
Epochs 1/3. Running Loss:    0.2358:  13%|█████▊                                      | 14/106 [00:01<00:06, 13.39it/s][A
Epochs 1/3. Running Loss:    0.1838:  13%|█████▊                                      | 14/106 [00:01<00:06, 13.39it/s][A
Epochs 1/3. Running Loss:    0.1838:  14%|██████▏                                     | 15/106 [00:01<00:06, 13.48it/s][A
Epochs 1/3. Runn

Epochs 1/3. Running Loss:    0.1927:  73%|███████████████████████████████▉            | 77/106 [00:05<00:02, 13.57it/s][A
Epochs 1/3. Running Loss:    0.1927:  74%|████████████████████████████████▍           | 78/106 [00:05<00:02, 13.55it/s][A
Epochs 1/3. Running Loss:    0.2178:  74%|████████████████████████████████▍           | 78/106 [00:06<00:02, 13.55it/s][A
Epochs 1/3. Running Loss:    0.2178:  75%|████████████████████████████████▊           | 79/106 [00:06<00:01, 13.54it/s][A
Epochs 1/3. Running Loss:    0.1723:  75%|████████████████████████████████▊           | 79/106 [00:06<00:01, 13.54it/s][A
Epochs 1/3. Running Loss:    0.1723:  75%|█████████████████████████████████▏          | 80/106 [00:06<00:01, 13.53it/s][A
Epochs 1/3. Running Loss:    0.2749:  75%|█████████████████████████████████▏          | 80/106 [00:06<00:01, 13.53it/s][A
Epochs 1/3. Running Loss:    0.2749:  76%|█████████████████████████████████▌          | 81/106 [00:06<00:01, 13.58it/s][A
Epochs 1/3. Runn

Epochs 2/3. Running Loss:    0.1538:  34%|██████████████▉                             | 36/106 [00:02<00:05, 13.58it/s][A
Epochs 2/3. Running Loss:    0.1538:  35%|███████████████▎                            | 37/106 [00:02<00:05, 13.56it/s][A
Epochs 2/3. Running Loss:    0.1568:  35%|███████████████▎                            | 37/106 [00:02<00:05, 13.56it/s][A
Epochs 2/3. Running Loss:    0.1568:  36%|███████████████▊                            | 38/106 [00:02<00:05, 13.54it/s][A
Epochs 2/3. Running Loss:    0.1398:  36%|███████████████▊                            | 38/106 [00:03<00:05, 13.54it/s][A
Epochs 2/3. Running Loss:    0.1398:  37%|████████████████▏                           | 39/106 [00:03<00:04, 13.53it/s][A
Epochs 2/3. Running Loss:    0.2168:  37%|████████████████▏                           | 39/106 [00:03<00:04, 13.53it/s][A
Epochs 2/3. Running Loss:    0.2168:  38%|████████████████▌                           | 40/106 [00:03<00:04, 13.58it/s][A
Epochs 2/3. Runn

Epochs 2/3. Running Loss:    0.1875:  96%|█████████████████████████████████████████▍ | 102/106 [00:07<00:00, 13.54it/s][A
Epochs 2/3. Running Loss:    0.1875:  97%|█████████████████████████████████████████▊ | 103/106 [00:07<00:00, 13.59it/s][A
Epochs 2/3. Running Loss:    0.1547:  97%|█████████████████████████████████████████▊ | 103/106 [00:07<00:00, 13.59it/s][A
Epochs 2/3. Running Loss:    0.1547:  98%|██████████████████████████████████████████▏| 104/106 [00:07<00:00, 13.56it/s][A
Epochs 2/3. Running Loss:    0.2003:  98%|██████████████████████████████████████████▏| 104/106 [00:07<00:00, 13.56it/s][A
Epochs 2/3. Running Loss:    0.2003:  99%|██████████████████████████████████████████▌| 105/106 [00:07<00:00, 13.55it/s][A
Epochs 2/3. Running Loss:    0.3384:  99%|██████████████████████████████████████████▌| 105/106 [00:07<00:00, 13.55it/s][A
Epochs 2/3. Running Loss:    0.3384: 100%|███████████████████████████████████████████| 106/106 [00:07<00:00, 13.27it/s][A
Epoch 3 of 3: 10

Epochs 0/3. Running Loss:    0.2115:  45%|███████████████████▉                        | 48/106 [00:03<00:04, 13.61it/s][A
Epochs 0/3. Running Loss:    0.2115:  46%|████████████████████▎                       | 49/106 [00:03<00:04, 13.64it/s][A
Epochs 0/3. Running Loss:    0.3054:  46%|████████████████████▎                       | 49/106 [00:03<00:04, 13.64it/s][A
Epochs 0/3. Running Loss:    0.3054:  47%|████████████████████▊                       | 50/106 [00:03<00:04, 13.60it/s][A
Epochs 0/3. Running Loss:    0.2424:  47%|████████████████████▊                       | 50/106 [00:04<00:04, 13.60it/s][A
Epochs 0/3. Running Loss:    0.2424:  48%|█████████████████████▏                      | 51/106 [00:04<00:04, 13.57it/s][A
Epochs 0/3. Running Loss:    0.2378:  48%|█████████████████████▏                      | 51/106 [00:04<00:04, 13.57it/s][A
Epochs 0/3. Running Loss:    0.2378:  49%|█████████████████████▌                      | 52/106 [00:04<00:03, 13.61it/s][A
Epochs 0/3. Runn

Epochs 1/3. Running Loss:    0.2570:   7%|██▉                                          | 7/106 [00:00<00:08, 12.21it/s][A
Epochs 1/3. Running Loss:    0.2570:   8%|███▍                                         | 8/106 [00:00<00:07, 12.60it/s][A
Epochs 1/3. Running Loss:    0.1839:   8%|███▍                                         | 8/106 [00:00<00:07, 12.60it/s][A
Epochs 1/3. Running Loss:    0.1839:   8%|███▊                                         | 9/106 [00:00<00:07, 12.87it/s][A
Epochs 1/3. Running Loss:    0.2820:   8%|███▊                                         | 9/106 [00:00<00:07, 12.87it/s][A
Epochs 1/3. Running Loss:    0.2820:   9%|████▏                                       | 10/106 [00:00<00:07, 13.11it/s][A
Epochs 1/3. Running Loss:    0.2630:   9%|████▏                                       | 10/106 [00:00<00:07, 13.11it/s][A
Epochs 1/3. Running Loss:    0.2630:  10%|████▌                                       | 11/106 [00:01<00:07, 13.23it/s][A
Epochs 1/3. Runn

Epochs 1/3. Running Loss:    0.1418:  69%|██████████████████████████████▎             | 73/106 [00:05<00:02, 13.61it/s][A
Epochs 1/3. Running Loss:    0.1418:  70%|██████████████████████████████▋             | 74/106 [00:05<00:02, 13.58it/s][A
Epochs 1/3. Running Loss:    0.1620:  70%|██████████████████████████████▋             | 74/106 [00:05<00:02, 13.58it/s][A
Epochs 1/3. Running Loss:    0.1620:  71%|███████████████████████████████▏            | 75/106 [00:05<00:02, 13.56it/s][A
Epochs 1/3. Running Loss:    0.2305:  71%|███████████████████████████████▏            | 75/106 [00:05<00:02, 13.56it/s][A
Epochs 1/3. Running Loss:    0.2305:  72%|███████████████████████████████▌            | 76/106 [00:05<00:02, 13.54it/s][A
Epochs 1/3. Running Loss:    0.2302:  72%|███████████████████████████████▌            | 76/106 [00:05<00:02, 13.54it/s][A
Epochs 1/3. Running Loss:    0.2302:  73%|███████████████████████████████▉            | 77/106 [00:05<00:02, 13.59it/s][A
Epochs 1/3. Runn

Epochs 2/3. Running Loss:    0.2097:  30%|█████████████▎                              | 32/106 [00:02<00:05, 13.58it/s][A
Epochs 2/3. Running Loss:    0.2097:  31%|█████████████▋                              | 33/106 [00:02<00:05, 13.56it/s][A
Epochs 2/3. Running Loss:    0.1542:  31%|█████████████▋                              | 33/106 [00:02<00:05, 13.56it/s][A
Epochs 2/3. Running Loss:    0.1542:  32%|██████████████                              | 34/106 [00:02<00:05, 13.49it/s][A
Epochs 2/3. Running Loss:    0.1924:  32%|██████████████                              | 34/106 [00:02<00:05, 13.49it/s][A
Epochs 2/3. Running Loss:    0.1924:  33%|██████████████▌                             | 35/106 [00:02<00:05, 13.55it/s][A
Epochs 2/3. Running Loss:    0.1949:  33%|██████████████▌                             | 35/106 [00:02<00:05, 13.55it/s][A
Epochs 2/3. Running Loss:    0.1949:  34%|██████████████▉                             | 36/106 [00:02<00:05, 13.54it/s][A
Epochs 2/3. Runn

Epochs 2/3. Running Loss:    0.2820:  92%|████████████████████████████████████████▋   | 98/106 [00:07<00:00, 13.52it/s][A
Epochs 2/3. Running Loss:    0.2820:  93%|█████████████████████████████████████████   | 99/106 [00:07<00:00, 13.57it/s][A
Epochs 2/3. Running Loss:    0.1217:  93%|█████████████████████████████████████████   | 99/106 [00:07<00:00, 13.57it/s][A
Epochs 2/3. Running Loss:    0.1217:  94%|████████████████████████████████████████▌  | 100/106 [00:07<00:00, 13.55it/s][A
Epochs 2/3. Running Loss:    0.2527:  94%|████████████████████████████████████████▌  | 100/106 [00:07<00:00, 13.55it/s][A
Epochs 2/3. Running Loss:    0.2527:  95%|████████████████████████████████████████▉  | 101/106 [00:07<00:00, 13.54it/s][A
Epochs 2/3. Running Loss:    0.1687:  95%|████████████████████████████████████████▉  | 101/106 [00:07<00:00, 13.54it/s][A
Epochs 2/3. Running Loss:    0.1687:  96%|█████████████████████████████████████████▍ | 102/106 [00:07<00:00, 13.59it/s][A
Epochs 2/3. Runn

Epochs 0/3. Running Loss:    0.2187:  42%|██████████████████▎                         | 44/106 [00:03<00:04, 13.54it/s][A
Epochs 0/3. Running Loss:    0.2187:  42%|██████████████████▋                         | 45/106 [00:03<00:04, 13.53it/s][A
Epochs 0/3. Running Loss:    0.2294:  42%|██████████████████▋                         | 45/106 [00:03<00:04, 13.53it/s][A
Epochs 0/3. Running Loss:    0.2294:  43%|███████████████████                         | 46/106 [00:03<00:04, 13.52it/s][A
Epochs 0/3. Running Loss:    0.1730:  43%|███████████████████                         | 46/106 [00:03<00:04, 13.52it/s][A
Epochs 0/3. Running Loss:    0.1730:  44%|███████████████████▌                        | 47/106 [00:03<00:04, 13.52it/s][A
Epochs 0/3. Running Loss:    0.1620:  44%|███████████████████▌                        | 47/106 [00:03<00:04, 13.52it/s][A
Epochs 0/3. Running Loss:    0.1620:  45%|███████████████████▉                        | 48/106 [00:03<00:04, 13.51it/s][A
Epochs 0/3. Runn

Epochs 1/3. Running Loss:    0.2994:   3%|█▎                                           | 3/106 [00:00<00:13,  7.92it/s][A
Epochs 1/3. Running Loss:    0.2994:   4%|█▋                                           | 4/106 [00:00<00:10,  9.47it/s][A
Epochs 1/3. Running Loss:    0.2660:   4%|█▋                                           | 4/106 [00:00<00:10,  9.47it/s][A
Epochs 1/3. Running Loss:    0.2660:   5%|██                                           | 5/106 [00:00<00:09, 10.57it/s][A
Epochs 1/3. Running Loss:    0.2295:   5%|██                                           | 5/106 [00:00<00:09, 10.57it/s][A
Epochs 1/3. Running Loss:    0.2295:   6%|██▌                                          | 6/106 [00:00<00:08, 11.33it/s][A
Epochs 1/3. Running Loss:    0.2179:   6%|██▌                                          | 6/106 [00:00<00:08, 11.33it/s][A
Epochs 1/3. Running Loss:    0.2179:   7%|██▉                                          | 7/106 [00:00<00:08, 12.00it/s][A
Epochs 1/3. Runn

Epochs 1/3. Running Loss:    0.1694:  65%|████████████████████████████▋               | 69/106 [00:05<00:02, 13.46it/s][A
Epochs 1/3. Running Loss:    0.1694:  66%|█████████████████████████████               | 70/106 [00:05<00:02, 13.48it/s][A
Epochs 1/3. Running Loss:    0.2183:  66%|█████████████████████████████               | 70/106 [00:05<00:02, 13.48it/s][A
Epochs 1/3. Running Loss:    0.2183:  67%|█████████████████████████████▍              | 71/106 [00:05<00:02, 13.54it/s][A
Epochs 1/3. Running Loss:    0.2688:  67%|█████████████████████████████▍              | 71/106 [00:05<00:02, 13.54it/s][A
Epochs 1/3. Running Loss:    0.2688:  68%|█████████████████████████████▉              | 72/106 [00:05<00:02, 13.53it/s][A
Epochs 1/3. Running Loss:    0.1781:  68%|█████████████████████████████▉              | 72/106 [00:05<00:02, 13.53it/s][A
Epochs 1/3. Running Loss:    0.1781:  69%|██████████████████████████████▎             | 73/106 [00:05<00:02, 13.52it/s][A
Epochs 1/3. Runn

Epochs 2/3. Running Loss:    0.1475:  26%|███████████▌                                | 28/106 [00:02<00:05, 13.50it/s][A
Epochs 2/3. Running Loss:    0.1475:  27%|████████████                                | 29/106 [00:02<00:05, 13.50it/s][A
Epochs 2/3. Running Loss:    0.2675:  27%|████████████                                | 29/106 [00:02<00:05, 13.50it/s][A
Epochs 2/3. Running Loss:    0.2675:  28%|████████████▍                               | 30/106 [00:02<00:05, 13.45it/s][A
Epochs 2/3. Running Loss:    0.1222:  28%|████████████▍                               | 30/106 [00:02<00:05, 13.45it/s][A
Epochs 2/3. Running Loss:    0.1222:  29%|████████████▊                               | 31/106 [00:02<00:05, 13.47it/s][A
Epochs 2/3. Running Loss:    0.2103:  29%|████████████▊                               | 31/106 [00:02<00:05, 13.47it/s][A
Epochs 2/3. Running Loss:    0.2103:  30%|█████████████▎                              | 32/106 [00:02<00:05, 13.37it/s][A
Epochs 2/3. Runn

Epochs 2/3. Running Loss:    0.1735:  89%|███████████████████████████████████████     | 94/106 [00:07<00:00, 13.49it/s][A
Epochs 2/3. Running Loss:    0.1735:  90%|███████████████████████████████████████▍    | 95/106 [00:07<00:00, 13.50it/s][A
Epochs 2/3. Running Loss:    0.3358:  90%|███████████████████████████████████████▍    | 95/106 [00:07<00:00, 13.50it/s][A
Epochs 2/3. Running Loss:    0.3358:  91%|███████████████████████████████████████▊    | 96/106 [00:07<00:00, 13.50it/s][A
Epochs 2/3. Running Loss:    0.1741:  91%|███████████████████████████████████████▊    | 96/106 [00:07<00:00, 13.50it/s][A
Epochs 2/3. Running Loss:    0.1741:  92%|████████████████████████████████████████▎   | 97/106 [00:07<00:00, 13.50it/s][A
Epochs 2/3. Running Loss:    0.1375:  92%|████████████████████████████████████████▎   | 97/106 [00:07<00:00, 13.50it/s][A
Epochs 2/3. Running Loss:    0.1375:  92%|████████████████████████████████████████▋   | 98/106 [00:07<00:00, 13.50it/s][A
Epochs 2/3. Runn

### `load_metrics`
Loads all fold metrics of a model and inserts them in a dataframe. Specific to metrics saved by `save_metrics` function.

- `directory: str` — Directory of the folder that contains the metric logs.

Returns: `DataFrame` containing all fold metrics.

In [34]:
def load_metrics_multilabel(directory):
    # Load files
    filenames = os.listdir(directory)

    metrics = []
    for file in filenames:
        f = open(f'{directory}/{file}')

        metrics += [json.load(f)]

        f.close()

    fold_metrics = pd.DataFrame()

    dfs = []
    for i, cur in enumerate(metrics):
        temp = {k:[v] for k,v in cur.items()}
        #fold_metrics = pd.concat([fold_metrics, pd.DataFrame.from_dict(temp)], ignore_index=True)

        arr = []

        for i, label in enumerate(labels):
            arr2 = []
            for key in temp.keys():
                arr2.append(temp[key][0][i])
            arr.append(arr2)
        dfs.append(pd.DataFrame(arr, index=labels, columns=["Precision", "Recall", "F1", "Support"]))
        
    return pd.concat(dfs)

In [35]:
metric_df = load_metrics_multilabel(f'{save_dir}/metrics/all')
print("Cross-validation fold metrics (All classes)")
metric_df.sort_index().set_index(pd.MultiIndex.from_tuples((label, i) for label in labels for i in range(5)))

Cross-validation fold metrics (All classes)


Unnamed: 0,Unnamed: 1,Precision,Recall,F1,Support
E1,0,0.9,0.439024,0.590164,41
E1,1,0.586207,0.586207,0.586207,29
E1,2,0.678571,0.575758,0.622951,33
E1,3,0.758621,0.628571,0.6875,35
E1,4,0.740741,0.5,0.597015,40
E2,0,0.490909,0.613636,0.545455,44
E2,1,0.581395,0.520833,0.549451,48
E2,2,0.354839,0.305556,0.328358,36
E2,3,0.628571,0.44898,0.52381,49
E2,4,0.633333,0.38,0.475,50


In [36]:
print("Cross-validation average (All classes)")
metric_df.groupby(metric_df.index).mean()

Cross-validation average (All classes)


Unnamed: 0,Precision,Recall,F1,Support
E1,0.732828,0.545912,0.616767,35.6
E2,0.53781,0.453801,0.484415,45.4
E3,0.0,0.0,0.0,0.8
I1,0.0,0.0,0.0,5.4
I2,0.0,0.0,0.0,22.6
I3,0.0,0.0,0.0,0.0
I4,0.0,0.0,0.0,3.6
I5,0.0,0.0,0.0,1.8
I6,0.0,0.0,0.0,6.6
I7,0.60525,0.278684,0.353205,59.4


In [37]:
metric_df = load_metrics_multilabel(f'{save_dir}/metrics/abusive-only')
print("Cross-validation fold metrics (Assuming abusive input)")
metric_df.sort_index().set_index(pd.MultiIndex.from_tuples((label, i) for label in labels for i in range(5)))

Cross-validation fold metrics (Assuming abusive input)


Unnamed: 0,Unnamed: 1,Precision,Recall,F1,Support
E1,0,0.9,0.439024,0.590164,41
E1,1,0.586207,0.586207,0.586207,29
E1,2,0.730769,0.575758,0.644068,33
E1,3,0.785714,0.628571,0.698413,35
E1,4,0.769231,0.5,0.606061,40
E2,0,0.5,0.613636,0.55102,44
E2,1,0.581395,0.520833,0.549451,48
E2,2,0.366667,0.305556,0.333333,36
E2,3,0.628571,0.44898,0.52381,49
E2,4,0.633333,0.38,0.475,50


In [38]:
print("Cross-validation average (Assuming abusive input)")
metric_df.groupby(metric_df.index).mean()

Cross-validation average (Assuming abusive input)


Unnamed: 0,Precision,Recall,F1,Support
E1,0.754384,0.545912,0.624982,35.6
E2,0.541993,0.453801,0.486523,45.4
E3,0.0,0.0,0.0,0.8
I1,0.0,0.0,0.0,5.4
I2,0.0,0.0,0.0,22.6
I3,0.0,0.0,0.0,0.0
I4,0.0,0.0,0.0,3.6
I5,0.0,0.0,0.0,1.8
I6,0.0,0.0,0.0,6.6
I7,0.687203,0.278684,0.369699,59.4
