In [None]:
! pip install -q accelerate datasets evaluate

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m270.9/270.9 kB[0m [31m5.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m507.1/507.1 kB[0m [31m10.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m9.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m115.3/115.3 kB[0m [31m10.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 kB[0m [31m10.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 kB[0m [31m10.1 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
import torch
import torch.nn as nn
import numpy as np
from datasets import load_dataset, DatasetDict
import evaluate
from sklearn.metrics import f1_score, accuracy_score, recall_score, precision_score

from transformers import AutoConfig, AutoModelForSequenceClassification, AutoTokenizer
from transformers import TrainingArguments, Trainer
from transformers import DataCollatorWithPadding, EvalPrediction
from transformers.optimization import AdamW

from time import time
from transformers import set_seed
set_seed(365)

In [None]:
classes = ['HAPPY', 'SAD', 'ANGRY', 'FEAR', 'SURPRISE', 'HATE', 'OTHER']
class2id = {classes[i]: i for i in range(len(classes))}
id2class = {i: classes[i] for i in range(len(classes))}

def sigmoid(X):
    return 1 / (1 + np.exp(-X))

def heaviside(X):
    return np.heaviside(X - 0.5, 0)

def onehot(ids, size=len(classes)):
  # print(ids)
  result = np.zeros((ids.shape[0], size))
  result[np.arange(ids.shape[0]), ids] = 1
  return result


def compute_metrics(eval_preds: EvalPrediction):
  logits, labels = eval_preds.predictions, eval_preds.label_ids
  predictions = onehot(np.argmax(sigmoid(logits), axis=-1))

  f1 = f1_score(labels, predictions, average=None, zero_division=0.0)
  f1 = {f'f1_C{i}': f1[i] for i in range(len(f1))}
  f1_macro = f1_score(labels, predictions, average='macro', zero_division=0.0)
  recall = recall_score(labels, predictions, average=None, zero_division=0.0)
  recall = {f'recall_C{i}': recall[i] for i in range(len(recall))}
  recall_macro = recall_score(labels, predictions, average='macro', zero_division=0.0)
  precision = precision_score(labels, predictions, average=None, zero_division=0.0)
  precision = {f'precision_C{i}': precision[i] for i in range(len(precision))}
  precision_macro = precision_score(labels, predictions, average='macro', zero_division=0.0)
  accuracy = accuracy_score(labels, predictions)
  results = {'accuracy': accuracy, 'precision_macro': precision_macro, 'recall_macro': recall_macro, 'f1_macro': f1_macro, **f1, **recall, **precision}
  return results
  # return {'exhgh': 0.1}

In [None]:
imdb_train = load_dataset("imdb", split='train').shuffle(seed=0).shard(num_shards=10, index=0)
imdb_test = load_dataset("imdb", split='test').shuffle(seed=0).shard(num_shards=50, index=0)
# print(imdb_test['label'])
imdb = DatasetDict({'train': imdb_train, 'test': imdb_test})
ds = imdb

In [None]:
ds_url = f'/content/'
ds_files = {
    'train': ds_url + 'pptrain.tsv',
    'test': ds_url + 'pptest.tsv',
}

ds = load_dataset('csv', data_files=ds_files, delimiter='\t')
ds = ds.rename_columns({'ID': 'id', 'Text': 'text', 'Label': 'label'})

ds

DatasetDict({
    train: Dataset({
        features: ['id', 'text', 'label'],
        num_rows: 6125
    })
    test: Dataset({
        features: ['id', 'text', 'label'],
        num_rows: 1151
    })
})

In [None]:
def convert_labels(example):
  # result = np.zeros((2))
  # result[example['label']] = 1
  # example['label'] = result
  example["label"] = [float(num) for num in example['label'][1:-1].split(' ')]
  return example

def replace_none_with_str(example):
  if example['text'] == None:
    example['text'] = ''
  return example

ds = ds.map(convert_labels)
ds = ds.map(replace_none_with_str)

print(ds['train'].features)
print(ds['test'].features)


Map:   0%|          | 0/6125 [00:00<?, ? examples/s]

Map:   0%|          | 0/1151 [00:00<?, ? examples/s]

Map:   0%|          | 0/6125 [00:00<?, ? examples/s]

Map:   0%|          | 0/1151 [00:00<?, ? examples/s]

{'id': Value(dtype='int64', id=None), 'text': Value(dtype='string', id=None), 'label': Sequence(feature=Value(dtype='float64', id=None), length=-1, id=None)}
{'id': Value(dtype='int64', id=None), 'text': Value(dtype='string', id=None), 'label': Sequence(feature=Value(dtype='float64', id=None), length=-1, id=None)}


In [None]:
num_epochs = 5
checkpoint = 'FacebookAI/xlm-roberta-base'

In [None]:
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
def tokenize_function(example):
  return tokenizer(example['text'], truncation=True, max_length=256, add_special_tokens=True)

tokenized_datasets = ds.map(tokenize_function, batched=True)
data_collator = DataCollatorWithPadding(tokenizer)

model = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=7)

Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/xlm-roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
training_args = TrainingArguments(
    run_name=f'First Run-{time()}',
    output_dir='outputs-xml', overwrite_output_dir=True,
    # auto_find_batch_size=True,
    num_train_epochs=num_epochs,
    evaluation_strategy='epoch',
    save_strategy='epoch',
    save_total_limit=5, load_best_model_at_end=True,
    push_to_hub=True,
    hub_model_id='mohammad-osoolian/DL-xlm-roberta-base10',
    hub_strategy='every_save',
    hub_private_repo=False,
    hub_token=''
)


In [None]:
trainer = Trainer(
    model,
    training_args,
    train_dataset=tokenized_datasets['train'],
    eval_dataset=tokenized_datasets['test'],
    data_collator=data_collator,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
)

In [None]:
torch.cuda.empty_cache()

In [None]:
train_output = trainer.train()

Epoch,Training Loss,Validation Loss,Accuracy,Precision Macro,Recall Macro,F1 Macro,F1 C0,F1 C1,F1 C2,F1 C3,F1 C4,F1 C5,F1 C6,Recall C0,Recall C1,Recall C2,Recall C3,Recall C4,Recall C5,Recall C6,Precision C0,Precision C1,Precision C2,Precision C3,Precision C4,Precision C5,Precision C6
1,0.3247,0.326075,0.50391,0.528971,0.497542,0.486044,0.5,0.478821,0.497006,0.671642,0.488688,0.214286,0.551867,0.458182,0.496183,0.538961,0.789474,0.372414,0.138462,0.689119,0.550218,0.462633,0.461111,0.584416,0.710526,0.473684,0.460208
2,0.2366,0.324474,0.498697,0.540036,0.496795,0.48377,0.456057,0.578824,0.5,0.621849,0.472868,0.263736,0.493056,0.349091,0.469466,0.668831,0.649123,0.42069,0.184615,0.735751,0.657534,0.754601,0.399225,0.596774,0.539823,0.461538,0.370757
3,0.2041,0.282938,0.615117,0.650267,0.608334,0.607669,0.643478,0.660317,0.548736,0.687023,0.524887,0.596491,0.592751,0.538182,0.793893,0.493506,0.789474,0.4,0.523077,0.720207,0.8,0.565217,0.617886,0.608108,0.763158,0.693878,0.503623
4,0.1529,0.297677,0.621199,0.655268,0.594882,0.603348,0.65812,0.675,0.56962,0.655462,0.518519,0.54717,0.599542,0.56,0.824427,0.584416,0.684211,0.386207,0.446154,0.678756,0.797927,0.571429,0.555556,0.629032,0.788732,0.707317,0.536885
5,0.1231,0.30951,0.624674,0.660845,0.613727,0.620989,0.648188,0.692967,0.564103,0.711864,0.561404,0.594595,0.573805,0.552727,0.770992,0.571429,0.736842,0.441379,0.507692,0.715026,0.783505,0.629283,0.556962,0.688525,0.771084,0.717391,0.479167


In [None]:
  train_output = trainer.train()

Epoch,Training Loss,Validation Loss,Accuracy,Precision Macro,Recall Macro,F1 Macro,F1 C0,F1 C1,F1 C2,F1 C3,F1 C4,F1 C5,F1 C6,Recall C0,Recall C1,Recall C2,Recall C3,Recall C4,Recall C5,Recall C6,Precision C0,Precision C1,Precision C2,Precision C3,Precision C4,Precision C5,Precision C6
1,0.1937,0.336754,0.553432,0.631496,0.529104,0.54484,0.650089,0.531056,0.554913,0.641509,0.421622,0.526316,0.488372,0.665455,0.652672,0.623377,0.596491,0.268966,0.461538,0.435233,0.635417,0.447644,0.5,0.693878,0.975,0.612245,0.556291
2,0.165,0.320431,0.590791,0.60869,0.57896,0.578764,0.652087,0.63961,0.52669,0.704,0.455814,0.528926,0.544218,0.596364,0.751908,0.480519,0.77193,0.337931,0.492308,0.621762,0.719298,0.556497,0.582677,0.647059,0.7,0.571429,0.483871
3,0.1223,0.352917,0.609036,0.643855,0.610921,0.60629,0.632287,0.647799,0.551237,0.677165,0.538813,0.615385,0.581345,0.512727,0.78626,0.506494,0.754386,0.406897,0.615385,0.694301,0.824561,0.550802,0.604651,0.614286,0.797297,0.615385,0.5
4,0.0828,0.437714,0.596872,0.664398,0.594933,0.592681,0.579572,0.696517,0.550388,0.698413,0.480769,0.598291,0.544815,0.443636,0.801527,0.461039,0.77193,0.344828,0.538462,0.803109,0.835616,0.615836,0.682692,0.637681,0.793651,0.673077,0.412234
5,0.0627,0.398735,0.638575,0.677999,0.635546,0.638005,0.659341,0.697509,0.62,0.715447,0.593886,0.603448,0.576402,0.545455,0.748092,0.603896,0.77193,0.468966,0.538462,0.772021,0.833333,0.653333,0.636986,0.666667,0.809524,0.686275,0.459877
