In [1]:
import numpy as np
import transformers
transformers.logging.set_verbosity_error()
from transformers import RobertaTokenizer, RobertaForSequenceClassification, Trainer, TrainingArguments
from datasets import load_dataset, DatasetDict, Dataset, load_metric
from sklearn.metrics import confusion_matrix
from transformers import AutoTokenizer, AutoModelForSequenceClassification, DataCollatorWithPadding

In [2]:
from classes import RobertaForSequenceClassification2, StrIgnoreDevice, DataLoaderWithTaskname, MultitaskDataloader, MultitaskTrainer

In [3]:
tokenizer = RobertaTokenizer.from_pretrained("roberta-base")

def tokenize_function(examples):
    return tokenizer(examples["text"], truncation=True)

def read_and_tokenize_data(train,test,evaluate):
    d = load_dataset('pandas', data_files={"train":train, "test":test,"evaluate":evaluate})
    for i in d:
        d[i]= d[i].remove_columns(column_names = ['__index_level_0__'])
    d = d.map(tokenize_function, batched=True)
    return d

data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

In [4]:
dataset_dict = {
    "author":  read_and_tokenize_data("data/authors_train.pkl","data/authors_test.pkl","data/authors_validation.pkl"),
    "sentiment":  read_and_tokenize_data("data/imdb_train.pkl","data/imdb_test.pkl","data/imdb_validation.pkl")
}


multitask_model = RobertaForSequenceClassification2.from_pretrained(
        "roberta-base",
        task_labels_map={"author": 4, "sentiment": 2},
    )


for i in dataset_dict.values():
    for j in i.values():
        j.set_format(type="torch",columns=["input_ids", "attention_mask", "label"])
    
    
train_dataset = {
        task_name: dataset["train"] for task_name, dataset in dataset_dict.items()}

eval_dataset = {
        task_name: dataset["evaluate"] for task_name, dataset in dataset_dict.items()}

test_dataset = {
        task_name: dataset["test"] for task_name, dataset in dataset_dict.items()}

Using custom data configuration default-308df153f829b177
Reusing dataset pandas (/Users/mgaulia/.cache/huggingface/datasets/pandas/default-308df153f829b177/0.0.0/6197c1e855b639d75a767140856841a562b7a71d129104973fe1962594877ade)


  0%|          | 0/3 [00:00<?, ?it/s]

Loading cached processed dataset at /Users/mgaulia/.cache/huggingface/datasets/pandas/default-308df153f829b177/0.0.0/6197c1e855b639d75a767140856841a562b7a71d129104973fe1962594877ade/cache-b02511b695f1c9e3.arrow
Loading cached processed dataset at /Users/mgaulia/.cache/huggingface/datasets/pandas/default-308df153f829b177/0.0.0/6197c1e855b639d75a767140856841a562b7a71d129104973fe1962594877ade/cache-68a2a10ec55d40ad.arrow


  0%|          | 0/1 [00:00<?, ?ba/s]

Using custom data configuration default-0192cbfa45180d60
Reusing dataset pandas (/Users/mgaulia/.cache/huggingface/datasets/pandas/default-0192cbfa45180d60/0.0.0/6197c1e855b639d75a767140856841a562b7a71d129104973fe1962594877ade)


  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

#### Training the model

In [7]:
training_args = TrainingArguments(
    output_dir='results',
    learning_rate=2e-5,
    weight_decay=0.01,
    num_train_epochs=15,
    evaluation_strategy="steps",
    eval_steps = 500,
    save_steps = 500,
    load_best_model_at_end = True,
    per_device_eval_batch_size =  8
)


metric = load_metric("accuracy")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)


data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

trainer = MultitaskTrainer(
    model=multitask_model,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    args=training_args,
    compute_metrics=compute_metrics,
    data_collator=data_collator
)

In [7]:
trainer.train()

***** Running training *****
  Num examples = 5391
  Num Epochs = 15
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel, distributed & accumulation) = 8
  Gradient Accumulation steps = 1
  Total optimization steps = 10125
***** Running Evaluation *****
  Num examples = 300
  Batch size = 8


{'loss': 0.6787, 'learning_rate': 1.901234567901235e-05, 'epoch': 0.74}


Saving model checkpoint to results/checkpoint-500
Configuration saved in results/checkpoint-500/config.json


{'eval_loss': 0.5599084496498108, 'eval_accuracy': 0.8466666666666667, 'eval_runtime': 3.2378, 'eval_samples_per_second': 92.656, 'eval_steps_per_second': 11.736, 'epoch': 0.74}


Model weights saved in results/checkpoint-500/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 300
  Batch size = 8


{'loss': 0.4569, 'learning_rate': 1.802469135802469e-05, 'epoch': 1.48}


Saving model checkpoint to results/checkpoint-1000
Configuration saved in results/checkpoint-1000/config.json


{'eval_loss': 0.48681876063346863, 'eval_accuracy': 0.8766666666666667, 'eval_runtime': 3.0574, 'eval_samples_per_second': 98.124, 'eval_steps_per_second': 12.429, 'epoch': 1.48}


Model weights saved in results/checkpoint-1000/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 300
  Batch size = 8


{'loss': 0.3162, 'learning_rate': 1.7037037037037038e-05, 'epoch': 2.22}


Saving model checkpoint to results/checkpoint-1500
Configuration saved in results/checkpoint-1500/config.json


{'eval_loss': 0.37365713715553284, 'eval_accuracy': 0.9033333333333333, 'eval_runtime': 3.0387, 'eval_samples_per_second': 98.728, 'eval_steps_per_second': 12.506, 'epoch': 2.22}


Model weights saved in results/checkpoint-1500/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 300
  Batch size = 8


{'loss': 0.1991, 'learning_rate': 1.6049382716049385e-05, 'epoch': 2.96}


Saving model checkpoint to results/checkpoint-2000
Configuration saved in results/checkpoint-2000/config.json


{'eval_loss': 0.4169445037841797, 'eval_accuracy': 0.9333333333333333, 'eval_runtime': 3.0751, 'eval_samples_per_second': 97.558, 'eval_steps_per_second': 12.357, 'epoch': 2.96}


Model weights saved in results/checkpoint-2000/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 300
  Batch size = 8


{'loss': 0.1429, 'learning_rate': 1.506172839506173e-05, 'epoch': 3.7}


Saving model checkpoint to results/checkpoint-2500
Configuration saved in results/checkpoint-2500/config.json


{'eval_loss': 0.42519184947013855, 'eval_accuracy': 0.92, 'eval_runtime': 3.0626, 'eval_samples_per_second': 97.955, 'eval_steps_per_second': 12.408, 'epoch': 3.7}


Model weights saved in results/checkpoint-2500/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 300
  Batch size = 8


{'loss': 0.0857, 'learning_rate': 1.4074074074074075e-05, 'epoch': 4.44}


Saving model checkpoint to results/checkpoint-3000
Configuration saved in results/checkpoint-3000/config.json


{'eval_loss': 0.5788098573684692, 'eval_accuracy': 0.9166666666666666, 'eval_runtime': 3.1225, 'eval_samples_per_second': 96.076, 'eval_steps_per_second': 12.17, 'epoch': 4.44}


Model weights saved in results/checkpoint-3000/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 300
  Batch size = 8


{'loss': 0.0634, 'learning_rate': 1.3086419753086422e-05, 'epoch': 5.19}


Saving model checkpoint to results/checkpoint-3500
Configuration saved in results/checkpoint-3500/config.json


{'eval_loss': 0.4006443917751312, 'eval_accuracy': 0.94, 'eval_runtime': 3.05, 'eval_samples_per_second': 98.362, 'eval_steps_per_second': 12.459, 'epoch': 5.19}


Model weights saved in results/checkpoint-3500/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 300
  Batch size = 8


{'loss': 0.0424, 'learning_rate': 1.2098765432098767e-05, 'epoch': 5.93}


Saving model checkpoint to results/checkpoint-4000
Configuration saved in results/checkpoint-4000/config.json


{'eval_loss': 0.5864290595054626, 'eval_accuracy': 0.93, 'eval_runtime': 3.0659, 'eval_samples_per_second': 97.852, 'eval_steps_per_second': 12.395, 'epoch': 5.93}


Model weights saved in results/checkpoint-4000/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 300
  Batch size = 8


{'loss': 0.0423, 'learning_rate': 1.1111111111111113e-05, 'epoch': 6.67}


Saving model checkpoint to results/checkpoint-4500
Configuration saved in results/checkpoint-4500/config.json


{'eval_loss': 0.41267475485801697, 'eval_accuracy': 0.9466666666666667, 'eval_runtime': 3.0452, 'eval_samples_per_second': 98.517, 'eval_steps_per_second': 12.479, 'epoch': 6.67}


Model weights saved in results/checkpoint-4500/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 300
  Batch size = 8


{'loss': 0.0304, 'learning_rate': 1.0123456790123458e-05, 'epoch': 7.41}


Saving model checkpoint to results/checkpoint-5000
Configuration saved in results/checkpoint-5000/config.json


{'eval_loss': 0.6471032500267029, 'eval_accuracy': 0.92, 'eval_runtime': 3.0773, 'eval_samples_per_second': 97.49, 'eval_steps_per_second': 12.349, 'epoch': 7.41}


Model weights saved in results/checkpoint-5000/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 300
  Batch size = 8


{'loss': 0.0129, 'learning_rate': 9.135802469135803e-06, 'epoch': 8.15}


Saving model checkpoint to results/checkpoint-5500
Configuration saved in results/checkpoint-5500/config.json


{'eval_loss': 0.6132160425186157, 'eval_accuracy': 0.9266666666666666, 'eval_runtime': 3.0603, 'eval_samples_per_second': 98.028, 'eval_steps_per_second': 12.417, 'epoch': 8.15}


Model weights saved in results/checkpoint-5500/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 300
  Batch size = 8


{'loss': 0.0311, 'learning_rate': 8.148148148148148e-06, 'epoch': 8.89}


Saving model checkpoint to results/checkpoint-6000
Configuration saved in results/checkpoint-6000/config.json


{'eval_loss': 0.5779215693473816, 'eval_accuracy': 0.9333333333333333, 'eval_runtime': 3.0676, 'eval_samples_per_second': 97.797, 'eval_steps_per_second': 12.388, 'epoch': 8.89}


Model weights saved in results/checkpoint-6000/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 300
  Batch size = 8


{'loss': 0.0133, 'learning_rate': 7.160493827160494e-06, 'epoch': 9.63}


Saving model checkpoint to results/checkpoint-6500
Configuration saved in results/checkpoint-6500/config.json


{'eval_loss': 0.5070252418518066, 'eval_accuracy': 0.94, 'eval_runtime': 3.0835, 'eval_samples_per_second': 97.291, 'eval_steps_per_second': 12.324, 'epoch': 9.63}


Model weights saved in results/checkpoint-6500/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 300
  Batch size = 8


{'loss': 0.0299, 'learning_rate': 6.17283950617284e-06, 'epoch': 10.37}


Saving model checkpoint to results/checkpoint-7000
Configuration saved in results/checkpoint-7000/config.json


{'eval_loss': 0.6678612232208252, 'eval_accuracy': 0.9266666666666666, 'eval_runtime': 3.103, 'eval_samples_per_second': 96.681, 'eval_steps_per_second': 12.246, 'epoch': 10.37}


Model weights saved in results/checkpoint-7000/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 300
  Batch size = 8


{'loss': 0.0109, 'learning_rate': 5.185185185185185e-06, 'epoch': 11.11}


Saving model checkpoint to results/checkpoint-7500
Configuration saved in results/checkpoint-7500/config.json


{'eval_loss': 0.5488567352294922, 'eval_accuracy': 0.9366666666666666, 'eval_runtime': 3.105, 'eval_samples_per_second': 96.619, 'eval_steps_per_second': 12.238, 'epoch': 11.11}


Model weights saved in results/checkpoint-7500/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 300
  Batch size = 8


{'loss': 0.0018, 'learning_rate': 4.197530864197531e-06, 'epoch': 11.85}


Saving model checkpoint to results/checkpoint-8000
Configuration saved in results/checkpoint-8000/config.json


{'eval_loss': 0.6135303974151611, 'eval_accuracy': 0.9333333333333333, 'eval_runtime': 3.0652, 'eval_samples_per_second': 97.872, 'eval_steps_per_second': 12.397, 'epoch': 11.85}


Model weights saved in results/checkpoint-8000/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 300
  Batch size = 8


{'loss': 0.0114, 'learning_rate': 3.2098765432098767e-06, 'epoch': 12.59}


Saving model checkpoint to results/checkpoint-8500
Configuration saved in results/checkpoint-8500/config.json


{'eval_loss': 0.578200101852417, 'eval_accuracy': 0.9433333333333334, 'eval_runtime': 3.1536, 'eval_samples_per_second': 95.129, 'eval_steps_per_second': 12.05, 'epoch': 12.59}


Model weights saved in results/checkpoint-8500/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 300
  Batch size = 8


{'loss': 0.0, 'learning_rate': 2.222222222222222e-06, 'epoch': 13.33}


Saving model checkpoint to results/checkpoint-9000
Configuration saved in results/checkpoint-9000/config.json


{'eval_loss': 0.5728410482406616, 'eval_accuracy': 0.9433333333333334, 'eval_runtime': 3.0618, 'eval_samples_per_second': 97.98, 'eval_steps_per_second': 12.411, 'epoch': 13.33}


Model weights saved in results/checkpoint-9000/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 300
  Batch size = 8


{'loss': 0.0, 'learning_rate': 1.234567901234568e-06, 'epoch': 14.07}


Saving model checkpoint to results/checkpoint-9500
Configuration saved in results/checkpoint-9500/config.json


{'eval_loss': 0.5762560367584229, 'eval_accuracy': 0.9433333333333334, 'eval_runtime': 3.0501, 'eval_samples_per_second': 98.358, 'eval_steps_per_second': 12.459, 'epoch': 14.07}


Model weights saved in results/checkpoint-9500/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 300
  Batch size = 8


{'loss': 0.0, 'learning_rate': 2.469135802469136e-07, 'epoch': 14.81}


Saving model checkpoint to results/checkpoint-10000
Configuration saved in results/checkpoint-10000/config.json


{'eval_loss': 0.6036664843559265, 'eval_accuracy': 0.94, 'eval_runtime': 3.0673, 'eval_samples_per_second': 97.806, 'eval_steps_per_second': 12.389, 'epoch': 14.81}


Model weights saved in results/checkpoint-10000/pytorch_model.bin


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from results/checkpoint-1500 (score: 0.37365713715553284).


{'train_runtime': 2821.0705, 'train_samples_per_second': 0.011, 'train_steps_per_second': 3.589, 'train_loss': 0.10714140081626397, 'epoch': 15.0}


TrainOutput(global_step=10125, training_loss=0.10714140081626397, metrics={'train_runtime': 2821.0705, 'train_samples_per_second': 0.011, 'train_steps_per_second': 3.589, 'train_loss': 0.10714140081626397, 'epoch': 15.0})

#### Saving the model

In [8]:
def save_model(trainer,tokenizer,trainer_path,tokenizer_path):
    trainer.save_model(trainer_path)
    tokenizer.save_pretrained(tokenizer_path)
    
save_model(trainer,tokenizer,"models/multihead_classification/model","models/multihead_classification/tokenizer")

Saving model checkpoint to models/multihead_classification/model
Configuration saved in models/multihead_classification/model/config.json
Model weights saved in models/multihead_classification/model/pytorch_model.bin
tokenizer config file saved in models/multihead_classification/tokenizer/tokenizer_config.json
Special tokens file saved in models/multihead_classification/tokenizer/special_tokens_map.json


#### Loading the model

In [8]:
def load_model(model_path,tokenizer_path):
    model = RobertaForSequenceClassification2.from_pretrained(model_path,task_labels_map={"author": 4, "sentiment": 2})
    tokenizer = RobertaTokenizer.from_pretrained(tokenizer_path)
    trainer = Trainer(model = model)
    return (model, trainer, tokenizer)
    

model, trainer, tokenizer = load_model("models/multihead_classification/model","models/multihead_classification/tokenizer")

loading configuration file models/multihead_classification/model\config.json
Model config RobertaConfig {
  "_name_or_path": "roberta-base",
  "architectures": [
    "RobertaForSequenceClassification2"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "torch_dtype": "float32",
  "transformers_version": "4.15.0",
  "type_vocab_size": 1,
  "use_cache": true,
  "vocab_size": 50265
}

loading weights file models/multihead_classification/model\pytorch_model.bin
All model checkpoint weights were used when initializing RobertaForSequenceClassification2.

All the weights of RobertaForSequenceC

In [9]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

def make_predictions(test_data):
    metric = load_metric("accuracy")
    for task in ["author", "sentiment"]:
            y_true = []
            y_pred = []
            logits = model(input_ids = test_data[task]["input_ids"].to(device),\
                                     attention_mask = test_data[task]["attention_mask"].to(device),task_name=task)[0]
            y_true.append(i["label"])
            y_pred.append(np.argmax(logits.cpu().detach().numpy(), axis=1))
            print(confusion_matrix(y_true, y_pred))
            acc = metric.compute(predictions=y_pred, references=y_true)
            print(f"Task: {task}", f"Accuracy: {acc}")

In [45]:
def make_predictions(test_data):
    metric = load_metric("accuracy")
    for task in ["author", "sentiment"]:
            y_true = []
            y_pred = []
            for i in test_data[task]:
                logits = model(input_ids = torch.unsqueeze(i["input_ids"],0).to(device),\
                                         attention_mask = torch.unsqueeze(i["attention_mask"],0).to(device),task_name=task)[0]
                y_true.append(i["label"])
                y_pred.append(np.argmax(logits.cpu().detach().numpy(), axis=-1))
            print(confusion_matrix(y_true, y_pred))
            acc = metric.compute(predictions=y_pred, references=y_true)
            print(f"Task: {task}", f"Accuracy: {acc}")

In [48]:
make_predictions(test_dataset)

[[48  0  0  0]
 [ 1 20  0  0]
 [ 1  0  4  1]
 [ 2  8  0 64]]
Task: author Accuracy: {'accuracy': 0.912751677852349}
[[70  7]
 [ 4 69]]
Task: sentiment Accuracy: {'accuracy': 0.9266666666666666}


In [40]:
def pipeline(text, task):
    inputs = tokenizer(text,padding="longest",return_tensors="pt")
    inputs["input_ids"] =  inputs["input_ids"].to(device)
    inputs["attention_mask"] =  inputs["attention_mask"].to(device)
    logits = model(**inputs, task_name=task)["logits"]
    probabilities = torch.softmax(logits, dim=1).detach().cpu()
    result = torch.argmax(
        probabilities,axis=1
    )
    probability = torch.max(
        probabilities,axis=1
    )[0]
    print({"label":result, "score":probability})

In [42]:
pipeline("Total dissapointment. I do not understand why would anyone watch this","sentiment")

{'label': tensor([0]), 'score': tensor([0.9992])}


In [43]:
pipeline("An all time classic","sentiment")

{'label': tensor([1]), 'score': tensor([0.9981])}
