In [1]:
import os
import random
import functools
import csv
import numpy as np
import torch
import torch.nn.functional as F
from sklearn.metrics import f1_score, precision_score, recall_score
from skmultilearn.model_selection import iterative_train_test_split
from sklearn.model_selection import StratifiedKFold


from datasets import Dataset, DatasetDict
from peft import (
    LoraConfig,
    prepare_model_for_kbit_training,
    get_peft_model
)
from transformers import (
    AutoModelForSequenceClassification,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    Trainer
)

In [2]:
def tokenize_examples(examples, tokenizer, classes):
    text = f"Issue: {examples['issue']}.\nAnswer: {examples['post_text']}"
    labels = [examples[label] for label in classes]
    tokenized_inputs = tokenizer(text, truncation=True, max_length=700, padding=True)
    tokenized_inputs['labels'] = labels
    return tokenized_inputs


# define custom batch preprocessor
def collate_fn(batch, tokenizer):
    dict_keys = ['input_ids', 'attention_mask', 'labels']
    d = {k: [dic[k] for dic in batch] for k in dict_keys}
    d['input_ids'] = torch.nn.utils.rnn.pad_sequence(
        d['input_ids'], batch_first=True, padding_value=tokenizer.pad_token_id
    )
    d['attention_mask'] = torch.nn.utils.rnn.pad_sequence(
        d['attention_mask'], batch_first=True, padding_value=0
    )
    d['labels'] = torch.stack(d['labels']).type(torch.float)
    return d


# define which metrics to compute for evaluation
def compute_metrics(p, id2class):
    predictions, labels = p
    predictions_binary = predictions > 0
    
    # Average metrics
    f1_micro = f1_score(labels, predictions_binary, average='micro')
    f1_macro = f1_score(labels, predictions_binary, average='macro')
    f1_weighted = f1_score(labels, predictions_binary, average='weighted')
    
    precision_micro = precision_score(labels, predictions_binary, average='micro')
    precision_macro = precision_score(labels, predictions_binary, average='macro')
    precision_weighted = precision_score(labels, predictions_binary, average='weighted')
    
    recall_micro = recall_score(labels, predictions_binary, average='micro')
    recall_macro = recall_score(labels, predictions_binary, average='macro')
    recall_weighted = recall_score(labels, predictions_binary, average='weighted')
    
    # Per-class metrics
    precision_per_id = precision_score(labels, predictions_binary, average=None)
    recall_per_id = recall_score(labels, predictions_binary, average=None)
    f1_per_id = f1_score(labels, predictions_binary, average=None)


    precision_per_class = {f"precision_{id2class[id]}": value for id, value in enumerate(precision_per_id)}
    recall_per_class = {f"recall_{id2class[id]}": value for id, value in enumerate(recall_per_id)}
    f1_per_class = {f"f1_{id2class[id]}": value for id, value in enumerate(f1_per_id)}
    
    return {
        'f1_micro': f1_micro,
        'f1_macro': f1_macro,
        'f1_weighted': f1_weighted,
        
        'precision_micro': precision_micro,
        'precision_macro': precision_macro,
        'precision_weighted': precision_weighted,
        
        'recall_micro': recall_micro,
        'recall_macro': recall_macro,
        'recall_weighted': recall_weighted,
        
        **precision_per_class,
        **recall_per_class,
        **f1_per_class
    }


# create custom trainer class to be able to pass label weights and calculate mutilabel loss
class CustomTrainer(Trainer):

    def __init__(self, label_weights, **kwargs):
        super().__init__(**kwargs)
        self.label_weights = label_weights
    
    def compute_loss(self, model, inputs, num_items_in_batch=1000, return_outputs=False):
        labels = inputs.pop("labels")
        
        # forward pass
        outputs = model(**inputs)
        logits = outputs.get("logits")
        
        # compute custom loss
        loss = F.binary_cross_entropy_with_logits(logits, labels.to(torch.float32), pos_weight=self.label_weights)
        return (loss, outputs) if return_outputs else loss


In [None]:
from datasets import load_dataset
    
ds = load_dataset('timonziegenbein/appropriateness-corpus')

classes = [
    'Toxic Emotions',
    'Missing Commitment',
    'Missing Intelligibility',
    'Other Reasons',
    'Inappropriateness',
    'Excessive Intensity',
    'Emotional Deception',
    'Missing Seriousness',
    'Missing Openness',
    'Unclear Meaning',
    'Missing Relevance',
    'Confusing Reasoning',
    'Detrimental Orthography',
    'Reason Unclassified'
]
class2id = {class_:id for id, class_ in enumerate(classes)}
id2class = {id:class_ for class_, id in class2id.items()}


model_name = 'microsoft/deberta-v3-large'

tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token #= tokenizer.special_tokens_map['pad_token']
tokenized_ds = ds.map(functools.partial(tokenize_examples, tokenizer=tokenizer, classes=classes), batched=False)
tokenized_ds = tokenized_ds.with_format('torch')

labels = tokenized_ds['train']['labels']
# label_weights = torch.ones(len(classes))
label_weights = 1 / labels.mean(dim=0, dtype=torch.float32)
print(label_weights)



tensor([ 3.7029,  2.9652,  2.8077, 20.1711,  1.8403,  5.4362,  5.0262, 11.8837,
         3.3326,  4.7757,  4.3183, 12.5656, 27.8727, 69.6818])


In [4]:
tokenized_ds = tokenized_ds.shuffle()

In [5]:
# qunatization config
quantization_config = BitsAndBytesConfig(
    load_in_4bit = True, # enable 4-bit quantization
    bnb_4bit_quant_type = 'nf4', # information theoretically optimal dtype for normally distributed weights
    bnb_4bit_use_double_quant = True, # quantize quantized weights //insert xzibit meme
    bnb_4bit_compute_dtype = torch.bfloat16 # optimized fp format for ML
)

# lora config
lora_config = LoraConfig(
    r = 8, # the dimension of the low-rank matrices
    lora_alpha = 16, # scaling factor for LoRA activations vs pre-trained weight activations
    target_modules="all-linear",
    # target_modules = ['q_proj', 'k_proj', 'v_proj', 'o_proj'],
    # target_modules = ['query_proj', 'value_proj'],
    lora_dropout = 0.05, # dropout probability of the LoRA layers
    bias = 'none', # wether to train bias weights, set to 'none' for attention layers
    task_type = 'SEQ_CLS'
)

# load model
model = AutoModelForSequenceClassification.from_pretrained(
    model_name,
    device_map="cuda:0",
    quantization_config=quantization_config,
    num_labels=len(classes),
    problem_type="multi_label_classification",
)
model = prepare_model_for_kbit_training(model)
model = get_peft_model(model, lora_config)
model.config.pad_token_id = tokenizer.pad_token_id

Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-large and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [6]:
# define training args
training_args = TrainingArguments(
    output_dir = 'multilabel_classification',
    logging_dir = 'multilabel_classification/logs',
    learning_rate = 1e-4,
    per_device_train_batch_size = 8, # tested with 16gb gpu ram
    per_device_eval_batch_size = 8,
    num_train_epochs = 10,
    # weight_decay = 0.01,
    eval_strategy = 'epoch',
    save_strategy = 'epoch',
    load_best_model_at_end = True
)

In [7]:
trainer = CustomTrainer(
    model = model,
    args = training_args,
    train_dataset = tokenized_ds['train'],
    eval_dataset = tokenized_ds['validation'],
    tokenizer = tokenizer,
    data_collator = functools.partial(collate_fn, tokenizer=tokenizer),
    compute_metrics = functools.partial(compute_metrics, id2class=id2class),
    label_weights = torch.tensor(label_weights, device=model.device)
)

trainer.train()

  label_weights = torch.tensor(label_weights, device=model.device)
  super().__init__(**kwargs)


  0%|          | 0/1920 [00:00<?, ?it/s]

  return fn(*args, **kwargs)


  0%|          | 0/28 [00:00<?, ?it/s]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


{'eval_loss': 1.1859934329986572, 'eval_f1_micro': 0.44579226686884005, 'eval_f1_macro': 0.3501763283371336, 'eval_f1_weighted': 0.47025563124247177, 'eval_precision_micro': 0.29429429429429427, 'eval_precision_macro': 0.23742297090059766, 'eval_precision_weighted': 0.32668024780665267, 'eval_recall_micro': 0.91875, 'eval_recall_macro': 0.7270222318176282, 'eval_recall_weighted': 0.91875, 'eval_precision_Toxic Emotions': 0.2772727272727273, 'eval_precision_Missing Commitment': 0.33181818181818185, 'eval_precision_Missing Intelligibility': 0.35454545454545455, 'eval_precision_Other Reasons': 0.0967741935483871, 'eval_precision_Inappropriateness': 0.5636363636363636, 'eval_precision_Excessive Intensity': 0.18181818181818182, 'eval_precision_Emotional Deception': 0.2, 'eval_precision_Missing Seriousness': 0.3191489361702128, 'eval_precision_Missing Openness': 0.3, 'eval_precision_Unclear Meaning': 0.29347826086956524, 'eval_precision_Missing Relevance': 0.2318181818181818, 'eval_precision

  return fn(*args, **kwargs)


  0%|          | 0/28 [00:00<?, ?it/s]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


{'eval_loss': 1.1196519136428833, 'eval_f1_micro': 0.5297092288242731, 'eval_f1_macro': 0.42502435797895866, 'eval_f1_weighted': 0.55359791729568, 'eval_precision_micro': 0.4447983014861996, 'eval_precision_macro': 0.3570597314269878, 'eval_precision_weighted': 0.4925647033689664, 'eval_recall_micro': 0.6546875, 'eval_recall_macro': 0.5654605101256288, 'eval_recall_weighted': 0.6546875, 'eval_precision_Toxic Emotions': 0.46987951807228917, 'eval_precision_Missing Commitment': 0.5617977528089888, 'eval_precision_Missing Intelligibility': 0.550561797752809, 'eval_precision_Other Reasons': 0.14814814814814814, 'eval_precision_Inappropriateness': 0.734375, 'eval_precision_Excessive Intensity': 0.32894736842105265, 'eval_precision_Emotional Deception': 0.3472222222222222, 'eval_precision_Missing Seriousness': 0.2727272727272727, 'eval_precision_Missing Openness': 0.4888888888888889, 'eval_precision_Unclear Meaning': 0.4507042253521127, 'eval_precision_Missing Relevance': 0.3974358974358974,

  return fn(*args, **kwargs)


{'loss': 1.1416, 'grad_norm': 1.560382604598999, 'learning_rate': 7.395833333333335e-05, 'epoch': 2.6}


  0%|          | 0/28 [00:00<?, ?it/s]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


{'eval_loss': 1.112463116645813, 'eval_f1_micro': 0.5372960372960373, 'eval_f1_macro': 0.43136991730567076, 'eval_f1_weighted': 0.5663704279438035, 'eval_precision_micro': 0.4284386617100372, 'eval_precision_macro': 0.3473835421567258, 'eval_precision_weighted': 0.48837487541656743, 'eval_recall_micro': 0.7203125, 'eval_recall_macro': 0.6202862697093112, 'eval_recall_weighted': 0.7203125, 'eval_precision_Toxic Emotions': 0.46938775510204084, 'eval_precision_Missing Commitment': 0.5384615384615384, 'eval_precision_Missing Intelligibility': 0.5333333333333333, 'eval_precision_Other Reasons': 0.125, 'eval_precision_Inappropriateness': 0.7913043478260869, 'eval_precision_Excessive Intensity': 0.336734693877551, 'eval_precision_Emotional Deception': 0.36363636363636365, 'eval_precision_Missing Seriousness': 0.26785714285714285, 'eval_precision_Missing Openness': 0.47572815533980584, 'eval_precision_Unclear Meaning': 0.3655913978494624, 'eval_precision_Missing Relevance': 0.35051546391752575

  return fn(*args, **kwargs)


  0%|          | 0/28 [00:00<?, ?it/s]

{'eval_loss': 1.224991798400879, 'eval_f1_micro': 0.5421443020903574, 'eval_f1_macro': 0.4282900692599742, 'eval_f1_weighted': 0.5643766453757195, 'eval_precision_micro': 0.47686832740213525, 'eval_precision_macro': 0.3791189499220989, 'eval_precision_weighted': 0.5292253638446015, 'eval_recall_micro': 0.628125, 'eval_recall_macro': 0.5196118709132163, 'eval_recall_weighted': 0.628125, 'eval_precision_Toxic Emotions': 0.5128205128205128, 'eval_precision_Missing Commitment': 0.6172839506172839, 'eval_precision_Missing Intelligibility': 0.6, 'eval_precision_Other Reasons': 0.11764705882352941, 'eval_precision_Inappropriateness': 0.794392523364486, 'eval_precision_Excessive Intensity': 0.3815789473684211, 'eval_precision_Emotional Deception': 0.38666666666666666, 'eval_precision_Missing Seriousness': 0.3783783783783784, 'eval_precision_Missing Openness': 0.5421686746987951, 'eval_precision_Unclear Meaning': 0.42105263157894735, 'eval_precision_Missing Relevance': 0.3684210526315789, 'eval

  return fn(*args, **kwargs)


  0%|          | 0/28 [00:00<?, ?it/s]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


{'eval_loss': 1.2806110382080078, 'eval_f1_micro': 0.5518227305218013, 'eval_f1_macro': 0.43292697207319353, 'eval_f1_weighted': 0.5646602950742606, 'eval_precision_micro': 0.5085638998682477, 'eval_precision_macro': 0.39971427574364393, 'eval_precision_weighted': 0.5506542278688956, 'eval_recall_micro': 0.603125, 'eval_recall_macro': 0.49927019762886354, 'eval_recall_weighted': 0.603125, 'eval_precision_Toxic Emotions': 0.5507246376811594, 'eval_precision_Missing Commitment': 0.6282051282051282, 'eval_precision_Missing Intelligibility': 0.5921052631578947, 'eval_precision_Other Reasons': 0.14285714285714285, 'eval_precision_Inappropriateness': 0.8404255319148937, 'eval_precision_Excessive Intensity': 0.4153846153846154, 'eval_precision_Emotional Deception': 0.43548387096774194, 'eval_precision_Missing Seriousness': 0.3783783783783784, 'eval_precision_Missing Openness': 0.5375, 'eval_precision_Unclear Meaning': 0.4090909090909091, 'eval_precision_Missing Relevance': 0.4057971014492754,

  return fn(*args, **kwargs)


{'loss': 0.9657, 'grad_norm': 2.0372555255889893, 'learning_rate': 4.791666666666667e-05, 'epoch': 5.21}


  0%|          | 0/28 [00:00<?, ?it/s]

{'eval_loss': 1.2615774869918823, 'eval_f1_micro': 0.5551747772446881, 'eval_f1_macro': 0.44037459499585646, 'eval_f1_weighted': 0.5729626216672903, 'eval_precision_micro': 0.4945054945054945, 'eval_precision_macro': 0.3913950669213344, 'eval_precision_weighted': 0.5371770730544345, 'eval_recall_micro': 0.6328125, 'eval_recall_macro': 0.525145973392248, 'eval_recall_weighted': 0.6328125, 'eval_precision_Toxic Emotions': 0.5571428571428572, 'eval_precision_Missing Commitment': 0.620253164556962, 'eval_precision_Missing Intelligibility': 0.5647058823529412, 'eval_precision_Other Reasons': 0.07692307692307693, 'eval_precision_Inappropriateness': 0.8073394495412844, 'eval_precision_Excessive Intensity': 0.4375, 'eval_precision_Emotional Deception': 0.40298507462686567, 'eval_precision_Missing Seriousness': 0.3888888888888889, 'eval_precision_Missing Openness': 0.5180722891566265, 'eval_precision_Unclear Meaning': 0.4166666666666667, 'eval_precision_Missing Relevance': 0.3972602739726027, '

  return fn(*args, **kwargs)


  0%|          | 0/28 [00:00<?, ?it/s]

{'eval_loss': 1.3427879810333252, 'eval_f1_micro': 0.5461309523809523, 'eval_f1_macro': 0.44625960581117335, 'eval_f1_weighted': 0.5587902100311911, 'eval_precision_micro': 0.5213068181818182, 'eval_precision_macro': 0.42499442701074397, 'eval_precision_weighted': 0.5621910760465927, 'eval_recall_micro': 0.5734375, 'eval_recall_macro': 0.4912088926771257, 'eval_recall_weighted': 0.5734375, 'eval_precision_Toxic Emotions': 0.578125, 'eval_precision_Missing Commitment': 0.6521739130434783, 'eval_precision_Missing Intelligibility': 0.5733333333333334, 'eval_precision_Other Reasons': 0.2222222222222222, 'eval_precision_Inappropriateness': 0.8241758241758241, 'eval_precision_Excessive Intensity': 0.42857142857142855, 'eval_precision_Emotional Deception': 0.4642857142857143, 'eval_precision_Missing Seriousness': 0.4, 'eval_precision_Missing Openness': 0.5571428571428572, 'eval_precision_Unclear Meaning': 0.4126984126984127, 'eval_precision_Missing Relevance': 0.43548387096774194, 'eval_preci

  return fn(*args, **kwargs)


{'loss': 0.8392, 'grad_norm': 1.8036552667617798, 'learning_rate': 2.1875e-05, 'epoch': 7.81}


  0%|          | 0/28 [00:00<?, ?it/s]

{'eval_loss': 1.4119999408721924, 'eval_f1_micro': 0.5483146067415731, 'eval_f1_macro': 0.44661143785906493, 'eval_f1_weighted': 0.5636767545430397, 'eval_precision_micro': 0.5266187050359712, 'eval_precision_macro': 0.42634654971509695, 'eval_precision_weighted': 0.5748896818112349, 'eval_recall_micro': 0.571875, 'eval_recall_macro': 0.4909608487800514, 'eval_recall_weighted': 0.571875, 'eval_precision_Toxic Emotions': 0.5714285714285714, 'eval_precision_Missing Commitment': 0.6666666666666666, 'eval_precision_Missing Intelligibility': 0.6086956521739131, 'eval_precision_Other Reasons': 0.16666666666666666, 'eval_precision_Inappropriateness': 0.8390804597701149, 'eval_precision_Excessive Intensity': 0.42857142857142855, 'eval_precision_Emotional Deception': 0.4642857142857143, 'eval_precision_Missing Seriousness': 0.4, 'eval_precision_Missing Openness': 0.5774647887323944, 'eval_precision_Unclear Meaning': 0.43103448275862066, 'eval_precision_Missing Relevance': 0.4666666666666667, 'e

  return fn(*args, **kwargs)


  0%|          | 0/28 [00:00<?, ?it/s]

{'eval_loss': 1.435486078262329, 'eval_f1_micro': 0.556745182012848, 'eval_f1_macro': 0.45113400295619327, 'eval_f1_weighted': 0.5728046952282786, 'eval_precision_micro': 0.5124835742444153, 'eval_precision_macro': 0.4131804377588419, 'eval_precision_weighted': 0.5568753684509229, 'eval_recall_micro': 0.609375, 'eval_recall_macro': 0.5174938636538999, 'eval_recall_weighted': 0.609375, 'eval_precision_Toxic Emotions': 0.5797101449275363, 'eval_precision_Missing Commitment': 0.6363636363636364, 'eval_precision_Missing Intelligibility': 0.5733333333333334, 'eval_precision_Other Reasons': 0.15384615384615385, 'eval_precision_Inappropriateness': 0.8247422680412371, 'eval_precision_Excessive Intensity': 0.45161290322580644, 'eval_precision_Emotional Deception': 0.4126984126984127, 'eval_precision_Missing Seriousness': 0.4117647058823529, 'eval_precision_Missing Openness': 0.5584415584415584, 'eval_precision_Unclear Meaning': 0.4307692307692308, 'eval_precision_Missing Relevance': 0.417910447

  return fn(*args, **kwargs)


  0%|          | 0/28 [00:00<?, ?it/s]

{'eval_loss': 1.4513543844223022, 'eval_f1_micro': 0.5613783201722901, 'eval_f1_macro': 0.4543419727059398, 'eval_f1_weighted': 0.5766803172799573, 'eval_precision_micro': 0.5192563081009296, 'eval_precision_macro': 0.4180394184046369, 'eval_precision_weighted': 0.5622052302604653, 'eval_recall_micro': 0.6109375, 'eval_recall_macro': 0.5178267545028288, 'eval_recall_weighted': 0.6109375, 'eval_precision_Toxic Emotions': 0.6029411764705882, 'eval_precision_Missing Commitment': 0.64, 'eval_precision_Missing Intelligibility': 0.5733333333333334, 'eval_precision_Other Reasons': 0.16666666666666666, 'eval_precision_Inappropriateness': 0.8367346938775511, 'eval_precision_Excessive Intensity': 0.4444444444444444, 'eval_precision_Emotional Deception': 0.421875, 'eval_precision_Missing Seriousness': 0.4, 'eval_precision_Missing Openness': 0.5526315789473685, 'eval_precision_Unclear Meaning': 0.42857142857142855, 'eval_precision_Missing Relevance': 0.42424242424242425, 'eval_precision_Confusing 

TrainOutput(global_step=1920, training_loss=0.9292195479075114, metrics={'train_runtime': 2781.0473, 'train_samples_per_second': 5.512, 'train_steps_per_second': 0.69, 'total_flos': 5695145604936000.0, 'train_loss': 0.9292195479075114, 'epoch': 10.0})

In [None]:
from pathlib import Path
import pandas as pd


results_dir = Path("../results/deberta-v3-large")
if not results_dir.exists():
    results_dir.mkdir(parents=True)
    
val_metrics = trainer.evaluate(tokenized_ds['validation'], metric_key_prefix="validation")
print(f"{val_metrics=}")
pd.DataFrame(val_metrics, index=[0]).to_csv(results_dir / "validation.csv")

  0%|          | 0/28 [00:00<?, ?it/s]

val_metrics={'validation_loss': 1.112463116645813, 'validation_f1_micro': 0.5372960372960373, 'validation_f1_macro': 0.43136991730567076, 'validation_f1_weighted': 0.5663704279438035, 'validation_precision_micro': 0.4284386617100372, 'validation_precision_macro': 0.3473835421567258, 'validation_precision_weighted': 0.48837487541656743, 'validation_recall_micro': 0.7203125, 'validation_recall_macro': 0.6202862697093112, 'validation_recall_weighted': 0.7203125, 'validation_precision_Toxic Emotions': 0.46938775510204084, 'validation_precision_Missing Commitment': 0.5384615384615384, 'validation_precision_Missing Intelligibility': 0.5333333333333333, 'validation_precision_Other Reasons': 0.125, 'validation_precision_Inappropriateness': 0.7913043478260869, 'validation_precision_Excessive Intensity': 0.336734693877551, 'validation_precision_Emotional Deception': 0.36363636363636365, 'validation_precision_Missing Seriousness': 0.26785714285714285, 'validation_precision_Missing Openness': 0.47

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [15]:
test_metrics = trainer.evaluate(tokenized_ds['test'], metric_key_prefix="test")
print(f"{test_metrics=}")
pd.DataFrame(test_metrics, index=[0]).to_csv(results_dir / "test.csv")

  0%|          | 0/55 [00:00<?, ?it/s]

test_metrics={'test_loss': 1.0224368572235107, 'test_f1_micro': 0.5624483043837882, 'test_f1_macro': 0.46208281148774627, 'test_f1_weighted': 0.5967191005610047, 'test_precision_micro': 0.42659974905897113, 'test_precision_macro': 0.3539288546484118, 'test_precision_weighted': 0.48532947433990115, 'test_recall_micro': 0.8252427184466019, 'test_recall_macro': 0.7368117178933515, 'test_recall_weighted': 0.8252427184466019, 'test_precision_Toxic Emotions': 0.4672897196261682, 'test_precision_Missing Commitment': 0.5701357466063348, 'test_precision_Missing Intelligibility': 0.5377777777777778, 'test_precision_Other Reasons': 0.21052631578947367, 'test_precision_Inappropriateness': 0.7559055118110236, 'test_precision_Excessive Intensity': 0.32710280373831774, 'test_precision_Emotional Deception': 0.3088235294117647, 'test_precision_Missing Seriousness': 0.22727272727272727, 'test_precision_Missing Openness': 0.5246636771300448, 'test_precision_Unclear Meaning': 0.34782608695652173, 'test_pr

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [8]:
# save model
peft_model_id = 'multilabel_deberta_v3_large_peft'
trainer.model.save_pretrained(peft_model_id)
tokenizer.save_pretrained(peft_model_id)

('multilabel_deberta_v3_large_peft/tokenizer_config.json',
 'multilabel_deberta_v3_large_peft/special_tokens_map.json',
 'multilabel_deberta_v3_large_peft/spm.model',
 'multilabel_deberta_v3_large_peft/added_tokens.json',
 'multilabel_deberta_v3_large_peft/tokenizer.json')

In [13]:
from huggingface_hub import login
token = "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"
login(token)

repository_id = 'anismk/' + peft_model_id
model.push_to_hub(repository_id)
tokenizer.push_to_hub(repository_id)

adapter_model.safetensors:   0%|          | 0.00/14.3M [00:00<?, ?B/s]

README.md:   0%|          | 0.00/5.17k [00:00<?, ?B/s]

spm.model:   0%|          | 0.00/2.46M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/anismk/multilabel_deberta_v3_large_peft/commit/0ec877a5769576a19e052fdf1da61c69feaf671b', commit_message='Upload tokenizer', commit_description='', oid='0ec877a5769576a19e052fdf1da61c69feaf671b', pr_url=None, repo_url=RepoUrl('https://huggingface.co/anismk/multilabel_deberta_v3_large_peft', endpoint='https://huggingface.co', repo_type='model', repo_id='anismk/multilabel_deberta_v3_large_peft'), pr_revision=None, pr_num=None)

In [None]:
# load model
peft_model_id = 'multilabel_mistral'
model = AutoModelForSequenceClassification.from_pretrained(peft_model_id)