In [1]:
import torch
import torch.nn.functional as F
from src.utils import *
from src.models.models import Model_Phrase_Concatenation, Model_Phrase_Extraction, Model_concat_nopooling
from src.models.baseline import Baseline
from datasets import concatenate_datasets
from sklearn.utils.class_weight import compute_class_weight
from transformers import DefaultDataCollator, AutoTokenizer, TrainingArguments, Trainer






In [2]:
RANDOM_SEED = 42
set_seeds(RANDOM_SEED)

print(torch.__version__)
print(torch.cuda.is_available())

2.1.0+cu121
True


In [3]:
url = "https://drive.google.com/uc?export=download&id=1wVNU2XvvhqjaGXZM-JLJwOt97gt4g9j2"
dataset_name = "MELD_train_efr.json"

df_manager = DataframeManager(url, dataset_name)

df = df_manager.produce_df()
df

Current work directory: c:\Users\marco\OneDrive\Immagini\Documenti\GitHub\ediref


Unnamed: 0,episode,emotions,utterances,triggers,emotions_id
0,utterance_0,"[neutral, neutral, neutral, neutral, surprise]",[also I was the point person on my company's t...,"[0, 0, 0, 1, 0]","[4, 4, 4, 4, 3]"
1,utterance_1,"[neutral, neutral, neutral, neutral, surprise,...",[also I was the point person on my company's t...,"[0, 0, 0, 0, 0, 1, 0]","[4, 4, 4, 4, 3, 4, 4]"
2,utterance_2,"[neutral, neutral, neutral, neutral, surprise,...",[also I was the point person on my company's t...,"[0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0]","[4, 4, 4, 4, 3, 4, 4, 4, 4, 4, 1]"
3,utterance_3,"[neutral, neutral, neutral, neutral, surprise,...",[also I was the point person on my company's t...,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0]","[4, 4, 4, 4, 3, 4, 4, 4, 4, 4, 1, 4, 3]"
4,utterance_4,"[surprise, sadness, surprise, fear]",[But then who? The waitress I went out with la...,"[0, 0, 1, 0]","[3, 0, 3, 1]"
...,...,...,...,...,...
3995,utterance_3995,"[neutral, joy, neutral, neutral, surprise, dis...","[Hey., Hey!, So how was Joan?, I broke up with...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]","[4, 6, 4, 4, 3, 2, 4, 2, 3, 4, 4, 2]"
3996,utterance_3996,"[neutral, joy, neutral, neutral, surprise, dis...","[Hey., Hey!, So how was Joan?, I broke up with...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]","[4, 6, 4, 4, 3, 2, 4, 2, 3, 4, 4, 2, 2, 4]"
3997,utterance_3997,"[neutral, joy, neutral, neutral, surprise, dis...","[Hey., Hey!, So how was Joan?, I broke up with...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0]","[4, 6, 4, 4, 3, 2, 4, 2, 3, 4, 4, 2, 2, 4, 4]"
3998,utterance_3998,"[neutral, joy, neutral, neutral, surprise, dis...","[Hey., Hey!, So how was Joan?, I broke up with...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0]","[4, 6, 4, 4, 3, 2, 4, 2, 3, 4, 4, 2, 2, 4, 4, 3]"


In [4]:
model_card = 'google/electra-base-discriminator'

tokenizer = AutoTokenizer.from_pretrained(model_card)

model_dir = "./model_dir/"+model_card.split("/")[-1]+"/"
data_collator = DefaultDataCollator()

In [5]:
train_data_tokenized, val_data_tokenized, test_data_tokenized = df_manager.produce_dataset(tokenizer, RANDOM_SEED)

In [6]:
len(train_data_tokenized)

28062

In [7]:
def init_pos_weight(data, labels_column, class_weights=True, factor=1):
    y = data[labels_column].numpy()
    if class_weights:
        return torch.tensor(compute_class_weight(class_weight="balanced", classes=np.unique(y), y=y)).to("cuda")
    else:
        return torch.tensor(compute_class_weight(class_weight=None, classes=np.unique(y), y=y)).to("cuda")

In [8]:
class MultiLabelTrainer(Trainer):
    def __init__(self, pos_weight=None, **kwargs):
        self.emotions_pos_weight, self.triggers_pos_weight = pos_weight
        super().__init__(**kwargs)

    def compute_loss(self, model, inputs, return_outputs=False):
        emotions_true = inputs["emotions_id_one_hot_encoding"].to("cuda")
        triggers_true = inputs["triggers"].unsqueeze(1).float().to("cuda")

        result = model(**inputs)
        
        emotion_logits = result['emotion_logits'].to("cuda")
        trigger_logits = result['trigger_logits'].to("cuda")
        
        loss_fct_emotions = torch.nn.CrossEntropyLoss(weight=self.emotions_pos_weight).to("cuda")        
        loss_fct_triggers = torch.nn.BCEWithLogitsLoss(pos_weight=self.triggers_pos_weight[1]).to("cuda")
        
        loss_emotions = loss_fct_emotions(emotion_logits, emotions_true.float())
        loss_triggers = loss_fct_triggers(trigger_logits, triggers_true)


        loss_emotions_wt = 0.5
        loss_triggers_wt = 0.5

        loss = loss_emotions_wt*loss_emotions + loss_triggers_wt*loss_triggers
        return (loss, {'emotion_logits': emotion_logits, 'trigger_logits': trigger_logits}) if return_outputs else loss

def get_trainer(model, train, val, model_dir, class_weights=True, batch_size=1, epochs=20):
    training_args = TrainingArguments(
        output_dir=model_dir,
        learning_rate=2e-5,
        per_device_train_batch_size=batch_size,
        per_device_eval_batch_size=batch_size,
        num_train_epochs=epochs,
        weight_decay=0.01,
        evaluation_strategy="epoch",
        save_strategy="epoch",
        save_total_limit=1,
        lr_scheduler_type="cosine",
        metric_for_best_model='u_avg_f1',
        report_to='none',
        label_names=["emotions_id", "triggers", "dialogue_index"],
    )

    full_dataset = concatenate_datasets([train_data_tokenized, val_data_tokenized, test_data_tokenized])
    pos_weight = (init_pos_weight(full_dataset, df_manager.column_emotions_id, class_weights), init_pos_weight(full_dataset, df_manager.column_triggers, class_weights))

    trainer = MultiLabelTrainer(
        pos_weight=pos_weight,
        model=model,
        args=training_args,
        train_dataset=train,
        eval_dataset=val,
        tokenizer=tokenizer,
        data_collator=data_collator,
        compute_metrics=lambda pred: compute_metrics(pred, len(df_manager.emotion2id.keys()))
    )

    return trainer

In [8]:
# How to load a model
# b = Model_Phrase_Extraction(len(df_manager.unique_emotions), tokenizer.sep_token_id)
# model_path = model_dir + "bert_extraction_51"
# b.load_state_dict(torch.load(model_path+"/bert_extraction_51.pth"))
# tr = get_trainer(b, train_data_tokenized, val_data_tokenized, model_path, class_weights=True, batch_size=4, epochs=5)
# tr.evaluate(val_data_tokenized)

In [11]:
def train_extr_model(seed, model_card, freeze=False):
    set_seeds(seed)
    base_model = Model_Phrase_Extraction(len(df_manager.unique_emotions), tokenizer.sep_token_id, model_card, freeze)
    model_path = model_dir+"electra_extraction_freeze_"+str(seed) if freeze else model_dir+"electra_extraction_"+str(seed)

    trainer = get_trainer(base_model, train_data_tokenized, val_data_tokenized, model_path, class_weights=True, batch_size=4, epochs=5)

    print(f'Training EXTRACTION MODEL with seed {seed}:')

    trainer.train()
    save_name = "/electra_extraction_freeze_"+str(seed)+".pth" if freeze else "/electra_extraction_"+str(seed)+".pth"
    torch.save(base_model.state_dict(), model_path+save_name)

In [10]:
seeds = [49, 666, 51, 77, 111]

In [12]:
# Training model with seed 49
train_extr_model(49, model_card)

  block_group = [InMemoryTable(cls._concat_blocks(list(block_group), axis=axis))]
  table = cls._concat_blocks(blocks, axis=0)


Training EXTRACTION MODEL with seed 49:


  0%|          | 0/35080 [00:00<?, ?it/s]

Could not estimate the number of tokens of the input, floating-point operations will not be computed


{'loss': 1.5503, 'grad_norm': 23.523033142089844, 'learning_rate': 1.9989976531650042e-05, 'epoch': 0.07}
{'loss': 1.4815, 'grad_norm': 14.407891273498535, 'learning_rate': 1.9959926220583713e-05, 'epoch': 0.14}
{'loss': 1.3513, 'grad_norm': 5.53718900680542, 'learning_rate': 1.9909909308469398e-05, 'epoch': 0.21}
{'loss': 1.3482, 'grad_norm': 13.497732162475586, 'learning_rate': 1.9840026063894193e-05, 'epoch': 0.29}
{'loss': 1.3739, 'grad_norm': 4.764459609985352, 'learning_rate': 1.9750416581356147e-05, 'epoch': 0.36}
{'loss': 1.3332, 'grad_norm': 2.6084022521972656, 'learning_rate': 1.9641260500417672e-05, 'epoch': 0.43}
{'loss': 1.3642, 'grad_norm': 7.200868129730225, 'learning_rate': 1.9512776645583266e-05, 'epoch': 0.5}
{'loss': 1.2698, 'grad_norm': 28.135955810546875, 'learning_rate': 1.9365222587623407e-05, 'epoch': 0.57}
{'loss': 1.3, 'grad_norm': 4.9596781730651855, 'learning_rate': 1.9198894127224075e-05, 'epoch': 0.64}
{'loss': 1.3046, 'grad_norm': 8.886441230773926, 'lear

  0%|          | 0/860 [00:00<?, ?it/s]

{'eval_loss': 1.2258921397153542, 'eval_accuracy_emotions': 0.5278, 'eval_accuracy_triggers': 0.828, 'eval_u_avg_f1': 0.5334, 'eval_u_f1scores_emotions': 0.4036, 'eval_u_f1scores_triggers': 0.6633, 'eval_d_f1scores_emotions': 0.5369, 'eval_d_f1scores_triggers': 0.7931, 'eval_runtime': 687.9695, 'eval_samples_per_second': 4.996, 'eval_steps_per_second': 1.25, 'epoch': 1.0}
{'loss': 1.1847, 'grad_norm': 4.730344295501709, 'learning_rate': 1.7827877610796515e-05, 'epoch': 1.07}
{'loss': 1.1864, 'grad_norm': 8.70012092590332, 'learning_rate': 1.7541478823146328e-05, 'epoch': 1.14}
{'loss': 1.1209, 'grad_norm': 12.081023216247559, 'learning_rate': 1.7239961680637002e-05, 'epoch': 1.21}
{'loss': 1.1398, 'grad_norm': 7.351411819458008, 'learning_rate': 1.6923930632775517e-05, 'epoch': 1.28}
{'loss': 1.0389, 'grad_norm': 5.166856288909912, 'learning_rate': 1.6594019225003043e-05, 'epoch': 1.35}
{'loss': 1.0742, 'grad_norm': 6.098517417907715, 'learning_rate': 1.62508888286304e-05, 'epoch': 1.4

  0%|          | 0/860 [00:00<?, ?it/s]

{'eval_loss': 1.0037077160306693, 'eval_accuracy_emotions': 0.6026, 'eval_accuracy_triggers': 0.8164, 'eval_u_avg_f1': 0.6162, 'eval_u_f1scores_emotions': 0.5538, 'eval_u_f1scores_triggers': 0.6787, 'eval_d_f1scores_emotions': 0.6017, 'eval_d_f1scores_triggers': 0.7807, 'eval_runtime': 687.9829, 'eval_samples_per_second': 4.996, 'eval_steps_per_second': 1.25, 'epoch': 2.0}
{'loss': 0.8511, 'grad_norm': 7.77825403213501, 'learning_rate': 1.2688968189084494e-05, 'epoch': 2.07}
{'loss': 0.8637, 'grad_norm': 5.907987117767334, 'learning_rate': 1.225513357792187e-05, 'epoch': 2.14}
{'loss': 0.8893, 'grad_norm': 67.90797424316406, 'learning_rate': 1.1816778114750594e-05, 'epoch': 2.21}
{'loss': 0.8565, 'grad_norm': 5.908543586730957, 'learning_rate': 1.13747805679929e-05, 'epoch': 2.28}
{'loss': 0.8985, 'grad_norm': 13.548846244812012, 'learning_rate': 1.0930027007332924e-05, 'epoch': 2.35}
{'loss': 0.8602, 'grad_norm': 20.146947860717773, 'learning_rate': 1.0483409027418426e-05, 'epoch': 2.

  0%|          | 0/860 [00:00<?, ?it/s]

{'eval_loss': 0.7938696966618242, 'eval_accuracy_emotions': 0.7373, 'eval_accuracy_triggers': 0.8086, 'eval_u_avg_f1': 0.695, 'eval_u_f1scores_emotions': 0.6911, 'eval_u_f1scores_triggers': 0.6988, 'eval_d_f1scores_emotions': 0.7215, 'eval_d_f1scores_triggers': 0.7629, 'eval_runtime': 688.0721, 'eval_samples_per_second': 4.995, 'eval_steps_per_second': 1.25, 'epoch': 3.0}
{'loss': 0.6747, 'grad_norm': 21.591777801513672, 'learning_rate': 6.527489286891458e-06, 'epoch': 3.06}
{'loss': 0.7254, 'grad_norm': 8.639361381530762, 'learning_rate': 6.111198874944846e-06, 'epoch': 3.14}
{'loss': 0.6415, 'grad_norm': 36.58537292480469, 'learning_rate': 5.702704317997491e-06, 'epoch': 3.21}
{'loss': 0.6582, 'grad_norm': 9.00691032409668, 'learning_rate': 5.302824522501931e-06, 'epoch': 3.28}
{'loss': 0.6819, 'grad_norm': 78.05694580078125, 'learning_rate': 4.912361124952949e-06, 'epoch': 3.35}
{'loss': 0.6355, 'grad_norm': 44.683773040771484, 'learning_rate': 4.532096884851978e-06, 'epoch': 3.42}


  0%|          | 0/860 [00:00<?, ?it/s]

{'eval_loss': 0.7240359774864472, 'eval_accuracy_emotions': 0.8013, 'eval_accuracy_triggers': 0.8179, 'eval_u_avg_f1': 0.7388, 'eval_u_f1scores_emotions': 0.7804, 'eval_u_f1scores_triggers': 0.6973, 'eval_d_f1scores_emotions': 0.7912, 'eval_d_f1scores_triggers': 0.7727, 'eval_runtime': 688.1218, 'eval_samples_per_second': 4.995, 'eval_steps_per_second': 1.25, 'epoch': 4.0}
{'loss': 0.5582, 'grad_norm': 9.661345481872559, 'learning_rate': 1.6865479389478545e-06, 'epoch': 4.06}
{'loss': 0.5658, 'grad_norm': 99.94718170166016, 'learning_rate': 1.4461099843816684e-06, 'epoch': 4.13}
{'loss': 0.562, 'grad_norm': 30.64739418029785, 'learning_rate': 1.2228199589835998e-06, 'epoch': 4.2}
{'loss': 0.5751, 'grad_norm': 15.452826499938965, 'learning_rate': 1.0171254908541373e-06, 'epoch': 4.28}
{'loss': 0.5748, 'grad_norm': 11.17940616607666, 'learning_rate': 8.294389343914899e-07, 'epoch': 4.35}
{'loss': 0.5607, 'grad_norm': 7.925429344177246, 'learning_rate': 6.601365436473439e-07, 'epoch': 4.4

  0%|          | 0/860 [00:00<?, ?it/s]

{'eval_loss': 0.7348935926066839, 'eval_accuracy_emotions': 0.8179, 'eval_accuracy_triggers': 0.8222, 'eval_u_avg_f1': 0.7448, 'eval_u_f1scores_emotions': 0.7986, 'eval_u_f1scores_triggers': 0.691, 'eval_d_f1scores_emotions': 0.8057, 'eval_d_f1scores_triggers': 0.7765, 'eval_runtime': 688.0526, 'eval_samples_per_second': 4.995, 'eval_steps_per_second': 1.25, 'epoch': 5.0}
{'train_runtime': 11173.1558, 'train_samples_per_second': 12.558, 'train_steps_per_second': 3.14, 'train_loss': 0.8870776417861642, 'epoch': 5.0}


In [13]:
# Training model with seed 666
train_extr_model(666, model_card)

  block_group = [InMemoryTable(cls._concat_blocks(list(block_group), axis=axis))]
  table = cls._concat_blocks(blocks, axis=0)


Training EXTRACTION MODEL with seed 666:


  0%|          | 0/35080 [00:00<?, ?it/s]

Could not estimate the number of tokens of the input, floating-point operations will not be computed


{'loss': 1.5579, 'grad_norm': 18.56740379333496, 'learning_rate': 1.9989976531650042e-05, 'epoch': 0.07}
{'loss': 1.4716, 'grad_norm': 7.754338264465332, 'learning_rate': 1.9959926220583713e-05, 'epoch': 0.14}
{'loss': 1.3423, 'grad_norm': 4.732564926147461, 'learning_rate': 1.9909909308469398e-05, 'epoch': 0.21}
{'loss': 1.3347, 'grad_norm': 12.34620189666748, 'learning_rate': 1.9840026063894193e-05, 'epoch': 0.29}
{'loss': 1.3627, 'grad_norm': 4.137117862701416, 'learning_rate': 1.9750416581356147e-05, 'epoch': 0.36}
{'loss': 1.3194, 'grad_norm': 1.6340515613555908, 'learning_rate': 1.9641260500417672e-05, 'epoch': 0.43}
{'loss': 1.3324, 'grad_norm': 7.403370380401611, 'learning_rate': 1.9512776645583266e-05, 'epoch': 0.5}
{'loss': 1.2725, 'grad_norm': 16.576576232910156, 'learning_rate': 1.9365222587623407e-05, 'epoch': 0.57}
{'loss': 1.3033, 'grad_norm': 5.041884422302246, 'learning_rate': 1.9198894127224075e-05, 'epoch': 0.64}
{'loss': 1.302, 'grad_norm': 5.951294898986816, 'learn

  0%|          | 0/860 [00:00<?, ?it/s]

{'eval_loss': 1.1969608082962853, 'eval_accuracy_emotions': 0.4745, 'eval_accuracy_triggers': 0.8118, 'eval_u_avg_f1': 0.5246, 'eval_u_f1scores_emotions': 0.3685, 'eval_u_f1scores_triggers': 0.6807, 'eval_d_f1scores_emotions': 0.4856, 'eval_d_f1scores_triggers': 0.7736, 'eval_runtime': 688.2279, 'eval_samples_per_second': 4.994, 'eval_steps_per_second': 1.25, 'epoch': 1.0}
{'loss': 1.2048, 'grad_norm': 6.208807945251465, 'learning_rate': 1.7827877610796515e-05, 'epoch': 1.07}
{'loss': 1.1869, 'grad_norm': 7.521693229675293, 'learning_rate': 1.7541478823146328e-05, 'epoch': 1.14}
{'loss': 1.1449, 'grad_norm': 11.467703819274902, 'learning_rate': 1.7239961680637002e-05, 'epoch': 1.21}
{'loss': 1.1537, 'grad_norm': 5.670267105102539, 'learning_rate': 1.6923930632775517e-05, 'epoch': 1.28}
{'loss': 1.073, 'grad_norm': 6.837320327758789, 'learning_rate': 1.6594019225003043e-05, 'epoch': 1.35}
{'loss': 1.1039, 'grad_norm': 7.393533706665039, 'learning_rate': 1.62508888286304e-05, 'epoch': 1.

  0%|          | 0/860 [00:00<?, ?it/s]

{'eval_loss': 1.0389991013595463, 'eval_accuracy_emotions': 0.5985, 'eval_accuracy_triggers': 0.8208, 'eval_u_avg_f1': 0.608, 'eval_u_f1scores_emotions': 0.5337, 'eval_u_f1scores_triggers': 0.6823, 'eval_d_f1scores_emotions': 0.6016, 'eval_d_f1scores_triggers': 0.7795, 'eval_runtime': 688.1068, 'eval_samples_per_second': 4.995, 'eval_steps_per_second': 1.25, 'epoch': 2.0}
{'loss': 0.8677, 'grad_norm': 5.7922868728637695, 'learning_rate': 1.2688968189084494e-05, 'epoch': 2.07}
{'loss': 0.9171, 'grad_norm': 6.638861656188965, 'learning_rate': 1.225513357792187e-05, 'epoch': 2.14}
{'loss': 0.8998, 'grad_norm': 41.45229721069336, 'learning_rate': 1.1816778114750594e-05, 'epoch': 2.21}
{'loss': 0.8851, 'grad_norm': 7.716005802154541, 'learning_rate': 1.13747805679929e-05, 'epoch': 2.28}
{'loss': 0.8937, 'grad_norm': 22.192787170410156, 'learning_rate': 1.0930027007332924e-05, 'epoch': 2.35}
{'loss': 0.9022, 'grad_norm': 60.65625, 'learning_rate': 1.0483409027418426e-05, 'epoch': 2.42}
{'los

  0%|          | 0/860 [00:00<?, ?it/s]

{'eval_loss': 0.8131707961034943, 'eval_accuracy_emotions': 0.7163, 'eval_accuracy_triggers': 0.815, 'eval_u_avg_f1': 0.6889, 'eval_u_f1scores_emotions': 0.6799, 'eval_u_f1scores_triggers': 0.698, 'eval_d_f1scores_emotions': 0.7073, 'eval_d_f1scores_triggers': 0.7753, 'eval_runtime': 688.1819, 'eval_samples_per_second': 4.994, 'eval_steps_per_second': 1.25, 'epoch': 3.0}
{'loss': 0.675, 'grad_norm': 35.65281677246094, 'learning_rate': 6.527489286891458e-06, 'epoch': 3.06}
{'loss': 0.7426, 'grad_norm': 9.14625072479248, 'learning_rate': 6.111198874944846e-06, 'epoch': 3.14}
{'loss': 0.6422, 'grad_norm': 38.97269058227539, 'learning_rate': 5.702704317997491e-06, 'epoch': 3.21}
{'loss': 0.669, 'grad_norm': 5.674678325653076, 'learning_rate': 5.302824522501931e-06, 'epoch': 3.28}
{'loss': 0.7189, 'grad_norm': 126.68789672851562, 'learning_rate': 4.912361124952949e-06, 'epoch': 3.35}
{'loss': 0.6437, 'grad_norm': 41.55527877807617, 'learning_rate': 4.532096884851978e-06, 'epoch': 3.42}
{'lo

  0%|          | 0/860 [00:00<?, ?it/s]

{'eval_loss': 0.7415418211046875, 'eval_accuracy_emotions': 0.7914, 'eval_accuracy_triggers': 0.8161, 'eval_u_avg_f1': 0.7334, 'eval_u_f1scores_emotions': 0.7663, 'eval_u_f1scores_triggers': 0.7006, 'eval_d_f1scores_emotions': 0.7914, 'eval_d_f1scores_triggers': 0.7736, 'eval_runtime': 688.0581, 'eval_samples_per_second': 4.995, 'eval_steps_per_second': 1.25, 'epoch': 4.0}
{'loss': 0.5735, 'grad_norm': 19.47225570678711, 'learning_rate': 1.6865479389478545e-06, 'epoch': 4.06}
{'loss': 0.5784, 'grad_norm': 89.74699401855469, 'learning_rate': 1.4461099843816684e-06, 'epoch': 4.13}
{'loss': 0.5634, 'grad_norm': 17.595260620117188, 'learning_rate': 1.2228199589835998e-06, 'epoch': 4.2}
{'loss': 0.6189, 'grad_norm': 17.248289108276367, 'learning_rate': 1.0171254908541373e-06, 'epoch': 4.28}
{'loss': 0.5946, 'grad_norm': 7.25916862487793, 'learning_rate': 8.294389343914899e-07, 'epoch': 4.35}
{'loss': 0.5863, 'grad_norm': 5.552279472351074, 'learning_rate': 6.601365436473439e-07, 'epoch': 4.

  0%|          | 0/860 [00:00<?, ?it/s]

{'eval_loss': 0.755768913634494, 'eval_accuracy_emotions': 0.8059, 'eval_accuracy_triggers': 0.8219, 'eval_u_avg_f1': 0.738, 'eval_u_f1scores_emotions': 0.7837, 'eval_u_f1scores_triggers': 0.6923, 'eval_d_f1scores_emotions': 0.7979, 'eval_d_f1scores_triggers': 0.7812, 'eval_runtime': 687.817, 'eval_samples_per_second': 4.997, 'eval_steps_per_second': 1.25, 'epoch': 5.0}
{'train_runtime': 11113.1561, 'train_samples_per_second': 12.626, 'train_steps_per_second': 3.157, 'train_loss': 0.9015629147450219, 'epoch': 5.0}


In [14]:
# Training model with seed 51
train_extr_model(51, model_card)

  block_group = [InMemoryTable(cls._concat_blocks(list(block_group), axis=axis))]
  table = cls._concat_blocks(blocks, axis=0)


Training EXTRACTION MODEL with seed 51:


  0%|          | 0/35080 [00:00<?, ?it/s]

Could not estimate the number of tokens of the input, floating-point operations will not be computed


{'loss': 1.5472, 'grad_norm': 19.608644485473633, 'learning_rate': 1.9989976531650042e-05, 'epoch': 0.07}
{'loss': 1.4529, 'grad_norm': 8.400200843811035, 'learning_rate': 1.9959926220583713e-05, 'epoch': 0.14}
{'loss': 1.3483, 'grad_norm': 3.9104936122894287, 'learning_rate': 1.9909909308469398e-05, 'epoch': 0.21}
{'loss': 1.3438, 'grad_norm': 14.281896591186523, 'learning_rate': 1.9840026063894193e-05, 'epoch': 0.29}
{'loss': 1.3507, 'grad_norm': 4.627607345581055, 'learning_rate': 1.9750416581356147e-05, 'epoch': 0.36}
{'loss': 1.3079, 'grad_norm': 2.688399314880371, 'learning_rate': 1.9641260500417672e-05, 'epoch': 0.43}
{'loss': 1.343, 'grad_norm': 9.18236255645752, 'learning_rate': 1.9512776645583266e-05, 'epoch': 0.5}
{'loss': 1.2604, 'grad_norm': 22.711729049682617, 'learning_rate': 1.9365222587623407e-05, 'epoch': 0.57}
{'loss': 1.277, 'grad_norm': 4.372272491455078, 'learning_rate': 1.9198894127224075e-05, 'epoch': 0.64}
{'loss': 1.2629, 'grad_norm': 4.408370494842529, 'learn

  0%|          | 0/860 [00:00<?, ?it/s]

{'eval_loss': 1.161741598481461, 'eval_accuracy_emotions': 0.5333, 'eval_accuracy_triggers': 0.8109, 'eval_u_avg_f1': 0.5549, 'eval_u_f1scores_emotions': 0.4288, 'eval_u_f1scores_triggers': 0.681, 'eval_d_f1scores_emotions': 0.5411, 'eval_d_f1scores_triggers': 0.7734, 'eval_runtime': 688.0708, 'eval_samples_per_second': 4.995, 'eval_steps_per_second': 1.25, 'epoch': 1.0}
{'loss': 1.1526, 'grad_norm': 5.3143439292907715, 'learning_rate': 1.7827877610796515e-05, 'epoch': 1.07}
{'loss': 1.0983, 'grad_norm': 10.162651062011719, 'learning_rate': 1.7541478823146328e-05, 'epoch': 1.14}
{'loss': 1.0819, 'grad_norm': 16.898889541625977, 'learning_rate': 1.7239961680637002e-05, 'epoch': 1.21}
{'loss': 1.0812, 'grad_norm': 7.683900833129883, 'learning_rate': 1.6923930632775517e-05, 'epoch': 1.28}
{'loss': 1.0102, 'grad_norm': 8.674668312072754, 'learning_rate': 1.6594019225003043e-05, 'epoch': 1.35}
{'loss': 1.0457, 'grad_norm': 6.801219940185547, 'learning_rate': 1.62508888286304e-05, 'epoch': 1

  0%|          | 0/860 [00:00<?, ?it/s]

{'eval_loss': 0.9618715810653982, 'eval_accuracy_emotions': 0.6375, 'eval_accuracy_triggers': 0.8152, 'eval_u_avg_f1': 0.636, 'eval_u_f1scores_emotions': 0.5889, 'eval_u_f1scores_triggers': 0.683, 'eval_d_f1scores_emotions': 0.6395, 'eval_d_f1scores_triggers': 0.7797, 'eval_runtime': 688.1187, 'eval_samples_per_second': 4.995, 'eval_steps_per_second': 1.25, 'epoch': 2.0}
{'loss': 0.8177, 'grad_norm': 6.732967376708984, 'learning_rate': 1.2688968189084494e-05, 'epoch': 2.07}
{'loss': 0.8295, 'grad_norm': 5.495974540710449, 'learning_rate': 1.225513357792187e-05, 'epoch': 2.14}
{'loss': 0.8213, 'grad_norm': 70.8066177368164, 'learning_rate': 1.1816778114750594e-05, 'epoch': 2.21}
{'loss': 0.8152, 'grad_norm': 22.77823257446289, 'learning_rate': 1.13747805679929e-05, 'epoch': 2.28}
{'loss': 0.8456, 'grad_norm': 27.501218795776367, 'learning_rate': 1.0930027007332924e-05, 'epoch': 2.35}
{'loss': 0.8107, 'grad_norm': 36.165592193603516, 'learning_rate': 1.0483409027418426e-05, 'epoch': 2.42

  0%|          | 0/860 [00:00<?, ?it/s]

{'eval_loss': 0.7649085547133277, 'eval_accuracy_emotions': 0.7664, 'eval_accuracy_triggers': 0.8042, 'eval_u_avg_f1': 0.7132, 'eval_u_f1scores_emotions': 0.7321, 'eval_u_f1scores_triggers': 0.6944, 'eval_d_f1scores_emotions': 0.7548, 'eval_d_f1scores_triggers': 0.7604, 'eval_runtime': 688.0941, 'eval_samples_per_second': 4.995, 'eval_steps_per_second': 1.25, 'epoch': 3.0}
{'loss': 0.6233, 'grad_norm': 21.648862838745117, 'learning_rate': 6.527489286891458e-06, 'epoch': 3.06}
{'loss': 0.6654, 'grad_norm': 5.890345096588135, 'learning_rate': 6.111198874944846e-06, 'epoch': 3.14}
{'loss': 0.6085, 'grad_norm': 22.171417236328125, 'learning_rate': 5.702704317997491e-06, 'epoch': 3.21}
{'loss': 0.6064, 'grad_norm': 7.3182525634765625, 'learning_rate': 5.302824522501931e-06, 'epoch': 3.28}
{'loss': 0.6259, 'grad_norm': 57.36540603637695, 'learning_rate': 4.912361124952949e-06, 'epoch': 3.35}
{'loss': 0.5876, 'grad_norm': 34.90696334838867, 'learning_rate': 4.532096884851978e-06, 'epoch': 3.4

  0%|          | 0/860 [00:00<?, ?it/s]

{'eval_loss': 0.7131614371977548, 'eval_accuracy_emotions': 0.826, 'eval_accuracy_triggers': 0.8164, 'eval_u_avg_f1': 0.7475, 'eval_u_f1scores_emotions': 0.8023, 'eval_u_f1scores_triggers': 0.6928, 'eval_d_f1scores_emotions': 0.8181, 'eval_d_f1scores_triggers': 0.774, 'eval_runtime': 688.1333, 'eval_samples_per_second': 4.995, 'eval_steps_per_second': 1.25, 'epoch': 4.0}
{'loss': 0.5226, 'grad_norm': 13.5438871383667, 'learning_rate': 1.6865479389478545e-06, 'epoch': 4.06}
{'loss': 0.5298, 'grad_norm': 35.197288513183594, 'learning_rate': 1.4461099843816684e-06, 'epoch': 4.13}
{'loss': 0.5156, 'grad_norm': 15.259615898132324, 'learning_rate': 1.2228199589835998e-06, 'epoch': 4.2}
{'loss': 0.5412, 'grad_norm': 20.333227157592773, 'learning_rate': 1.0171254908541373e-06, 'epoch': 4.28}
{'loss': 0.5267, 'grad_norm': 12.428129196166992, 'learning_rate': 8.294389343914899e-07, 'epoch': 4.35}
{'loss': 0.5161, 'grad_norm': 9.315380096435547, 'learning_rate': 6.601365436473439e-07, 'epoch': 4.

  0%|          | 0/860 [00:00<?, ?it/s]

{'eval_loss': 0.7381752438258306, 'eval_accuracy_emotions': 0.8374, 'eval_accuracy_triggers': 0.8272, 'eval_u_avg_f1': 0.7532, 'eval_u_f1scores_emotions': 0.8174, 'eval_u_f1scores_triggers': 0.689, 'eval_d_f1scores_emotions': 0.8268, 'eval_d_f1scores_triggers': 0.7854, 'eval_runtime': 688.1447, 'eval_samples_per_second': 4.995, 'eval_steps_per_second': 1.25, 'epoch': 5.0}
{'train_runtime': 11098.1396, 'train_samples_per_second': 12.643, 'train_steps_per_second': 3.161, 'train_loss': 0.845754943986847, 'epoch': 5.0}


In [15]:
# Training model with seed 77
train_extr_model(77, model_card)

  block_group = [InMemoryTable(cls._concat_blocks(list(block_group), axis=axis))]
  table = cls._concat_blocks(blocks, axis=0)


Training EXTRACTION MODEL with seed 77:


  0%|          | 0/35080 [00:00<?, ?it/s]

Could not estimate the number of tokens of the input, floating-point operations will not be computed


{'loss': 1.55, 'grad_norm': 20.195926666259766, 'learning_rate': 1.9989976531650042e-05, 'epoch': 0.07}
{'loss': 1.4791, 'grad_norm': 7.536732196807861, 'learning_rate': 1.9959926220583713e-05, 'epoch': 0.14}
{'loss': 1.3439, 'grad_norm': 6.286160469055176, 'learning_rate': 1.9909909308469398e-05, 'epoch': 0.21}
{'loss': 1.3465, 'grad_norm': 11.505781173706055, 'learning_rate': 1.9840026063894193e-05, 'epoch': 0.29}
{'loss': 1.3554, 'grad_norm': 4.116151809692383, 'learning_rate': 1.9750416581356147e-05, 'epoch': 0.36}
{'loss': 1.3187, 'grad_norm': 2.221175193786621, 'learning_rate': 1.9641260500417672e-05, 'epoch': 0.43}
{'loss': 1.3309, 'grad_norm': 7.87340784072876, 'learning_rate': 1.9512776645583266e-05, 'epoch': 0.5}
{'loss': 1.2638, 'grad_norm': 18.67973518371582, 'learning_rate': 1.9365222587623407e-05, 'epoch': 0.57}
{'loss': 1.2878, 'grad_norm': 4.627801895141602, 'learning_rate': 1.9198894127224075e-05, 'epoch': 0.64}
{'loss': 1.2847, 'grad_norm': 5.233429908752441, 'learnin

  0%|          | 0/860 [00:00<?, ?it/s]

{'eval_loss': 1.1897829642751019, 'eval_accuracy_emotions': 0.5412, 'eval_accuracy_triggers': 0.8123, 'eval_u_avg_f1': 0.5357, 'eval_u_f1scores_emotions': 0.4148, 'eval_u_f1scores_triggers': 0.6567, 'eval_d_f1scores_emotions': 0.5484, 'eval_d_f1scores_triggers': 0.7753, 'eval_runtime': 688.1403, 'eval_samples_per_second': 4.995, 'eval_steps_per_second': 1.25, 'epoch': 1.0}
{'loss': 1.1705, 'grad_norm': 5.365209579467773, 'learning_rate': 1.7827877610796515e-05, 'epoch': 1.07}
{'loss': 1.1518, 'grad_norm': 9.975178718566895, 'learning_rate': 1.7541478823146328e-05, 'epoch': 1.14}
{'loss': 1.1006, 'grad_norm': 9.860774040222168, 'learning_rate': 1.7239961680637002e-05, 'epoch': 1.21}
{'loss': 1.1273, 'grad_norm': 10.404792785644531, 'learning_rate': 1.6923930632775517e-05, 'epoch': 1.28}
{'loss': 1.0385, 'grad_norm': 11.451273918151855, 'learning_rate': 1.6594019225003043e-05, 'epoch': 1.35}
{'loss': 1.0651, 'grad_norm': 4.713058948516846, 'learning_rate': 1.62508888286304e-05, 'epoch': 

  0%|          | 0/860 [00:00<?, ?it/s]

{'eval_loss': 1.0289282789212422, 'eval_accuracy_emotions': 0.6119, 'eval_accuracy_triggers': 0.8234, 'eval_u_avg_f1': 0.6069, 'eval_u_f1scores_emotions': 0.5585, 'eval_u_f1scores_triggers': 0.6553, 'eval_d_f1scores_emotions': 0.6132, 'eval_d_f1scores_triggers': 0.7867, 'eval_runtime': 687.989, 'eval_samples_per_second': 4.996, 'eval_steps_per_second': 1.25, 'epoch': 2.0}
{'loss': 0.8471, 'grad_norm': 9.569546699523926, 'learning_rate': 1.2688968189084494e-05, 'epoch': 2.07}
{'loss': 0.8615, 'grad_norm': 4.843546390533447, 'learning_rate': 1.225513357792187e-05, 'epoch': 2.14}
{'loss': 0.8635, 'grad_norm': 35.08657455444336, 'learning_rate': 1.1816778114750594e-05, 'epoch': 2.21}
{'loss': 0.8269, 'grad_norm': 8.162013053894043, 'learning_rate': 1.13747805679929e-05, 'epoch': 2.28}
{'loss': 0.8743, 'grad_norm': 9.844584465026855, 'learning_rate': 1.0930027007332924e-05, 'epoch': 2.35}
{'loss': 0.8348, 'grad_norm': 54.52912521362305, 'learning_rate': 1.0483409027418426e-05, 'epoch': 2.42

  0%|          | 0/860 [00:00<?, ?it/s]

{'eval_loss': 0.779824101229457, 'eval_accuracy_emotions': 0.7445, 'eval_accuracy_triggers': 0.8144, 'eval_u_avg_f1': 0.7058, 'eval_u_f1scores_emotions': 0.713, 'eval_u_f1scores_triggers': 0.6985, 'eval_d_f1scores_emotions': 0.7299, 'eval_d_f1scores_triggers': 0.7743, 'eval_runtime': 688.1385, 'eval_samples_per_second': 4.995, 'eval_steps_per_second': 1.25, 'epoch': 3.0}
{'loss': 0.6464, 'grad_norm': 15.554058074951172, 'learning_rate': 6.527489286891458e-06, 'epoch': 3.06}
{'loss': 0.6993, 'grad_norm': 7.9339094161987305, 'learning_rate': 6.111198874944846e-06, 'epoch': 3.14}
{'loss': 0.6084, 'grad_norm': 37.627559661865234, 'learning_rate': 5.702704317997491e-06, 'epoch': 3.21}
{'loss': 0.6303, 'grad_norm': 6.557735919952393, 'learning_rate': 5.302824522501931e-06, 'epoch': 3.28}
{'loss': 0.6743, 'grad_norm': 52.76289749145508, 'learning_rate': 4.912361124952949e-06, 'epoch': 3.35}
{'loss': 0.6129, 'grad_norm': 58.15658950805664, 'learning_rate': 4.532096884851978e-06, 'epoch': 3.42}

  0%|          | 0/860 [00:00<?, ?it/s]

{'eval_loss': 0.7137157415534108, 'eval_accuracy_emotions': 0.8056, 'eval_accuracy_triggers': 0.8161, 'eval_u_avg_f1': 0.7374, 'eval_u_f1scores_emotions': 0.7853, 'eval_u_f1scores_triggers': 0.6895, 'eval_d_f1scores_emotions': 0.7971, 'eval_d_f1scores_triggers': 0.7721, 'eval_runtime': 688.0824, 'eval_samples_per_second': 4.995, 'eval_steps_per_second': 1.25, 'epoch': 4.0}
{'loss': 0.5375, 'grad_norm': 14.192309379577637, 'learning_rate': 1.6865479389478545e-06, 'epoch': 4.06}
{'loss': 0.5463, 'grad_norm': 62.915950775146484, 'learning_rate': 1.4461099843816684e-06, 'epoch': 4.13}
{'loss': 0.5498, 'grad_norm': 14.207857131958008, 'learning_rate': 1.2228199589835998e-06, 'epoch': 4.2}
{'loss': 0.5701, 'grad_norm': 19.491853713989258, 'learning_rate': 1.0171254908541373e-06, 'epoch': 4.28}
{'loss': 0.5497, 'grad_norm': 11.624287605285645, 'learning_rate': 8.294389343914899e-07, 'epoch': 4.35}
{'loss': 0.5366, 'grad_norm': 8.002408027648926, 'learning_rate': 6.601365436473439e-07, 'epoch'

  0%|          | 0/860 [00:00<?, ?it/s]

{'eval_loss': 0.7223600885174735, 'eval_accuracy_emotions': 0.8152, 'eval_accuracy_triggers': 0.8219, 'eval_u_avg_f1': 0.7443, 'eval_u_f1scores_emotions': 0.8025, 'eval_u_f1scores_triggers': 0.6861, 'eval_d_f1scores_emotions': 0.8021, 'eval_d_f1scores_triggers': 0.7791, 'eval_runtime': 687.9513, 'eval_samples_per_second': 4.996, 'eval_steps_per_second': 1.25, 'epoch': 5.0}
{'train_runtime': 11142.1656, 'train_samples_per_second': 12.593, 'train_steps_per_second': 3.148, 'train_loss': 0.865916392196951, 'epoch': 5.0}


In [16]:
# Training model with seed 111
train_extr_model(111, model_card)

  block_group = [InMemoryTable(cls._concat_blocks(list(block_group), axis=axis))]
  table = cls._concat_blocks(blocks, axis=0)


Training EXTRACTION MODEL with seed 111:


  0%|          | 0/35080 [00:00<?, ?it/s]

Could not estimate the number of tokens of the input, floating-point operations will not be computed


{'loss': 1.5547, 'grad_norm': 17.89653778076172, 'learning_rate': 1.9989976531650042e-05, 'epoch': 0.07}
{'loss': 1.5031, 'grad_norm': 28.77094841003418, 'learning_rate': 1.9959926220583713e-05, 'epoch': 0.14}
{'loss': 1.3796, 'grad_norm': 3.3197789192199707, 'learning_rate': 1.9909909308469398e-05, 'epoch': 0.21}
{'loss': 1.3599, 'grad_norm': 21.892202377319336, 'learning_rate': 1.9840026063894193e-05, 'epoch': 0.29}
{'loss': 1.3732, 'grad_norm': 3.5873613357543945, 'learning_rate': 1.9750416581356147e-05, 'epoch': 0.36}
{'loss': 1.3494, 'grad_norm': 2.694654703140259, 'learning_rate': 1.9641260500417672e-05, 'epoch': 0.43}
{'loss': 1.3449, 'grad_norm': 4.54103946685791, 'learning_rate': 1.9512776645583266e-05, 'epoch': 0.5}
{'loss': 1.2746, 'grad_norm': 24.71185302734375, 'learning_rate': 1.9365222587623407e-05, 'epoch': 0.57}
{'loss': 1.3182, 'grad_norm': 4.146399974822998, 'learning_rate': 1.9198894127224075e-05, 'epoch': 0.64}
{'loss': 1.3174, 'grad_norm': 5.558351039886475, 'lear

  0%|          | 0/860 [00:00<?, ?it/s]

{'eval_loss': 1.228665440281951, 'eval_accuracy_emotions': 0.4489, 'eval_accuracy_triggers': 0.817, 'eval_u_avg_f1': 0.5128, 'eval_u_f1scores_emotions': 0.3529, 'eval_u_f1scores_triggers': 0.6727, 'eval_d_f1scores_emotions': 0.4622, 'eval_d_f1scores_triggers': 0.779, 'eval_runtime': 688.0627, 'eval_samples_per_second': 4.995, 'eval_steps_per_second': 1.25, 'epoch': 1.0}
{'loss': 1.2239, 'grad_norm': 4.233496189117432, 'learning_rate': 1.7827877610796515e-05, 'epoch': 1.07}
{'loss': 1.2358, 'grad_norm': 8.18569278717041, 'learning_rate': 1.7541478823146328e-05, 'epoch': 1.14}
{'loss': 1.1641, 'grad_norm': 17.71514129638672, 'learning_rate': 1.7239961680637002e-05, 'epoch': 1.21}
{'loss': 1.1927, 'grad_norm': 6.716207504272461, 'learning_rate': 1.6923930632775517e-05, 'epoch': 1.28}
{'loss': 1.1201, 'grad_norm': 7.243381500244141, 'learning_rate': 1.6594019225003043e-05, 'epoch': 1.35}
{'loss': 1.149, 'grad_norm': 7.283588886260986, 'learning_rate': 1.62508888286304e-05, 'epoch': 1.43}
{

  0%|          | 0/860 [00:00<?, ?it/s]

{'eval_loss': 1.0796683882507376, 'eval_accuracy_emotions': 0.5921, 'eval_accuracy_triggers': 0.8254, 'eval_u_avg_f1': 0.5933, 'eval_u_f1scores_emotions': 0.5269, 'eval_u_f1scores_triggers': 0.6596, 'eval_d_f1scores_emotions': 0.5911, 'eval_d_f1scores_triggers': 0.7868, 'eval_runtime': 688.0894, 'eval_samples_per_second': 4.995, 'eval_steps_per_second': 1.25, 'epoch': 2.0}
{'loss': 0.9636, 'grad_norm': 5.827549934387207, 'learning_rate': 1.2688968189084494e-05, 'epoch': 2.07}
{'loss': 0.9599, 'grad_norm': 4.711796760559082, 'learning_rate': 1.225513357792187e-05, 'epoch': 2.14}
{'loss': 0.9357, 'grad_norm': 31.187774658203125, 'learning_rate': 1.1816778114750594e-05, 'epoch': 2.21}
{'loss': 0.9407, 'grad_norm': 5.3724775314331055, 'learning_rate': 1.13747805679929e-05, 'epoch': 2.28}
{'loss': 0.9871, 'grad_norm': 17.622352600097656, 'learning_rate': 1.0930027007332924e-05, 'epoch': 2.35}
{'loss': 0.9455, 'grad_norm': 73.21424865722656, 'learning_rate': 1.0483409027418426e-05, 'epoch': 

  0%|          | 0/860 [00:00<?, ?it/s]

{'eval_loss': 0.8567845772288228, 'eval_accuracy_emotions': 0.6864, 'eval_accuracy_triggers': 0.8048, 'eval_u_avg_f1': 0.6641, 'eval_u_f1scores_emotions': 0.6303, 'eval_u_f1scores_triggers': 0.6979, 'eval_d_f1scores_emotions': 0.6738, 'eval_d_f1scores_triggers': 0.7637, 'eval_runtime': 688.0888, 'eval_samples_per_second': 4.995, 'eval_steps_per_second': 1.25, 'epoch': 3.0}
{'loss': 0.7443, 'grad_norm': 12.245230674743652, 'learning_rate': 6.527489286891458e-06, 'epoch': 3.06}
{'loss': 0.7907, 'grad_norm': 8.418944358825684, 'learning_rate': 6.111198874944846e-06, 'epoch': 3.14}
{'loss': 0.7325, 'grad_norm': 29.323875427246094, 'learning_rate': 5.702704317997491e-06, 'epoch': 3.21}
{'loss': 0.7363, 'grad_norm': 7.589890003204346, 'learning_rate': 5.302824522501931e-06, 'epoch': 3.28}
{'loss': 0.7442, 'grad_norm': 84.50823974609375, 'learning_rate': 4.912361124952949e-06, 'epoch': 3.35}
{'loss': 0.6948, 'grad_norm': 47.93317413330078, 'learning_rate': 4.532096884851978e-06, 'epoch': 3.42

  0%|          | 0/860 [00:00<?, ?it/s]

{'eval_loss': 0.799468365038622, 'eval_accuracy_emotions': 0.7617, 'eval_accuracy_triggers': 0.8164, 'eval_u_avg_f1': 0.7096, 'eval_u_f1scores_emotions': 0.723, 'eval_u_f1scores_triggers': 0.6962, 'eval_d_f1scores_emotions': 0.7513, 'eval_d_f1scores_triggers': 0.7785, 'eval_runtime': 688.0545, 'eval_samples_per_second': 4.995, 'eval_steps_per_second': 1.25, 'epoch': 4.0}
{'loss': 0.6102, 'grad_norm': 12.554176330566406, 'learning_rate': 1.6865479389478545e-06, 'epoch': 4.06}
{'loss': 0.6394, 'grad_norm': 144.2042694091797, 'learning_rate': 1.4461099843816684e-06, 'epoch': 4.13}
{'loss': 0.6233, 'grad_norm': 26.036357879638672, 'learning_rate': 1.2228199589835998e-06, 'epoch': 4.2}
{'loss': 0.678, 'grad_norm': 15.784579277038574, 'learning_rate': 1.0171254908541373e-06, 'epoch': 4.28}
{'loss': 0.6432, 'grad_norm': 7.162163734436035, 'learning_rate': 8.294389343914899e-07, 'epoch': 4.35}
{'loss': 0.6358, 'grad_norm': 8.224676132202148, 'learning_rate': 6.601365436473439e-07, 'epoch': 4.4

  0%|          | 0/860 [00:00<?, ?it/s]

{'eval_loss': 0.7984874646421296, 'eval_accuracy_emotions': 0.776, 'eval_accuracy_triggers': 0.8167, 'eval_u_avg_f1': 0.7161, 'eval_u_f1scores_emotions': 0.7453, 'eval_u_f1scores_triggers': 0.6869, 'eval_d_f1scores_emotions': 0.7621, 'eval_d_f1scores_triggers': 0.776, 'eval_runtime': 688.0548, 'eval_samples_per_second': 4.995, 'eval_steps_per_second': 1.25, 'epoch': 5.0}
{'train_runtime': 11182.3991, 'train_samples_per_second': 12.547, 'train_steps_per_second': 3.137, 'train_loss': 0.9473706626022147, 'epoch': 5.0}


Train freezed model

In [None]:
# Training model with seed 49
train_extr_model(49, True)

In [None]:
# Training model with seed 666
train_extr_model(666, True)

In [None]:
# Training model with seed 51
train_extr_model(51, True)

In [None]:
# Training model with seed 77
train_extr_model(77, True)

In [None]:
# Training model with seed 111
train_extr_model(111, True)