In [41]:
from datasets.dataset_dict import DatasetDict
from datasets import Dataset, concatenate_datasets
import evaluate
import os
import re
import json
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split 


In [19]:
# OPTIONAL IF YOU WANT TO PUSH TO THE HUB!
from huggingface_hub import notebook_login
# IF RUNNING THIS CELL DOES NOT WORK:
# copy and paste this code in the terminal: huggingface-cli login 
# then paste this token: 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# It may not show up but still paste the token in and press enter


notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

## Import the dataset

In [42]:
path = "../../../eHRAF_Scraper-Analysis-and-Prep/Data/"
dataFolder = r"(subjects-(contracts_OR_disabilities_OR_disasters_OR_friendships_OR_gift_giving_OR_infant_feeding_OR_lineages_OR_local_officials_OR_luck_and_chance_OR_magicians_and_diviners_OR_mortuary_specialists_OR_nuclear_family_OR_priesthood_OR_prophet/"
# dataFolder = r'subjects-(sickness)_FILTERS-culture_level_samples(PSF)'

#load df (only load one of these commented out lines)
# df = pd.read_excel(f"{path}{dataFolder}/_Altogether_Dataset_RACoded.xlsx", header=[0,1], index_col=0) # Fall 2023 sickness + non-sickness
df = pd.read_excel(f"{path}{dataFolder}/_Altogether_Dataset_RACoded_Combined.xlsx", header=[0,1], index_col=0) # Spring 2023 - Spring 2024  sickness + nonsickness dataset
df.head(3)

Unnamed: 0_level_0,CULTURE,CULTURE,CULTURE,CULTURE,CULTURE,CULTURE,CULTURE,CULTURE,CULTURE,CULTURE,...,ACTION,ACTION,ACTION,ACTION,OTHER,CODER,CODER,CODER,CODER,CODER
Unnamed: 0_level_1,Passage Number,Region,SubRegion,Culture,DocTitle,Section,Author,Page,Year,OCM,...,Priest_High_Religion,Other,Description,Local_terms,Other_Comments,Run_Number,Finished,Coder,Dataset,Info
0,1392,Asia,South Asia,Andamans,Hygiene and medical practices among the Onge (...,1. Habitation,"Cipriani, Lidio",484,1961,"['171', '301', '727', '751', '765', '775', '777']",...,0,1,Several customs are believed to connect with t...,ibidanghe: made from decorated human jawbone ...,General note of this spreadsheet - many of the...,1,True,YM,1,"Dataset 1: ['750', '751', '752', '753'] Coun..."
1,1393,Asia,South Asia,Andamans,Hygiene and medical practices among the Onge (...,3. Food,"Cipriani, Lidio",487,1961,"['136', '231', '271', '312', '415', '516', '751']",...,0,0,No action is mentioned,0,,1,True,YM,1,"Dataset 2: ['784', '731', '732', '777', '791',..."
2,1395,Asia,South Asia,Andamans,Hygiene and medical practices among the Onge (...,3. Food,"Cipriani, Lidio",490,1961,"['114', '137', '164', '262', '273', '751', '825']",...,0,0,"Certain foods, such as Pteropus (giant bat) an...",Pteropus: a giant bat eaten by the Andaman Is...,,1,True,YM,1,Run 1: Spring 2023 Coding of Sickness dataset ...


In [43]:
df["CODER"][["Run_Number", "Dataset"]].value_counts(sort=False, dropna=False)

Run_Number  Dataset
1           1          1926
2           1            51
3           1          4193
            2          3305
Name: count, dtype: int64

In [44]:


# subdivide into just passage and outcome
df_small = pd.DataFrame()
df_small[["ID","passage","EVENT","CAUSE","ACTION"]] = df[[('CULTURE', "Passage Number"), ('CULTURE', "Passage"), ('EVENT', "No_Info"), ('CAUSE', "No_Info"), ('ACTION', "No_Info")]]
# Flip the lable of "no_info"
df_small[["EVENT","CAUSE","ACTION"]]  = df_small[["EVENT","CAUSE","ACTION"]].replace({0:1, 1:0})

# Remove certain passages which should not be in training or inference (these are duplicates that had to be manually found by a human)
values_to_remove = [3252, 33681, 6758, 10104]
df_small = df_small[~df_small['ID'].isin(values_to_remove)]

df_small

# # create train and validation/test sets
# train_val, test = train_test_split(df_small, test_size=0.2, random_state=10)

# create train and validation/test sets
train_val, test = train_test_split(df_small, test_size=0.2, random_state=10)
# # do it again to get the test and validation sets (15% = 50% * 30%)
# test, validation = train_test_split(test_val, test_size=0.5, random_state=10)




# Create an NLP friendly dataset
Hraf = DatasetDict(
    {'train':Dataset.from_dict(train_val.to_dict(orient= 'list')),
     'test':Dataset.from_dict(test.to_dict(orient= 'list'))})
Hraf


DatasetDict({
    train: Dataset({
        features: ['ID', 'passage', 'EVENT', 'CAUSE', 'ACTION'],
        num_rows: 7578
    })
    test: Dataset({
        features: ['ID', 'passage', 'EVENT', 'CAUSE', 'ACTION'],
        num_rows: 1895
    })
})

Make sure the training set is as biased as our groups (we want to train on as or less biased data as the groups they come from) <br>
We are shooting for equivelent biases across test, train, and validation (if it exists at this step)


In [45]:
# extract the total proportion
def totalProportion(df, col, present=1):
    value_counts = df[col].value_counts()
    percentage = round(value_counts[present]/len(df)*100,2)
    return percentage

# extracts percentages per datafaframe
def colProportion(Hraf, col):
    percentage_list = []
    for dataframe in Hraf.keys():
        percentage_list += [round(sum(Hraf[dataframe][col]) / (len(Hraf[dataframe]))*100,2)]
    return percentage_list



# print bias per label
dataframe_keys= Hraf.keys()
labels = [label for label in Hraf['train'].features.keys() if label not in ['ID', 'passage']]
header = "                                TOTAL"
for key in dataframe_keys:
    header += f"     {key}"
print(header)
print('_'*(len(header)+4))
for col in labels:
    totalPercentage =  totalProportion(df_small, col)
    percentage_list =  colProportion(Hraf, col)
    spacing = 10
    percentage_str = f"{totalPercentage}{' '* (spacing-len(str(totalPercentage)))}"
    for index, key in enumerate(dataframe_keys):
        percentage_str += f"{(len(key)-5)*' '}{percentage_list[index]}{' '* (spacing-len(str(percentage_list[index])))}"
    print(f"{col}:{' ' * (30- len(col))} {percentage_str}")

                                TOTAL     train     test
____________________________________________________________
EVENT:                          65.41     65.47     65.17     
CAUSE:                          48.87     49.26     47.28     
ACTION:                         49.74     49.72     49.82     


## Preprocess

Create labels for training and preprocessing

In [46]:

labels = [label for label in Hraf['train'].features.keys() if label not in ['ID', 'passage']]
id2label = {idx:label for idx, label in enumerate(labels)}
label2id = {label:idx for idx, label in enumerate(labels)}
id2label

{0: 'EVENT', 1: 'CAUSE', 2: 'ACTION'}

load a DistilBERT tokenizer to preprocess the text field: <br>

Create a preprocessing function to tokenize text and truncate sequences to be no longer than DistilBERT’s maximum input length:<br>
Guidelines were followed from NielsRogge found <a href= "https://github.com/NielsRogge/Transformers-Tutorials/blob/master/BERT/Fine_tuning_BERT_(and_friends)_for_multi_label_text_classification.ipynb"> here </a>

In [47]:
from transformers import AutoTokenizer
import numpy as np


tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")

def preprocess_data(examples):
  # take a batch of texts
  text = examples["passage"]
  # encode them
  encoding = tokenizer(text, max_length=512, truncation=True) #max length for BERT is 512
  # add labels
  labels_batch = {k: examples[k] for k in examples.keys() if k in labels}
  # create numpy array of shape (batch_size, num_labels)
  labels_matrix = np.zeros((len(text), len(labels)))
  # fill numpy array
  for idx, label in enumerate(labels):
    labels_matrix[:, idx] = labels_batch[label]

  encoding["labels"] = labels_matrix.tolist()

  return encoding

To apply the preprocessing function over the entire dataset, use 🤗 Datasets map function. You can speed up map by setting batched=True to process multiple elements of the dataset at once:

In [48]:
# Tokenize data, remove all columns and give new ones
tokenized_Hraf = Hraf.map(preprocess_data, batched=True, remove_columns=Hraf['train'].column_names)
tokenized_Hraf

Map:   0%|          | 0/7578 [00:00<?, ? examples/s]

Map:   0%|          | 0/1895 [00:00<?, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['input_ids', 'attention_mask', 'labels'],
        num_rows: 7578
    })
    test: Dataset({
        features: ['input_ids', 'attention_mask', 'labels'],
        num_rows: 1895
    })
})

In [49]:
# sample decoding
example = tokenized_Hraf['train'][1]
print(example.keys())
print(tokenizer.decode(example['input_ids']))


dict_keys(['input_ids', 'attention_mask', 'labels'])
[CLS] the religion has certainly operated to discourage scientific investigation and the progress of arts and technology. for example : the ifugao ’ s medical knowledge is little more than nil and lies principally in the setting of broken bones and the reduction of dislocations. the art descends from ancestor to son or, in some cases, daughter. some of the bone - setters do a fairly good job of opposing the ends of fractures. i was once talking to a young practitioner, a bright enough fellow by natural endowment, but a conservative in the art. he belittled the art of bone manipulation : “ the prayer is the important thing, ” he said, “ the prayer and the sacrifice. if these be right, the recovery will be right. ” [SEP]


In [50]:
print(example['labels'])
[id2label[idx] for idx, label in enumerate(example['labels']) if label == 1.0]

[1.0, 0.0, 1.0]


['EVENT', 'ACTION']

In [51]:
# Number of passages longer than 512 tokens (and therefore truncated)
sequence_i = []
for i, tx in enumerate(tokenized_Hraf['train']):
    if len(tx['input_ids']) >= 512:
        sequence_i.append(i)
print('Number Truncated: ', len(sequence_i))
print(f'Percentage Truncated: {round(len(sequence_i)/len(tokenized_Hraf["train"])*100,1)}%')
print(sequence_i)

Number Truncated:  151
Percentage Truncated: 2.0%
[24, 42, 75, 129, 256, 267, 284, 307, 405, 487, 493, 517, 586, 673, 689, 804, 853, 902, 925, 933, 980, 988, 1017, 1063, 1102, 1272, 1289, 1344, 1392, 1440, 1539, 1551, 1556, 1719, 1885, 1886, 1915, 1965, 2013, 2069, 2228, 2254, 2356, 2384, 2422, 2498, 2501, 2548, 2632, 2860, 2866, 2888, 2918, 2935, 3120, 3123, 3177, 3499, 3607, 3805, 3808, 3859, 3868, 3875, 3891, 3894, 3909, 3963, 3969, 4054, 4072, 4108, 4163, 4194, 4258, 4392, 4402, 4430, 4476, 4479, 4480, 4491, 4562, 4739, 4779, 4874, 4896, 4937, 4961, 4968, 5008, 5025, 5033, 5059, 5068, 5156, 5164, 5176, 5189, 5316, 5373, 5389, 5413, 5414, 5439, 5526, 5546, 5578, 5579, 5615, 5653, 5704, 5720, 5791, 5823, 5840, 5903, 5952, 5996, 6144, 6147, 6158, 6173, 6211, 6251, 6252, 6419, 6514, 6571, 6647, 6661, 6682, 6702, 6743, 6828, 6890, 6975, 6995, 6996, 7027, 7049, 7098, 7105, 7166, 7200, 7215, 7308, 7407, 7410, 7499, 7539]


### Create Splits

 Stratification using multilabels is a difficult process as the number of unique bins of stratification increases exponentially by the number of labels (see more info and potential ways to conduct multilabel sttratification sampling <a href="https://dl.acm.org/doi/10.5555/2034161.2034172"> HERE  </a>). We will currently disregard focusing on stratification of all the labels/classifications and just use a single label for stratification. Currently, this is still giving decent splits that do not deviate far from the true proportion or between n_splits. Still, one should check the proportional deviation of each label to make sure

In [52]:
#  Splitting
from sklearn.model_selection import StratifiedKFold
# folds = StratifiedKFold(n_splits=5)
folds = StratifiedKFold(n_splits=5, shuffle= True, random_state=10)
splits = folds.split(np.zeros(Hraf['train'].num_rows), Hraf['train']['ACTION'])


train_list = []
val_list = []

for fold, (train_idxs, val_idxs) in enumerate(splits, start=1):
    train_list += [train_idxs]
    val_list += [val_idxs]
    print("Fold:",fold)
    print(f"EVENT:  {np.mean(Hraf['train'][train_idxs]['EVENT'])}\nCAUSE:  {np.mean(Hraf['train'][train_idxs]['CAUSE'])}\nACTION: {np.mean(Hraf['train'][train_idxs]['ACTION'])}\n")
    
# print(train_list,"\n", val_list)
# print(train_idxs)

Fold: 1
EVENT:  0.6591883866710656
CAUSE:  0.4947212141207522
ACTION: 0.49719564500164964

Fold: 2
EVENT:  0.6517650940283735
CAUSE:  0.4907621247113164
ACTION: 0.49719564500164964

Fold: 3
EVENT:  0.6547344110854504
CAUSE:  0.4915869350049489
ACTION: 0.49719564500164964

Fold: 4
EVENT:  0.6523173346528122
CAUSE:  0.49348507339600856
ACTION: 0.4972785749628897

Fold: 5
EVENT:  0.6552861619660234
CAUSE:  0.4924954642916048
ACTION: 0.4972785749628897



Now create a batch of examples using <a href="https://huggingface.co/docs/transformers/v4.29.0/en/main_classes/data_collator#transformers.DataCollatorWithPadding"> DataCollatorWithPadding</a>. It’s more efficient to dynamically pad the sentences to the longest length in a batch during collation, instead of padding the whole dataset to the maximum length.

In [53]:
from transformers import DataCollatorWithPadding

data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

Set tokenized passages to PyTorch Tensor

In [54]:
tokenized_Hraf.set_format("torch")

## Evaluate

Obtain F1 score for evaluation

In [55]:
from sklearn.metrics import f1_score, roc_auc_score, accuracy_score
from transformers import EvalPrediction, TrainerCallback
import torch

# Get Metric performance
# source: https://jesusleal.io/2021/04/21/Longformer-multilabel-classification/
def multi_label_metrics(predictions, labels, threshold=0.5):
    # first, apply sigmoid on predictions which are of shape (batch_size, num_labels)
    sigmoid = torch.nn.Sigmoid()
    probs = sigmoid(torch.Tensor(predictions))
    # next, use threshold to turn them into integer predictions
    y_pred = np.zeros(probs.shape)
    y_pred[np.where(probs >= threshold)] = 1
    # finally, compute metrics
    y_true = labels
    f1_micro_average = f1_score(y_true=y_true, y_pred=y_pred, average='micro')
    roc_auc = roc_auc_score(y_true, y_pred, average = 'micro')
    accuracy = accuracy_score(y_true, y_pred)
    # return as dictionary
    metrics = {'f1': f1_micro_average,
               'roc_auc': roc_auc,
               'accuracy': accuracy}
    return metrics

# Compute evaluation
def compute_metrics(p: EvalPrediction):
    preds = p.predictions[0] if isinstance(p.predictions, 
            tuple) else p.predictions
    result = multi_label_metrics(
        predictions=preds, 
        labels=p.label_ids)
    return result


# # Retrieving best model
# class BestCheckpointCallback(TrainerCallback):
#     def __init__(self):
#         self.best_checkpoint = None

#     def on_save(self, args, state, control, **kwargs):
#         # Update the best_checkpoint variable when a new best checkpoint is saved
#         self.best_checkpoint = control.value
# # Initialize the callback
# best_checkpoint_callback = BestCheckpointCallback()


## Train
Before you start training your model, create a map of the expected ids to their labels with id2label and label2id:

In [56]:
from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer

model = AutoModelForSequenceClassification.from_pretrained(
    "distilbert-base-uncased", 
    problem_type='multi_label_classification',
    num_labels = len(labels), 
    id2label=id2label, 
    label2id=label2id
)
# Get initial state (this is for later kfolds loops which appear to have data leakage)
initial_model_state = {name: param.data.clone() for name, param in model.named_parameters()}

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [57]:
#forward pass (NOT IMPLEMENTED YET, JUST A TEST)
outputs = model(input_ids=tokenized_Hraf['train']['input_ids'][0].unsqueeze(0), labels=tokenized_Hraf['train'][0]['labels'].unsqueeze(0))
outputs

SequenceClassifierOutput(loss=tensor(0.6838, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>), logits=tensor([[-0.0804,  0.0529, -0.0312]], grad_fn=<AddmmBackward0>), hidden_states=None, attentions=None)

### Initialize Training 

In [None]:
# ### Optional Paramters

# ### OPTIONAL If it crashes on a fold, you may skip the fold by specifying the start.
# ### Set to 1 after a successful run
# # start_fold = 1
# # # # Set it to true if you want the model to start from a checkpoint, 
# # # # You can also specify a specific checkpoint. Otherwise choose False, normally, this may be good to set False unless a crash occurs
# # resume_bool = '/checkpoint-434' 
# # #set True if you want to start at the beginnning
# # overwrite_training = False 


# # Create Eval_dataset (be aware this may raise a prompt you must fill in)
# # only create a new eval_df if one does not exist (this step is useful in case of a crash)
# if 'eval_df' not in locals(): 
#     eval_df = pd.DataFrame()
# else:
#     eval_inputAppend = input("eval_df found, would you like to append to there? (y/n)")
#     if eval_inputAppend.lower() =='y':
#         print("Appending to old eval dataframe, this is useful if you must restart training")
#         # fix issues with starting fold
#         if max(eval_df['fold']) >= start_fold: 
#             Start_fold_input = input(f"Your starting fold ({start_fold}) is not greater than the largest fold in the eval_df ({max(eval_df['fold'])}), this may mean redoing folds. Is this what you want (y/n)")
#             if Start_fold_input.lower() != 'y':
#                 raise Exception('Quitting run, please redo your start_fold parameter')
#     else:
#         eval_df = pd.DataFrame()

### Training Versions (run only 1)

#### Normal training

In [None]:
training_args = TrainingArguments(
    output_dir="HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO",
    learning_rate=2e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=5,
    weight_decay=0.01,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    metric_for_best_model='f1',
    push_to_hub=False,
)



# # Train the model
assert (start_fold >0) and (start_fold <= len(train_list)), f"Incorrect Starting fold, must be greater than or equal to 1 and less than or equal to {len(train_list)}"
for fold, (train_idxs, val_idxs) in enumerate(zip(train_list, val_list), start=1): # K-fold loop
    
    #Skip folds if desired
    if start_fold >fold:
        print('\033[93m'+ f"Skipping Fold {fold}"+ '\033[0m')
        continue

    print(f"------Fold {fold}--------\n")
    train_ds = tokenized_Hraf["train"].select(train_idxs)
    val_ds = tokenized_Hraf["train"].select(val_idxs)
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_ds,
        eval_dataset=val_ds,
        tokenizer=tokenizer,
        data_collator=data_collator,
        compute_metrics=compute_metrics,
        
    )
    try:
        trainer.train() 
    except:
        print('\033[93m'+ f"A crash occurred, restarting fold from checkpoint"+ '\033[0m')
        trainer.train(resume_from_checkpoint=True) #This is the same thing above but often restarting can make all the difference so let's try it

    # Evaluate and then concatinate results to a dataframe
    eval_dict = trainer.evaluate()
    eval_df_line = pd.DataFrame([eval_dict])
    eval_df_line["fold"] = fold
    eval_df_line["train_count"] = len(train_ds)
    eval_df_line["val_count"] = len(val_ds)
    eval_df_line["total_count"] = eval_df_line["val_count"] + eval_df_line["train_count"]
    eval_df = pd.concat([eval_df, eval_df_line])



# Save the model to disk
trainer.save_model()

***** Running training *****
  Num examples = 3472


  Num Epochs = 5
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel, distributed & accumulation) = 8
  Gradient Accumulation steps = 1
  Total optimization steps = 2170
  Number of trainable parameters = 66955779


------Fold 1--------



  0%|          | 0/2170 [00:00<?, ?it/s]

You're using a DistilBertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
***** Running Evaluation *****
  Num examples = 868
  Batch size = 8


  0%|          | 0/109 [00:00<?, ?it/s]

Saving model checkpoint to HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-434


{'eval_loss': 0.423545241355896, 'eval_f1': 0.8451061133987963, 'eval_roc_auc': 0.799144889130704, 'eval_accuracy': 0.5875576036866359, 'eval_runtime': 197.0423, 'eval_samples_per_second': 4.405, 'eval_steps_per_second': 0.553, 'epoch': 1.0}


Configuration saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-434/config.json
Model weights saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-434/pytorch_model.bin
tokenizer config file saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-434/tokenizer_config.json
Special tokens file saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-434/special_tokens_map.json


{'loss': 0.501, 'learning_rate': 1.5391705069124425e-05, 'epoch': 1.15}


***** Running Evaluation *****
  Num examples = 868
  Batch size = 8


  0%|          | 0/109 [00:00<?, ?it/s]

Saving model checkpoint to HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-868


{'eval_loss': 0.43711310625076294, 'eval_f1': 0.8546744831842024, 'eval_roc_auc': 0.800649621934365, 'eval_accuracy': 0.6059907834101382, 'eval_runtime': 198.5329, 'eval_samples_per_second': 4.372, 'eval_steps_per_second': 0.549, 'epoch': 2.0}


Configuration saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-868/config.json
Model weights saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-868/pytorch_model.bin
tokenizer config file saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-868/tokenizer_config.json
Special tokens file saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-868/special_tokens_map.json


{'loss': 0.3398, 'learning_rate': 1.0783410138248848e-05, 'epoch': 2.3}


***** Running Evaluation *****
  Num examples = 868
  Batch size = 8


  0%|          | 0/109 [00:00<?, ?it/s]

Saving model checkpoint to HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-1302


{'eval_loss': 0.445345401763916, 'eval_f1': 0.8537477148080439, 'eval_roc_auc': 0.7943531834996758, 'eval_accuracy': 0.5933179723502304, 'eval_runtime': 197.4677, 'eval_samples_per_second': 4.396, 'eval_steps_per_second': 0.552, 'epoch': 3.0}


Configuration saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-1302/config.json
Model weights saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-1302/pytorch_model.bin
tokenizer config file saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-1302/tokenizer_config.json
Special tokens file saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-1302/special_tokens_map.json


{'loss': 0.2458, 'learning_rate': 6.175115207373272e-06, 'epoch': 3.46}


***** Running Evaluation *****
  Num examples = 868
  Batch size = 8


  0%|          | 0/109 [00:00<?, ?it/s]

Saving model checkpoint to HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-1736


{'eval_loss': 0.44958022236824036, 'eval_f1': 0.8570538772967923, 'eval_roc_auc': 0.8073972373154303, 'eval_accuracy': 0.6071428571428571, 'eval_runtime': 197.553, 'eval_samples_per_second': 4.394, 'eval_steps_per_second': 0.552, 'epoch': 4.0}


Configuration saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-1736/config.json
Model weights saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-1736/pytorch_model.bin
tokenizer config file saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-1736/tokenizer_config.json
Special tokens file saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-1736/special_tokens_map.json


{'loss': 0.1775, 'learning_rate': 1.5668202764976959e-06, 'epoch': 4.61}


***** Running Evaluation *****
  Num examples = 868
  Batch size = 8


  0%|          | 0/109 [00:00<?, ?it/s]

Saving model checkpoint to HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-2170


{'eval_loss': 0.45325857400894165, 'eval_f1': 0.8584240871236386, 'eval_roc_auc': 0.8200623753646457, 'eval_accuracy': 0.618663594470046, 'eval_runtime': 196.8385, 'eval_samples_per_second': 4.41, 'eval_steps_per_second': 0.554, 'epoch': 5.0}


Configuration saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-2170/config.json
Model weights saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-2170/pytorch_model.bin
tokenizer config file saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-2170/tokenizer_config.json
Special tokens file saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-2170/special_tokens_map.json


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-2170 (score: 0.8584240871236386).
***** Running Evaluation *****
  Num examples = 868
  Batch size = 8


{'train_runtime': 14017.5835, 'train_samples_per_second': 1.238, 'train_steps_per_second': 0.155, 'train_loss': 0.3030908848283478, 'epoch': 5.0}


  0%|          | 0/109 [00:00<?, ?it/s]

***** Running training *****
  Num examples = 3472
  Num Epochs = 5
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel, distributed & accumulation) = 8
  Gradient Accumulation steps = 1
  Total optimization steps = 2170
  Number of trainable parameters = 66955779


------Fold 2--------



  0%|          | 0/2170 [00:00<?, ?it/s]

***** Running Evaluation *****
  Num examples = 868
  Batch size = 8


  0%|          | 0/109 [00:00<?, ?it/s]

Saving model checkpoint to HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-434
Configuration saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-434/config.json


{'eval_loss': 0.1457630842924118, 'eval_f1': 0.957901554404145, 'eval_roc_auc': 0.947026282762066, 'eval_accuracy': 0.868663594470046, 'eval_runtime': 192.4012, 'eval_samples_per_second': 4.511, 'eval_steps_per_second': 0.567, 'epoch': 1.0}


Model weights saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-434/pytorch_model.bin
tokenizer config file saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-434/tokenizer_config.json
Special tokens file saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-434/special_tokens_map.json


{'loss': 0.2371, 'learning_rate': 1.5391705069124425e-05, 'epoch': 1.15}


***** Running Evaluation *****
  Num examples = 868
  Batch size = 8


  0%|          | 0/109 [00:00<?, ?it/s]

Saving model checkpoint to HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-868
Configuration saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-868/config.json


{'eval_loss': 0.14711026847362518, 'eval_f1': 0.9567446331304069, 'eval_roc_auc': 0.9427110114659616, 'eval_accuracy': 0.8675115207373272, 'eval_runtime': 190.7769, 'eval_samples_per_second': 4.55, 'eval_steps_per_second': 0.571, 'epoch': 2.0}


Model weights saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-868/pytorch_model.bin
tokenizer config file saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-868/tokenizer_config.json
Special tokens file saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-868/special_tokens_map.json


{'loss': 0.1446, 'learning_rate': 1.0783410138248848e-05, 'epoch': 2.3}


***** Running Evaluation *****
  Num examples = 868
  Batch size = 8


  0%|          | 0/109 [00:00<?, ?it/s]

Saving model checkpoint to HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-1302
Configuration saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-1302/config.json


{'eval_loss': 0.15874063968658447, 'eval_f1': 0.9527760051052967, 'eval_roc_auc': 0.9366362451108214, 'eval_accuracy': 0.8536866359447005, 'eval_runtime': 191.2927, 'eval_samples_per_second': 4.538, 'eval_steps_per_second': 0.57, 'epoch': 3.0}


Model weights saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-1302/pytorch_model.bin
tokenizer config file saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-1302/tokenizer_config.json
Special tokens file saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-1302/special_tokens_map.json


{'loss': 0.0791, 'learning_rate': 6.175115207373272e-06, 'epoch': 3.46}


***** Running Evaluation *****
  Num examples = 868
  Batch size = 8


  0%|          | 0/109 [00:00<?, ?it/s]

Saving model checkpoint to HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-1736
Configuration saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-1736/config.json


{'eval_loss': 0.15558165311813354, 'eval_f1': 0.9584273283918788, 'eval_roc_auc': 0.9463628166542787, 'eval_accuracy': 0.868663594470046, 'eval_runtime': 191.4648, 'eval_samples_per_second': 4.533, 'eval_steps_per_second': 0.569, 'epoch': 4.0}


Model weights saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-1736/pytorch_model.bin
tokenizer config file saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-1736/tokenizer_config.json
Special tokens file saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-1736/special_tokens_map.json


{'loss': 0.0482, 'learning_rate': 1.5668202764976959e-06, 'epoch': 4.61}


***** Running Evaluation *****
  Num examples = 868
  Batch size = 8


  0%|          | 0/109 [00:00<?, ?it/s]

Saving model checkpoint to HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-2170
Configuration saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-2170/config.json


{'eval_loss': 0.1686871498823166, 'eval_f1': 0.9556052379431491, 'eval_roc_auc': 0.9404178191521768, 'eval_accuracy': 0.8582949308755761, 'eval_runtime': 191.7016, 'eval_samples_per_second': 4.528, 'eval_steps_per_second': 0.569, 'epoch': 5.0}


Model weights saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-2170/pytorch_model.bin
tokenizer config file saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-2170/tokenizer_config.json
Special tokens file saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-2170/special_tokens_map.json


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-1736 (score: 0.9584273283918788).
***** Running Evaluation *****
  Num examples = 868
  Batch size = 8


{'train_runtime': 13973.7607, 'train_samples_per_second': 1.242, 'train_steps_per_second': 0.155, 'train_loss': 0.12036735297348093, 'epoch': 5.0}


  0%|          | 0/109 [00:00<?, ?it/s]

***** Running training *****
  Num examples = 3472
  Num Epochs = 5
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel, distributed & accumulation) = 8
  Gradient Accumulation steps = 1
  Total optimization steps = 2170
  Number of trainable parameters = 66955779


------Fold 3--------



  0%|          | 0/2170 [00:00<?, ?it/s]

***** Running Evaluation *****
  Num examples = 868
  Batch size = 8


  0%|          | 0/109 [00:00<?, ?it/s]

Saving model checkpoint to HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-434
Configuration saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-434/config.json


{'eval_loss': 0.04312153905630112, 'eval_f1': 0.989540412044374, 'eval_roc_auc': 0.9852195945945947, 'eval_accuracy': 0.9642857142857143, 'eval_runtime': 196.3134, 'eval_samples_per_second': 4.422, 'eval_steps_per_second': 0.555, 'epoch': 1.0}


Model weights saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-434/pytorch_model.bin
tokenizer config file saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-434/tokenizer_config.json
Special tokens file saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-434/special_tokens_map.json


{'loss': 0.1097, 'learning_rate': 1.5391705069124425e-05, 'epoch': 1.15}


***** Running Evaluation *****
  Num examples = 868
  Batch size = 8


  0%|          | 0/109 [00:00<?, ?it/s]

Saving model checkpoint to HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-868
Configuration saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-868/config.json


{'eval_loss': 0.035966116935014725, 'eval_f1': 0.9911280101394169, 'eval_roc_auc': 0.9871414782129069, 'eval_accuracy': 0.9711981566820277, 'eval_runtime': 196.4649, 'eval_samples_per_second': 4.418, 'eval_steps_per_second': 0.555, 'epoch': 2.0}


Model weights saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-868/pytorch_model.bin
tokenizer config file saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-868/tokenizer_config.json
Special tokens file saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-868/special_tokens_map.json


{'loss': 0.0631, 'learning_rate': 1.0783410138248848e-05, 'epoch': 2.3}


***** Running Evaluation *****
  Num examples = 868
  Batch size = 8


  0%|          | 0/109 [00:00<?, ?it/s]

Saving model checkpoint to HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-1302
Configuration saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-1302/config.json


{'eval_loss': 0.02918880246579647, 'eval_f1': 0.9923906150919467, 'eval_roc_auc': 0.9889082322118036, 'eval_accuracy': 0.9758064516129032, 'eval_runtime': 196.1773, 'eval_samples_per_second': 4.425, 'eval_steps_per_second': 0.556, 'epoch': 3.0}


Model weights saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-1302/pytorch_model.bin
tokenizer config file saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-1302/tokenizer_config.json
Special tokens file saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-1302/special_tokens_map.json


{'loss': 0.0286, 'learning_rate': 6.175115207373272e-06, 'epoch': 3.46}


***** Running Evaluation *****
  Num examples = 868
  Batch size = 8


  0%|          | 0/109 [00:00<?, ?it/s]

Saving model checkpoint to HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-1736
Configuration saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-1736/config.json


{'eval_loss': 0.029055986553430557, 'eval_f1': 0.9911280101394169, 'eval_roc_auc': 0.9871414782129069, 'eval_accuracy': 0.9711981566820277, 'eval_runtime': 196.0693, 'eval_samples_per_second': 4.427, 'eval_steps_per_second': 0.556, 'epoch': 4.0}


Model weights saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-1736/pytorch_model.bin
tokenizer config file saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-1736/tokenizer_config.json
Special tokens file saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-1736/special_tokens_map.json


{'loss': 0.0113, 'learning_rate': 1.5668202764976959e-06, 'epoch': 4.61}


***** Running Evaluation *****
  Num examples = 868
  Batch size = 8


  0%|          | 0/109 [00:00<?, ?it/s]

Saving model checkpoint to HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-2170
Configuration saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-2170/config.json


{'eval_loss': 0.02619864046573639, 'eval_f1': 0.9926960939980947, 'eval_roc_auc': 0.9897183535576393, 'eval_accuracy': 0.978110599078341, 'eval_runtime': 195.6949, 'eval_samples_per_second': 4.435, 'eval_steps_per_second': 0.557, 'epoch': 5.0}


Model weights saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-2170/pytorch_model.bin
tokenizer config file saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-2170/tokenizer_config.json
Special tokens file saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-2170/special_tokens_map.json


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-2170 (score: 0.9926960939980947).
***** Running Evaluation *****
  Num examples = 868
  Batch size = 8


{'train_runtime': 13842.7568, 'train_samples_per_second': 1.254, 'train_steps_per_second': 0.157, 'train_loss': 0.04961150511069232, 'epoch': 5.0}


  0%|          | 0/109 [00:00<?, ?it/s]

***** Running training *****
  Num examples = 3472
  Num Epochs = 5
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel, distributed & accumulation) = 8
  Gradient Accumulation steps = 1
  Total optimization steps = 2170
  Number of trainable parameters = 66955779


------Fold 4--------



  0%|          | 0/2170 [00:00<?, ?it/s]

***** Running Evaluation *****
  Num examples = 868
  Batch size = 8


  0%|          | 0/109 [00:00<?, ?it/s]

Saving model checkpoint to HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-434
Configuration saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-434/config.json


{'eval_loss': 0.009037306532263756, 'eval_f1': 0.996968676321994, 'eval_roc_auc': 0.9962058065773236, 'eval_accuracy': 0.9896313364055299, 'eval_runtime': 199.131, 'eval_samples_per_second': 4.359, 'eval_steps_per_second': 0.547, 'epoch': 1.0}


Model weights saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-434/pytorch_model.bin
tokenizer config file saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-434/tokenizer_config.json
Special tokens file saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-434/special_tokens_map.json


{'loss': 0.0354, 'learning_rate': 1.5391705069124425e-05, 'epoch': 1.15}


***** Running Evaluation *****
  Num examples = 868
  Batch size = 8


  0%|          | 0/109 [00:00<?, ?it/s]

Saving model checkpoint to HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-868
Configuration saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-868/config.json


{'eval_loss': 0.00467693293467164, 'eval_f1': 0.9986522911051213, 'eval_roc_auc': 0.9982174688057042, 'eval_accuracy': 0.9953917050691244, 'eval_runtime': 199.0299, 'eval_samples_per_second': 4.361, 'eval_steps_per_second': 0.548, 'epoch': 2.0}


Model weights saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-868/pytorch_model.bin
tokenizer config file saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-868/tokenizer_config.json
Special tokens file saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-868/special_tokens_map.json


{'loss': 0.0265, 'learning_rate': 1.0783410138248848e-05, 'epoch': 2.3}


***** Running Evaluation *****
  Num examples = 868
  Batch size = 8


  0%|          | 0/109 [00:00<?, ?it/s]

Saving model checkpoint to HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-1302
Configuration saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-1302/config.json


{'eval_loss': 0.004247164353728294, 'eval_f1': 0.9986504723346828, 'eval_roc_auc': 0.9984339705701936, 'eval_accuracy': 0.9953917050691244, 'eval_runtime': 199.0542, 'eval_samples_per_second': 4.361, 'eval_steps_per_second': 0.548, 'epoch': 3.0}


Model weights saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-1302/pytorch_model.bin
tokenizer config file saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-1302/tokenizer_config.json
Special tokens file saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-1302/special_tokens_map.json


{'loss': 0.0101, 'learning_rate': 6.175115207373272e-06, 'epoch': 3.46}


***** Running Evaluation *****
  Num examples = 868
  Batch size = 8


  0%|          | 0/109 [00:00<?, ?it/s]

Saving model checkpoint to HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-1736
Configuration saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-1736/config.json


{'eval_loss': 0.0020585672464221716, 'eval_f1': 0.9989875126560918, 'eval_roc_auc': 0.9988796033687675, 'eval_accuracy': 0.9965437788018433, 'eval_runtime': 198.7473, 'eval_samples_per_second': 4.367, 'eval_steps_per_second': 0.548, 'epoch': 4.0}


Model weights saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-1736/pytorch_model.bin
tokenizer config file saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-1736/tokenizer_config.json
Special tokens file saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-1736/special_tokens_map.json


{'loss': 0.003, 'learning_rate': 1.5668202764976959e-06, 'epoch': 4.61}


***** Running Evaluation *****
  Num examples = 868
  Batch size = 8


  0%|          | 0/109 [00:00<?, ?it/s]

Saving model checkpoint to HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-2170
Configuration saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-2170/config.json


{'eval_loss': 0.0025650858879089355, 'eval_f1': 0.9993252361673415, 'eval_roc_auc': 0.9992169852850968, 'eval_accuracy': 0.9976958525345622, 'eval_runtime': 198.499, 'eval_samples_per_second': 4.373, 'eval_steps_per_second': 0.549, 'epoch': 5.0}


Model weights saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-2170/pytorch_model.bin
tokenizer config file saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-2170/tokenizer_config.json
Special tokens file saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-2170/special_tokens_map.json


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-2170 (score: 0.9993252361673415).
***** Running Evaluation *****
  Num examples = 868
  Batch size = 8


{'train_runtime': 13692.8755, 'train_samples_per_second': 1.268, 'train_steps_per_second': 0.158, 'train_loss': 0.017497836540920943, 'epoch': 5.0}


  0%|          | 0/109 [00:00<?, ?it/s]

***** Running training *****
  Num examples = 3472
  Num Epochs = 5
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel, distributed & accumulation) = 8
  Gradient Accumulation steps = 1
  Total optimization steps = 2170
  Number of trainable parameters = 66955779


------Fold 5--------



  0%|          | 0/2170 [00:00<?, ?it/s]

***** Running Evaluation *****
  Num examples = 868
  Batch size = 8


  0%|          | 0/109 [00:00<?, ?it/s]

Saving model checkpoint to HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-434
Configuration saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-434/config.json


{'eval_loss': 0.0006783527787774801, 'eval_f1': 1.0, 'eval_roc_auc': 1.0, 'eval_accuracy': 1.0, 'eval_runtime': 199.8549, 'eval_samples_per_second': 4.343, 'eval_steps_per_second': 0.545, 'epoch': 1.0}


Model weights saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-434/pytorch_model.bin
tokenizer config file saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-434/tokenizer_config.json
Special tokens file saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-434/special_tokens_map.json


{'loss': 0.0209, 'learning_rate': 1.5391705069124425e-05, 'epoch': 1.15}


***** Running Evaluation *****
  Num examples = 868
  Batch size = 8


  0%|          | 0/109 [00:00<?, ?it/s]

Saving model checkpoint to HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-868
Configuration saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-868/config.json


{'eval_loss': 0.001404578099027276, 'eval_f1': 0.9996743731683491, 'eval_roc_auc': 0.9996744791666667, 'eval_accuracy': 0.9988479262672811, 'eval_runtime': 200.5795, 'eval_samples_per_second': 4.327, 'eval_steps_per_second': 0.543, 'epoch': 2.0}


Model weights saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-868/pytorch_model.bin
tokenizer config file saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-868/tokenizer_config.json
Special tokens file saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-868/special_tokens_map.json


{'loss': 0.0173, 'learning_rate': 1.0783410138248848e-05, 'epoch': 2.3}


***** Running Evaluation *****
  Num examples = 868
  Batch size = 8


  0%|          | 0/109 [00:00<?, ?it/s]

Saving model checkpoint to HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-1302
Configuration saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-1302/config.json


{'eval_loss': 0.0003970277030020952, 'eval_f1': 1.0, 'eval_roc_auc': 1.0, 'eval_accuracy': 1.0, 'eval_runtime': 201.8827, 'eval_samples_per_second': 4.3, 'eval_steps_per_second': 0.54, 'epoch': 3.0}


Model weights saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-1302/pytorch_model.bin
tokenizer config file saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-1302/tokenizer_config.json
Special tokens file saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-1302/special_tokens_map.json


{'loss': 0.0061, 'learning_rate': 6.175115207373272e-06, 'epoch': 3.46}


***** Running Evaluation *****
  Num examples = 868
  Batch size = 8


  0%|          | 0/109 [00:00<?, ?it/s]

Saving model checkpoint to HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-1736
Configuration saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-1736/config.json


{'eval_loss': 0.0003000223950948566, 'eval_f1': 1.0, 'eval_roc_auc': 1.0, 'eval_accuracy': 1.0, 'eval_runtime': 202.0393, 'eval_samples_per_second': 4.296, 'eval_steps_per_second': 0.539, 'epoch': 4.0}


Model weights saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-1736/pytorch_model.bin
tokenizer config file saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-1736/tokenizer_config.json
Special tokens file saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-1736/special_tokens_map.json


{'loss': 0.0026, 'learning_rate': 1.5668202764976959e-06, 'epoch': 4.61}


***** Running Evaluation *****
  Num examples = 868
  Batch size = 8


  0%|          | 0/109 [00:00<?, ?it/s]

Saving model checkpoint to HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-2170
Configuration saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-2170/config.json


{'eval_loss': 0.00017800420755520463, 'eval_f1': 1.0, 'eval_roc_auc': 1.0, 'eval_accuracy': 1.0, 'eval_runtime': 203.0652, 'eval_samples_per_second': 4.274, 'eval_steps_per_second': 0.537, 'epoch': 5.0}


Model weights saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-2170/pytorch_model.bin
tokenizer config file saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-2170/tokenizer_config.json
Special tokens file saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-2170/special_tokens_map.json


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-434 (score: 1.0).
***** Running Evaluation *****
  Num examples = 868
  Batch size = 8


{'train_runtime': 13688.5703, 'train_samples_per_second': 1.268, 'train_steps_per_second': 0.159, 'train_loss': 0.01084896839022087, 'epoch': 5.0}


  0%|          | 0/109 [00:00<?, ?it/s]

In [None]:
# Push to hub (I have not gotten this to work so alternatively you can manually add in the best checkpoint by uploading the checkpoint into your hugging face account)
trainer.push_to_hub()

TypeError: Repository.__init__() got an unexpected keyword argument 'private'

#### Weight Decay Checking

The following code attempts to investigate the best weight decay for the model by initiallizing training for each weight decay.

In [58]:
def eval_save(eval_df, dataset=Hraf, overwrite_training=True):
    # Augment Evaluation File 
    from datetime import date

    today = date.today()
    date_tm = today.strftime("%y/%m/%d")

    #reorganize columns
    cols = list(eval_df.columns.values) 
    remove_list = ["fold", "epoch","weight_decay", "learning_rate"]
    for removal in remove_list:
        cols.remove(removal)
    cols = remove_list+cols
    eval_df = eval_df[cols]


    trainingStatus = 'Initial Training' if overwrite_training == True else 'Continue Training'

    info_df  = pd.DataFrame({"Date":len(eval_df)*[date_tm],"Train_status":len(eval_df)*[trainingStatus]})
    eval_df = eval_df.reset_index(drop=True)
    eval_df = pd.concat([info_df, eval_df], axis=1)

    

    # import evaluation if it exists
    if os.path.exists("Evaluation.xlsx"):
        old_eval = pd.read_excel("Evaluation.xlsx", sheet_name="Sheet1", index_col=0)
        eval_df = pd.concat([old_eval, eval_df])

    eval_df.to_excel('Evaluation.xlsx')
# Run this if your model crashes and you want to save after the fact
# eval_save(dataset=Hraf, eval_df=eval_df)

In [None]:
### DELETE

# eval_df = pd.DataFrame()
# fold_f1s = []
# fold_f1 = eval_dict['eval_f1']
# fold_f1s.append(fold_f1)
# print(f"Fold {fold} Accuracy: {fold_f1}")

# eval_df_line = pd.DataFrame([eval_dict])
# eval_df_line["fold"] = fold
# eval_df_line["weight_decay"] = weight_decay
# eval_df_line["learning_rate"] = learning_rate
# eval_df_line["train_count"] = len(train_ds)
# eval_df_line["val_count"] = len(val_ds)
# eval_df_line["total_count"] = eval_df_line["val_count"] + eval_df_line["train_count"]
# eval_df = pd.concat([eval_df, eval_df_line])
# eval_df

Note: If you are re-running the model, you must respecify the model. For reasons that I have yet to determine, Huggingface uses a cached model (I believe) every loop and thus causes a data leak that makes folds not indpendent of each other. This is a bad thing and can make the data overfitted and a less reliable estimate of model performance.

In [40]:
# torch.cuda.is_available()
# device = torch.device('mps')
# device

device(type='mps')

In [59]:
import torch
if torch.backends.mps.is_available():
    mps_device = torch.device("mps")
    x = torch.ones(1, device=mps_device)
    print (x)
else:
    print ("MPS device not found.")

# PYTORCH_MPS_HIGH_WATERMARK_RATIO=0.7

tensor([1.], device='mps:0')


In [60]:

model_name = "HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO_WeightInvestigation"


# Define a range of weight decay values to test
weight_decay_values = [1e-6, 1e-5, 1e-4, 1e-3, 1e-2]
# weight_decay_values = [1e-2]
learning_rate = 2e-5
# Track the average validation accuracy for each weight decay value
avg_validation_f1s = []



for weight_decay in weight_decay_values:
    fold_f1s = []


    eval_df = pd.DataFrame()
    # # Train the model
    # assert (start_fold >0) and (start_fold <= len(train_list)), f"Incorrect Starting fold, must be greater than or equal to 1 and less than or equal to {len(train_list)}"
    for fold, (train_idxs, val_idxs) in enumerate(zip(train_list, val_list), start=1): # K-fold loop
        

        output_dir = f"{model_name}/output_dir_{weight_decay}_fold_{fold}"

        resume_bool = False
        

        #Skip folds already completed
        if os.path.exists(f"{output_dir}"):
            if os.path.exists(f"{output_dir}/finished.txt"):
                print('\033[93m'+ f"Skipping {output_dir} as it is indicated as finished" + '\033[0m')
                continue
            else:
                print('\033[93m'+ f"Starting from last checkpoint {output_dir}"+ '\033[0m')
                resume_bool = True # resume from the last checkpoint if there is an output folder but it is not finished.





        print(f"------Fold {fold}/{len(train_list)}--------\n")

        #reinitialize the model (since it appears to be dataleaking over loops)
        model.load_state_dict(initial_model_state)
        for name, param in model.named_parameters():
                try:
                    assert (np.array(initial_model_state[name]) == np.array(param.data)).all(), "Parameters differ from original model"
                except:
                    print(name, "Differs from initial model")
        model.load_state_dict(initial_model_state)

        train_ds = tokenized_Hraf["train"].select(train_idxs)
        val_ds = tokenized_Hraf["train"].select(val_idxs)


        training_args = TrainingArguments(
            output_dir=output_dir,
            learning_rate=learning_rate,
            per_device_train_batch_size=8,  # should be multiples of 8
            per_device_eval_batch_size=8, # should be multiples of 8
            num_train_epochs=5,
            weight_decay=weight_decay,
            evaluation_strategy="epoch",
            save_strategy="epoch",
            metric_for_best_model='f1',
            push_to_hub=False,
            logging_dir=f"{model_name}/logs_{weight_decay}_fold_{fold}",
            logging_steps=100,
            use_cpu=True, # set True or False depending on if you want ot use the GPU, which is faster but has been unreliable on Macs
        )


        trainer = Trainer(
            model=model,
            args=training_args,
            train_dataset=train_ds,
            eval_dataset=val_ds,
            tokenizer=tokenizer,
            data_collator=data_collator,
            # callbacks=[best_checkpoint_callback], 
            compute_metrics=compute_metrics,
            
        )
        try:
            trainer.train(resume_from_checkpoint = resume_bool) 
        except:
            print('\033[91m'+ f"A crash occurred, restarting fold from checkpoint"+ '\033[0m')
            trainer.train(resume_from_checkpoint=True) #This is the same thing above but often restarting can make all the difference so let's try it

        # Evaluate and then concatinate results to a dataframe

        # Evaluate on validation set for this fold
        eval_dict = trainer.evaluate(val_ds)
        fold_f1 = eval_dict['eval_f1']
        fold_f1s.append(fold_f1)
        print(f"Fold {fold} F1: {fold_f1}")

        eval_df_line = pd.DataFrame([eval_dict])
        eval_df_line["fold"] = fold
        eval_df_line["weight_decay"] = weight_decay
        eval_df_line["learning_rate"] = learning_rate
        eval_df_line["fold_f1"] = fold_f1
        eval_df_line["train_count"] = len(train_ds)
        eval_df_line["val_count"] = len(val_ds)
        eval_df_line["total_count"] = eval_df_line["val_count"] + eval_df_line["train_count"]
        eval_df = pd.concat([eval_df, eval_df_line])

        # # Get best model and then finish
        # best_checkpoint = best_checkpoint_callback.best_checkpoint
        # print("Best Checkpoint:", best_checkpoint)
        
        # Save Best model
        f = open(f"{output_dir}/finished.txt", "w")
        f.write(f"Best Model: TBD code incomplete")
        f.close()

        
    # Calculate average accuracy for this weight decay value
    if len(fold_f1s) == 0:
        print('\033[93m'+ f"No F1's in list, this likely means all the folds were skipped" + '\033[0m')
        continue
    elif len(fold_f1s) < len(train_list):
        print('\033[93m'+ f"Warning less F1's than expected, likely some folds were skipped and thus the mean f1 may be off" + '\033[0m')
    else:
        pass

    avg_f1 = np.mean(fold_f1s)
    avg_validation_f1s.append(avg_f1)
    print(f"Average Accuracy for Weight Decay {weight_decay}: {avg_f1}")
    #Save evaluation File
    eval_save(dataset=Hraf, eval_df=eval_df)




# Choose the weight decay with the highest average validation accuracy
best_weight_decay = weight_decay_values[np.argmax(avg_validation_f1s)]
print(f"Best Weight Decay: {best_weight_decay}")


# Save the model to disk
# trainer.save_model()

[93mSkipping HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO_WeightInvestigation/output_dir_1e-06_fold_1 as it is indicated as finished[0m
[93mSkipping HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO_WeightInvestigation/output_dir_1e-06_fold_2 as it is indicated as finished[0m
[93mSkipping HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO_WeightInvestigation/output_dir_1e-06_fold_3 as it is indicated as finished[0m
[93mSkipping HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO_WeightInvestigation/output_dir_1e-06_fold_4 as it is indicated as finished[0m
[93mSkipping HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO_WeightInvestigation/output_dir_1e-06_fold_5 as it is indicated as finished[0m
[93mNo F1's in list, this likely means all the folds were skipped[0m
[93mSkipping HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO_WeightInvestigation/output_dir_1e-05_fold_1 as it is indicated as finished[0m
[93mSkipping HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO_

  0%|          | 0/3790 [00:00<?, ?it/s]

{'loss': 0.1733, 'grad_norm': 3.3595187664031982, 'learning_rate': 3.6411609498680746e-06, 'epoch': 4.09}
{'loss': 0.1311, 'grad_norm': 13.486773490905762, 'learning_rate': 3.1134564643799476e-06, 'epoch': 4.22}
{'loss': 0.1109, 'grad_norm': 3.1867263317108154, 'learning_rate': 2.5857519788918206e-06, 'epoch': 4.35}
{'loss': 0.1307, 'grad_norm': 2.514549493789673, 'learning_rate': 2.058047493403694e-06, 'epoch': 4.49}
{'loss': 0.1282, 'grad_norm': 5.564957141876221, 'learning_rate': 1.5303430079155673e-06, 'epoch': 4.62}
{'loss': 0.1379, 'grad_norm': 4.532524585723877, 'learning_rate': 1.0026385224274407e-06, 'epoch': 4.75}
{'loss': 0.1322, 'grad_norm': 3.9431843757629395, 'learning_rate': 4.7493403693931397e-07, 'epoch': 4.88}


  0%|          | 0/190 [00:00<?, ?it/s]

{'eval_loss': 0.46793022751808167, 'eval_f1': 0.8606955508062949, 'eval_roc_auc': 0.8373765281173594, 'eval_accuracy': 0.6580858085808581, 'eval_runtime': 330.9321, 'eval_samples_per_second': 4.578, 'eval_steps_per_second': 0.574, 'epoch': 5.0}
{'train_runtime': 5248.3053, 'train_samples_per_second': 5.776, 'train_steps_per_second': 0.722, 'train_loss': 0.025964097297285983, 'epoch': 5.0}


  0%|          | 0/190 [00:00<?, ?it/s]

Fold 4 F1: 0.8606955508062949
------Fold 5/5--------



  0%|          | 0/3790 [00:00<?, ?it/s]

{'loss': 0.6056, 'grad_norm': 1.8332290649414062, 'learning_rate': 1.9472295514511874e-05, 'epoch': 0.13}
{'loss': 0.4921, 'grad_norm': 3.7581775188446045, 'learning_rate': 1.894459102902375e-05, 'epoch': 0.26}
{'loss': 0.4665, 'grad_norm': 5.812331199645996, 'learning_rate': 1.8416886543535623e-05, 'epoch': 0.4}
{'loss': 0.4206, 'grad_norm': 6.1683549880981445, 'learning_rate': 1.7889182058047495e-05, 'epoch': 0.53}
{'loss': 0.41, 'grad_norm': 8.479280471801758, 'learning_rate': 1.7361477572559368e-05, 'epoch': 0.66}
{'loss': 0.4293, 'grad_norm': 2.9700231552124023, 'learning_rate': 1.683377308707124e-05, 'epoch': 0.79}
{'loss': 0.404, 'grad_norm': 4.1815924644470215, 'learning_rate': 1.6306068601583113e-05, 'epoch': 0.92}


  0%|          | 0/190 [00:00<?, ?it/s]

{'eval_loss': 0.38801276683807373, 'eval_f1': 0.8456402994133118, 'eval_roc_auc': 0.8312946381815993, 'eval_accuracy': 0.633003300330033, 'eval_runtime': 330.8984, 'eval_samples_per_second': 4.578, 'eval_steps_per_second': 0.574, 'epoch': 1.0}
{'loss': 0.3571, 'grad_norm': 2.0961878299713135, 'learning_rate': 1.577836411609499e-05, 'epoch': 1.06}
{'loss': 0.3467, 'grad_norm': 2.2172343730926514, 'learning_rate': 1.5250659630606862e-05, 'epoch': 1.19}
{'loss': 0.3376, 'grad_norm': 2.6702523231506348, 'learning_rate': 1.4722955145118736e-05, 'epoch': 1.32}
{'loss': 0.3185, 'grad_norm': 4.551942348480225, 'learning_rate': 1.4195250659630609e-05, 'epoch': 1.45}
{'loss': 0.3049, 'grad_norm': 9.708532333374023, 'learning_rate': 1.3667546174142481e-05, 'epoch': 1.58}
{'loss': 0.3304, 'grad_norm': 8.946589469909668, 'learning_rate': 1.3139841688654355e-05, 'epoch': 1.72}
{'loss': 0.3155, 'grad_norm': 5.656391143798828, 'learning_rate': 1.2612137203166228e-05, 'epoch': 1.85}
{'loss': 0.3284, 'g

  0%|          | 0/190 [00:00<?, ?it/s]

{'eval_loss': 0.3947567045688629, 'eval_f1': 0.8493256262042389, 'eval_roc_auc': 0.8218759720943467, 'eval_accuracy': 0.6323432343234323, 'eval_runtime': 330.0171, 'eval_samples_per_second': 4.591, 'eval_steps_per_second': 0.576, 'epoch': 2.0}
{'loss': 0.2259, 'grad_norm': 11.120250701904297, 'learning_rate': 1.1556728232189975e-05, 'epoch': 2.11}
{'loss': 0.2618, 'grad_norm': 2.992271900177002, 'learning_rate': 1.1029023746701847e-05, 'epoch': 2.24}
{'loss': 0.2263, 'grad_norm': 5.019251823425293, 'learning_rate': 1.050131926121372e-05, 'epoch': 2.37}
{'loss': 0.2316, 'grad_norm': 4.047245025634766, 'learning_rate': 9.973614775725594e-06, 'epoch': 2.51}
{'loss': 0.2574, 'grad_norm': 13.14600944519043, 'learning_rate': 9.445910290237469e-06, 'epoch': 2.64}
{'loss': 0.2447, 'grad_norm': 8.431661605834961, 'learning_rate': 8.918205804749341e-06, 'epoch': 2.77}
{'loss': 0.2491, 'grad_norm': 4.99027156829834, 'learning_rate': 8.390501319261214e-06, 'epoch': 2.9}


  0%|          | 0/190 [00:00<?, ?it/s]

{'eval_loss': 0.4343133568763733, 'eval_f1': 0.8503453568687643, 'eval_roc_auc': 0.8218568233011895, 'eval_accuracy': 0.6336633663366337, 'eval_runtime': 331.6739, 'eval_samples_per_second': 4.568, 'eval_steps_per_second': 0.573, 'epoch': 3.0}
{'loss': 0.212, 'grad_norm': 4.191608428955078, 'learning_rate': 7.862796833773088e-06, 'epoch': 3.03}
{'loss': 0.1583, 'grad_norm': 12.622949600219727, 'learning_rate': 7.3350923482849614e-06, 'epoch': 3.17}
{'loss': 0.1924, 'grad_norm': 9.256453514099121, 'learning_rate': 6.807387862796835e-06, 'epoch': 3.3}
{'loss': 0.1573, 'grad_norm': 5.695972442626953, 'learning_rate': 6.2796833773087074e-06, 'epoch': 3.43}
{'loss': 0.1692, 'grad_norm': 6.834404468536377, 'learning_rate': 5.751978891820581e-06, 'epoch': 3.56}
{'loss': 0.1607, 'grad_norm': 3.8109517097473145, 'learning_rate': 5.224274406332454e-06, 'epoch': 3.69}
{'loss': 0.1728, 'grad_norm': 2.854853630065918, 'learning_rate': 4.696569920844328e-06, 'epoch': 3.83}
{'loss': 0.1951, 'grad_nor

  0%|          | 0/190 [00:00<?, ?it/s]

{'eval_loss': 0.5068909525871277, 'eval_f1': 0.8467620481927711, 'eval_roc_auc': 0.8122028029143681, 'eval_accuracy': 0.6257425742574257, 'eval_runtime': 329.3474, 'eval_samples_per_second': 4.6, 'eval_steps_per_second': 0.577, 'epoch': 4.0}
{'loss': 0.1413, 'grad_norm': 4.882196426391602, 'learning_rate': 3.6411609498680746e-06, 'epoch': 4.09}
{'loss': 0.1497, 'grad_norm': 15.466547012329102, 'learning_rate': 3.1134564643799476e-06, 'epoch': 4.22}
{'loss': 0.1288, 'grad_norm': 4.238325119018555, 'learning_rate': 2.5857519788918206e-06, 'epoch': 4.35}
{'loss': 0.1371, 'grad_norm': 0.7318606972694397, 'learning_rate': 2.058047493403694e-06, 'epoch': 4.49}
{'loss': 0.132, 'grad_norm': 1.088075876235962, 'learning_rate': 1.5303430079155673e-06, 'epoch': 4.62}
{'loss': 0.1154, 'grad_norm': 1.0877991914749146, 'learning_rate': 1.0026385224274407e-06, 'epoch': 4.75}
{'loss': 0.1264, 'grad_norm': 3.2048089504241943, 'learning_rate': 4.7493403693931397e-07, 'epoch': 4.88}


  0%|          | 0/190 [00:00<?, ?it/s]

{'eval_loss': 0.5063793063163757, 'eval_f1': 0.8489180834621329, 'eval_roc_auc': 0.8221707267318716, 'eval_accuracy': 0.6356435643564357, 'eval_runtime': 328.7802, 'eval_samples_per_second': 4.608, 'eval_steps_per_second': 0.578, 'epoch': 5.0}
{'train_runtime': 23628.9965, 'train_samples_per_second': 1.283, 'train_steps_per_second': 0.16, 'train_loss': 0.2641790729713943, 'epoch': 5.0}


  0%|          | 0/190 [00:00<?, ?it/s]

Fold 5 F1: 0.8489180834621329
Average Accuracy for Weight Decay 0.01: 0.8548068171342139
Best Weight Decay: 1e-06


### Save Evaluation Dataset columns

In [None]:
# Augment Evaluation File 

from datetime import date

today = date.today()
date_tm = today.strftime("%y/%m/%d")

#reorganize columns
cols = list(eval_df.columns.values) 
remove_list = ["fold", "epoch"]
for removal in remove_list:
    cols.remove(removal)
cols = ["fold","epoch"]+cols
eval_df = eval_df[cols]

numrows = sum(Hraf.num_rows.values())

trainingStatus = 'Initial Training' if overwrite_training == True else 'Continue Training'

info_df  = pd.DataFrame({"Date":len(eval_df)*[date_tm],"Train_status":len(eval_df)*[trainingStatus]})
eval_df = eval_df.reset_index(drop=True)
eval_df = pd.concat([info_df, eval_df], axis=1)
eval_df




Unnamed: 0,eval_loss,eval_f1,eval_roc_auc,eval_accuracy,eval_runtime,eval_samples_per_second,eval_steps_per_second,epoch,Fold
0,0.453259,0.858424,0.820062,0.618664,198.1796,4.38,0.55,5.0,1
0,0.155582,0.958427,0.946363,0.868664,193.1072,4.495,0.564,5.0,2
0,0.026199,0.992696,0.989718,0.978111,197.2923,4.4,0.552,5.0,3
0,0.002565,0.999325,0.999217,0.997696,201.194,4.314,0.542,5.0,4
0,0.000678,1.0,1.0,1.0,204.5292,4.244,0.533,5.0,5


In [None]:
# import evaluation if it exists
if os.path.exists("Evaluation.xlsx"):
    old_eval = pd.read_excel("Evaluation.xlsx", index_col=0)
    eval_df = pd.concat([old_eval, eval_df])

eval_df.to_excel('Evaluation.xlsx')

### Save Partitioned Dataset 

In [40]:
def make_dir(path):
    import os
    # Check whether the specified path exists or not
    isExist = os.path.exists(path)
    if not isExist:
    # Create a new directory because it does not exist
        os.makedirs(path)

# make folder if it does not exist yet
path = os.getcwd() + '/Datasets'
make_dir(path)
# save to Json
for key in Hraf.keys():
    Hraf_dict = Hraf[key].to_dict()
    file_path = f"{path}/{key}_dataset.json"
    with open(file_path, "w") as outfile:
        json.dump(Hraf_dict, outfile)
        print(len(Hraf_dict['ID']), f"Rows for \'{key}\' succesfully saved to {file_path}")

7578 Rows for 'train' succesfully saved to /Users/ericchantland/Library/CloudStorage/Dropbox/MEM-DEV-LAB-Current/2023-eHRAF-Misf/HRAF-Misf-NaturalLanguageProcessing/HRAF_NLP/HRAF_MultiLabel_ThreeLargeClasses_kfoldsDemo/Datasets/train_dataset.json
1895 Rows for 'test' succesfully saved to /Users/ericchantland/Library/CloudStorage/Dropbox/MEM-DEV-LAB-Current/2023-eHRAF-Misf/HRAF-Misf-NaturalLanguageProcessing/HRAF_NLP/HRAF_MultiLabel_ThreeLargeClasses_kfoldsDemo/Datasets/test_dataset.json


## Continue Training


This code is meant for if you want to continue training from where you left off. It will for the most part have extremely similar code to the initial run code and even ask you to run some cells above which have needed functions. If you do not have an active dataset or model yet, this is not for you!

In [None]:
import json
import pandas as pd

path = "../../../eHRAF_Scraper-Analysis-and-Prep/Data/"
dataFolder = r"(subjects-(contracts_OR_disabilities_OR_disasters_OR_friendships_OR_gift_giving_OR_infant_feeding_OR_lineages_OR_local_officials_OR_luck_and_chance_OR_magicians_and_diviners_OR_mortuary_specialists_OR_nuclear_family_OR_priesthood_OR_prophet/"
# dataFolder = r'subjects-(sickness)_FILTERS-culture_level_samples(PSF)'

#load df (only load one of these commented out lines)
# df = pd.read_excel(f"{path}{dataFolder}/_Altogether_Dataset_RACoded.xlsx", header=[0,1], index_col=0) # Fall 2023 sickness + non-sickness
df = pd.read_excel(f"{path}{dataFolder}/_Altogether_Dataset_RACoded_Combined.xlsx", header=[0,1], index_col=0) # Spring 2023 - Spring 2024  sickness + nonsickness dataset
# df.head(3)


# Remove duplicates
# Ideally, we want no duplicates. If there is a duplicate, prefer run 3 over run 1.
#Take only the run number 1 and 3
df = df.loc[(df[("CODER","Run_Number")]==1) | (df[("CODER","Run_Number")]==3)]
dup1 = df[("CULTURE","Passage Number")].duplicated(keep=False) #passage number duplicates
print("Passage Number Duplicates before filtering:", sum(dup1))
dup2 = df[("CODER","Run_Number")] != 3 # select all that are not run 3 (as we want to use run 3)
df = df[~(dup1 & dup2)]
print("Passage Number Duplicates after filtering:", sum(df[("CULTURE","Passage Number")].duplicated(keep=False)))
print("Passage Duplicates after filtering:", sum(df[("CULTURE","Passage")].duplicated(keep=False)),"\n")

# subdivide into just passage and outcome
df_small = pd.DataFrame()
df_small[["ID","passage","EVENT","CAUSE","ACTION"]] = df[[('CULTURE', "Passage Number"), ('CULTURE', "Passage"), ('EVENT', "No_Info"), ('CAUSE', "No_Info"), ('ACTION', "No_Info")]]
# Flip the lable of "no_info"
df_small[["EVENT","CAUSE","ACTION"]]  = df_small[["EVENT","CAUSE","ACTION"]].replace({0:1, 1:0})

# Remove certain passages which should not be in training or inference (these are duplicates that had to be manually found by a human)
values_to_remove = [3252, 33681, 6758, 10104]
df_small = df_small[~df_small['ID'].isin(values_to_remove)]
df_small.head(3)



Passage Number Duplicates before filtering: 654
Passage Number Duplicates after filtering: 0
Passage Duplicates after filtering: 0 



Unnamed: 0,ID,passage,EVENT,CAUSE,ACTION
0,1392,The communal huts are used for another purpose...,1,0,1
1,1393,Although the Onge observe few hygienic precaut...,1,0,0
2,1395,Foods considered healthy by the Onge include h...,1,1,1


### Load and compare old dataset from JSON datasets

In [None]:
loc = ""

Hraf_prev = DatasetDict()

dataset_names = ['train', 'test']

df_prev = pd.DataFrame([])
for name in dataset_names:
    f = open(loc+f"Datasets/{name}_dataset.json")
    data = json.load(f)
    df_prev = pd.concat([df_prev, pd.DataFrame(data)])
    Hraf_prev[name] = Dataset.from_dict(data) # load to hugging face dataset dict
    # Closing file
    f.close()
# f = open(loc+"Datasets/train_dataset.json")
# # f = open("../HRAF_MultiLabel_ThreeLargeClasses/Datasets/test_dataset.json") #load old threemain class (comment this out unless you specifically are using it)
# data = json.load(f)
# Hraf_train = pd.DataFrame(data)
df_prev.head(3)


Unnamed: 0,ID,passage,EVENT,CAUSE,ACTION
0,66159,"Ancestors of the other side,/ who are also pri...",0,0,0
1,8756,“So we call all these things. But each obia ha...,0,0,0
2,25937,"In certain cases, scrutiny of recent events in...",1,1,1


#### Check difference between the two datasets

In [None]:
# Make sure all the rows in the original/previous dataset appear in the new one

assert len(set(df_small.columns) - set(df_small.columns)) == 0, "Dataframe columns do not match"
diff_count = len(df_small) - len(df_prev)
# df_small['ID'].isin(df_prev['ID'])

dif_df = df_prev[~df_prev['ID'].isin(df_small['ID'])] #get all ID's which are in the original dataset but not the new one
if len(dif_df) != 0:
    print('\033[93m'+ "WARNING, Not all rows of original dataset are within new dataset." + '\033[0m')
    print("IDs:\n",dif_df[['ID','passage']])
    print('\033[93m'+ "Including unknown extra rows to new dataset, stop here if this is not desired." + '\033[0m')
    df_small = pd.concat([df_small, dif_df])
    diff_count = len(df_small) - len(df_prev)

IDs:
         ID                                            passage
4361  1016  Prior to the twentieth century Catholic priest...
[93mIncluding unknown extra rows to new dataset, stop here if this is not desired.[0m


In [None]:
# extract only the new rows which do not appear in the original dataset
df_new = df_small[~df_small['ID'].isin(df_prev['ID'])]


# Divide them and turn them into HRAF
# create train and validation/test sets
train_val, test = train_test_split(df_new, test_size=0.2, random_state=10)
# # do it again to get the test and validation sets (15% = 50% * 30%)
# test, validation = train_test_split(test_val, test_size=0.5, random_state=10)




# Create an NLP friendly dataset
Hraf = DatasetDict(
    {'train':Dataset.from_dict(train_val.to_dict(orient= 'list')),
     'test':Dataset.from_dict(test.to_dict(orient= 'list'))})
Hraf

ValueError: With n_samples=0, test_size=0.2 and train_size=None, the resulting train set will be empty. Adjust any of the aforementioned parameters.

### Conduct filtering much like original training (copied and pasted or ran above cells when they are functions)

In [None]:
# Get labels
labels = [label for label in Hraf['train'].features.keys() if label not in ['ID', 'passage']]
id2label = {idx:label for idx, label in enumerate(labels)}
label2id = {label:idx for idx, label in enumerate(labels)}
id2label

{0: 'EVENT', 1: 'CAUSE', 2: 'ACTION'}

In [None]:
from transformers import pipeline, AutoTokenizer

# CHANGE Model name
model = "MultiLabel_ThreeLargeClasses_kfoldsDEMO"

# set up the pipeline from local
import os
path =os.path.abspath(f"HRAF_Model_{model}")
# classifier = pipeline("text-classification", model=path, top_k=None)

In [None]:
# Get tokenizer from old model
model = "MultiLabel_ThreeLargeClasses_kfoldsDEMO"
# set up the pipeline from local
import os
path =os.path.abspath(f"HRAF_Model_{model}") #the need to specify checkpoints may not be needed now with setting the load best checkpoint at the end, regardless, consider specifying
# Note for above, the last accurate model was checkpoint-1176 so consider adding that into the path if you want to assure it uses it. Although I believe it should automatically load the best model!
tokenizer = AutoTokenizer.from_pretrained(path)


# Tokenize data, remove all columns and give new ones (GET FUNCTION FROM ABOVE)
tokenized_Hraf = Hraf.map(preprocess_data, batched=True, remove_columns=Hraf['train'].column_names)
tokenized_Hraf

loading file vocab.txt
loading file tokenizer.json
loading file added_tokens.json
loading file special_tokens_map.json
loading file tokenizer_config.json


Map:   0%|          | 0/2937 [00:00<?, ? examples/s]

Map:   0%|          | 0/735 [00:00<?, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['input_ids', 'attention_mask', 'labels'],
        num_rows: 2937
    })
    test: Dataset({
        features: ['input_ids', 'attention_mask', 'labels'],
        num_rows: 735
    })
})

### Create Splits

In [None]:
#  Splitting
from sklearn.model_selection import StratifiedKFold
# folds = StratifiedKFold(n_splits=5)
folds = StratifiedKFold(n_splits=5, shuffle= True, random_state=10)
splits = folds.split(np.zeros(Hraf['train'].num_rows), Hraf['train']['ACTION'])


train_list = []
val_list = []

for train_idxs, val_idxs in splits:
    train_list += [train_idxs]
    val_list += [val_idxs]
    print(f"EVENT:  {np.mean(Hraf['train'][train_idxs]['EVENT'])}\nCAUSE:  {np.mean(Hraf['train'][train_idxs]['CAUSE'])}\nACTION: {np.mean(Hraf['train'][train_idxs]['ACTION'])}\n")

EVENT:  0.5755640698169434
CAUSE:  0.3959131545338442
ACTION: 0.4108131119625372

EVENT:  0.5683269476372924
CAUSE:  0.3818646232439336
ACTION: 0.4108131119625372

EVENT:  0.5702127659574469
CAUSE:  0.38382978723404254
ACTION: 0.41106382978723405

EVENT:  0.574468085106383
CAUSE:  0.3880851063829787
ACTION: 0.41106382978723405

EVENT:  0.5714893617021276
CAUSE:  0.38765957446808513
ACTION: 0.41106382978723405



### Collate and create torch

In [None]:
from transformers import DataCollatorWithPadding
# Pad data
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

# set to torch
tokenized_Hraf.set_format("torch")


### Load model then Train

In [None]:
from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer

model = AutoModelForSequenceClassification.from_pretrained(
    path, 
    problem_type='multi_label_classification',
    num_labels = len(labels), 
    id2label=id2label, 
    label2id=label2id
)

loading configuration file /Users/ericchantland/Library/CloudStorage/Dropbox/MEM-DEV-LAB-Current/2023-eHRAF-Misf/HRAF-Misf-NaturalLanguageProcessing/HRAF_NLP/HRAF_MultiLabel_ThreeLargeClasses_kfoldsDemo/HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-434/config.json
Model config DistilBertConfig {
  "_name_or_path": "/Users/ericchantland/Library/CloudStorage/Dropbox/MEM-DEV-LAB-Current/2023-eHRAF-Misf/HRAF-Misf-NaturalLanguageProcessing/HRAF_NLP/HRAF_MultiLabel_ThreeLargeClasses_kfoldsDemo/HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-434",
  "activation": "gelu",
  "architectures": [
    "DistilBertForSequenceClassification"
  ],
  "attention_dropout": 0.1,
  "dim": 768,
  "dropout": 0.1,
  "hidden_dim": 3072,
  "id2label": {
    "0": "EVENT",
    "1": "CAUSE",
    "2": "ACTION"
  },
  "initializer_range": 0.02,
  "label2id": {
    "ACTION": 2,
    "CAUSE": 1,
    "EVENT": 0
  },
  "max_position_embeddings": 512,
  "model_type": "distilbert",
  "n_heads":

Make sure you run the functions above like the "evaluate" function

In [None]:
### Optional Paramters

### OPTIONAL If it crashes on a fold, you may skip the fold by specifying the start.
### Set to 1 after a successful run
start_fold = 1
# # Set it to true if you want the model to start from a checkpoint, 
# # You can also specify a specific checkpoint. Otherwise choose False, normally, this may be good to set False unless a crash occurs
resume_bool = False
#set True if you want to start at the beginnning
overwrite_training = False 


# Create Eval_dataset (be aware this may raise a prompt you must fill in)
# only create a new eval_df if one does not exist (this step is useful in case of a crash)
if 'eval_df' not in locals(): 
    eval_df = pd.DataFrame()
else:
    eval_inputAppend = input("eval_df found, would you like to append to there? (y/n)")
    if eval_inputAppend.lower() =='y':
        print("Appending to old eval dataframe, this is useful if you must restart training")
        # fix issues with starting fold
        if max(eval_df['fold']) >= start_fold: 
            Start_fold_input = input(f"Your starting fold ({start_fold}) is not greater than the largest fold in the eval_df ({max(eval_df['fold'])}), this may mean redoing folds. Is this what you want (y/n)")
            if Start_fold_input.lower() != 'y':
                raise Exception('Quitting run, please redo your start_fold parameter')
    else:
        eval_df = pd.DataFrame()

In [None]:
training_args = TrainingArguments(
    output_dir="HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO",
    learning_rate=2e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=5,
    weight_decay=0.01,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    metric_for_best_model='f1',
    push_to_hub=False,
)



# # Train the model
assert (start_fold >0) and (start_fold <= len(train_list)), f"Incorrect Starting fold, must be greater than or equal to 1 and less than or equal to {len(train_list)}"
for fold, (train_idxs, val_idxs) in enumerate(zip(train_list, val_list), start=1): # K-fold loop
    
    #Skip folds if desired
    if start_fold >fold:
        print('\033[93m'+ f"Skipping Fold {fold}"+ '\033[0m')
        continue

    print(f"------Fold {fold}--------\n")
    train_ds = tokenized_Hraf["train"].select(train_idxs)
    val_ds = tokenized_Hraf["train"].select(val_idxs)
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_ds,
        eval_dataset=val_ds,
        tokenizer=tokenizer,
        data_collator=data_collator,
        compute_metrics=compute_metrics,
        
    )
    try:
        trainer.train() 
    except:
        print('\033[93m'+ f"A crash occurred, restarting fold from checkpoint"+ '\033[0m')
        trainer.train(resume_from_checkpoint=True) #This is the same thing above but often restarting can make all the difference so let's try it

    # Evaluate and then concatinate results to a dataframe
    eval_dict = trainer.evaluate()
    eval_df_line = pd.DataFrame([eval_dict])
    eval_df_line["fold"] = fold
    eval_df_line["train_count"] = len(train_ds)
    eval_df_line["val_count"] = len(val_ds)
    eval_df_line["total_count"] = eval_df_line["val_count"] + eval_df_line["train_count"]
    eval_df = pd.concat([eval_df, eval_df_line])



# Save the model to disk
trainer.save_model()

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
***** Running training *****
  Num examples = 2349
  Num Epochs = 5
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel, distributed & accumulation) = 8
  Gradient Accumulation steps = 1
  Total optimization steps = 1470
  Number of trainable parameters = 66955779


------Fold 1--------



  0%|          | 0/1470 [00:00<?, ?it/s]

You're using a DistilBertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
***** Running Evaluation *****
  Num examples = 588
  Batch size = 8


  0%|          | 0/74 [00:00<?, ?it/s]

Saving model checkpoint to HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-294


{'eval_loss': 0.3263649344444275, 'eval_f1': 0.8439450686641699, 'eval_roc_auc': 0.8593965908320601, 'eval_accuracy': 0.685374149659864, 'eval_runtime': 119.4519, 'eval_samples_per_second': 4.922, 'eval_steps_per_second': 0.619, 'epoch': 1.0}


Configuration saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-294/config.json
Model weights saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-294/pytorch_model.bin
tokenizer config file saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-294/tokenizer_config.json
Special tokens file saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-294/special_tokens_map.json


{'loss': 0.4052, 'learning_rate': 1.3197278911564626e-05, 'epoch': 1.7}


***** Running Evaluation *****
  Num examples = 588
  Batch size = 8


  0%|          | 0/74 [00:00<?, ?it/s]

Saving model checkpoint to HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-588


{'eval_loss': 0.37282049655914307, 'eval_f1': 0.847457627118644, 'eval_roc_auc': 0.8628107124420551, 'eval_accuracy': 0.6972789115646258, 'eval_runtime': 119.6105, 'eval_samples_per_second': 4.916, 'eval_steps_per_second': 0.619, 'epoch': 2.0}


Configuration saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-588/config.json
Model weights saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-588/pytorch_model.bin
tokenizer config file saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-588/tokenizer_config.json
Special tokens file saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-588/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 588
  Batch size = 8


  0%|          | 0/74 [00:00<?, ?it/s]

Saving model checkpoint to HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-882


{'eval_loss': 0.514999508857727, 'eval_f1': 0.8489028213166143, 'eval_roc_auc': 0.864096059485757, 'eval_accuracy': 0.6989795918367347, 'eval_runtime': 118.9022, 'eval_samples_per_second': 4.945, 'eval_steps_per_second': 0.622, 'epoch': 3.0}


Configuration saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-882/config.json
Model weights saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-882/pytorch_model.bin
tokenizer config file saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-882/tokenizer_config.json
Special tokens file saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-882/special_tokens_map.json


{'loss': 0.1277, 'learning_rate': 6.394557823129253e-06, 'epoch': 3.4}


***** Running Evaluation *****
  Num examples = 588
  Batch size = 8


  0%|          | 0/74 [00:00<?, ?it/s]

Saving model checkpoint to HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-1176


{'eval_loss': 0.6913695335388184, 'eval_f1': 0.8412291933418694, 'eval_roc_auc': 0.8578348811379884, 'eval_accuracy': 0.685374149659864, 'eval_runtime': 118.494, 'eval_samples_per_second': 4.962, 'eval_steps_per_second': 0.625, 'epoch': 4.0}


Configuration saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-1176/config.json
Model weights saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-1176/pytorch_model.bin
tokenizer config file saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-1176/tokenizer_config.json
Special tokens file saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-1176/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 588
  Batch size = 8


  0%|          | 0/74 [00:00<?, ?it/s]

Saving model checkpoint to HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-1470


{'eval_loss': 0.757633626461029, 'eval_f1': 0.8413098236775819, 'eval_roc_auc': 0.8572977990061373, 'eval_accuracy': 0.6870748299319728, 'eval_runtime': 118.8199, 'eval_samples_per_second': 4.949, 'eval_steps_per_second': 0.623, 'epoch': 5.0}


Configuration saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-1470/config.json
Model weights saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-1470/pytorch_model.bin
tokenizer config file saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-1470/tokenizer_config.json
Special tokens file saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-1470/special_tokens_map.json


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-882 (score: 0.8489028213166143).
***** Running Evaluation *****
  Num examples = 588
  Batch size = 8


{'train_runtime': 8431.8808, 'train_samples_per_second': 1.393, 'train_steps_per_second': 0.174, 'train_loss': 0.1985794482587957, 'epoch': 5.0}


  0%|          | 0/74 [00:00<?, ?it/s]

***** Running training *****
  Num examples = 2349
  Num Epochs = 5
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel, distributed & accumulation) = 8
  Gradient Accumulation steps = 1
  Total optimization steps = 1470
  Number of trainable parameters = 66955779


------Fold 2--------



  0%|          | 0/1470 [00:00<?, ?it/s]

***** Running Evaluation *****
  Num examples = 588
  Batch size = 8


  0%|          | 0/74 [00:00<?, ?it/s]

Saving model checkpoint to HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-294


{'eval_loss': 0.08184368908405304, 'eval_f1': 0.9680094786729859, 'eval_roc_auc': 0.9703874024526199, 'eval_accuracy': 0.9183673469387755, 'eval_runtime': 108.1964, 'eval_samples_per_second': 5.435, 'eval_steps_per_second': 0.684, 'epoch': 1.0}


Configuration saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-294/config.json
Model weights saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-294/pytorch_model.bin
tokenizer config file saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-294/tokenizer_config.json
Special tokens file saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-294/special_tokens_map.json


{'loss': 0.1517, 'learning_rate': 1.3197278911564626e-05, 'epoch': 1.7}


***** Running Evaluation *****
  Num examples = 588
  Batch size = 8


  0%|          | 0/74 [00:00<?, ?it/s]

Saving model checkpoint to HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-588
Configuration saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-588/config.json


{'eval_loss': 0.10213024169206619, 'eval_f1': 0.9692307692307691, 'eval_roc_auc': 0.9715951319212188, 'eval_accuracy': 0.923469387755102, 'eval_runtime': 107.9429, 'eval_samples_per_second': 5.447, 'eval_steps_per_second': 0.686, 'epoch': 2.0}


Model weights saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-588/pytorch_model.bin
tokenizer config file saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-588/tokenizer_config.json
Special tokens file saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-588/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 588
  Batch size = 8


  0%|          | 0/74 [00:00<?, ?it/s]

Saving model checkpoint to HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-882
Configuration saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-882/config.json


{'eval_loss': 0.09630405902862549, 'eval_f1': 0.9695158398087268, 'eval_roc_auc': 0.9715719063545151, 'eval_accuracy': 0.9251700680272109, 'eval_runtime': 107.929, 'eval_samples_per_second': 5.448, 'eval_steps_per_second': 0.686, 'epoch': 3.0}


Model weights saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-882/pytorch_model.bin
tokenizer config file saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-882/tokenizer_config.json
Special tokens file saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-882/special_tokens_map.json


{'loss': 0.0558, 'learning_rate': 6.394557823129253e-06, 'epoch': 3.4}


***** Running Evaluation *****
  Num examples = 588
  Batch size = 8


  0%|          | 0/74 [00:00<?, ?it/s]

Saving model checkpoint to HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-1176
Configuration saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-1176/config.json


{'eval_loss': 0.10796252638101578, 'eval_f1': 0.9682063587282543, 'eval_roc_auc': 0.9702248234856932, 'eval_accuracy': 0.9251700680272109, 'eval_runtime': 107.8884, 'eval_samples_per_second': 5.45, 'eval_steps_per_second': 0.686, 'epoch': 4.0}


Model weights saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-1176/pytorch_model.bin
tokenizer config file saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-1176/tokenizer_config.json
Special tokens file saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-1176/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 588
  Batch size = 8


  0%|          | 0/74 [00:00<?, ?it/s]

Saving model checkpoint to HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-1470
Configuration saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-1470/config.json


{'eval_loss': 0.13214153051376343, 'eval_f1': 0.9663716814159292, 'eval_roc_auc': 0.9689241917502787, 'eval_accuracy': 0.9166666666666666, 'eval_runtime': 107.9752, 'eval_samples_per_second': 5.446, 'eval_steps_per_second': 0.685, 'epoch': 5.0}


Model weights saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-1470/pytorch_model.bin
tokenizer config file saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-1470/tokenizer_config.json
Special tokens file saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-1470/special_tokens_map.json


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-882 (score: 0.9695158398087268).
***** Running Evaluation *****
  Num examples = 588
  Batch size = 8


{'train_runtime': 8278.3771, 'train_samples_per_second': 1.419, 'train_steps_per_second': 0.178, 'train_loss': 0.07752862722695279, 'epoch': 5.0}


  0%|          | 0/74 [00:00<?, ?it/s]

***** Running training *****
  Num examples = 2350
  Num Epochs = 5
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel, distributed & accumulation) = 8
  Gradient Accumulation steps = 1
  Total optimization steps = 1470
  Number of trainable parameters = 66955779


------Fold 3--------



  0%|          | 0/1470 [00:00<?, ?it/s]

***** Running Evaluation *****
  Num examples = 587
  Batch size = 8


  0%|          | 0/74 [00:00<?, ?it/s]

Saving model checkpoint to HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-294
Configuration saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-294/config.json


{'eval_loss': 0.006697264965623617, 'eval_f1': 0.9993876301285977, 'eval_roc_auc': 0.9993880048959609, 'eval_accuracy': 0.9982964224872232, 'eval_runtime': 116.2527, 'eval_samples_per_second': 5.049, 'eval_steps_per_second': 0.637, 'epoch': 1.0}


Model weights saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-294/pytorch_model.bin
tokenizer config file saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-294/tokenizer_config.json
Special tokens file saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-294/special_tokens_map.json


{'loss': 0.0698, 'learning_rate': 1.3197278911564626e-05, 'epoch': 1.7}


***** Running Evaluation *****
  Num examples = 587
  Batch size = 8


  0%|          | 0/74 [00:00<?, ?it/s]

Saving model checkpoint to HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-588
Configuration saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-588/config.json


{'eval_loss': 0.013444957323372364, 'eval_f1': 0.9945155393053016, 'eval_roc_auc': 0.9951507167603677, 'eval_accuracy': 0.9846678023850085, 'eval_runtime': 115.7219, 'eval_samples_per_second': 5.073, 'eval_steps_per_second': 0.639, 'epoch': 2.0}


Model weights saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-588/pytorch_model.bin
tokenizer config file saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-588/tokenizer_config.json
Special tokens file saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-588/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 587
  Batch size = 8


  0%|          | 0/74 [00:00<?, ?it/s]

Saving model checkpoint to HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-882
Configuration saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-882/config.json


{'eval_loss': 0.012307526543736458, 'eval_f1': 0.9963325183374084, 'eval_roc_auc': 0.996657365724125, 'eval_accuracy': 0.989778534923339, 'eval_runtime': 117.4584, 'eval_samples_per_second': 4.998, 'eval_steps_per_second': 0.63, 'epoch': 3.0}


Model weights saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-882/pytorch_model.bin
tokenizer config file saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-882/tokenizer_config.json
Special tokens file saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-882/special_tokens_map.json


{'loss': 0.0225, 'learning_rate': 6.394557823129253e-06, 'epoch': 3.4}


***** Running Evaluation *****
  Num examples = 587
  Batch size = 8


  0%|          | 0/74 [00:00<?, ?it/s]

Saving model checkpoint to HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-1176
Configuration saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-1176/config.json


{'eval_loss': 0.007532469928264618, 'eval_f1': 0.998165137614679, 'eval_roc_auc': 0.9983286828620626, 'eval_accuracy': 0.9948892674616695, 'eval_runtime': 116.3298, 'eval_samples_per_second': 5.046, 'eval_steps_per_second': 0.636, 'epoch': 4.0}


Model weights saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-1176/pytorch_model.bin
tokenizer config file saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-1176/tokenizer_config.json
Special tokens file saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-1176/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 587
  Batch size = 8


  0%|          | 0/74 [00:00<?, ?it/s]

Saving model checkpoint to HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-1470
Configuration saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-1470/config.json


{'eval_loss': 0.007237689569592476, 'eval_f1': 0.998165137614679, 'eval_roc_auc': 0.9983286828620626, 'eval_accuracy': 0.9948892674616695, 'eval_runtime': 116.2664, 'eval_samples_per_second': 5.049, 'eval_steps_per_second': 0.636, 'epoch': 5.0}


Model weights saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-1470/pytorch_model.bin
tokenizer config file saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-1470/tokenizer_config.json
Special tokens file saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-1470/special_tokens_map.json


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-294 (score: 0.9993876301285977).
***** Running Evaluation *****
  Num examples = 587
  Batch size = 8


{'train_runtime': 8252.0489, 'train_samples_per_second': 1.424, 'train_steps_per_second': 0.178, 'train_loss': 0.03379697296895137, 'epoch': 5.0}


  0%|          | 0/74 [00:00<?, ?it/s]

***** Running training *****
  Num examples = 2350
  Num Epochs = 5
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel, distributed & accumulation) = 8
  Gradient Accumulation steps = 1
  Total optimization steps = 1470
  Number of trainable parameters = 66955779


------Fold 4--------



  0%|          | 0/1470 [00:00<?, ?it/s]

***** Running Evaluation *****
  Num examples = 587
  Batch size = 8


  0%|          | 0/74 [00:00<?, ?it/s]

Saving model checkpoint to HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-294
Configuration saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-294/config.json


{'eval_loss': 0.021204331889748573, 'eval_f1': 0.993734335839599, 'eval_roc_auc': 0.9943785565163971, 'eval_accuracy': 0.9846678023850085, 'eval_runtime': 111.6766, 'eval_samples_per_second': 5.256, 'eval_steps_per_second': 0.663, 'epoch': 1.0}


Model weights saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-294/pytorch_model.bin
tokenizer config file saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-294/tokenizer_config.json
Special tokens file saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-294/special_tokens_map.json


{'loss': 0.0464, 'learning_rate': 1.3197278911564626e-05, 'epoch': 1.7}


***** Running Evaluation *****
  Num examples = 587
  Batch size = 8


  0%|          | 0/74 [00:00<?, ?it/s]

Saving model checkpoint to HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-588


{'eval_loss': 0.027905471622943878, 'eval_f1': 0.9925093632958802, 'eval_roc_auc': 0.9935585728640077, 'eval_accuracy': 0.9812606473594548, 'eval_runtime': 111.5736, 'eval_samples_per_second': 5.261, 'eval_steps_per_second': 0.663, 'epoch': 2.0}


Configuration saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-588/config.json
Model weights saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-588/pytorch_model.bin
tokenizer config file saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-588/tokenizer_config.json
Special tokens file saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-588/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 587
  Batch size = 8


  0%|          | 0/74 [00:00<?, ?it/s]

Saving model checkpoint to HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-882


{'eval_loss': 0.03012479841709137, 'eval_f1': 0.9925187032418953, 'eval_roc_auc': 0.9936672532369831, 'eval_accuracy': 0.9812606473594548, 'eval_runtime': 111.3146, 'eval_samples_per_second': 5.273, 'eval_steps_per_second': 0.665, 'epoch': 3.0}


Configuration saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-882/config.json
Model weights saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-882/pytorch_model.bin
tokenizer config file saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-882/tokenizer_config.json
Special tokens file saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-882/special_tokens_map.json


{'loss': 0.018, 'learning_rate': 6.394557823129253e-06, 'epoch': 3.4}


***** Running Evaluation *****
  Num examples = 587
  Batch size = 8


  0%|          | 0/74 [00:00<?, ?it/s]

Saving model checkpoint to HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-1176


{'eval_loss': 0.02782345376908779, 'eval_f1': 0.9931378665003119, 'eval_roc_auc': 0.9941859254361534, 'eval_accuracy': 0.9829642248722317, 'eval_runtime': 112.199, 'eval_samples_per_second': 5.232, 'eval_steps_per_second': 0.66, 'epoch': 4.0}


Configuration saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-1176/config.json
Model weights saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-1176/pytorch_model.bin
tokenizer config file saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-1176/tokenizer_config.json
Special tokens file saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-1176/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 587
  Batch size = 8


  0%|          | 0/74 [00:00<?, ?it/s]

Saving model checkpoint to HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-1470


{'eval_loss': 0.026869775727391243, 'eval_f1': 0.9950062421972534, 'eval_roc_auc': 0.995850622406639, 'eval_accuracy': 0.9880749574105622, 'eval_runtime': 110.7282, 'eval_samples_per_second': 5.301, 'eval_steps_per_second': 0.668, 'epoch': 5.0}


Configuration saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-1470/config.json
Model weights saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-1470/pytorch_model.bin
tokenizer config file saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-1470/tokenizer_config.json
Special tokens file saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-1470/special_tokens_map.json


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-1470 (score: 0.9950062421972534).
***** Running Evaluation *****
  Num examples = 587
  Batch size = 8


{'train_runtime': 8511.5823, 'train_samples_per_second': 1.38, 'train_steps_per_second': 0.173, 'train_loss': 0.02251990786215075, 'epoch': 5.0}


  0%|          | 0/74 [00:00<?, ?it/s]

***** Running training *****
  Num examples = 2350
  Num Epochs = 5
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel, distributed & accumulation) = 8
  Gradient Accumulation steps = 1
  Total optimization steps = 1470
  Number of trainable parameters = 66955779


------Fold 5--------



  0%|          | 0/1470 [00:00<?, ?it/s]

***** Running Evaluation *****
  Num examples = 587
  Batch size = 8


  0%|          | 0/74 [00:00<?, ?it/s]

Saving model checkpoint to HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-294


{'eval_loss': 0.009680557064712048, 'eval_f1': 0.9962871287128713, 'eval_roc_auc': 0.9968619246861924, 'eval_accuracy': 0.9931856899488927, 'eval_runtime': 117.0172, 'eval_samples_per_second': 5.016, 'eval_steps_per_second': 0.632, 'epoch': 1.0}


Configuration saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-294/config.json
Model weights saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-294/pytorch_model.bin
tokenizer config file saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-294/tokenizer_config.json
Special tokens file saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-294/special_tokens_map.json


{'loss': 0.0259, 'learning_rate': 1.3197278911564626e-05, 'epoch': 1.7}


***** Running Evaluation *****
  Num examples = 587
  Batch size = 8


  0%|          | 0/74 [00:00<?, ?it/s]

Saving model checkpoint to HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-588


{'eval_loss': 0.0021150456741452217, 'eval_f1': 0.9987577639751553, 'eval_roc_auc': 0.9988558694352764, 'eval_accuracy': 0.9965928449744463, 'eval_runtime': 116.9366, 'eval_samples_per_second': 5.02, 'eval_steps_per_second': 0.633, 'epoch': 2.0}


Configuration saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-588/config.json
Model weights saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-588/pytorch_model.bin
tokenizer config file saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-588/tokenizer_config.json
Special tokens file saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-588/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 587
  Batch size = 8


  0%|          | 0/74 [00:00<?, ?it/s]

Saving model checkpoint to HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-882


{'eval_loss': 0.001561467070132494, 'eval_f1': 0.9987593052109182, 'eval_roc_auc': 0.9989539748953975, 'eval_accuracy': 0.9965928449744463, 'eval_runtime': 117.1931, 'eval_samples_per_second': 5.009, 'eval_steps_per_second': 0.631, 'epoch': 3.0}


Configuration saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-882/config.json
Model weights saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-882/pytorch_model.bin
tokenizer config file saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-882/tokenizer_config.json
Special tokens file saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-882/special_tokens_map.json


{'loss': 0.0119, 'learning_rate': 6.394557823129253e-06, 'epoch': 3.4}


***** Running Evaluation *****
  Num examples = 587
  Batch size = 8


  0%|          | 0/74 [00:00<?, ?it/s]

Saving model checkpoint to HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-1176


{'eval_loss': 0.0005287817912176251, 'eval_f1': 1.0, 'eval_roc_auc': 1.0, 'eval_accuracy': 1.0, 'eval_runtime': 117.9743, 'eval_samples_per_second': 4.976, 'eval_steps_per_second': 0.627, 'epoch': 4.0}


Configuration saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-1176/config.json
Model weights saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-1176/pytorch_model.bin
tokenizer config file saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-1176/tokenizer_config.json
Special tokens file saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-1176/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 587
  Batch size = 8


  0%|          | 0/74 [00:00<?, ?it/s]

Saving model checkpoint to HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-1470


{'eval_loss': 0.0002490115584805608, 'eval_f1': 1.0, 'eval_roc_auc': 1.0, 'eval_accuracy': 1.0, 'eval_runtime': 116.6999, 'eval_samples_per_second': 5.03, 'eval_steps_per_second': 0.634, 'epoch': 5.0}


Configuration saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-1470/config.json
Model weights saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-1470/pytorch_model.bin
tokenizer config file saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-1470/tokenizer_config.json
Special tokens file saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-1470/special_tokens_map.json


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/checkpoint-1176 (score: 1.0).
***** Running Evaluation *****
  Num examples = 587
  Batch size = 8


{'train_runtime': 8330.8436, 'train_samples_per_second': 1.41, 'train_steps_per_second': 0.176, 'train_loss': 0.013535653085124735, 'epoch': 5.0}


  0%|          | 0/74 [00:00<?, ?it/s]

Saving model checkpoint to HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO
Configuration saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/config.json
Model weights saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/pytorch_model.bin
tokenizer config file saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/tokenizer_config.json
Special tokens file saved in HRAF_Model_MultiLabel_ThreeLargeClasses_kfoldsDEMO/special_tokens_map.json


In [None]:
# def train_loop(train_list=train_list, val_list=val_list, model=model, training_args=training_args, tokenizer=tokenizer, data_collator=data_collator, compute_metrics=compute_metrics, start_fold=1):
#     assert (start_fold >0) and (start_fold <= len(train_list)), f"Incorrect Starting fold, must be no lower than 1 and no higher than {len(train_list)}"

# train_loop(start_fold=1)

### Save Evaluation

In [None]:
# eval_df = eval_df.drop(columns=['train_Count','val_Count',])

In [None]:
# Augment Evaluation File 

from datetime import date

today = date.today()
date_tm = today.strftime("%y/%m/%d")

#reorganize columns
cols = list(eval_df.columns.values) 
remove_list = ["fold", "epoch"]
for removal in remove_list:
    cols.remove(removal)
cols = ["fold","epoch"]+cols
eval_df = eval_df[cols]

numrows = sum(Hraf.num_rows.values())

trainingStatus = 'Initial Training' if overwrite_training == True else 'Continue Training'

info_df  = pd.DataFrame({"Date":len(eval_df)*[date_tm],"Train_status":len(eval_df)*[trainingStatus]})
eval_df = eval_df.reset_index(drop=True)
eval_df = pd.concat([info_df, eval_df], axis=1)
eval_df


Unnamed: 0,Date,Train_status,fold,epoch,eval_loss,eval_f1,eval_roc_auc,eval_accuracy,eval_runtime,eval_samples_per_second,eval_steps_per_second,train_count,val_count,total_count
0,24/04/04,Continue Training,1,5.0,0.515,0.848903,0.864096,0.69898,119.5624,4.918,0.619,2349,588,2937
1,24/04/04,Continue Training,2,5.0,0.096304,0.969516,0.971572,0.92517,108.4121,5.424,0.683,2349,588,2937
2,24/04/04,Continue Training,3,5.0,0.006697,0.999388,0.999388,0.998296,116.6438,5.032,0.634,2350,587,2937
3,24/04/04,Continue Training,4,5.0,0.02687,0.995006,0.995851,0.988075,112.146,5.234,0.66,2350,587,2937
4,24/04/04,Continue Training,5,5.0,0.000529,1.0,1.0,1.0,118.3625,4.959,0.625,2350,587,2937


In [None]:
# import evalutaion if it exists
if os.path.exists("Evaluation.xlsx"):
    old_eval = pd.read_excel("Evaluation.xlsx", index_col=0)
    eval_df = pd.concat([old_eval, eval_df])

eval_df.to_excel('Evaluation.xlsx')

### Save Paritioned Datasets

In [None]:
def make_dir(path):
    import os
    # Check whether the specified path exists or not
    isExist = os.path.exists(path)
    if not isExist:
    # Create a new directory because it does not exist
        os.makedirs(path)


# make folder if it does not exist yet
path = os.getcwd() + '/Datasets'
make_dir(path)
# save to Json
for key in Hraf.keys():
    Hraf_dict = Hraf[key]
    Hraf_dict = concatenate_datasets([Hraf_dict, Hraf_prev[key]])
    Hraf_dict = Hraf_dict.to_dict()
    file_path = f"{path}/{key}_dataset.json"
    with open(file_path, "w") as outfile:
        json.dump(Hraf_dict, outfile)
        print(len(Hraf_dict['ID']), f"Rows for \'{key}\' succesfully saved to {file_path}")

7277 Rows for 'train' succesfully saved to /Users/ericchantland/Library/CloudStorage/Dropbox/MEM-DEV-LAB-Current/2023-eHRAF-Misf/HRAF-Misf-NaturalLanguageProcessing/HRAF_NLP/HRAF_MultiLabel_ThreeLargeClasses_kfoldsDemo/Datasets/train_dataset.json
1820 Rows for 'test' succesfully saved to /Users/ericchantland/Library/CloudStorage/Dropbox/MEM-DEV-LAB-Current/2023-eHRAF-Misf/HRAF-Misf-NaturalLanguageProcessing/HRAF_NLP/HRAF_MultiLabel_ThreeLargeClasses_kfoldsDemo/Datasets/test_dataset.json


In [None]:
Hraf

DatasetDict({
    train: Dataset({
        features: ['ID', 'passage', 'EVENT', 'CAUSE', 'ACTION'],
        num_rows: 2937
    })
    test: Dataset({
        features: ['ID', 'passage', 'EVENT', 'CAUSE', 'ACTION'],
        num_rows: 735
    })
})

In [None]:
Hraf_prev

DatasetDict({
    train: Dataset({
        features: ['ID', 'passage', 'EVENT', 'CAUSE', 'ACTION'],
        num_rows: 4340
    })
    test: Dataset({
        features: ['ID', 'passage', 'EVENT', 'CAUSE', 'ACTION'],
        num_rows: 1085
    })
})

In [None]:

Hraf_dummy = Hraf['train']
Hraf_dummy = concatenate_datasets([Hraf_dummy, Hraf_prev['train']])
Hraf_dummy

Dataset({
    features: ['ID', 'passage', 'EVENT', 'CAUSE', 'ACTION'],
    num_rows: 7277
})

In [None]:
Hraf_dummy = Hraf
Hraf_dummy = concatenate_datasets([Hraf_dummy, Hraf_prev])
Hraf_dummy

ValueError: Expected a list of Dataset objects or a list of IterableDataset objects, but first element is a <class 'datasets.dataset_dict.DatasetDict'>