In [1]:
!bash /home/azureuser/cloudfiles/code/blobfuse/blobfuse_raadsinformatie.sh


In [2]:
import sys
sys.path.append("..")

# Select where to run notebook: "azure" or "local"
my_run = "azure"

# import my_secrets as sc
import settings as st

if my_run == "azure":
    import config_azure as cf
elif my_run == "local":
    import config as cf


import os
if my_run == "azure":
    if not os.path.exists(cf.HUGGING_CACHE):
        os.mkdir(cf.HUGGING_CACHE)
    os.environ["TRANSFORMERS_CACHE"] = cf.HUGGING_CACHE

import pandas as pd

In [3]:
import torch
torch.cuda.empty_cache()

## Notebook Overview
Goal: Fine-tune models for document classification.

Method: the documents are shortened by taking the first 200 tokens. Then the shortened doc is formatted using the zero-shot prompt, without template. Then the ideal response is formatted according to JSON format. Formatted doc and response are combined into conversation using the apply_chat_template function. 

*Previous notebook: FinetuningDataFormatting*

*Next notebook: GetPredictions*

In [4]:
# necesarry to log in to huggingface, to save models there
from huggingface_hub import notebook_login
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.svâ€¦

### Finetuning GEITje

In [5]:
# Load GEITje formatted data

from datasets import load_dataset
chat_dataset = load_dataset('FemkeBakker/AmsterdamBalancedFirst200Tokens')


In [6]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

# basemodel_name = 'Rijgersberg/GEITje-7B-chat-v2'
basemodel_name = 'mistralai/Mistral-7B-Instruct-v0.2'
# basemodel_name = "stabilityai/stablelm-2-1_6b"
# basemodel_name = 'meta-llama/Llama-2-7b-chat-hf'
model = AutoModelForCausalLM.from_pretrained(basemodel_name, torch_dtype=torch.bfloat16,
                                                low_cpu_mem_usage=True, attn_implementation="sdpa",
                                                device_map='cpu')


tokenizer = AutoTokenizer.from_pretrained(basemodel_name)
tokenizer.pad_token = tokenizer.unk_token
tokenizer.padding_side = 'right'

model.config.pad_token_id = tokenizer.unk_token_id



Downloading shards:   0%|          | 0/3 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

In [7]:
import pandas as pd

# funcation to load previous saved dataframe and combine with current model, then save again
def combine_and_save_df(model_df, save_to_path):
    
    # combine with earlier runs if exists
    if os.path.exists(save_to_path):
        original = pd.read_pickle(save_to_path)
        model_df = pd.concat([original, model_df])

    model_df.to_pickle(save_to_path)

In [14]:
import torch
from datasets import DatasetDict, load_dataset, concatenate_datasets
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments
from trl import SFTTrainer
import time

import sys
sys.path.append('../src/') 
import prediction_helperfunctions as ph

def train(model, model_name, tokenizer, chat_dataset, chat_dataset_name, new_model_name, output_directory, train_set, test_set, n_epochs, run_id='No_id', save_to_hub=True, resume=False):
    start_time = time.time()

    # format conversations
    def format(examples):
        return [tokenizer.apply_chat_template(conversation, tokenize=False)
                for conversation in examples['message']]

    per_device_train_batch_size = 2
    gradient_accumulation_steps = 8
    steps_per_epoch = len(chat_dataset[train_set])\
                // (torch.cuda.device_count() * per_device_train_batch_size * gradient_accumulation_steps)
    eval_steps = steps_per_epoch // 5

    training_args = TrainingArguments(
        optim='adamw_bnb_8bit',
        num_train_epochs=n_epochs,
        learning_rate=1e-5,
        lr_scheduler_type='cosine',
        warmup_ratio=0.1,
        per_device_train_batch_size=2,
        gradient_accumulation_steps=8,
        gradient_checkpointing=True,
        evaluation_strategy='steps',
        eval_steps=eval_steps,
        save_strategy='epoch',
        bf16=False, #bf16=True require CUDA 11 -> original code bf16=True
        output_dir=output_directory,
        report_to=["tensorboard", 'wandb'],
        logging_steps=1,
        logging_first_step=True,
        hub_model_id=new_model_name,
        push_to_hub=True,
        hub_private_repo=True,
        hub_strategy='all_checkpoints',
    )

    trainer = SFTTrainer(
        model=model,
        args=training_args,
        tokenizer=tokenizer,
        max_seq_length=8192,
        train_dataset=chat_dataset[train_set],
        eval_dataset=chat_dataset[test_set],
        formatting_func=format,
        neftune_noise_alpha=5,
    )

  
    dict_info = {
        'model':new_model_name,
        'base_model':model_name,
        'chat_dataset':chat_dataset_name,
        'train_set':train_set,
        'test_set': test_set,
        'training_args': training_args,
        'resume_from_checkpoint':resume,
        'date':ph.get_datetime(),
        'runtime': False,
        'Error': False,
        'run_id':run_id,
        'save_to_hub':save_to_hub,
        'output_dir': output_directory,
        'num_train_epochs':n_epochs
        }

    data = pd.DataFrame(columns=dict_info.keys())

    # if no error during training, save run in overview_models and push to hub
    try:
        trainer.train(resume_from_checkpoint=resume)
        if save_to_hub == True:
            trainer.push_to_hub()
            
        dict_info['runtime'] = time.time()-start_time

        data.loc[len(data)] = dict_info
        combine_and_save_df(data, f'{cf.output_path}/finetuning_output/overview_models.pkl')
        print("Finished without error!")

    # if keyboardinterrupted or an error is thrown, save run in overview_models
    except KeyboardInterrupt:
        dict_info['Error'] = 'KeyboardInterrupt'        
        dict_info['runtime'] = time.time()-start_time

        data.loc[len(data)] = dict_info
        combine_and_save_df(data, f'{cf.output_path}/finetuning_output/overview_models.pkl')

    except Exception  as e:
        print(e)
        dict_info['Error'] = e
        dict_info['runtime'] = time.time()-start_time

        data.loc[len(data)] = dict_info
        combine_and_save_df(data, f'{cf.output_path}/finetuning_output/overview_models.pkl')


        model_df = pd.DataFrame(dict_info)
        combine_and_save_df(model_df, f'{cf.output_path}/finetuning_output/overview_models.pkl')


**Note**

To use resume_from_checkpoint, the epoch must be complete; otherwise, it will throw an error. If an error occurs even after an epoch is complete, remove the last checkpoint folder to resolve this. This means you can only resume training from a completed checkpoint. Since each epoch took about 30 minutes, this was not an issue.

MAKE SURE: run_id is unique, for each seperate run. Check overview_models.pkl to find which run ids have already been used.

In [15]:
# path to folder where the checkpoint of the model need to be saved
output_directory = f'{cf.output_path}/finetuning_output/MistralTry2epochs'

# the name of the chat dataset
chat_dataset_name = 'FemkeBakker/AmsterdamBalancedFirst200Tokens'

training_set = 'train' 
validation_set = 'val'

new_model_name = 'FemkeBakker/MistralTry2epochs'
n_epochs = 2

In [16]:
train(model, basemodel_name, tokenizer, chat_dataset, chat_dataset_name,new_model_name,
          output_directory, training_set, validation_set,  n_epochs, run_id=31, save_to_hub=True, resume=False)



## TODO: clean up overview file

In [13]:
overview = pd.read_pickle(f'{cf.output_path}/finetuning_output/overview_models.pkl')

pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)

# yeet = overview.loc[overview['num_train_epochs']==3]
# yeet = yeet.loc[yeet['Error']!= 'KeyboardInterrupt']
# yeet = yeet.drop(columns=['training_args'])
display(overview)

Unnamed: 0,model,base_model,chat_dataset,train_set,test_set,training_args,resume_from_checkpoint,date,runtime,Error,run_id,save_to_hub,output_dir,num_train_epochs
0,FemkeBakker/AmsterdamDocClassificationGEITje200T,Rijgersberg/GEITje-7B-chat-v2,FemkeBakker/AmsterdamBalancedFirst200Tokens,train,val,"TrainingArguments(\n_n_gpu=1,\naccelerator_config={'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None},\nadafactor=False,\nadam_beta1=0.9,\nadam_beta2=0.999,\nadam_epsilon=1e-08,\nauto_find_batch_size=False,\nbatch_eval_metrics=False,\nbf16=False,\nbf16_full_eval=False,\ndata_seed=None,\ndataloader_drop_last=False,\ndataloader_num_workers=0,\ndataloader_persistent_workers=False,\ndataloader_pin_memory=True,\ndataloader_prefetch_factor=None,\nddp_backend=None,\nddp_broadcast_buffers=None,\nddp_bucket_cap_mb=None,\nddp_find_unused_parameters=None,\nddp_timeout=1800,\ndebug=[],\ndeepspeed=None,\ndisable_tqdm=False,\ndispatch_batches=None,\ndo_eval=True,\ndo_predict=False,\ndo_train=False,\neval_accumulation_steps=None,\neval_delay=0,\neval_do_concat_batches=True,\neval_steps=123,\neval_strategy=steps,\nevaluation_strategy=steps,\nfp16=False,\nfp16_backend=auto,\nfp16_full_eval=False,\nfp16_opt_level=O1,\nfsdp=[],\nfsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False},\nfsdp_min_num_params=0,\nfsdp_transformer_layer_cls_to_wrap=None,\nfull_determinism=False,\ngradient_accumulation_steps=8,\ngradient_checkpointing=True,\ngradient_checkpointing_kwargs=None,\ngreater_is_better=None,\ngroup_by_length=False,\nhalf_precision_backend=auto,\nhub_always_push=False,\nhub_model_id=FemkeBakker/AmsterdamDocClassificationGEITje200T,\nhub_private_repo=True,\nhub_strategy=all_checkpoints,\nhub_token=<HUB_TOKEN>,\nignore_data_skip=False,\ninclude_inputs_for_metrics=False,\ninclude_num_input_tokens_seen=False,\ninclude_tokens_per_second=False,\njit_mode_eval=False,\nlabel_names=None,\nlabel_smoothing_factor=0.0,\nlearning_rate=1e-05,\nlength_column_name=length,\nload_best_model_at_end=False,\nlocal_rank=0,\nlog_level=passive,\nlog_level_replica=warning,\nlog_on_each_node=True,\nlogging_dir=/home/azureuser/cloudfiles/code/blobfuse/raadsinformatie/processed_data/woo_document_classification/finetuning_output/AmsterdamDocClassificationGEITje200T/runs/May29_12-07-01_femke-gpu-24cores-220ram,\nlogging_first_step=True,\nlogging_nan_inf_filter=True,\nlogging_steps=1,\nlogging_strategy=steps,\nlr_scheduler_kwargs={},\nlr_scheduler_type=cosine,\nmax_grad_norm=1.0,\nmax_steps=-1,\nmetric_for_best_model=None,\nmp_parameters=,\nneftune_noise_alpha=5,\nno_cuda=False,\nnum_train_epochs=1,\noptim=adamw_bnb_8bit,\noptim_args=None,\noptim_target_modules=None,\noutput_dir=/home/azureuser/cloudfiles/code/blobfuse/raadsinformatie/processed_data/woo_document_classification/finetuning_output/AmsterdamDocClassificationGEITje200T,\noverwrite_output_dir=False,\npast_index=-1,\nper_device_eval_batch_size=8,\nper_device_train_batch_size=2,\nprediction_loss_only=False,\npush_to_hub=True,\npush_to_hub_model_id=None,\npush_to_hub_organization=None,\npush_to_hub_token=<PUSH_TO_HUB_TOKEN>,\nray_scope=last,\nremove_unused_columns=True,\nreport_to=['tensorboard', 'wandb'],\nrestore_callback_states_from_checkpoint=False,\nresume_from_checkpoint=None,\nrun_name=/home/azureuser/cloudfiles/code/blobfuse/raadsinformatie/processed_data/woo_document_classification/finetuning_output/AmsterdamDocClassificationGEITje200T,\nsave_on_each_node=False,\nsave_only_model=False,\nsave_safetensors=True,\nsave_steps=500,\nsave_strategy=epoch,\nsave_total_limit=None,\nseed=42,\nskip_memory_metrics=True,\nsplit_batches=None,\ntf32=None,\ntorch_compile=False,\ntorch_compile_backend=None,\ntorch_compile_mode=None,\ntorchdynamo=None,\ntpu_metrics_debug=False,\ntpu_num_cores=None,\nuse_cpu=False,\nuse_ipex=False,\nuse_legacy_prediction_loop=False,\nuse_mps_device=False,\nwarmup_ratio=0.1,\nwarmup_steps=0,\nweight_decay=0.0,\n)",False,2024-05-29 14:07:04.642821+02:00,2926.216714,False,15,True,/home/azureuser/cloudfiles/code/blobfuse/raadsinformatie/processed_data/woo_document_classification/finetuning_output/AmsterdamDocClassificationGEITje200T,1.0
0,FemkeBakker/AmsterdamDocClassificationMistral200T,mistralai/Mistral-7B-Instruct-v0.2,FemkeBakker/AmsterdamBalancedFirst200Tokens,train,val,"TrainingArguments(\n_n_gpu=1,\naccelerator_config={'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None},\nadafactor=False,\nadam_beta1=0.9,\nadam_beta2=0.999,\nadam_epsilon=1e-08,\nauto_find_batch_size=False,\nbatch_eval_metrics=False,\nbf16=False,\nbf16_full_eval=False,\ndata_seed=None,\ndataloader_drop_last=False,\ndataloader_num_workers=0,\ndataloader_persistent_workers=False,\ndataloader_pin_memory=True,\ndataloader_prefetch_factor=None,\nddp_backend=None,\nddp_broadcast_buffers=None,\nddp_bucket_cap_mb=None,\nddp_find_unused_parameters=None,\nddp_timeout=1800,\ndebug=[],\ndeepspeed=None,\ndisable_tqdm=False,\ndispatch_batches=None,\ndo_eval=True,\ndo_predict=False,\ndo_train=False,\neval_accumulation_steps=None,\neval_delay=0,\neval_do_concat_batches=True,\neval_steps=123,\neval_strategy=steps,\nevaluation_strategy=steps,\nfp16=False,\nfp16_backend=auto,\nfp16_full_eval=False,\nfp16_opt_level=O1,\nfsdp=[],\nfsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False},\nfsdp_min_num_params=0,\nfsdp_transformer_layer_cls_to_wrap=None,\nfull_determinism=False,\ngradient_accumulation_steps=8,\ngradient_checkpointing=True,\ngradient_checkpointing_kwargs=None,\ngreater_is_better=None,\ngroup_by_length=False,\nhalf_precision_backend=auto,\nhub_always_push=False,\nhub_model_id=FemkeBakker/AmsterdamDocClassificationMistral200T,\nhub_private_repo=True,\nhub_strategy=all_checkpoints,\nhub_token=<HUB_TOKEN>,\nignore_data_skip=False,\ninclude_inputs_for_metrics=False,\ninclude_num_input_tokens_seen=False,\ninclude_tokens_per_second=False,\njit_mode_eval=False,\nlabel_names=None,\nlabel_smoothing_factor=0.0,\nlearning_rate=1e-05,\nlength_column_name=length,\nload_best_model_at_end=False,\nlocal_rank=0,\nlog_level=passive,\nlog_level_replica=warning,\nlog_on_each_node=True,\nlogging_dir=/home/azureuser/cloudfiles/code/blobfuse/raadsinformatie/processed_data/woo_document_classification/finetuning_output/AmsterdamDocClassificationMistral200T/runs/May29_13-36-42_femke-gpu-24cores-220ram,\nlogging_first_step=True,\nlogging_nan_inf_filter=True,\nlogging_steps=1,\nlogging_strategy=steps,\nlr_scheduler_kwargs={},\nlr_scheduler_type=cosine,\nmax_grad_norm=1.0,\nmax_steps=-1,\nmetric_for_best_model=None,\nmp_parameters=,\nneftune_noise_alpha=5,\nno_cuda=False,\nnum_train_epochs=1,\noptim=adamw_bnb_8bit,\noptim_args=None,\noptim_target_modules=None,\noutput_dir=/home/azureuser/cloudfiles/code/blobfuse/raadsinformatie/processed_data/woo_document_classification/finetuning_output/AmsterdamDocClassificationMistral200T,\noverwrite_output_dir=False,\npast_index=-1,\nper_device_eval_batch_size=8,\nper_device_train_batch_size=2,\nprediction_loss_only=False,\npush_to_hub=True,\npush_to_hub_model_id=None,\npush_to_hub_organization=None,\npush_to_hub_token=<PUSH_TO_HUB_TOKEN>,\nray_scope=last,\nremove_unused_columns=True,\nreport_to=['tensorboard', 'wandb'],\nrestore_callback_states_from_checkpoint=False,\nresume_from_checkpoint=None,\nrun_name=/home/azureuser/cloudfiles/code/blobfuse/raadsinformatie/processed_data/woo_document_classification/finetuning_output/AmsterdamDocClassificationMistral200T,\nsave_on_each_node=False,\nsave_only_model=False,\nsave_safetensors=True,\nsave_steps=500,\nsave_strategy=epoch,\nsave_total_limit=None,\nseed=42,\nskip_memory_metrics=True,\nsplit_batches=None,\ntf32=None,\ntorch_compile=False,\ntorch_compile_backend=None,\ntorch_compile_mode=None,\ntorchdynamo=None,\ntpu_metrics_debug=False,\ntpu_num_cores=None,\nuse_cpu=False,\nuse_ipex=False,\nuse_legacy_prediction_loop=False,\nuse_mps_device=False,\nwarmup_ratio=0.1,\nwarmup_steps=0,\nweight_decay=0.0,\n)",False,2024-05-29 15:36:45.085202+02:00,2879.482625,False,16,True,/home/azureuser/cloudfiles/code/blobfuse/raadsinformatie/processed_data/woo_document_classification/finetuning_output/AmsterdamDocClassificationMistral200T,1.0
0,FemkeBakker/AmsterdamDocClassificationLlama200T,meta-llama/Llama-2-7b-chat-hf,FemkeBakker/AmsterdamBalancedFirst200Tokens,train,val,"TrainingArguments(\n_n_gpu=1,\naccelerator_config={'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None},\nadafactor=False,\nadam_beta1=0.9,\nadam_beta2=0.999,\nadam_epsilon=1e-08,\nauto_find_batch_size=False,\nbatch_eval_metrics=False,\nbf16=False,\nbf16_full_eval=False,\ndata_seed=None,\ndataloader_drop_last=False,\ndataloader_num_workers=0,\ndataloader_persistent_workers=False,\ndataloader_pin_memory=True,\ndataloader_prefetch_factor=None,\nddp_backend=None,\nddp_broadcast_buffers=None,\nddp_bucket_cap_mb=None,\nddp_find_unused_parameters=None,\nddp_timeout=1800,\ndebug=[],\ndeepspeed=None,\ndisable_tqdm=False,\ndispatch_batches=None,\ndo_eval=True,\ndo_predict=False,\ndo_train=False,\neval_accumulation_steps=None,\neval_delay=0,\neval_do_concat_batches=True,\neval_steps=123,\neval_strategy=steps,\nevaluation_strategy=steps,\nfp16=False,\nfp16_backend=auto,\nfp16_full_eval=False,\nfp16_opt_level=O1,\nfsdp=[],\nfsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False},\nfsdp_min_num_params=0,\nfsdp_transformer_layer_cls_to_wrap=None,\nfull_determinism=False,\ngradient_accumulation_steps=8,\ngradient_checkpointing=True,\ngradient_checkpointing_kwargs=None,\ngreater_is_better=None,\ngroup_by_length=False,\nhalf_precision_backend=auto,\nhub_always_push=False,\nhub_model_id=FemkeBakker/AmsterdamDocClassificationLlama200T,\nhub_private_repo=True,\nhub_strategy=all_checkpoints,\nhub_token=<HUB_TOKEN>,\nignore_data_skip=False,\ninclude_inputs_for_metrics=False,\ninclude_num_input_tokens_seen=False,\ninclude_tokens_per_second=False,\njit_mode_eval=False,\nlabel_names=None,\nlabel_smoothing_factor=0.0,\nlearning_rate=1e-05,\nlength_column_name=length,\nload_best_model_at_end=False,\nlocal_rank=0,\nlog_level=passive,\nlog_level_replica=warning,\nlog_on_each_node=True,\nlogging_dir=/home/azureuser/cloudfiles/code/blobfuse/raadsinformatie/processed_data/woo_document_classification/finetuning_output/AmsterdamDocClassificationLlama200T/runs/May29_14-44-46_femke-gpu-24cores-220ram,\nlogging_first_step=True,\nlogging_nan_inf_filter=True,\nlogging_steps=1,\nlogging_strategy=steps,\nlr_scheduler_kwargs={},\nlr_scheduler_type=cosine,\nmax_grad_norm=1.0,\nmax_steps=-1,\nmetric_for_best_model=None,\nmp_parameters=,\nneftune_noise_alpha=5,\nno_cuda=False,\nnum_train_epochs=1,\noptim=adamw_bnb_8bit,\noptim_args=None,\noptim_target_modules=None,\noutput_dir=/home/azureuser/cloudfiles/code/blobfuse/raadsinformatie/processed_data/woo_document_classification/finetuning_output/AmsterdamDocClassificationLlama200T,\noverwrite_output_dir=False,\npast_index=-1,\nper_device_eval_batch_size=8,\nper_device_train_batch_size=2,\nprediction_loss_only=False,\npush_to_hub=True,\npush_to_hub_model_id=None,\npush_to_hub_organization=None,\npush_to_hub_token=<PUSH_TO_HUB_TOKEN>,\nray_scope=last,\nremove_unused_columns=True,\nreport_to=['tensorboard', 'wandb'],\nrestore_callback_states_from_checkpoint=False,\nresume_from_checkpoint=None,\nrun_name=/home/azureuser/cloudfiles/code/blobfuse/raadsinformatie/processed_data/woo_document_classification/finetuning_output/AmsterdamDocClassificationLlama200T,\nsave_on_each_node=False,\nsave_only_model=False,\nsave_safetensors=True,\nsave_steps=500,\nsave_strategy=epoch,\nsave_total_limit=None,\nseed=42,\nskip_memory_metrics=True,\nsplit_batches=None,\ntf32=None,\ntorch_compile=False,\ntorch_compile_backend=None,\ntorch_compile_mode=None,\ntorchdynamo=None,\ntpu_metrics_debug=False,\ntpu_num_cores=None,\nuse_cpu=False,\nuse_ipex=False,\nuse_legacy_prediction_loop=False,\nuse_mps_device=False,\nwarmup_ratio=0.1,\nwarmup_steps=0,\nweight_decay=0.0,\n)",False,2024-05-29 16:44:49.778730+02:00,2459.635145,False,17,True,/home/azureuser/cloudfiles/code/blobfuse/raadsinformatie/processed_data/woo_document_classification/finetuning_output/AmsterdamDocClassificationLlama200T,1.0
0,FemkeBakker/AmsterdamDocClassificationLlama200T,meta-llama/Llama-2-7b-chat-hf,FemkeBakker/AmsterdamBalancedFirst200Tokens,train,val,"TrainingArguments(\n_n_gpu=1,\naccelerator_config={'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None},\nadafactor=False,\nadam_beta1=0.9,\nadam_beta2=0.999,\nadam_epsilon=1e-08,\nauto_find_batch_size=False,\nbatch_eval_metrics=False,\nbf16=False,\nbf16_full_eval=False,\ndata_seed=None,\ndataloader_drop_last=False,\ndataloader_num_workers=0,\ndataloader_persistent_workers=False,\ndataloader_pin_memory=True,\ndataloader_prefetch_factor=None,\nddp_backend=None,\nddp_broadcast_buffers=None,\nddp_bucket_cap_mb=None,\nddp_find_unused_parameters=None,\nddp_timeout=1800,\ndebug=[],\ndeepspeed=None,\ndisable_tqdm=False,\ndispatch_batches=None,\ndo_eval=True,\ndo_predict=False,\ndo_train=False,\neval_accumulation_steps=None,\neval_delay=0,\neval_do_concat_batches=True,\neval_steps=123,\neval_strategy=steps,\nevaluation_strategy=steps,\nfp16=False,\nfp16_backend=auto,\nfp16_full_eval=False,\nfp16_opt_level=O1,\nfsdp=[],\nfsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False},\nfsdp_min_num_params=0,\nfsdp_transformer_layer_cls_to_wrap=None,\nfull_determinism=False,\ngradient_accumulation_steps=8,\ngradient_checkpointing=True,\ngradient_checkpointing_kwargs=None,\ngreater_is_better=None,\ngroup_by_length=False,\nhalf_precision_backend=auto,\nhub_always_push=False,\nhub_model_id=FemkeBakker/AmsterdamDocClassificationLlama200T,\nhub_private_repo=True,\nhub_strategy=all_checkpoints,\nhub_token=<HUB_TOKEN>,\nignore_data_skip=False,\ninclude_inputs_for_metrics=False,\ninclude_num_input_tokens_seen=False,\ninclude_tokens_per_second=False,\njit_mode_eval=False,\nlabel_names=None,\nlabel_smoothing_factor=0.0,\nlearning_rate=1e-05,\nlength_column_name=length,\nload_best_model_at_end=False,\nlocal_rank=0,\nlog_level=passive,\nlog_level_replica=warning,\nlog_on_each_node=True,\nlogging_dir=/home/azureuser/cloudfiles/code/blobfuse/raadsinformatie/processed_data/woo_document_classification/finetuning_output/AmsterdamDocClassificationLlama200T/runs/May29_15-32-33_femke-gpu-24cores-220ram,\nlogging_first_step=True,\nlogging_nan_inf_filter=True,\nlogging_steps=1,\nlogging_strategy=steps,\nlr_scheduler_kwargs={},\nlr_scheduler_type=cosine,\nmax_grad_norm=1.0,\nmax_steps=-1,\nmetric_for_best_model=None,\nmp_parameters=,\nneftune_noise_alpha=5,\nno_cuda=False,\nnum_train_epochs=2,\noptim=adamw_bnb_8bit,\noptim_args=None,\noptim_target_modules=None,\noutput_dir=/home/azureuser/cloudfiles/code/blobfuse/raadsinformatie/processed_data/woo_document_classification/finetuning_output/AmsterdamDocClassificationLlama200T,\noverwrite_output_dir=False,\npast_index=-1,\nper_device_eval_batch_size=8,\nper_device_train_batch_size=2,\nprediction_loss_only=False,\npush_to_hub=True,\npush_to_hub_model_id=None,\npush_to_hub_organization=None,\npush_to_hub_token=<PUSH_TO_HUB_TOKEN>,\nray_scope=last,\nremove_unused_columns=True,\nreport_to=['tensorboard', 'wandb'],\nrestore_callback_states_from_checkpoint=False,\nresume_from_checkpoint=None,\nrun_name=/home/azureuser/cloudfiles/code/blobfuse/raadsinformatie/processed_data/woo_document_classification/finetuning_output/AmsterdamDocClassificationLlama200T,\nsave_on_each_node=False,\nsave_only_model=False,\nsave_safetensors=True,\nsave_steps=500,\nsave_strategy=epoch,\nsave_total_limit=None,\nseed=42,\nskip_memory_metrics=True,\nsplit_batches=None,\ntf32=None,\ntorch_compile=False,\ntorch_compile_backend=None,\ntorch_compile_mode=None,\ntorchdynamo=None,\ntpu_metrics_debug=False,\ntpu_num_cores=None,\nuse_cpu=False,\nuse_ipex=False,\nuse_legacy_prediction_loop=False,\nuse_mps_device=False,\nwarmup_ratio=0.1,\nwarmup_steps=0,\nweight_decay=0.0,\n)",True,2024-05-29 17:32:34.996990+02:00,2435.851116,False,17,True,/home/azureuser/cloudfiles/code/blobfuse/raadsinformatie/processed_data/woo_document_classification/finetuning_output/AmsterdamDocClassificationLlama200T,2.0
0,FemkeBakker/AmsterdamDocClassificationMistrallama200T,mistralai/Mistral-7B-Instruct-v0.2,FemkeBakker/AmsterdamBalancedFirst200Tokens,train,val,"TrainingArguments(\n_n_gpu=1,\naccelerator_config={'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None},\nadafactor=False,\nadam_beta1=0.9,\nadam_beta2=0.999,\nadam_epsilon=1e-08,\nauto_find_batch_size=False,\nbatch_eval_metrics=False,\nbf16=False,\nbf16_full_eval=False,\ndata_seed=None,\ndataloader_drop_last=False,\ndataloader_num_workers=0,\ndataloader_persistent_workers=False,\ndataloader_pin_memory=True,\ndataloader_prefetch_factor=None,\nddp_backend=None,\nddp_broadcast_buffers=None,\nddp_bucket_cap_mb=None,\nddp_find_unused_parameters=None,\nddp_timeout=1800,\ndebug=[],\ndeepspeed=None,\ndisable_tqdm=False,\ndispatch_batches=None,\ndo_eval=True,\ndo_predict=False,\ndo_train=False,\neval_accumulation_steps=None,\neval_delay=0,\neval_do_concat_batches=True,\neval_steps=123,\neval_strategy=steps,\nevaluation_strategy=steps,\nfp16=False,\nfp16_backend=auto,\nfp16_full_eval=False,\nfp16_opt_level=O1,\nfsdp=[],\nfsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False},\nfsdp_min_num_params=0,\nfsdp_transformer_layer_cls_to_wrap=None,\nfull_determinism=False,\ngradient_accumulation_steps=8,\ngradient_checkpointing=True,\ngradient_checkpointing_kwargs=None,\ngreater_is_better=None,\ngroup_by_length=False,\nhalf_precision_backend=auto,\nhub_always_push=False,\nhub_model_id=FemkeBakker/AmsterdamDocClassificationMistrallama200T,\nhub_private_repo=True,\nhub_strategy=all_checkpoints,\nhub_token=<HUB_TOKEN>,\nignore_data_skip=False,\ninclude_inputs_for_metrics=False,\ninclude_num_input_tokens_seen=False,\ninclude_tokens_per_second=False,\njit_mode_eval=False,\nlabel_names=None,\nlabel_smoothing_factor=0.0,\nlearning_rate=1e-05,\nlength_column_name=length,\nload_best_model_at_end=False,\nlocal_rank=0,\nlog_level=passive,\nlog_level_replica=warning,\nlog_on_each_node=True,\nlogging_dir=/home/azureuser/cloudfiles/code/blobfuse/raadsinformatie/processed_data/woo_document_classification/finetuning_output/AmsterdamDocClassificationMistral200T/runs/May29_16-17-39_femke-gpu-24cores-220ram,\nlogging_first_step=True,\nlogging_nan_inf_filter=True,\nlogging_steps=1,\nlogging_strategy=steps,\nlr_scheduler_kwargs={},\nlr_scheduler_type=cosine,\nmax_grad_norm=1.0,\nmax_steps=-1,\nmetric_for_best_model=None,\nmp_parameters=,\nneftune_noise_alpha=5,\nno_cuda=False,\nnum_train_epochs=2,\noptim=adamw_bnb_8bit,\noptim_args=None,\noptim_target_modules=None,\noutput_dir=/home/azureuser/cloudfiles/code/blobfuse/raadsinformatie/processed_data/woo_document_classification/finetuning_output/AmsterdamDocClassificationMistral200T,\noverwrite_output_dir=False,\npast_index=-1,\nper_device_eval_batch_size=8,\nper_device_train_batch_size=2,\nprediction_loss_only=False,\npush_to_hub=True,\npush_to_hub_model_id=None,\npush_to_hub_organization=None,\npush_to_hub_token=<PUSH_TO_HUB_TOKEN>,\nray_scope=last,\nremove_unused_columns=True,\nreport_to=['tensorboard', 'wandb'],\nrestore_callback_states_from_checkpoint=False,\nresume_from_checkpoint=None,\nrun_name=/home/azureuser/cloudfiles/code/blobfuse/raadsinformatie/processed_data/woo_document_classification/finetuning_output/AmsterdamDocClassificationMistral200T,\nsave_on_each_node=False,\nsave_only_model=False,\nsave_safetensors=True,\nsave_steps=500,\nsave_strategy=epoch,\nsave_total_limit=None,\nseed=42,\nskip_memory_metrics=True,\nsplit_batches=None,\ntf32=None,\ntorch_compile=False,\ntorch_compile_backend=None,\ntorch_compile_mode=None,\ntorchdynamo=None,\ntpu_metrics_debug=False,\ntpu_num_cores=None,\nuse_cpu=False,\nuse_ipex=False,\nuse_legacy_prediction_loop=False,\nuse_mps_device=False,\nwarmup_ratio=0.1,\nwarmup_steps=0,\nweight_decay=0.0,\n)",True,2024-05-29 18:17:40.189426+02:00,2909.715319,False,16,True,/home/azureuser/cloudfiles/code/blobfuse/raadsinformatie/processed_data/woo_document_classification/finetuning_output/AmsterdamDocClassificationMistral200T,2.0
0,FemkeBakker/AmsterdamDocClassificationMistral200T,mistralai/Mistral-7B-Instruct-v0.2,FemkeBakker/AmsterdamBalancedFirst200Tokens,train,val,"TrainingArguments(\n_n_gpu=1,\naccelerator_config={'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None},\nadafactor=False,\nadam_beta1=0.9,\nadam_beta2=0.999,\nadam_epsilon=1e-08,\nauto_find_batch_size=False,\nbatch_eval_metrics=False,\nbf16=False,\nbf16_full_eval=False,\ndata_seed=None,\ndataloader_drop_last=False,\ndataloader_num_workers=0,\ndataloader_persistent_workers=False,\ndataloader_pin_memory=True,\ndataloader_prefetch_factor=None,\nddp_backend=None,\nddp_broadcast_buffers=None,\nddp_bucket_cap_mb=None,\nddp_find_unused_parameters=None,\nddp_timeout=1800,\ndebug=[],\ndeepspeed=None,\ndisable_tqdm=False,\ndispatch_batches=None,\ndo_eval=True,\ndo_predict=False,\ndo_train=False,\neval_accumulation_steps=None,\neval_delay=0,\neval_do_concat_batches=True,\neval_steps=123,\neval_strategy=steps,\nevaluation_strategy=steps,\nfp16=False,\nfp16_backend=auto,\nfp16_full_eval=False,\nfp16_opt_level=O1,\nfsdp=[],\nfsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False},\nfsdp_min_num_params=0,\nfsdp_transformer_layer_cls_to_wrap=None,\nfull_determinism=False,\ngradient_accumulation_steps=8,\ngradient_checkpointing=True,\ngradient_checkpointing_kwargs=None,\ngreater_is_better=None,\ngroup_by_length=False,\nhalf_precision_backend=auto,\nhub_always_push=False,\nhub_model_id=FemkeBakker/AmsterdamDocClassificationMistral200T,\nhub_private_repo=True,\nhub_strategy=all_checkpoints,\nhub_token=<HUB_TOKEN>,\nignore_data_skip=False,\ninclude_inputs_for_metrics=False,\ninclude_num_input_tokens_seen=False,\ninclude_tokens_per_second=False,\njit_mode_eval=False,\nlabel_names=None,\nlabel_smoothing_factor=0.0,\nlearning_rate=1e-05,\nlength_column_name=length,\nload_best_model_at_end=False,\nlocal_rank=0,\nlog_level=passive,\nlog_level_replica=warning,\nlog_on_each_node=True,\nlogging_dir=/home/azureuser/cloudfiles/code/blobfuse/raadsinformatie/processed_data/woo_document_classification/finetuning_output/AmsterdamDocClassificationGEITje200T/runs/May29_17-20-43_femke-gpu-24cores-220ram,\nlogging_first_step=True,\nlogging_nan_inf_filter=True,\nlogging_steps=1,\nlogging_strategy=steps,\nlr_scheduler_kwargs={},\nlr_scheduler_type=cosine,\nmax_grad_norm=1.0,\nmax_steps=-1,\nmetric_for_best_model=None,\nmp_parameters=,\nneftune_noise_alpha=5,\nno_cuda=False,\nnum_train_epochs=2,\noptim=adamw_bnb_8bit,\noptim_args=None,\noptim_target_modules=None,\noutput_dir=/home/azureuser/cloudfiles/code/blobfuse/raadsinformatie/processed_data/woo_document_classification/finetuning_output/AmsterdamDocClassificationGEITje200T,\noverwrite_output_dir=False,\npast_index=-1,\nper_device_eval_batch_size=8,\nper_device_train_batch_size=2,\nprediction_loss_only=False,\npush_to_hub=True,\npush_to_hub_model_id=None,\npush_to_hub_organization=None,\npush_to_hub_token=<PUSH_TO_HUB_TOKEN>,\nray_scope=last,\nremove_unused_columns=True,\nreport_to=['tensorboard', 'wandb'],\nrestore_callback_states_from_checkpoint=False,\nresume_from_checkpoint=None,\nrun_name=/home/azureuser/cloudfiles/code/blobfuse/raadsinformatie/processed_data/woo_document_classification/finetuning_output/AmsterdamDocClassificationGEITje200T,\nsave_on_each_node=False,\nsave_only_model=False,\nsave_safetensors=True,\nsave_steps=500,\nsave_strategy=epoch,\nsave_total_limit=None,\nseed=42,\nskip_memory_metrics=True,\nsplit_batches=None,\ntf32=None,\ntorch_compile=False,\ntorch_compile_backend=None,\ntorch_compile_mode=None,\ntorchdynamo=None,\ntpu_metrics_debug=False,\ntpu_num_cores=None,\nuse_cpu=False,\nuse_ipex=False,\nuse_legacy_prediction_loop=False,\nuse_mps_device=False,\nwarmup_ratio=0.1,\nwarmup_steps=0,\nweight_decay=0.0,\n)",True,2024-05-29 19:20:43.997402+02:00,19.168457,KeyboardInterrupt,16,True,/home/azureuser/cloudfiles/code/blobfuse/raadsinformatie/processed_data/woo_document_classification/finetuning_output/AmsterdamDocClassificationGEITje200T,2.0
0,FemkeBakker/AmsterdamDocClassificationMistral200T,mistralai/Mistral-7B-Instruct-v0.2,FemkeBakker/AmsterdamBalancedFirst200Tokens,train,val,"TrainingArguments(\n_n_gpu=1,\naccelerator_config={'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None},\nadafactor=False,\nadam_beta1=0.9,\nadam_beta2=0.999,\nadam_epsilon=1e-08,\nauto_find_batch_size=False,\nbatch_eval_metrics=False,\nbf16=False,\nbf16_full_eval=False,\ndata_seed=None,\ndataloader_drop_last=False,\ndataloader_num_workers=0,\ndataloader_persistent_workers=False,\ndataloader_pin_memory=True,\ndataloader_prefetch_factor=None,\nddp_backend=None,\nddp_broadcast_buffers=None,\nddp_bucket_cap_mb=None,\nddp_find_unused_parameters=None,\nddp_timeout=1800,\ndebug=[],\ndeepspeed=None,\ndisable_tqdm=False,\ndispatch_batches=None,\ndo_eval=True,\ndo_predict=False,\ndo_train=False,\neval_accumulation_steps=None,\neval_delay=0,\neval_do_concat_batches=True,\neval_steps=123,\neval_strategy=steps,\nevaluation_strategy=steps,\nfp16=False,\nfp16_backend=auto,\nfp16_full_eval=False,\nfp16_opt_level=O1,\nfsdp=[],\nfsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False},\nfsdp_min_num_params=0,\nfsdp_transformer_layer_cls_to_wrap=None,\nfull_determinism=False,\ngradient_accumulation_steps=8,\ngradient_checkpointing=True,\ngradient_checkpointing_kwargs=None,\ngreater_is_better=None,\ngroup_by_length=False,\nhalf_precision_backend=auto,\nhub_always_push=False,\nhub_model_id=FemkeBakker/AmsterdamDocClassificationMistral200T,\nhub_private_repo=True,\nhub_strategy=all_checkpoints,\nhub_token=<HUB_TOKEN>,\nignore_data_skip=False,\ninclude_inputs_for_metrics=False,\ninclude_num_input_tokens_seen=False,\ninclude_tokens_per_second=False,\njit_mode_eval=False,\nlabel_names=None,\nlabel_smoothing_factor=0.0,\nlearning_rate=1e-05,\nlength_column_name=length,\nload_best_model_at_end=False,\nlocal_rank=0,\nlog_level=passive,\nlog_level_replica=warning,\nlog_on_each_node=True,\nlogging_dir=/home/azureuser/cloudfiles/code/blobfuse/raadsinformatie/processed_data/woo_document_classification/finetuning_output/AmsterdamDocClassificationMistral200T/runs/May29_17-21-11_femke-gpu-24cores-220ram,\nlogging_first_step=True,\nlogging_nan_inf_filter=True,\nlogging_steps=1,\nlogging_strategy=steps,\nlr_scheduler_kwargs={},\nlr_scheduler_type=cosine,\nmax_grad_norm=1.0,\nmax_steps=-1,\nmetric_for_best_model=None,\nmp_parameters=,\nneftune_noise_alpha=5,\nno_cuda=False,\nnum_train_epochs=2,\noptim=adamw_bnb_8bit,\noptim_args=None,\noptim_target_modules=None,\noutput_dir=/home/azureuser/cloudfiles/code/blobfuse/raadsinformatie/processed_data/woo_document_classification/finetuning_output/AmsterdamDocClassificationMistral200T,\noverwrite_output_dir=False,\npast_index=-1,\nper_device_eval_batch_size=8,\nper_device_train_batch_size=2,\nprediction_loss_only=False,\npush_to_hub=True,\npush_to_hub_model_id=None,\npush_to_hub_organization=None,\npush_to_hub_token=<PUSH_TO_HUB_TOKEN>,\nray_scope=last,\nremove_unused_columns=True,\nreport_to=['tensorboard', 'wandb'],\nrestore_callback_states_from_checkpoint=False,\nresume_from_checkpoint=None,\nrun_name=/home/azureuser/cloudfiles/code/blobfuse/raadsinformatie/processed_data/woo_document_classification/finetuning_output/AmsterdamDocClassificationMistral200T,\nsave_on_each_node=False,\nsave_only_model=False,\nsave_safetensors=True,\nsave_steps=500,\nsave_strategy=epoch,\nsave_total_limit=None,\nseed=42,\nskip_memory_metrics=True,\nsplit_batches=None,\ntf32=None,\ntorch_compile=False,\ntorch_compile_backend=None,\ntorch_compile_mode=None,\ntorchdynamo=None,\ntpu_metrics_debug=False,\ntpu_num_cores=None,\nuse_cpu=False,\nuse_ipex=False,\nuse_legacy_prediction_loop=False,\nuse_mps_device=False,\nwarmup_ratio=0.1,\nwarmup_steps=0,\nweight_decay=0.0,\n)",True,2024-05-29 19:21:12.049071+02:00,370.747607,False,16,True,/home/azureuser/cloudfiles/code/blobfuse/raadsinformatie/processed_data/woo_document_classification/finetuning_output/AmsterdamDocClassificationMistral200T,2.0
0,FemkeBakker/AmsterdamDocClassificationGEITje200T,Rijgersberg/GEITje-7B-chat-v2,FemkeBakker/AmsterdamBalancedFirst200Tokens,train,val,"TrainingArguments(\n_n_gpu=1,\naccelerator_config={'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None},\nadafactor=False,\nadam_beta1=0.9,\nadam_beta2=0.999,\nadam_epsilon=1e-08,\nauto_find_batch_size=False,\nbatch_eval_metrics=False,\nbf16=False,\nbf16_full_eval=False,\ndata_seed=None,\ndataloader_drop_last=False,\ndataloader_num_workers=0,\ndataloader_persistent_workers=False,\ndataloader_pin_memory=True,\ndataloader_prefetch_factor=None,\nddp_backend=None,\nddp_broadcast_buffers=None,\nddp_bucket_cap_mb=None,\nddp_find_unused_parameters=None,\nddp_timeout=1800,\ndebug=[],\ndeepspeed=None,\ndisable_tqdm=False,\ndispatch_batches=None,\ndo_eval=True,\ndo_predict=False,\ndo_train=False,\neval_accumulation_steps=None,\neval_delay=0,\neval_do_concat_batches=True,\neval_steps=123,\neval_strategy=steps,\nevaluation_strategy=steps,\nfp16=False,\nfp16_backend=auto,\nfp16_full_eval=False,\nfp16_opt_level=O1,\nfsdp=[],\nfsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False},\nfsdp_min_num_params=0,\nfsdp_transformer_layer_cls_to_wrap=None,\nfull_determinism=False,\ngradient_accumulation_steps=8,\ngradient_checkpointing=True,\ngradient_checkpointing_kwargs=None,\ngreater_is_better=None,\ngroup_by_length=False,\nhalf_precision_backend=auto,\nhub_always_push=False,\nhub_model_id=FemkeBakker/AmsterdamDocClassificationGEITje200T,\nhub_private_repo=True,\nhub_strategy=all_checkpoints,\nhub_token=<HUB_TOKEN>,\nignore_data_skip=False,\ninclude_inputs_for_metrics=False,\ninclude_num_input_tokens_seen=False,\ninclude_tokens_per_second=False,\njit_mode_eval=False,\nlabel_names=None,\nlabel_smoothing_factor=0.0,\nlearning_rate=1e-05,\nlength_column_name=length,\nload_best_model_at_end=False,\nlocal_rank=0,\nlog_level=passive,\nlog_level_replica=warning,\nlog_on_each_node=True,\nlogging_dir=/home/azureuser/cloudfiles/code/blobfuse/raadsinformatie/processed_data/woo_document_classification/finetuning_output/AmsterdamDocClassificationGEITje200T/runs/May29_17-39-27_femke-gpu-24cores-220ram,\nlogging_first_step=True,\nlogging_nan_inf_filter=True,\nlogging_steps=1,\nlogging_strategy=steps,\nlr_scheduler_kwargs={},\nlr_scheduler_type=cosine,\nmax_grad_norm=1.0,\nmax_steps=-1,\nmetric_for_best_model=None,\nmp_parameters=,\nneftune_noise_alpha=5,\nno_cuda=False,\nnum_train_epochs=2,\noptim=adamw_bnb_8bit,\noptim_args=None,\noptim_target_modules=None,\noutput_dir=/home/azureuser/cloudfiles/code/blobfuse/raadsinformatie/processed_data/woo_document_classification/finetuning_output/AmsterdamDocClassificationGEITje200T,\noverwrite_output_dir=False,\npast_index=-1,\nper_device_eval_batch_size=8,\nper_device_train_batch_size=2,\nprediction_loss_only=False,\npush_to_hub=True,\npush_to_hub_model_id=None,\npush_to_hub_organization=None,\npush_to_hub_token=<PUSH_TO_HUB_TOKEN>,\nray_scope=last,\nremove_unused_columns=True,\nreport_to=['tensorboard', 'wandb'],\nrestore_callback_states_from_checkpoint=False,\nresume_from_checkpoint=None,\nrun_name=/home/azureuser/cloudfiles/code/blobfuse/raadsinformatie/processed_data/woo_document_classification/finetuning_output/AmsterdamDocClassificationGEITje200T,\nsave_on_each_node=False,\nsave_only_model=False,\nsave_safetensors=True,\nsave_steps=500,\nsave_strategy=epoch,\nsave_total_limit=None,\nseed=42,\nskip_memory_metrics=True,\nsplit_batches=None,\ntf32=None,\ntorch_compile=False,\ntorch_compile_backend=None,\ntorch_compile_mode=None,\ntorchdynamo=None,\ntpu_metrics_debug=False,\ntpu_num_cores=None,\nuse_cpu=False,\nuse_ipex=False,\nuse_legacy_prediction_loop=False,\nuse_mps_device=False,\nwarmup_ratio=0.1,\nwarmup_steps=0,\nweight_decay=0.0,\n)",True,2024-05-29 19:39:27.535325+02:00,2821.834527,False,15,True,/home/azureuser/cloudfiles/code/blobfuse/raadsinformatie/processed_data/woo_document_classification/finetuning_output/AmsterdamDocClassificationGEITje200T,2.0
0,FemkeBakker/AmsterdamDocClassificationGEITje200T2Epochs,Rijgersberg/GEITje-7B-chat-v2,FemkeBakker/AmsterdamBalancedFirst200Tokens,train,val,"TrainingArguments(\n_n_gpu=1,\naccelerator_config={'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None},\nadafactor=False,\nadam_beta1=0.9,\nadam_beta2=0.999,\nadam_epsilon=1e-08,\nauto_find_batch_size=False,\nbatch_eval_metrics=False,\nbf16=False,\nbf16_full_eval=False,\ndata_seed=None,\ndataloader_drop_last=False,\ndataloader_num_workers=0,\ndataloader_persistent_workers=False,\ndataloader_pin_memory=True,\ndataloader_prefetch_factor=None,\nddp_backend=None,\nddp_broadcast_buffers=None,\nddp_bucket_cap_mb=None,\nddp_find_unused_parameters=None,\nddp_timeout=1800,\ndebug=[],\ndeepspeed=None,\ndisable_tqdm=False,\ndispatch_batches=None,\ndo_eval=True,\ndo_predict=False,\ndo_train=False,\neval_accumulation_steps=None,\neval_delay=0,\neval_do_concat_batches=True,\neval_steps=123,\neval_strategy=steps,\nevaluation_strategy=steps,\nfp16=False,\nfp16_backend=auto,\nfp16_full_eval=False,\nfp16_opt_level=O1,\nfsdp=[],\nfsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False},\nfsdp_min_num_params=0,\nfsdp_transformer_layer_cls_to_wrap=None,\nfull_determinism=False,\ngradient_accumulation_steps=8,\ngradient_checkpointing=True,\ngradient_checkpointing_kwargs=None,\ngreater_is_better=None,\ngroup_by_length=False,\nhalf_precision_backend=auto,\nhub_always_push=False,\nhub_model_id=FemkeBakker/AmsterdamDocClassificationGEITje200T2Epochs,\nhub_private_repo=True,\nhub_strategy=all_checkpoints,\nhub_token=<HUB_TOKEN>,\nignore_data_skip=False,\ninclude_inputs_for_metrics=False,\ninclude_num_input_tokens_seen=False,\ninclude_tokens_per_second=False,\njit_mode_eval=False,\nlabel_names=None,\nlabel_smoothing_factor=0.0,\nlearning_rate=1e-05,\nlength_column_name=length,\nload_best_model_at_end=False,\nlocal_rank=0,\nlog_level=passive,\nlog_level_replica=warning,\nlog_on_each_node=True,\nlogging_dir=/home/azureuser/cloudfiles/code/blobfuse/raadsinformatie/processed_data/woo_document_classification/finetuning_output/AmsterdamDocClassificationGEITje200T/runs/Jun03_06-33-43_femke-gpu-24cores-220ram,\nlogging_first_step=True,\nlogging_nan_inf_filter=True,\nlogging_steps=1,\nlogging_strategy=steps,\nlr_scheduler_kwargs={},\nlr_scheduler_type=cosine,\nmax_grad_norm=1.0,\nmax_steps=-1,\nmetric_for_best_model=None,\nmp_parameters=,\nneftune_noise_alpha=5,\nno_cuda=False,\nnum_train_epochs=2,\noptim=adamw_bnb_8bit,\noptim_args=None,\noptim_target_modules=None,\noutput_dir=/home/azureuser/cloudfiles/code/blobfuse/raadsinformatie/processed_data/woo_document_classification/finetuning_output/AmsterdamDocClassificationGEITje200T,\noverwrite_output_dir=False,\npast_index=-1,\nper_device_eval_batch_size=8,\nper_device_train_batch_size=2,\nprediction_loss_only=False,\npush_to_hub=True,\npush_to_hub_model_id=None,\npush_to_hub_organization=None,\npush_to_hub_token=<PUSH_TO_HUB_TOKEN>,\nray_scope=last,\nremove_unused_columns=True,\nreport_to=['tensorboard', 'wandb'],\nrestore_callback_states_from_checkpoint=False,\nresume_from_checkpoint=None,\nrun_name=/home/azureuser/cloudfiles/code/blobfuse/raadsinformatie/processed_data/woo_document_classification/finetuning_output/AmsterdamDocClassificationGEITje200T,\nsave_on_each_node=False,\nsave_only_model=False,\nsave_safetensors=True,\nsave_steps=500,\nsave_strategy=epoch,\nsave_total_limit=None,\nseed=42,\nskip_memory_metrics=True,\nsplit_batches=None,\ntf32=None,\ntorch_compile=False,\ntorch_compile_backend=None,\ntorch_compile_mode=None,\ntorchdynamo=None,\ntpu_metrics_debug=False,\ntpu_num_cores=None,\nuse_cpu=False,\nuse_ipex=False,\nuse_legacy_prediction_loop=False,\nuse_mps_device=False,\nwarmup_ratio=0.1,\nwarmup_steps=0,\nweight_decay=0.0,\n)",True,2024-06-03 08:33:45.251941+02:00,419.937012,False,18,True,/home/azureuser/cloudfiles/code/blobfuse/raadsinformatie/processed_data/woo_document_classification/finetuning_output/AmsterdamDocClassificationGEITje200T,2.0
0,FemkeBakker/AmsterdamDocClassificationLlama200T2Epochs,meta-llama/Llama-2-7b-chat-hf,FemkeBakker/AmsterdamBalancedFirst200Tokens,train,val,"TrainingArguments(\n_n_gpu=1,\naccelerator_config={'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None},\nadafactor=False,\nadam_beta1=0.9,\nadam_beta2=0.999,\nadam_epsilon=1e-08,\nauto_find_batch_size=False,\nbatch_eval_metrics=False,\nbf16=False,\nbf16_full_eval=False,\ndata_seed=None,\ndataloader_drop_last=False,\ndataloader_num_workers=0,\ndataloader_persistent_workers=False,\ndataloader_pin_memory=True,\ndataloader_prefetch_factor=None,\nddp_backend=None,\nddp_broadcast_buffers=None,\nddp_bucket_cap_mb=None,\nddp_find_unused_parameters=None,\nddp_timeout=1800,\ndebug=[],\ndeepspeed=None,\ndisable_tqdm=False,\ndispatch_batches=None,\ndo_eval=True,\ndo_predict=False,\ndo_train=False,\neval_accumulation_steps=None,\neval_delay=0,\neval_do_concat_batches=True,\neval_steps=123,\neval_strategy=steps,\nevaluation_strategy=steps,\nfp16=False,\nfp16_backend=auto,\nfp16_full_eval=False,\nfp16_opt_level=O1,\nfsdp=[],\nfsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False},\nfsdp_min_num_params=0,\nfsdp_transformer_layer_cls_to_wrap=None,\nfull_determinism=False,\ngradient_accumulation_steps=8,\ngradient_checkpointing=True,\ngradient_checkpointing_kwargs=None,\ngreater_is_better=None,\ngroup_by_length=False,\nhalf_precision_backend=auto,\nhub_always_push=False,\nhub_model_id=FemkeBakker/AmsterdamDocClassificationLlama200T2Epochs,\nhub_private_repo=True,\nhub_strategy=all_checkpoints,\nhub_token=<HUB_TOKEN>,\nignore_data_skip=False,\ninclude_inputs_for_metrics=False,\ninclude_num_input_tokens_seen=False,\ninclude_tokens_per_second=False,\njit_mode_eval=False,\nlabel_names=None,\nlabel_smoothing_factor=0.0,\nlearning_rate=1e-05,\nlength_column_name=length,\nload_best_model_at_end=False,\nlocal_rank=0,\nlog_level=passive,\nlog_level_replica=warning,\nlog_on_each_node=True,\nlogging_dir=/home/azureuser/cloudfiles/code/blobfuse/raadsinformatie/processed_data/woo_document_classification/finetuning_output/AmsterdamDocClassificationLlama200T/runs/Jun03_06-52-31_femke-gpu-24cores-220ram,\nlogging_first_step=True,\nlogging_nan_inf_filter=True,\nlogging_steps=1,\nlogging_strategy=steps,\nlr_scheduler_kwargs={},\nlr_scheduler_type=cosine,\nmax_grad_norm=1.0,\nmax_steps=-1,\nmetric_for_best_model=None,\nmp_parameters=,\nneftune_noise_alpha=5,\nno_cuda=False,\nnum_train_epochs=2,\noptim=adamw_bnb_8bit,\noptim_args=None,\noptim_target_modules=None,\noutput_dir=/home/azureuser/cloudfiles/code/blobfuse/raadsinformatie/processed_data/woo_document_classification/finetuning_output/AmsterdamDocClassificationLlama200T,\noverwrite_output_dir=False,\npast_index=-1,\nper_device_eval_batch_size=8,\nper_device_train_batch_size=2,\nprediction_loss_only=False,\npush_to_hub=True,\npush_to_hub_model_id=None,\npush_to_hub_organization=None,\npush_to_hub_token=<PUSH_TO_HUB_TOKEN>,\nray_scope=last,\nremove_unused_columns=True,\nreport_to=['tensorboard', 'wandb'],\nrestore_callback_states_from_checkpoint=False,\nresume_from_checkpoint=None,\nrun_name=/home/azureuser/cloudfiles/code/blobfuse/raadsinformatie/processed_data/woo_document_classification/finetuning_output/AmsterdamDocClassificationLlama200T,\nsave_on_each_node=False,\nsave_only_model=False,\nsave_safetensors=True,\nsave_steps=500,\nsave_strategy=epoch,\nsave_total_limit=None,\nseed=42,\nskip_memory_metrics=True,\nsplit_batches=None,\ntf32=None,\ntorch_compile=False,\ntorch_compile_backend=None,\ntorch_compile_mode=None,\ntorchdynamo=None,\ntpu_metrics_debug=False,\ntpu_num_cores=None,\nuse_cpu=False,\nuse_ipex=False,\nuse_legacy_prediction_loop=False,\nuse_mps_device=False,\nwarmup_ratio=0.1,\nwarmup_steps=0,\nweight_decay=0.0,\n)",True,2024-06-03 08:52:32.234614+02:00,378.556517,False,19,True,/home/azureuser/cloudfiles/code/blobfuse/raadsinformatie/processed_data/woo_document_classification/finetuning_output/AmsterdamDocClassificationLlama200T,2.0


In [15]:
for i in set(overview['model']):
    print(i)

FemkeBakker/AmsterdamDocClassificationMistral200T2Epochs
FemkeBakker/AmsterdamDocClassificationGEITje200T2Epochs
FemkeBakker/AmsterdamDocClassificationLlama200T
FemkeBakker/AmsterdamDocClassificationMistral200T3Epochs
FemkeBakker/AmsterdamDocClassificationMistral200T
FemkeBakker/AmsterdamDocClassificationLlama200T2Epochs
FemkeBakker/AmsterdamDocClassificationMistrallama200T
FemkeBakker/AmsterdamDocClassificationGEITje200T1Epochs
FemkeBakker/AmsterdamDocClassificationMistral200T1Epochs
FemkeBakker/AmsterdamDocClassificationLlama200T3Epochs
FemkeBakker/AmsterdamDocClassificationGEITje200T
FemkeBakker/Try2epochGEITje
FemkeBakker/AmsterdamDocClassificationGEITje200T3Epochs
FemkeBakker/AmsterdamDocClassificationLlama200T1Epochs


In [14]:
display(overview)

Unnamed: 0,model,base_model,chat_dataset,train_set,test_set,training_args,resume_from_checkpoint,date,runtime,Error,run_id,save_to_hub,output_dir
0,FemkeBakker/AmsterdamDocClassificationGEITje200T,Rijgersberg/GEITje-7B-chat-v2,FemkeBakker/AmsterdamBalancedFirst200Tokens,train,val,"TrainingArguments(\n_n_gpu=1,\naccelerator_config={'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'gradient_accumulation_kwargs': None},\nadafactor=False,\nadam_beta1=0.9,\nadam_beta2=0.999,\nadam_epsilon=1e-08,\nauto_find_batch_size=False,\nbf16=False,\nbf16_full_eval=False,\ndata_seed=None,\ndataloader_drop_last=False,\ndataloader_num_workers=0,\ndataloader_persistent_workers=False,\ndataloader_pin_memory=True,\ndataloader_prefetch_factor=None,\nddp_backend=None,\nddp_broadcast_buffers=None,\nddp_bucket_cap_mb=None,\nddp_find_unused_parameters=None,\nddp_timeout=1800,\ndebug=[],\ndeepspeed=None,\ndisable_tqdm=False,\ndispatch_batches=None,\ndo_eval=True,\ndo_predict=False,\ndo_train=False,\neval_accumulation_steps=None,\neval_delay=0,\neval_do_concat_batches=True,\neval_steps=123,\nevaluation_strategy=steps,\nfp16=False,\nfp16_backend=auto,\nfp16_full_eval=False,\nfp16_opt_level=O1,\nfsdp=[],\nfsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False},\nfsdp_min_num_params=0,\nfsdp_transformer_layer_cls_to_wrap=None,\nfull_determinism=False,\ngradient_accumulation_steps=8,\ngradient_checkpointing=True,\ngradient_checkpointing_kwargs=None,\ngreater_is_better=None,\ngroup_by_length=False,\nhalf_precision_backend=auto,\nhub_always_push=False,\nhub_model_id=FemkeBakker/AmsterdamDocClassificationGEITje200T,\nhub_private_repo=True,\nhub_strategy=all_checkpoints,\nhub_token=<HUB_TOKEN>,\nignore_data_skip=False,\ninclude_inputs_for_metrics=False,\ninclude_num_input_tokens_seen=False,\ninclude_tokens_per_second=False,\njit_mode_eval=False,\nlabel_names=None,\nlabel_smoothing_factor=0.0,\nlearning_rate=1e-05,\nlength_column_name=length,\nload_best_model_at_end=False,\nlocal_rank=0,\nlog_level=passive,\nlog_level_replica=warning,\nlog_on_each_node=True,\nlogging_dir=/home/azureuser/cloudfiles/code/blobfuse/raadsinformatie/processed_data/woo_document_classification/finetuning_output/AmsterdamDocClassificationGEITje200T/runs/May29_12-07-01_femke-gpu-24cores-220ram,\nlogging_first_step=True,\nlogging_nan_inf_filter=True,\nlogging_steps=1,\nlogging_strategy=steps,\nlr_scheduler_kwargs={},\nlr_scheduler_type=cosine,\nmax_grad_norm=1.0,\nmax_steps=-1,\nmetric_for_best_model=None,\nmp_parameters=,\nneftune_noise_alpha=5,\nno_cuda=False,\nnum_train_epochs=1,\noptim=adamw_bnb_8bit,\noptim_args=None,\noptim_target_modules=None,\noutput_dir=/home/azureuser/cloudfiles/code/blobfuse/raadsinformatie/processed_data/woo_document_classification/finetuning_output/AmsterdamDocClassificationGEITje200T,\noverwrite_output_dir=False,\npast_index=-1,\nper_device_eval_batch_size=8,\nper_device_train_batch_size=2,\nprediction_loss_only=False,\npush_to_hub=True,\npush_to_hub_model_id=None,\npush_to_hub_organization=None,\npush_to_hub_token=<PUSH_TO_HUB_TOKEN>,\nray_scope=last,\nremove_unused_columns=True,\nreport_to=['tensorboard', 'wandb'],\nresume_from_checkpoint=None,\nrun_name=/home/azureuser/cloudfiles/code/blobfuse/raadsinformatie/processed_data/woo_document_classification/finetuning_output/AmsterdamDocClassificationGEITje200T,\nsave_on_each_node=False,\nsave_only_model=False,\nsave_safetensors=True,\nsave_steps=500,\nsave_strategy=epoch,\nsave_total_limit=None,\nseed=42,\nskip_memory_metrics=True,\nsplit_batches=None,\ntf32=None,\ntorch_compile=False,\ntorch_compile_backend=None,\ntorch_compile_mode=None,\ntorchdynamo=None,\ntpu_metrics_debug=False,\ntpu_num_cores=None,\nuse_cpu=False,\nuse_ipex=False,\nuse_legacy_prediction_loop=False,\nuse_mps_device=False,\nwarmup_ratio=0.1,\nwarmup_steps=0,\nweight_decay=0.0,\n)",False,2024-05-29 14:07:04.642821+02:00,2926.216714,False,15,True,/home/azureuser/cloudfiles/code/blobfuse/raadsinformatie/processed_data/woo_document_classification/finetuning_output/AmsterdamDocClassificationGEITje200T
0,FemkeBakker/AmsterdamDocClassificationMistral200T,mistralai/Mistral-7B-Instruct-v0.2,FemkeBakker/AmsterdamBalancedFirst200Tokens,train,val,"TrainingArguments(\n_n_gpu=1,\naccelerator_config={'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'gradient_accumulation_kwargs': None},\nadafactor=False,\nadam_beta1=0.9,\nadam_beta2=0.999,\nadam_epsilon=1e-08,\nauto_find_batch_size=False,\nbf16=False,\nbf16_full_eval=False,\ndata_seed=None,\ndataloader_drop_last=False,\ndataloader_num_workers=0,\ndataloader_persistent_workers=False,\ndataloader_pin_memory=True,\ndataloader_prefetch_factor=None,\nddp_backend=None,\nddp_broadcast_buffers=None,\nddp_bucket_cap_mb=None,\nddp_find_unused_parameters=None,\nddp_timeout=1800,\ndebug=[],\ndeepspeed=None,\ndisable_tqdm=False,\ndispatch_batches=None,\ndo_eval=True,\ndo_predict=False,\ndo_train=False,\neval_accumulation_steps=None,\neval_delay=0,\neval_do_concat_batches=True,\neval_steps=123,\nevaluation_strategy=steps,\nfp16=False,\nfp16_backend=auto,\nfp16_full_eval=False,\nfp16_opt_level=O1,\nfsdp=[],\nfsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False},\nfsdp_min_num_params=0,\nfsdp_transformer_layer_cls_to_wrap=None,\nfull_determinism=False,\ngradient_accumulation_steps=8,\ngradient_checkpointing=True,\ngradient_checkpointing_kwargs=None,\ngreater_is_better=None,\ngroup_by_length=False,\nhalf_precision_backend=auto,\nhub_always_push=False,\nhub_model_id=FemkeBakker/AmsterdamDocClassificationMistral200T,\nhub_private_repo=True,\nhub_strategy=all_checkpoints,\nhub_token=<HUB_TOKEN>,\nignore_data_skip=False,\ninclude_inputs_for_metrics=False,\ninclude_num_input_tokens_seen=False,\ninclude_tokens_per_second=False,\njit_mode_eval=False,\nlabel_names=None,\nlabel_smoothing_factor=0.0,\nlearning_rate=1e-05,\nlength_column_name=length,\nload_best_model_at_end=False,\nlocal_rank=0,\nlog_level=passive,\nlog_level_replica=warning,\nlog_on_each_node=True,\nlogging_dir=/home/azureuser/cloudfiles/code/blobfuse/raadsinformatie/processed_data/woo_document_classification/finetuning_output/AmsterdamDocClassificationMistral200T/runs/May29_13-36-42_femke-gpu-24cores-220ram,\nlogging_first_step=True,\nlogging_nan_inf_filter=True,\nlogging_steps=1,\nlogging_strategy=steps,\nlr_scheduler_kwargs={},\nlr_scheduler_type=cosine,\nmax_grad_norm=1.0,\nmax_steps=-1,\nmetric_for_best_model=None,\nmp_parameters=,\nneftune_noise_alpha=5,\nno_cuda=False,\nnum_train_epochs=1,\noptim=adamw_bnb_8bit,\noptim_args=None,\noptim_target_modules=None,\noutput_dir=/home/azureuser/cloudfiles/code/blobfuse/raadsinformatie/processed_data/woo_document_classification/finetuning_output/AmsterdamDocClassificationMistral200T,\noverwrite_output_dir=False,\npast_index=-1,\nper_device_eval_batch_size=8,\nper_device_train_batch_size=2,\nprediction_loss_only=False,\npush_to_hub=True,\npush_to_hub_model_id=None,\npush_to_hub_organization=None,\npush_to_hub_token=<PUSH_TO_HUB_TOKEN>,\nray_scope=last,\nremove_unused_columns=True,\nreport_to=['tensorboard', 'wandb'],\nresume_from_checkpoint=None,\nrun_name=/home/azureuser/cloudfiles/code/blobfuse/raadsinformatie/processed_data/woo_document_classification/finetuning_output/AmsterdamDocClassificationMistral200T,\nsave_on_each_node=False,\nsave_only_model=False,\nsave_safetensors=True,\nsave_steps=500,\nsave_strategy=epoch,\nsave_total_limit=None,\nseed=42,\nskip_memory_metrics=True,\nsplit_batches=None,\ntf32=None,\ntorch_compile=False,\ntorch_compile_backend=None,\ntorch_compile_mode=None,\ntorchdynamo=None,\ntpu_metrics_debug=False,\ntpu_num_cores=None,\nuse_cpu=False,\nuse_ipex=False,\nuse_legacy_prediction_loop=False,\nuse_mps_device=False,\nwarmup_ratio=0.1,\nwarmup_steps=0,\nweight_decay=0.0,\n)",False,2024-05-29 15:36:45.085202+02:00,2879.482625,False,16,True,/home/azureuser/cloudfiles/code/blobfuse/raadsinformatie/processed_data/woo_document_classification/finetuning_output/AmsterdamDocClassificationMistral200T
0,FemkeBakker/AmsterdamDocClassificationLlama200T,meta-llama/Llama-2-7b-chat-hf,FemkeBakker/AmsterdamBalancedFirst200Tokens,train,val,"TrainingArguments(\n_n_gpu=1,\naccelerator_config={'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'gradient_accumulation_kwargs': None},\nadafactor=False,\nadam_beta1=0.9,\nadam_beta2=0.999,\nadam_epsilon=1e-08,\nauto_find_batch_size=False,\nbf16=False,\nbf16_full_eval=False,\ndata_seed=None,\ndataloader_drop_last=False,\ndataloader_num_workers=0,\ndataloader_persistent_workers=False,\ndataloader_pin_memory=True,\ndataloader_prefetch_factor=None,\nddp_backend=None,\nddp_broadcast_buffers=None,\nddp_bucket_cap_mb=None,\nddp_find_unused_parameters=None,\nddp_timeout=1800,\ndebug=[],\ndeepspeed=None,\ndisable_tqdm=False,\ndispatch_batches=None,\ndo_eval=True,\ndo_predict=False,\ndo_train=False,\neval_accumulation_steps=None,\neval_delay=0,\neval_do_concat_batches=True,\neval_steps=123,\nevaluation_strategy=steps,\nfp16=False,\nfp16_backend=auto,\nfp16_full_eval=False,\nfp16_opt_level=O1,\nfsdp=[],\nfsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False},\nfsdp_min_num_params=0,\nfsdp_transformer_layer_cls_to_wrap=None,\nfull_determinism=False,\ngradient_accumulation_steps=8,\ngradient_checkpointing=True,\ngradient_checkpointing_kwargs=None,\ngreater_is_better=None,\ngroup_by_length=False,\nhalf_precision_backend=auto,\nhub_always_push=False,\nhub_model_id=FemkeBakker/AmsterdamDocClassificationLlama200T,\nhub_private_repo=True,\nhub_strategy=all_checkpoints,\nhub_token=<HUB_TOKEN>,\nignore_data_skip=False,\ninclude_inputs_for_metrics=False,\ninclude_num_input_tokens_seen=False,\ninclude_tokens_per_second=False,\njit_mode_eval=False,\nlabel_names=None,\nlabel_smoothing_factor=0.0,\nlearning_rate=1e-05,\nlength_column_name=length,\nload_best_model_at_end=False,\nlocal_rank=0,\nlog_level=passive,\nlog_level_replica=warning,\nlog_on_each_node=True,\nlogging_dir=/home/azureuser/cloudfiles/code/blobfuse/raadsinformatie/processed_data/woo_document_classification/finetuning_output/AmsterdamDocClassificationLlama200T/runs/May29_14-44-46_femke-gpu-24cores-220ram,\nlogging_first_step=True,\nlogging_nan_inf_filter=True,\nlogging_steps=1,\nlogging_strategy=steps,\nlr_scheduler_kwargs={},\nlr_scheduler_type=cosine,\nmax_grad_norm=1.0,\nmax_steps=-1,\nmetric_for_best_model=None,\nmp_parameters=,\nneftune_noise_alpha=5,\nno_cuda=False,\nnum_train_epochs=1,\noptim=adamw_bnb_8bit,\noptim_args=None,\noptim_target_modules=None,\noutput_dir=/home/azureuser/cloudfiles/code/blobfuse/raadsinformatie/processed_data/woo_document_classification/finetuning_output/AmsterdamDocClassificationLlama200T,\noverwrite_output_dir=False,\npast_index=-1,\nper_device_eval_batch_size=8,\nper_device_train_batch_size=2,\nprediction_loss_only=False,\npush_to_hub=True,\npush_to_hub_model_id=None,\npush_to_hub_organization=None,\npush_to_hub_token=<PUSH_TO_HUB_TOKEN>,\nray_scope=last,\nremove_unused_columns=True,\nreport_to=['tensorboard', 'wandb'],\nresume_from_checkpoint=None,\nrun_name=/home/azureuser/cloudfiles/code/blobfuse/raadsinformatie/processed_data/woo_document_classification/finetuning_output/AmsterdamDocClassificationLlama200T,\nsave_on_each_node=False,\nsave_only_model=False,\nsave_safetensors=True,\nsave_steps=500,\nsave_strategy=epoch,\nsave_total_limit=None,\nseed=42,\nskip_memory_metrics=True,\nsplit_batches=None,\ntf32=None,\ntorch_compile=False,\ntorch_compile_backend=None,\ntorch_compile_mode=None,\ntorchdynamo=None,\ntpu_metrics_debug=False,\ntpu_num_cores=None,\nuse_cpu=False,\nuse_ipex=False,\nuse_legacy_prediction_loop=False,\nuse_mps_device=False,\nwarmup_ratio=0.1,\nwarmup_steps=0,\nweight_decay=0.0,\n)",False,2024-05-29 16:44:49.778730+02:00,2459.635145,False,17,True,/home/azureuser/cloudfiles/code/blobfuse/raadsinformatie/processed_data/woo_document_classification/finetuning_output/AmsterdamDocClassificationLlama200T
0,FemkeBakker/AmsterdamDocClassificationLlama200T,meta-llama/Llama-2-7b-chat-hf,FemkeBakker/AmsterdamBalancedFirst200Tokens,train,val,"TrainingArguments(\n_n_gpu=1,\naccelerator_config={'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'gradient_accumulation_kwargs': None},\nadafactor=False,\nadam_beta1=0.9,\nadam_beta2=0.999,\nadam_epsilon=1e-08,\nauto_find_batch_size=False,\nbf16=False,\nbf16_full_eval=False,\ndata_seed=None,\ndataloader_drop_last=False,\ndataloader_num_workers=0,\ndataloader_persistent_workers=False,\ndataloader_pin_memory=True,\ndataloader_prefetch_factor=None,\nddp_backend=None,\nddp_broadcast_buffers=None,\nddp_bucket_cap_mb=None,\nddp_find_unused_parameters=None,\nddp_timeout=1800,\ndebug=[],\ndeepspeed=None,\ndisable_tqdm=False,\ndispatch_batches=None,\ndo_eval=True,\ndo_predict=False,\ndo_train=False,\neval_accumulation_steps=None,\neval_delay=0,\neval_do_concat_batches=True,\neval_steps=123,\nevaluation_strategy=steps,\nfp16=False,\nfp16_backend=auto,\nfp16_full_eval=False,\nfp16_opt_level=O1,\nfsdp=[],\nfsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False},\nfsdp_min_num_params=0,\nfsdp_transformer_layer_cls_to_wrap=None,\nfull_determinism=False,\ngradient_accumulation_steps=8,\ngradient_checkpointing=True,\ngradient_checkpointing_kwargs=None,\ngreater_is_better=None,\ngroup_by_length=False,\nhalf_precision_backend=auto,\nhub_always_push=False,\nhub_model_id=FemkeBakker/AmsterdamDocClassificationLlama200T,\nhub_private_repo=True,\nhub_strategy=all_checkpoints,\nhub_token=<HUB_TOKEN>,\nignore_data_skip=False,\ninclude_inputs_for_metrics=False,\ninclude_num_input_tokens_seen=False,\ninclude_tokens_per_second=False,\njit_mode_eval=False,\nlabel_names=None,\nlabel_smoothing_factor=0.0,\nlearning_rate=1e-05,\nlength_column_name=length,\nload_best_model_at_end=False,\nlocal_rank=0,\nlog_level=passive,\nlog_level_replica=warning,\nlog_on_each_node=True,\nlogging_dir=/home/azureuser/cloudfiles/code/blobfuse/raadsinformatie/processed_data/woo_document_classification/finetuning_output/AmsterdamDocClassificationLlama200T/runs/May29_15-32-33_femke-gpu-24cores-220ram,\nlogging_first_step=True,\nlogging_nan_inf_filter=True,\nlogging_steps=1,\nlogging_strategy=steps,\nlr_scheduler_kwargs={},\nlr_scheduler_type=cosine,\nmax_grad_norm=1.0,\nmax_steps=-1,\nmetric_for_best_model=None,\nmp_parameters=,\nneftune_noise_alpha=5,\nno_cuda=False,\nnum_train_epochs=2,\noptim=adamw_bnb_8bit,\noptim_args=None,\noptim_target_modules=None,\noutput_dir=/home/azureuser/cloudfiles/code/blobfuse/raadsinformatie/processed_data/woo_document_classification/finetuning_output/AmsterdamDocClassificationLlama200T,\noverwrite_output_dir=False,\npast_index=-1,\nper_device_eval_batch_size=8,\nper_device_train_batch_size=2,\nprediction_loss_only=False,\npush_to_hub=True,\npush_to_hub_model_id=None,\npush_to_hub_organization=None,\npush_to_hub_token=<PUSH_TO_HUB_TOKEN>,\nray_scope=last,\nremove_unused_columns=True,\nreport_to=['tensorboard', 'wandb'],\nresume_from_checkpoint=None,\nrun_name=/home/azureuser/cloudfiles/code/blobfuse/raadsinformatie/processed_data/woo_document_classification/finetuning_output/AmsterdamDocClassificationLlama200T,\nsave_on_each_node=False,\nsave_only_model=False,\nsave_safetensors=True,\nsave_steps=500,\nsave_strategy=epoch,\nsave_total_limit=None,\nseed=42,\nskip_memory_metrics=True,\nsplit_batches=None,\ntf32=None,\ntorch_compile=False,\ntorch_compile_backend=None,\ntorch_compile_mode=None,\ntorchdynamo=None,\ntpu_metrics_debug=False,\ntpu_num_cores=None,\nuse_cpu=False,\nuse_ipex=False,\nuse_legacy_prediction_loop=False,\nuse_mps_device=False,\nwarmup_ratio=0.1,\nwarmup_steps=0,\nweight_decay=0.0,\n)",True,2024-05-29 17:32:34.996990+02:00,2435.851116,False,17,True,/home/azureuser/cloudfiles/code/blobfuse/raadsinformatie/processed_data/woo_document_classification/finetuning_output/AmsterdamDocClassificationLlama200T
0,FemkeBakker/AmsterdamDocClassificationMistrallama200T,mistralai/Mistral-7B-Instruct-v0.2,FemkeBakker/AmsterdamBalancedFirst200Tokens,train,val,"TrainingArguments(\n_n_gpu=1,\naccelerator_config={'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'gradient_accumulation_kwargs': None},\nadafactor=False,\nadam_beta1=0.9,\nadam_beta2=0.999,\nadam_epsilon=1e-08,\nauto_find_batch_size=False,\nbf16=False,\nbf16_full_eval=False,\ndata_seed=None,\ndataloader_drop_last=False,\ndataloader_num_workers=0,\ndataloader_persistent_workers=False,\ndataloader_pin_memory=True,\ndataloader_prefetch_factor=None,\nddp_backend=None,\nddp_broadcast_buffers=None,\nddp_bucket_cap_mb=None,\nddp_find_unused_parameters=None,\nddp_timeout=1800,\ndebug=[],\ndeepspeed=None,\ndisable_tqdm=False,\ndispatch_batches=None,\ndo_eval=True,\ndo_predict=False,\ndo_train=False,\neval_accumulation_steps=None,\neval_delay=0,\neval_do_concat_batches=True,\neval_steps=123,\nevaluation_strategy=steps,\nfp16=False,\nfp16_backend=auto,\nfp16_full_eval=False,\nfp16_opt_level=O1,\nfsdp=[],\nfsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False},\nfsdp_min_num_params=0,\nfsdp_transformer_layer_cls_to_wrap=None,\nfull_determinism=False,\ngradient_accumulation_steps=8,\ngradient_checkpointing=True,\ngradient_checkpointing_kwargs=None,\ngreater_is_better=None,\ngroup_by_length=False,\nhalf_precision_backend=auto,\nhub_always_push=False,\nhub_model_id=FemkeBakker/AmsterdamDocClassificationMistrallama200T,\nhub_private_repo=True,\nhub_strategy=all_checkpoints,\nhub_token=<HUB_TOKEN>,\nignore_data_skip=False,\ninclude_inputs_for_metrics=False,\ninclude_num_input_tokens_seen=False,\ninclude_tokens_per_second=False,\njit_mode_eval=False,\nlabel_names=None,\nlabel_smoothing_factor=0.0,\nlearning_rate=1e-05,\nlength_column_name=length,\nload_best_model_at_end=False,\nlocal_rank=0,\nlog_level=passive,\nlog_level_replica=warning,\nlog_on_each_node=True,\nlogging_dir=/home/azureuser/cloudfiles/code/blobfuse/raadsinformatie/processed_data/woo_document_classification/finetuning_output/AmsterdamDocClassificationMistral200T/runs/May29_16-17-39_femke-gpu-24cores-220ram,\nlogging_first_step=True,\nlogging_nan_inf_filter=True,\nlogging_steps=1,\nlogging_strategy=steps,\nlr_scheduler_kwargs={},\nlr_scheduler_type=cosine,\nmax_grad_norm=1.0,\nmax_steps=-1,\nmetric_for_best_model=None,\nmp_parameters=,\nneftune_noise_alpha=5,\nno_cuda=False,\nnum_train_epochs=2,\noptim=adamw_bnb_8bit,\noptim_args=None,\noptim_target_modules=None,\noutput_dir=/home/azureuser/cloudfiles/code/blobfuse/raadsinformatie/processed_data/woo_document_classification/finetuning_output/AmsterdamDocClassificationMistral200T,\noverwrite_output_dir=False,\npast_index=-1,\nper_device_eval_batch_size=8,\nper_device_train_batch_size=2,\nprediction_loss_only=False,\npush_to_hub=True,\npush_to_hub_model_id=None,\npush_to_hub_organization=None,\npush_to_hub_token=<PUSH_TO_HUB_TOKEN>,\nray_scope=last,\nremove_unused_columns=True,\nreport_to=['tensorboard', 'wandb'],\nresume_from_checkpoint=None,\nrun_name=/home/azureuser/cloudfiles/code/blobfuse/raadsinformatie/processed_data/woo_document_classification/finetuning_output/AmsterdamDocClassificationMistral200T,\nsave_on_each_node=False,\nsave_only_model=False,\nsave_safetensors=True,\nsave_steps=500,\nsave_strategy=epoch,\nsave_total_limit=None,\nseed=42,\nskip_memory_metrics=True,\nsplit_batches=None,\ntf32=None,\ntorch_compile=False,\ntorch_compile_backend=None,\ntorch_compile_mode=None,\ntorchdynamo=None,\ntpu_metrics_debug=False,\ntpu_num_cores=None,\nuse_cpu=False,\nuse_ipex=False,\nuse_legacy_prediction_loop=False,\nuse_mps_device=False,\nwarmup_ratio=0.1,\nwarmup_steps=0,\nweight_decay=0.0,\n)",True,2024-05-29 18:17:40.189426+02:00,2909.715319,False,16,True,/home/azureuser/cloudfiles/code/blobfuse/raadsinformatie/processed_data/woo_document_classification/finetuning_output/AmsterdamDocClassificationMistral200T
0,FemkeBakker/AmsterdamDocClassificationMistral200T,mistralai/Mistral-7B-Instruct-v0.2,FemkeBakker/AmsterdamBalancedFirst200Tokens,train,val,"TrainingArguments(\n_n_gpu=1,\naccelerator_config={'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'gradient_accumulation_kwargs': None},\nadafactor=False,\nadam_beta1=0.9,\nadam_beta2=0.999,\nadam_epsilon=1e-08,\nauto_find_batch_size=False,\nbf16=False,\nbf16_full_eval=False,\ndata_seed=None,\ndataloader_drop_last=False,\ndataloader_num_workers=0,\ndataloader_persistent_workers=False,\ndataloader_pin_memory=True,\ndataloader_prefetch_factor=None,\nddp_backend=None,\nddp_broadcast_buffers=None,\nddp_bucket_cap_mb=None,\nddp_find_unused_parameters=None,\nddp_timeout=1800,\ndebug=[],\ndeepspeed=None,\ndisable_tqdm=False,\ndispatch_batches=None,\ndo_eval=True,\ndo_predict=False,\ndo_train=False,\neval_accumulation_steps=None,\neval_delay=0,\neval_do_concat_batches=True,\neval_steps=123,\nevaluation_strategy=steps,\nfp16=False,\nfp16_backend=auto,\nfp16_full_eval=False,\nfp16_opt_level=O1,\nfsdp=[],\nfsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False},\nfsdp_min_num_params=0,\nfsdp_transformer_layer_cls_to_wrap=None,\nfull_determinism=False,\ngradient_accumulation_steps=8,\ngradient_checkpointing=True,\ngradient_checkpointing_kwargs=None,\ngreater_is_better=None,\ngroup_by_length=False,\nhalf_precision_backend=auto,\nhub_always_push=False,\nhub_model_id=FemkeBakker/AmsterdamDocClassificationMistral200T,\nhub_private_repo=True,\nhub_strategy=all_checkpoints,\nhub_token=<HUB_TOKEN>,\nignore_data_skip=False,\ninclude_inputs_for_metrics=False,\ninclude_num_input_tokens_seen=False,\ninclude_tokens_per_second=False,\njit_mode_eval=False,\nlabel_names=None,\nlabel_smoothing_factor=0.0,\nlearning_rate=1e-05,\nlength_column_name=length,\nload_best_model_at_end=False,\nlocal_rank=0,\nlog_level=passive,\nlog_level_replica=warning,\nlog_on_each_node=True,\nlogging_dir=/home/azureuser/cloudfiles/code/blobfuse/raadsinformatie/processed_data/woo_document_classification/finetuning_output/AmsterdamDocClassificationGEITje200T/runs/May29_17-20-43_femke-gpu-24cores-220ram,\nlogging_first_step=True,\nlogging_nan_inf_filter=True,\nlogging_steps=1,\nlogging_strategy=steps,\nlr_scheduler_kwargs={},\nlr_scheduler_type=cosine,\nmax_grad_norm=1.0,\nmax_steps=-1,\nmetric_for_best_model=None,\nmp_parameters=,\nneftune_noise_alpha=5,\nno_cuda=False,\nnum_train_epochs=2,\noptim=adamw_bnb_8bit,\noptim_args=None,\noptim_target_modules=None,\noutput_dir=/home/azureuser/cloudfiles/code/blobfuse/raadsinformatie/processed_data/woo_document_classification/finetuning_output/AmsterdamDocClassificationGEITje200T,\noverwrite_output_dir=False,\npast_index=-1,\nper_device_eval_batch_size=8,\nper_device_train_batch_size=2,\nprediction_loss_only=False,\npush_to_hub=True,\npush_to_hub_model_id=None,\npush_to_hub_organization=None,\npush_to_hub_token=<PUSH_TO_HUB_TOKEN>,\nray_scope=last,\nremove_unused_columns=True,\nreport_to=['tensorboard', 'wandb'],\nresume_from_checkpoint=None,\nrun_name=/home/azureuser/cloudfiles/code/blobfuse/raadsinformatie/processed_data/woo_document_classification/finetuning_output/AmsterdamDocClassificationGEITje200T,\nsave_on_each_node=False,\nsave_only_model=False,\nsave_safetensors=True,\nsave_steps=500,\nsave_strategy=epoch,\nsave_total_limit=None,\nseed=42,\nskip_memory_metrics=True,\nsplit_batches=None,\ntf32=None,\ntorch_compile=False,\ntorch_compile_backend=None,\ntorch_compile_mode=None,\ntorchdynamo=None,\ntpu_metrics_debug=False,\ntpu_num_cores=None,\nuse_cpu=False,\nuse_ipex=False,\nuse_legacy_prediction_loop=False,\nuse_mps_device=False,\nwarmup_ratio=0.1,\nwarmup_steps=0,\nweight_decay=0.0,\n)",True,2024-05-29 19:20:43.997402+02:00,19.168457,KeyboardInterrupt,16,True,/home/azureuser/cloudfiles/code/blobfuse/raadsinformatie/processed_data/woo_document_classification/finetuning_output/AmsterdamDocClassificationGEITje200T
0,FemkeBakker/AmsterdamDocClassificationMistral200T,mistralai/Mistral-7B-Instruct-v0.2,FemkeBakker/AmsterdamBalancedFirst200Tokens,train,val,"TrainingArguments(\n_n_gpu=1,\naccelerator_config={'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'gradient_accumulation_kwargs': None},\nadafactor=False,\nadam_beta1=0.9,\nadam_beta2=0.999,\nadam_epsilon=1e-08,\nauto_find_batch_size=False,\nbf16=False,\nbf16_full_eval=False,\ndata_seed=None,\ndataloader_drop_last=False,\ndataloader_num_workers=0,\ndataloader_persistent_workers=False,\ndataloader_pin_memory=True,\ndataloader_prefetch_factor=None,\nddp_backend=None,\nddp_broadcast_buffers=None,\nddp_bucket_cap_mb=None,\nddp_find_unused_parameters=None,\nddp_timeout=1800,\ndebug=[],\ndeepspeed=None,\ndisable_tqdm=False,\ndispatch_batches=None,\ndo_eval=True,\ndo_predict=False,\ndo_train=False,\neval_accumulation_steps=None,\neval_delay=0,\neval_do_concat_batches=True,\neval_steps=123,\nevaluation_strategy=steps,\nfp16=False,\nfp16_backend=auto,\nfp16_full_eval=False,\nfp16_opt_level=O1,\nfsdp=[],\nfsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False},\nfsdp_min_num_params=0,\nfsdp_transformer_layer_cls_to_wrap=None,\nfull_determinism=False,\ngradient_accumulation_steps=8,\ngradient_checkpointing=True,\ngradient_checkpointing_kwargs=None,\ngreater_is_better=None,\ngroup_by_length=False,\nhalf_precision_backend=auto,\nhub_always_push=False,\nhub_model_id=FemkeBakker/AmsterdamDocClassificationMistral200T,\nhub_private_repo=True,\nhub_strategy=all_checkpoints,\nhub_token=<HUB_TOKEN>,\nignore_data_skip=False,\ninclude_inputs_for_metrics=False,\ninclude_num_input_tokens_seen=False,\ninclude_tokens_per_second=False,\njit_mode_eval=False,\nlabel_names=None,\nlabel_smoothing_factor=0.0,\nlearning_rate=1e-05,\nlength_column_name=length,\nload_best_model_at_end=False,\nlocal_rank=0,\nlog_level=passive,\nlog_level_replica=warning,\nlog_on_each_node=True,\nlogging_dir=/home/azureuser/cloudfiles/code/blobfuse/raadsinformatie/processed_data/woo_document_classification/finetuning_output/AmsterdamDocClassificationMistral200T/runs/May29_17-21-11_femke-gpu-24cores-220ram,\nlogging_first_step=True,\nlogging_nan_inf_filter=True,\nlogging_steps=1,\nlogging_strategy=steps,\nlr_scheduler_kwargs={},\nlr_scheduler_type=cosine,\nmax_grad_norm=1.0,\nmax_steps=-1,\nmetric_for_best_model=None,\nmp_parameters=,\nneftune_noise_alpha=5,\nno_cuda=False,\nnum_train_epochs=2,\noptim=adamw_bnb_8bit,\noptim_args=None,\noptim_target_modules=None,\noutput_dir=/home/azureuser/cloudfiles/code/blobfuse/raadsinformatie/processed_data/woo_document_classification/finetuning_output/AmsterdamDocClassificationMistral200T,\noverwrite_output_dir=False,\npast_index=-1,\nper_device_eval_batch_size=8,\nper_device_train_batch_size=2,\nprediction_loss_only=False,\npush_to_hub=True,\npush_to_hub_model_id=None,\npush_to_hub_organization=None,\npush_to_hub_token=<PUSH_TO_HUB_TOKEN>,\nray_scope=last,\nremove_unused_columns=True,\nreport_to=['tensorboard', 'wandb'],\nresume_from_checkpoint=None,\nrun_name=/home/azureuser/cloudfiles/code/blobfuse/raadsinformatie/processed_data/woo_document_classification/finetuning_output/AmsterdamDocClassificationMistral200T,\nsave_on_each_node=False,\nsave_only_model=False,\nsave_safetensors=True,\nsave_steps=500,\nsave_strategy=epoch,\nsave_total_limit=None,\nseed=42,\nskip_memory_metrics=True,\nsplit_batches=None,\ntf32=None,\ntorch_compile=False,\ntorch_compile_backend=None,\ntorch_compile_mode=None,\ntorchdynamo=None,\ntpu_metrics_debug=False,\ntpu_num_cores=None,\nuse_cpu=False,\nuse_ipex=False,\nuse_legacy_prediction_loop=False,\nuse_mps_device=False,\nwarmup_ratio=0.1,\nwarmup_steps=0,\nweight_decay=0.0,\n)",True,2024-05-29 19:21:12.049071+02:00,370.747607,False,16,True,/home/azureuser/cloudfiles/code/blobfuse/raadsinformatie/processed_data/woo_document_classification/finetuning_output/AmsterdamDocClassificationMistral200T
0,FemkeBakker/AmsterdamDocClassificationGEITje200T,Rijgersberg/GEITje-7B-chat-v2,FemkeBakker/AmsterdamBalancedFirst200Tokens,train,val,"TrainingArguments(\n_n_gpu=1,\naccelerator_config={'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'gradient_accumulation_kwargs': None},\nadafactor=False,\nadam_beta1=0.9,\nadam_beta2=0.999,\nadam_epsilon=1e-08,\nauto_find_batch_size=False,\nbf16=False,\nbf16_full_eval=False,\ndata_seed=None,\ndataloader_drop_last=False,\ndataloader_num_workers=0,\ndataloader_persistent_workers=False,\ndataloader_pin_memory=True,\ndataloader_prefetch_factor=None,\nddp_backend=None,\nddp_broadcast_buffers=None,\nddp_bucket_cap_mb=None,\nddp_find_unused_parameters=None,\nddp_timeout=1800,\ndebug=[],\ndeepspeed=None,\ndisable_tqdm=False,\ndispatch_batches=None,\ndo_eval=True,\ndo_predict=False,\ndo_train=False,\neval_accumulation_steps=None,\neval_delay=0,\neval_do_concat_batches=True,\neval_steps=123,\nevaluation_strategy=steps,\nfp16=False,\nfp16_backend=auto,\nfp16_full_eval=False,\nfp16_opt_level=O1,\nfsdp=[],\nfsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False},\nfsdp_min_num_params=0,\nfsdp_transformer_layer_cls_to_wrap=None,\nfull_determinism=False,\ngradient_accumulation_steps=8,\ngradient_checkpointing=True,\ngradient_checkpointing_kwargs=None,\ngreater_is_better=None,\ngroup_by_length=False,\nhalf_precision_backend=auto,\nhub_always_push=False,\nhub_model_id=FemkeBakker/AmsterdamDocClassificationGEITje200T,\nhub_private_repo=True,\nhub_strategy=all_checkpoints,\nhub_token=<HUB_TOKEN>,\nignore_data_skip=False,\ninclude_inputs_for_metrics=False,\ninclude_num_input_tokens_seen=False,\ninclude_tokens_per_second=False,\njit_mode_eval=False,\nlabel_names=None,\nlabel_smoothing_factor=0.0,\nlearning_rate=1e-05,\nlength_column_name=length,\nload_best_model_at_end=False,\nlocal_rank=0,\nlog_level=passive,\nlog_level_replica=warning,\nlog_on_each_node=True,\nlogging_dir=/home/azureuser/cloudfiles/code/blobfuse/raadsinformatie/processed_data/woo_document_classification/finetuning_output/AmsterdamDocClassificationGEITje200T/runs/May29_17-39-27_femke-gpu-24cores-220ram,\nlogging_first_step=True,\nlogging_nan_inf_filter=True,\nlogging_steps=1,\nlogging_strategy=steps,\nlr_scheduler_kwargs={},\nlr_scheduler_type=cosine,\nmax_grad_norm=1.0,\nmax_steps=-1,\nmetric_for_best_model=None,\nmp_parameters=,\nneftune_noise_alpha=5,\nno_cuda=False,\nnum_train_epochs=2,\noptim=adamw_bnb_8bit,\noptim_args=None,\noptim_target_modules=None,\noutput_dir=/home/azureuser/cloudfiles/code/blobfuse/raadsinformatie/processed_data/woo_document_classification/finetuning_output/AmsterdamDocClassificationGEITje200T,\noverwrite_output_dir=False,\npast_index=-1,\nper_device_eval_batch_size=8,\nper_device_train_batch_size=2,\nprediction_loss_only=False,\npush_to_hub=True,\npush_to_hub_model_id=None,\npush_to_hub_organization=None,\npush_to_hub_token=<PUSH_TO_HUB_TOKEN>,\nray_scope=last,\nremove_unused_columns=True,\nreport_to=['tensorboard', 'wandb'],\nresume_from_checkpoint=None,\nrun_name=/home/azureuser/cloudfiles/code/blobfuse/raadsinformatie/processed_data/woo_document_classification/finetuning_output/AmsterdamDocClassificationGEITje200T,\nsave_on_each_node=False,\nsave_only_model=False,\nsave_safetensors=True,\nsave_steps=500,\nsave_strategy=epoch,\nsave_total_limit=None,\nseed=42,\nskip_memory_metrics=True,\nsplit_batches=None,\ntf32=None,\ntorch_compile=False,\ntorch_compile_backend=None,\ntorch_compile_mode=None,\ntorchdynamo=None,\ntpu_metrics_debug=False,\ntpu_num_cores=None,\nuse_cpu=False,\nuse_ipex=False,\nuse_legacy_prediction_loop=False,\nuse_mps_device=False,\nwarmup_ratio=0.1,\nwarmup_steps=0,\nweight_decay=0.0,\n)",True,2024-05-29 19:39:27.535325+02:00,2821.834527,False,15,True,/home/azureuser/cloudfiles/code/blobfuse/raadsinformatie/processed_data/woo_document_classification/finetuning_output/AmsterdamDocClassificationGEITje200T
0,FemkeBakker/AmsterdamDocClassificationGEITje200T2Epochs,Rijgersberg/GEITje-7B-chat-v2,FemkeBakker/AmsterdamBalancedFirst200Tokens,train,val,"TrainingArguments(\n_n_gpu=1,\naccelerator_config={'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'gradient_accumulation_kwargs': None},\nadafactor=False,\nadam_beta1=0.9,\nadam_beta2=0.999,\nadam_epsilon=1e-08,\nauto_find_batch_size=False,\nbf16=False,\nbf16_full_eval=False,\ndata_seed=None,\ndataloader_drop_last=False,\ndataloader_num_workers=0,\ndataloader_persistent_workers=False,\ndataloader_pin_memory=True,\ndataloader_prefetch_factor=None,\nddp_backend=None,\nddp_broadcast_buffers=None,\nddp_bucket_cap_mb=None,\nddp_find_unused_parameters=None,\nddp_timeout=1800,\ndebug=[],\ndeepspeed=None,\ndisable_tqdm=False,\ndispatch_batches=None,\ndo_eval=True,\ndo_predict=False,\ndo_train=False,\neval_accumulation_steps=None,\neval_delay=0,\neval_do_concat_batches=True,\neval_steps=123,\nevaluation_strategy=steps,\nfp16=False,\nfp16_backend=auto,\nfp16_full_eval=False,\nfp16_opt_level=O1,\nfsdp=[],\nfsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False},\nfsdp_min_num_params=0,\nfsdp_transformer_layer_cls_to_wrap=None,\nfull_determinism=False,\ngradient_accumulation_steps=8,\ngradient_checkpointing=True,\ngradient_checkpointing_kwargs=None,\ngreater_is_better=None,\ngroup_by_length=False,\nhalf_precision_backend=auto,\nhub_always_push=False,\nhub_model_id=FemkeBakker/AmsterdamDocClassificationGEITje200T2Epochs,\nhub_private_repo=True,\nhub_strategy=all_checkpoints,\nhub_token=<HUB_TOKEN>,\nignore_data_skip=False,\ninclude_inputs_for_metrics=False,\ninclude_num_input_tokens_seen=False,\ninclude_tokens_per_second=False,\njit_mode_eval=False,\nlabel_names=None,\nlabel_smoothing_factor=0.0,\nlearning_rate=1e-05,\nlength_column_name=length,\nload_best_model_at_end=False,\nlocal_rank=0,\nlog_level=passive,\nlog_level_replica=warning,\nlog_on_each_node=True,\nlogging_dir=/home/azureuser/cloudfiles/code/blobfuse/raadsinformatie/processed_data/woo_document_classification/finetuning_output/AmsterdamDocClassificationGEITje200T/runs/Jun03_06-33-43_femke-gpu-24cores-220ram,\nlogging_first_step=True,\nlogging_nan_inf_filter=True,\nlogging_steps=1,\nlogging_strategy=steps,\nlr_scheduler_kwargs={},\nlr_scheduler_type=cosine,\nmax_grad_norm=1.0,\nmax_steps=-1,\nmetric_for_best_model=None,\nmp_parameters=,\nneftune_noise_alpha=5,\nno_cuda=False,\nnum_train_epochs=2,\noptim=adamw_bnb_8bit,\noptim_args=None,\noptim_target_modules=None,\noutput_dir=/home/azureuser/cloudfiles/code/blobfuse/raadsinformatie/processed_data/woo_document_classification/finetuning_output/AmsterdamDocClassificationGEITje200T,\noverwrite_output_dir=False,\npast_index=-1,\nper_device_eval_batch_size=8,\nper_device_train_batch_size=2,\nprediction_loss_only=False,\npush_to_hub=True,\npush_to_hub_model_id=None,\npush_to_hub_organization=None,\npush_to_hub_token=<PUSH_TO_HUB_TOKEN>,\nray_scope=last,\nremove_unused_columns=True,\nreport_to=['tensorboard', 'wandb'],\nresume_from_checkpoint=None,\nrun_name=/home/azureuser/cloudfiles/code/blobfuse/raadsinformatie/processed_data/woo_document_classification/finetuning_output/AmsterdamDocClassificationGEITje200T,\nsave_on_each_node=False,\nsave_only_model=False,\nsave_safetensors=True,\nsave_steps=500,\nsave_strategy=epoch,\nsave_total_limit=None,\nseed=42,\nskip_memory_metrics=True,\nsplit_batches=None,\ntf32=None,\ntorch_compile=False,\ntorch_compile_backend=None,\ntorch_compile_mode=None,\ntorchdynamo=None,\ntpu_metrics_debug=False,\ntpu_num_cores=None,\nuse_cpu=False,\nuse_ipex=False,\nuse_legacy_prediction_loop=False,\nuse_mps_device=False,\nwarmup_ratio=0.1,\nwarmup_steps=0,\nweight_decay=0.0,\n)",True,2024-06-03 08:33:45.251941+02:00,419.937012,False,18,True,/home/azureuser/cloudfiles/code/blobfuse/raadsinformatie/processed_data/woo_document_classification/finetuning_output/AmsterdamDocClassificationGEITje200T
0,FemkeBakker/AmsterdamDocClassificationLlama200T2Epochs,meta-llama/Llama-2-7b-chat-hf,FemkeBakker/AmsterdamBalancedFirst200Tokens,train,val,"TrainingArguments(\n_n_gpu=1,\naccelerator_config={'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'gradient_accumulation_kwargs': None},\nadafactor=False,\nadam_beta1=0.9,\nadam_beta2=0.999,\nadam_epsilon=1e-08,\nauto_find_batch_size=False,\nbf16=False,\nbf16_full_eval=False,\ndata_seed=None,\ndataloader_drop_last=False,\ndataloader_num_workers=0,\ndataloader_persistent_workers=False,\ndataloader_pin_memory=True,\ndataloader_prefetch_factor=None,\nddp_backend=None,\nddp_broadcast_buffers=None,\nddp_bucket_cap_mb=None,\nddp_find_unused_parameters=None,\nddp_timeout=1800,\ndebug=[],\ndeepspeed=None,\ndisable_tqdm=False,\ndispatch_batches=None,\ndo_eval=True,\ndo_predict=False,\ndo_train=False,\neval_accumulation_steps=None,\neval_delay=0,\neval_do_concat_batches=True,\neval_steps=123,\nevaluation_strategy=steps,\nfp16=False,\nfp16_backend=auto,\nfp16_full_eval=False,\nfp16_opt_level=O1,\nfsdp=[],\nfsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False},\nfsdp_min_num_params=0,\nfsdp_transformer_layer_cls_to_wrap=None,\nfull_determinism=False,\ngradient_accumulation_steps=8,\ngradient_checkpointing=True,\ngradient_checkpointing_kwargs=None,\ngreater_is_better=None,\ngroup_by_length=False,\nhalf_precision_backend=auto,\nhub_always_push=False,\nhub_model_id=FemkeBakker/AmsterdamDocClassificationLlama200T2Epochs,\nhub_private_repo=True,\nhub_strategy=all_checkpoints,\nhub_token=<HUB_TOKEN>,\nignore_data_skip=False,\ninclude_inputs_for_metrics=False,\ninclude_num_input_tokens_seen=False,\ninclude_tokens_per_second=False,\njit_mode_eval=False,\nlabel_names=None,\nlabel_smoothing_factor=0.0,\nlearning_rate=1e-05,\nlength_column_name=length,\nload_best_model_at_end=False,\nlocal_rank=0,\nlog_level=passive,\nlog_level_replica=warning,\nlog_on_each_node=True,\nlogging_dir=/home/azureuser/cloudfiles/code/blobfuse/raadsinformatie/processed_data/woo_document_classification/finetuning_output/AmsterdamDocClassificationLlama200T/runs/Jun03_06-52-31_femke-gpu-24cores-220ram,\nlogging_first_step=True,\nlogging_nan_inf_filter=True,\nlogging_steps=1,\nlogging_strategy=steps,\nlr_scheduler_kwargs={},\nlr_scheduler_type=cosine,\nmax_grad_norm=1.0,\nmax_steps=-1,\nmetric_for_best_model=None,\nmp_parameters=,\nneftune_noise_alpha=5,\nno_cuda=False,\nnum_train_epochs=2,\noptim=adamw_bnb_8bit,\noptim_args=None,\noptim_target_modules=None,\noutput_dir=/home/azureuser/cloudfiles/code/blobfuse/raadsinformatie/processed_data/woo_document_classification/finetuning_output/AmsterdamDocClassificationLlama200T,\noverwrite_output_dir=False,\npast_index=-1,\nper_device_eval_batch_size=8,\nper_device_train_batch_size=2,\nprediction_loss_only=False,\npush_to_hub=True,\npush_to_hub_model_id=None,\npush_to_hub_organization=None,\npush_to_hub_token=<PUSH_TO_HUB_TOKEN>,\nray_scope=last,\nremove_unused_columns=True,\nreport_to=['tensorboard', 'wandb'],\nresume_from_checkpoint=None,\nrun_name=/home/azureuser/cloudfiles/code/blobfuse/raadsinformatie/processed_data/woo_document_classification/finetuning_output/AmsterdamDocClassificationLlama200T,\nsave_on_each_node=False,\nsave_only_model=False,\nsave_safetensors=True,\nsave_steps=500,\nsave_strategy=epoch,\nsave_total_limit=None,\nseed=42,\nskip_memory_metrics=True,\nsplit_batches=None,\ntf32=None,\ntorch_compile=False,\ntorch_compile_backend=None,\ntorch_compile_mode=None,\ntorchdynamo=None,\ntpu_metrics_debug=False,\ntpu_num_cores=None,\nuse_cpu=False,\nuse_ipex=False,\nuse_legacy_prediction_loop=False,\nuse_mps_device=False,\nwarmup_ratio=0.1,\nwarmup_steps=0,\nweight_decay=0.0,\n)",True,2024-06-03 08:52:32.234614+02:00,378.556517,False,19,True,/home/azureuser/cloudfiles/code/blobfuse/raadsinformatie/processed_data/woo_document_classification/finetuning_output/AmsterdamDocClassificationLlama200T
