## Install and Import Modules

In [1]:
# Clone BAdam Repository
!git clone https://github.com/Ledzy/BAdam.git
!cp -R ./BAdam/*.py ./
!ls

Cloning into 'BAdam'...
remote: Enumerating objects: 8, done.[K
remote: Counting objects: 100% (8/8), done.[K
remote: Compressing objects: 100% (6/6), done.[K
remote: Total 8 (delta 2), reused 8 (delta 2), pack-reused 0[K
Receiving objects: 100% (8/8), 8.71 KiB | 8.71 MiB/s, done.
Resolving deltas: 100% (2/2), done.
BAdam					 block_optim.py  README.md
BAdam_Qwen1_5_1.8B_Dutch_Chat_SFT.ipynb  LICENSE


In [2]:
# Install Modules
#!pip install -q accelerate==0.27.2
#!pip install -q bitsandbytes==0.43.0
#!pip install -q datasets==2.17.1
#!pip install -q peft==0.9.0
#!pip install -q transformers==4.38.2
#!pip install -q trl==0.8.1

In [3]:
# Import Modules
from datasets import load_dataset
from huggingface_hub import notebook_login
from transformers import (AutoTokenizer,
                          AutoModelForCausalLM,
                          DataCollatorForLanguageModeling,
                          TrainingArguments)
import torch
from trl import SFTTrainer

# Import BAdam
from block_optim import BlockOptimizer

## Constants

In [4]:
# Set Name Constants
model_name = 'Qwen/Qwen1.5-1.8B-Chat'
hf_model_name = 'BAdam-Qwen1.5-1.8B-Dutch-Chat-Sft'

## HuggingFace Login

In [5]:
# HuggingFace Hub Login
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

## Tokenizer

In [6]:
# Create Tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Max Length
MAX_LEN = 1024

# Tokenizer Summary
print(tokenizer)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Qwen2TokenizerFast(name_or_path='Qwen/Qwen1.5-1.8B-Chat', vocab_size=151643, model_max_length=32768, is_fast=True, padding_side='right', truncation_side='right', special_tokens={'eos_token': '<|im_end|>', 'pad_token': '<|endoftext|>', 'additional_special_tokens': ['<|im_start|>', '<|im_end|>']}, clean_up_tokenization_spaces=False),  added_tokens_decoder={
	151643: AddedToken("<|endoftext|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	151644: AddedToken("<|im_start|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	151645: AddedToken("<|im_end|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
}


## Create Model based on Qwen1.5_1.8B_Chat Model

In [7]:
# Create Model
model = AutoModelForCausalLM.from_pretrained(model_name,
                                             device_map = "auto",
                                             torch_dtype = torch.bfloat16)

# Set cache to False
model.config.use_cache = False

# Show Model Summary
print(model)

Qwen2ForCausalLM(
  (model): Qwen2Model(
    (embed_tokens): Embedding(151936, 2048)
    (layers): ModuleList(
      (0-23): 24 x Qwen2DecoderLayer(
        (self_attn): Qwen2SdpaAttention(
          (q_proj): Linear(in_features=2048, out_features=2048, bias=True)
          (k_proj): Linear(in_features=2048, out_features=2048, bias=True)
          (v_proj): Linear(in_features=2048, out_features=2048, bias=True)
          (o_proj): Linear(in_features=2048, out_features=2048, bias=False)
          (rotary_emb): Qwen2RotaryEmbedding()
        )
        (mlp): Qwen2MLP(
          (gate_proj): Linear(in_features=2048, out_features=5504, bias=False)
          (up_proj): Linear(in_features=2048, out_features=5504, bias=False)
          (down_proj): Linear(in_features=5504, out_features=2048, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): Qwen2RMSNorm()
        (post_attention_layernorm): Qwen2RMSNorm()
      )
    )
    (norm): Qwen2RMSNorm()
  )
  (lm_head): Line

## Modify Default Chat template to Dutch

In [8]:
# Modify Default Chattemplate with Dutch System Message
tokenizer.chat_template = "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nJe bent een behulpzame AI assistent<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content']}}{% if (loop.last and add_generation_prompt) or not loop.last %}{{ '<|im_end|>' + '\n'}}{% endif %}{% endfor %}{% if add_generation_prompt and messages[-1]['role'] != 'assistant' %}{{ '<|im_start|>assistant\n' }}{% endif %}"

# Summary Chat Template
tokenizer.chat_template

"{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nJe bent een behulpzame AI assistent<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content']}}{% if (loop.last and add_generation_prompt) or not loop.last %}{{ '<|im_end|>' + '\n'}}{% endif %}{% endfor %}{% if add_generation_prompt and messages[-1]['role'] != 'assistant' %}{{ '<|im_start|>assistant\n' }}{% endif %}"

## Test Dataset

In [9]:
# Load Test Dataset
test_data = load_dataset('BramVanroy/ultrachat_200k_dutch', split = 'test_sft')
test_data = test_data.select(range(3072)).remove_columns(["prompt", "prompt_id"]) # Only part of the test data to limit the time spent on validation.

# Summary
print(test_data)

Dataset({
    features: ['messages'],
    num_rows: 3072
})


## Training Dataset

In [10]:
# Load Train Dataset
train_data = load_dataset('BramVanroy/ultrachat_200k_dutch', split = 'train_sft').remove_columns(["prompt", "prompt_id"])

# Summary
print(train_data)

Dataset({
    features: ['messages'],
    num_rows: 192598
})


## Chat Template Example

In [11]:
# Show sample
item_data = train_data[0]
chat_template_string = tokenizer.apply_chat_template(item_data["messages"], tokenize = False)
print(chat_template_string)

<|im_start|>system
Je bent een behulpzame AI assistent<|im_end|>
<|im_start|>user
Kan je mij vertellen welke versie van mijn website thema ik gebruik? Er staat iets over sectie-gebaseerde thema's zoals Responsive 6.0+, Retina 4.0+, Parallax 3.0+ Turbo 2.0+, Mobilia 5.0+. En ook, hoe zit het met die functie die toelaat het tweede productafbeelding te tonen wanneer ik erover hover? Is dat voor alle secties of enkel de genoemden?<|im_end|>
<|im_start|>assistant
Om te bepalen welke themaversie u gebruikt, moet u meestal in de instellingen van uw websiteachtergrond kijken. Deze kunnen vaak gevonden worden in de thema-editor of het dashboard waar informatie staat over het huidige thema. Wat betreft de functie om de secundaire afbeelding van een product te tonen bij het erover hoveren; dit is een instellingsmogelijkheid die vaak in de secties 'Collecties pagina's' en 'Uitgelichte Collecties' zich bevindt. Deze functie is doorgaans beperkt tot de secties die in het materiaal genoemd worden, te

## Create BAdam Block Optimzer

In [12]:
# Create BAdam Optimizer
optimizer = BlockOptimizer(base_optimizer = torch.optim.AdamW(model.parameters(), lr = 5.0e-5, weight_decay = 0.001),
                           named_parameters_list = list(model.named_parameters()), 
                           block_prefix_list = None, 
                           switch_block_every = 16,
                           switch_mode = "descending",
                           verbose = 0)

## Train Model

In [13]:
# Set Steps
eval_steps = 256
save_steps = 1024
logging_steps = 128

# Set TrainingArguments
training_args = TrainingArguments(num_train_epochs = 1,
                                  max_steps = 8192,
                                  evaluation_strategy = "steps",
                                  logging_steps = logging_steps,
                                  save_strategy = 'steps',
                                  eval_steps = eval_steps,
                                  save_steps = save_steps,
                                  save_total_limit = 1,
                                  per_device_train_batch_size = 2,
                                  per_device_eval_batch_size = 2,
                                  gradient_accumulation_steps = 8,
                                  gradient_checkpointing = True,
                                  gradient_checkpointing_kwargs = {'use_reentrant': False},
                                  bf16 = True,
                                  output_dir = hf_model_name,
                                  hub_model_id = hf_model_name,
                                  push_to_hub = True,
                                  hub_private_repo = True,
                                  report_to = 'tensorboard')

# Config SFTTrainer
trainer = SFTTrainer(model,
                     train_dataset = train_data,
                     eval_dataset = test_data,
                     tokenizer = tokenizer,
                     packing = True,
                     eval_packing = False,
                     max_seq_length = MAX_LEN,
                     optimizers = (optimizer, None),
                     data_collator = DataCollatorForLanguageModeling(tokenizer, mlm = False),
                     args = training_args)

# Perform Training
trainer.train()

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


  0%|          | 0/8192 [00:00<?, ?it/s]

{'loss': 2.4304, 'grad_norm': 0.400390625, 'learning_rate': 4.921875e-05, 'epoch': 0.01}
{'loss': 2.198, 'grad_norm': 0.43359375, 'learning_rate': 4.8437500000000005e-05, 'epoch': 0.02}


  0%|          | 0/1536 [00:00<?, ?it/s]

{'eval_loss': 2.0903122425079346, 'eval_runtime': 339.4339, 'eval_samples_per_second': 9.05, 'eval_steps_per_second': 4.525, 'epoch': 0.02}
{'loss': 2.1101, 'grad_norm': 0.6015625, 'learning_rate': 4.765625e-05, 'epoch': 0.03}
{'loss': 2.0581, 'grad_norm': 0.388671875, 'learning_rate': 4.6875e-05, 'epoch': 0.04}


  0%|          | 0/1536 [00:00<?, ?it/s]

{'eval_loss': 1.9722604751586914, 'eval_runtime': 339.6458, 'eval_samples_per_second': 9.045, 'eval_steps_per_second': 4.522, 'epoch': 0.04}
{'loss': 2.0034, 'grad_norm': 0.419921875, 'learning_rate': 4.609375e-05, 'epoch': 0.05}
{'loss': 1.9868, 'grad_norm': 0.453125, 'learning_rate': 4.5312500000000004e-05, 'epoch': 0.07}


  0%|          | 0/1536 [00:00<?, ?it/s]

{'eval_loss': 1.9184621572494507, 'eval_runtime': 339.453, 'eval_samples_per_second': 9.05, 'eval_steps_per_second': 4.525, 'epoch': 0.07}
{'loss': 1.9784, 'grad_norm': 0.408203125, 'learning_rate': 4.453125e-05, 'epoch': 0.08}
{'loss': 1.9414, 'grad_norm': 0.4296875, 'learning_rate': 4.375e-05, 'epoch': 0.09}


  0%|          | 0/1536 [00:00<?, ?it/s]

{'eval_loss': 1.8734697103500366, 'eval_runtime': 340.2689, 'eval_samples_per_second': 9.028, 'eval_steps_per_second': 4.514, 'epoch': 0.09}
{'loss': 1.9216, 'grad_norm': 0.51953125, 'learning_rate': 4.2968750000000004e-05, 'epoch': 0.1}
{'loss': 1.9182, 'grad_norm': 0.5234375, 'learning_rate': 4.21875e-05, 'epoch': 0.11}


  0%|          | 0/1536 [00:00<?, ?it/s]

{'eval_loss': 1.8662651777267456, 'eval_runtime': 340.5849, 'eval_samples_per_second': 9.02, 'eval_steps_per_second': 4.51, 'epoch': 0.11}
{'loss': 1.9047, 'grad_norm': 0.3984375, 'learning_rate': 4.140625e-05, 'epoch': 0.12}
{'loss': 1.8836, 'grad_norm': 1.3359375, 'learning_rate': 4.0625000000000005e-05, 'epoch': 0.13}


  0%|          | 0/1536 [00:00<?, ?it/s]

{'eval_loss': 1.8209768533706665, 'eval_runtime': 340.3774, 'eval_samples_per_second': 9.025, 'eval_steps_per_second': 4.513, 'epoch': 0.13}
{'loss': 1.8764, 'grad_norm': 1.3515625, 'learning_rate': 3.984375e-05, 'epoch': 0.14}
{'loss': 1.8817, 'grad_norm': 0.376953125, 'learning_rate': 3.90625e-05, 'epoch': 0.15}


  0%|          | 0/1536 [00:00<?, ?it/s]

{'eval_loss': 1.8031729459762573, 'eval_runtime': 340.614, 'eval_samples_per_second': 9.019, 'eval_steps_per_second': 4.51, 'epoch': 0.15}
{'loss': 1.8535, 'grad_norm': 0.416015625, 'learning_rate': 3.828125e-05, 'epoch': 0.16}
{'loss': 1.8516, 'grad_norm': 0.50390625, 'learning_rate': 3.7500000000000003e-05, 'epoch': 0.17}


  0%|          | 0/1536 [00:00<?, ?it/s]

{'eval_loss': 1.786013126373291, 'eval_runtime': 340.745, 'eval_samples_per_second': 9.016, 'eval_steps_per_second': 4.508, 'epoch': 0.17}


'(MaxRetryError("HTTPSConnectionPool(host='hf-hub-lfs-us-east-1.s3-accelerate.amazonaws.com', port=443): Max retries exceeded with url: /repos/0f/7c/0f7cac763a264ce2ee9a8886fd2d1cbbfbc07c6af2c73d70617957f4c22db9a0/607b169e49b1863c99f025e245e50055e7307ef87e948c132c9a59c7a9cc3d63?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Content-Sha256=UNSIGNED-PAYLOAD&X-Amz-Credential=AKIA2JU7TKAQFN2FTF47%2F20240405%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20240405T231312Z&X-Amz-Expires=86400&X-Amz-Signature=ea0dffb8c1643ec889cc844c32cf00382a45bb5a551ecaae8adb5252d3329d1f&X-Amz-SignedHeaders=host&partNumber=1&uploadId=.AFWtvOUCmi1L1_fciOM6avXnrCA3gtytHj4Q4EK84VNyrzLlTutAj76PxjdholLGXKfawsAE60LYhDimgofVRESiPZnx.7mMjKp3ssdFHT3VxbsPfP8v3MloqwHHHy_&x-id=UploadPart (Caused by SSLError(SSLEOFError(8, 'EOF occurred in violation of protocol (_ssl.c:2426)')))"), '(Request ID: 8e7d52c5-be47-443f-9025-07b021b0719d)')' thrown while requesting PUT https://hf-hub-lfs-us-east-1.s3-accelerate.amazonaws.com/repos/0f/7c/0

{'loss': 1.8457, 'grad_norm': 0.3828125, 'learning_rate': 3.671875e-05, 'epoch': 0.19}
{'loss': 1.8233, 'grad_norm': 0.396484375, 'learning_rate': 3.59375e-05, 'epoch': 0.2}


  0%|          | 0/1536 [00:00<?, ?it/s]

{'eval_loss': 1.770159125328064, 'eval_runtime': 340.5447, 'eval_samples_per_second': 9.021, 'eval_steps_per_second': 4.51, 'epoch': 0.2}
{'loss': 1.8291, 'grad_norm': 0.392578125, 'learning_rate': 3.5156250000000004e-05, 'epoch': 0.21}
{'loss': 1.8181, 'grad_norm': 0.390625, 'learning_rate': 3.4375e-05, 'epoch': 0.22}


  0%|          | 0/1536 [00:00<?, ?it/s]

{'eval_loss': 1.768544316291809, 'eval_runtime': 339.7486, 'eval_samples_per_second': 9.042, 'eval_steps_per_second': 4.521, 'epoch': 0.22}
{'loss': 1.8213, 'grad_norm': 0.43359375, 'learning_rate': 3.359375e-05, 'epoch': 0.23}
{'loss': 1.7951, 'grad_norm': 0.5078125, 'learning_rate': 3.2812500000000005e-05, 'epoch': 0.24}


  0%|          | 0/1536 [00:00<?, ?it/s]

{'eval_loss': 1.746395468711853, 'eval_runtime': 340.5577, 'eval_samples_per_second': 9.02, 'eval_steps_per_second': 4.51, 'epoch': 0.24}
{'loss': 1.8114, 'grad_norm': 0.48046875, 'learning_rate': 3.203125e-05, 'epoch': 0.25}
{'loss': 1.8068, 'grad_norm': 0.3984375, 'learning_rate': 3.125e-05, 'epoch': 0.26}


  0%|          | 0/1536 [00:00<?, ?it/s]

{'eval_loss': 1.7382639646530151, 'eval_runtime': 338.9646, 'eval_samples_per_second': 9.063, 'eval_steps_per_second': 4.531, 'epoch': 0.26}


'(MaxRetryError("HTTPSConnectionPool(host='hf-hub-lfs-us-east-1.s3-accelerate.amazonaws.com', port=443): Max retries exceeded with url: /repos/0f/7c/0f7cac763a264ce2ee9a8886fd2d1cbbfbc07c6af2c73d70617957f4c22db9a0/2039e48b64a7c2bcac935ea77e16751213d75518a6bc3fac0594185967feb2e0?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Content-Sha256=UNSIGNED-PAYLOAD&X-Amz-Credential=AKIA2JU7TKAQFN2FTF47%2F20240406%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20240406T004548Z&X-Amz-Expires=86400&X-Amz-Signature=ef599a4f992ebe78f2b8e8ed55161dc5a38c4d9cbcc54d1037f3931fc290b535&X-Amz-SignedHeaders=host&partNumber=1&uploadId=QklIYha2NWn6PYfMH.ptcNT41CDJFQbvLbBEoKxEKnAKxsrKSAylCHa_NVSTqzTSyB_NC_NXSe3h6nWYNoKQZxEM03I75b40RnDaV2IqTiLePlMiETwlSNYQdOum3nc7&x-id=UploadPart (Caused by SSLError(SSLEOFError(8, 'EOF occurred in violation of protocol (_ssl.c:2426)')))"), '(Request ID: ba390cf5-65b9-41a3-9114-8d478517b0d4)')' thrown while requesting PUT https://hf-hub-lfs-us-east-1.s3-accelerate.amazonaws.com/repos/0f/7c/0

{'loss': 1.7897, 'grad_norm': 1.34375, 'learning_rate': 3.0468750000000002e-05, 'epoch': 0.27}
{'loss': 1.7925, 'grad_norm': 1.28125, 'learning_rate': 2.96875e-05, 'epoch': 0.28}


  0%|          | 0/1536 [00:00<?, ?it/s]

{'eval_loss': 1.7308464050292969, 'eval_runtime': 340.0049, 'eval_samples_per_second': 9.035, 'eval_steps_per_second': 4.518, 'epoch': 0.28}
{'loss': 1.7999, 'grad_norm': 0.375, 'learning_rate': 2.890625e-05, 'epoch': 0.29}
{'loss': 1.769, 'grad_norm': 0.451171875, 'learning_rate': 2.8125000000000003e-05, 'epoch': 0.31}


  0%|          | 0/1536 [00:00<?, ?it/s]

{'eval_loss': 1.7187190055847168, 'eval_runtime': 339.474, 'eval_samples_per_second': 9.049, 'eval_steps_per_second': 4.525, 'epoch': 0.31}
{'loss': 1.7755, 'grad_norm': 0.515625, 'learning_rate': 2.734375e-05, 'epoch': 0.32}
{'loss': 1.7799, 'grad_norm': 0.380859375, 'learning_rate': 2.6562500000000002e-05, 'epoch': 0.33}


  0%|          | 0/1536 [00:00<?, ?it/s]

{'eval_loss': 1.7177022695541382, 'eval_runtime': 340.224, 'eval_samples_per_second': 9.029, 'eval_steps_per_second': 4.515, 'epoch': 0.33}
{'loss': 1.766, 'grad_norm': 0.412109375, 'learning_rate': 2.578125e-05, 'epoch': 0.34}
{'loss': 1.7591, 'grad_norm': 0.37109375, 'learning_rate': 2.5e-05, 'epoch': 0.35}


  0%|          | 0/1536 [00:00<?, ?it/s]

{'eval_loss': 1.704742431640625, 'eval_runtime': 340.167, 'eval_samples_per_second': 9.031, 'eval_steps_per_second': 4.515, 'epoch': 0.35}
{'loss': 1.7581, 'grad_norm': 0.388671875, 'learning_rate': 2.4218750000000003e-05, 'epoch': 0.36}
{'loss': 1.7568, 'grad_norm': 0.412109375, 'learning_rate': 2.34375e-05, 'epoch': 0.37}


  0%|          | 0/1536 [00:00<?, ?it/s]

{'eval_loss': 1.6954189538955688, 'eval_runtime': 341.0297, 'eval_samples_per_second': 9.008, 'eval_steps_per_second': 4.504, 'epoch': 0.37}
{'loss': 1.7418, 'grad_norm': 0.46484375, 'learning_rate': 2.2656250000000002e-05, 'epoch': 0.38}
{'loss': 1.7458, 'grad_norm': 0.4765625, 'learning_rate': 2.1875e-05, 'epoch': 0.39}


  0%|          | 0/1536 [00:00<?, ?it/s]

{'eval_loss': 1.7067569494247437, 'eval_runtime': 340.5158, 'eval_samples_per_second': 9.022, 'eval_steps_per_second': 4.511, 'epoch': 0.39}
{'loss': 1.7599, 'grad_norm': 0.392578125, 'learning_rate': 2.109375e-05, 'epoch': 0.4}
{'loss': 1.748, 'grad_norm': 1.2578125, 'learning_rate': 2.0312500000000002e-05, 'epoch': 0.41}


  0%|          | 0/1536 [00:00<?, ?it/s]

{'eval_loss': 1.6854361295700073, 'eval_runtime': 340.1953, 'eval_samples_per_second': 9.03, 'eval_steps_per_second': 4.515, 'epoch': 0.41}
{'loss': 1.742, 'grad_norm': 1.2109375, 'learning_rate': 1.953125e-05, 'epoch': 0.43}
{'loss': 1.747, 'grad_norm': 0.376953125, 'learning_rate': 1.8750000000000002e-05, 'epoch': 0.44}


  0%|          | 0/1536 [00:00<?, ?it/s]

{'eval_loss': 1.6785920858383179, 'eval_runtime': 340.7219, 'eval_samples_per_second': 9.016, 'eval_steps_per_second': 4.508, 'epoch': 0.44}


'(MaxRetryError("HTTPSConnectionPool(host='hf-hub-lfs-us-east-1.s3-accelerate.amazonaws.com', port=443): Max retries exceeded with url: /repos/0f/7c/0f7cac763a264ce2ee9a8886fd2d1cbbfbc07c6af2c73d70617957f4c22db9a0/fe9f909a9bbc5886b51e9962fff96ec0c7ad2810d60c96b24c494e034d40ade2?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Content-Sha256=UNSIGNED-PAYLOAD&X-Amz-Credential=AKIA2JU7TKAQFN2FTF47%2F20240406%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20240406T035517Z&X-Amz-Expires=86400&X-Amz-Signature=56a881a7de5608d55bc4c642251c7a0000bcda2043635b479f9ce84d8f8a28a0&X-Amz-SignedHeaders=host&partNumber=1&uploadId=5VgJa7wEG.0BrX56.3JAIX9_eSR1grd..L8C49lupyfHGjB.N4cFuphWn2nwWmoGdX5z2Q_HzM3PJUkcRCsirrBBQ40q497P_FNN9e54vijx5TfOObjrDW1TMbHX6Id2&x-id=UploadPart (Caused by SSLError(SSLEOFError(8, 'EOF occurred in violation of protocol (_ssl.c:2426)')))"), '(Request ID: b75ebbb1-f60f-4523-b89d-348d70048eda)')' thrown while requesting PUT https://hf-hub-lfs-us-east-1.s3-accelerate.amazonaws.com/repos/0f/7c/0

{'loss': 1.7438, 'grad_norm': 0.42578125, 'learning_rate': 1.796875e-05, 'epoch': 0.45}
{'loss': 1.7256, 'grad_norm': 0.5234375, 'learning_rate': 1.71875e-05, 'epoch': 0.46}


  0%|          | 0/1536 [00:00<?, ?it/s]

{'eval_loss': 1.6701663732528687, 'eval_runtime': 339.956, 'eval_samples_per_second': 9.036, 'eval_steps_per_second': 4.518, 'epoch': 0.46}
{'loss': 1.7443, 'grad_norm': 0.375, 'learning_rate': 1.6406250000000002e-05, 'epoch': 0.47}
{'loss': 1.7325, 'grad_norm': 0.408203125, 'learning_rate': 1.5625e-05, 'epoch': 0.48}


  0%|          | 0/1536 [00:00<?, ?it/s]

{'eval_loss': 1.664176344871521, 'eval_runtime': 339.8348, 'eval_samples_per_second': 9.04, 'eval_steps_per_second': 4.52, 'epoch': 0.48}
{'loss': 1.724, 'grad_norm': 0.384765625, 'learning_rate': 1.484375e-05, 'epoch': 0.49}
{'loss': 1.7341, 'grad_norm': 0.369140625, 'learning_rate': 1.4062500000000001e-05, 'epoch': 0.5}


  0%|          | 0/1536 [00:00<?, ?it/s]

{'eval_loss': 1.6714180707931519, 'eval_runtime': 340.468, 'eval_samples_per_second': 9.023, 'eval_steps_per_second': 4.511, 'epoch': 0.5}
{'loss': 1.7293, 'grad_norm': 0.400390625, 'learning_rate': 1.3281250000000001e-05, 'epoch': 0.51}
{'loss': 1.7097, 'grad_norm': 0.44140625, 'learning_rate': 1.25e-05, 'epoch': 0.52}


  0%|          | 0/1536 [00:00<?, ?it/s]

{'eval_loss': 1.656798243522644, 'eval_runtime': 339.7976, 'eval_samples_per_second': 9.041, 'eval_steps_per_second': 4.52, 'epoch': 0.52}


'(MaxRetryError("HTTPSConnectionPool(host='hf-hub-lfs-us-east-1.s3-accelerate.amazonaws.com', port=443): Max retries exceeded with url: /repos/0f/7c/0f7cac763a264ce2ee9a8886fd2d1cbbfbc07c6af2c73d70617957f4c22db9a0/74a651129338e7afa2d45196937ad629030a756ba363c977ed536cc0507ebd32?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Content-Sha256=UNSIGNED-PAYLOAD&X-Amz-Credential=AKIA2JU7TKAQFN2FTF47%2F20240406%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20240406T053016Z&X-Amz-Expires=86400&X-Amz-Signature=1d3a5c972cd1ff5a0946e124b965cc87ecc727328adf2caee0e0e9dd0ad725ec&X-Amz-SignedHeaders=host&partNumber=1&uploadId=DSX0eo2jM5L_hnxg1iWEhFRqOchBsshvReMtpA1VmQrtggShLTDPAzM1x3o9PLyS_cpK6y.Ew6oonPw_IjRVbZ5ic.kWPGhKkGvyZa.CWMbN2kXVctkapMfPt5DeCW_Y&x-id=UploadPart (Caused by SSLError(SSLEOFError(8, 'EOF occurred in violation of protocol (_ssl.c:2426)')))"), '(Request ID: 973ddb1d-5bff-4b9f-87a8-59fb8ccef4a3)')' thrown while requesting PUT https://hf-hub-lfs-us-east-1.s3-accelerate.amazonaws.com/repos/0f/7c/0

{'loss': 1.7209, 'grad_norm': 0.52734375, 'learning_rate': 1.171875e-05, 'epoch': 0.53}
{'loss': 1.7139, 'grad_norm': 0.404296875, 'learning_rate': 1.09375e-05, 'epoch': 0.55}


  0%|          | 0/1536 [00:00<?, ?it/s]

{'eval_loss': 1.6550045013427734, 'eval_runtime': 338.9321, 'eval_samples_per_second': 9.064, 'eval_steps_per_second': 4.532, 'epoch': 0.55}
{'loss': 1.6972, 'grad_norm': 1.2265625, 'learning_rate': 1.0156250000000001e-05, 'epoch': 0.56}
{'loss': 1.715, 'grad_norm': 1.3203125, 'learning_rate': 9.375000000000001e-06, 'epoch': 0.57}


  0%|          | 0/1536 [00:00<?, ?it/s]

{'eval_loss': 1.6512317657470703, 'eval_runtime': 340.1179, 'eval_samples_per_second': 9.032, 'eval_steps_per_second': 4.516, 'epoch': 0.57}
{'loss': 1.7205, 'grad_norm': 0.375, 'learning_rate': 8.59375e-06, 'epoch': 0.58}
{'loss': 1.7056, 'grad_norm': 0.490234375, 'learning_rate': 7.8125e-06, 'epoch': 0.59}


  0%|          | 0/1536 [00:00<?, ?it/s]

{'eval_loss': 1.6477681398391724, 'eval_runtime': 339.9375, 'eval_samples_per_second': 9.037, 'eval_steps_per_second': 4.518, 'epoch': 0.59}
{'loss': 1.6886, 'grad_norm': 0.462890625, 'learning_rate': 7.031250000000001e-06, 'epoch': 0.6}
{'loss': 1.7105, 'grad_norm': 0.37109375, 'learning_rate': 6.25e-06, 'epoch': 0.61}


  0%|          | 0/1536 [00:00<?, ?it/s]

{'eval_loss': 1.6511720418930054, 'eval_runtime': 339.8367, 'eval_samples_per_second': 9.04, 'eval_steps_per_second': 4.52, 'epoch': 0.61}


'(MaxRetryError("HTTPSConnectionPool(host='hf-hub-lfs-us-east-1.s3-accelerate.amazonaws.com', port=443): Max retries exceeded with url: /repos/0f/7c/0f7cac763a264ce2ee9a8886fd2d1cbbfbc07c6af2c73d70617957f4c22db9a0/04399e40af7eace2c2f8f4a7ab0eac1cb74ac7f2321a61eb94482b19e7a956c6?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Content-Sha256=UNSIGNED-PAYLOAD&X-Amz-Credential=AKIA2JU7TKAQFN2FTF47%2F20240406%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20240406T070452Z&X-Amz-Expires=86400&X-Amz-Signature=31896d7acbb3472b21125859edfa48a46ea232d43c1fd0909774173416229a63&X-Amz-SignedHeaders=host&partNumber=1&uploadId=x_MBuq6m1bhP5WmeDUIdEK5z_UN6_nWOTggPpXk7faRl9V2kDtc4TAcaRW4pzkIE9VaQPrt2FIr6uHE0BrJ1V5Pln8WoZbiwfpNLuUIYn6H0u_hKmkbsvBRm7O1HmqQA&x-id=UploadPart (Caused by SSLError(SSLEOFError(8, 'EOF occurred in violation of protocol (_ssl.c:2426)')))"), '(Request ID: 14091714-a246-474d-b959-680e82be18da)')' thrown while requesting PUT https://hf-hub-lfs-us-east-1.s3-accelerate.amazonaws.com/repos/0f/7c/0

{'loss': 1.7072, 'grad_norm': 0.388671875, 'learning_rate': 5.46875e-06, 'epoch': 0.62}
{'loss': 1.712, 'grad_norm': 0.3359375, 'learning_rate': 4.6875000000000004e-06, 'epoch': 0.63}


  0%|          | 0/1536 [00:00<?, ?it/s]

{'eval_loss': 1.64041268825531, 'eval_runtime': 339.643, 'eval_samples_per_second': 9.045, 'eval_steps_per_second': 4.522, 'epoch': 0.63}
{'loss': 1.7011, 'grad_norm': 0.359375, 'learning_rate': 3.90625e-06, 'epoch': 0.64}
{'loss': 1.7068, 'grad_norm': 0.412109375, 'learning_rate': 3.125e-06, 'epoch': 0.65}


  0%|          | 0/1536 [00:00<?, ?it/s]

{'eval_loss': 1.6382999420166016, 'eval_runtime': 340.5821, 'eval_samples_per_second': 9.02, 'eval_steps_per_second': 4.51, 'epoch': 0.65}
{'loss': 1.6878, 'grad_norm': 0.45703125, 'learning_rate': 2.3437500000000002e-06, 'epoch': 0.67}
{'loss': 1.6968, 'grad_norm': 0.484375, 'learning_rate': 1.5625e-06, 'epoch': 0.68}


  0%|          | 0/1536 [00:00<?, ?it/s]

{'eval_loss': 1.651187539100647, 'eval_runtime': 340.9535, 'eval_samples_per_second': 9.01, 'eval_steps_per_second': 4.505, 'epoch': 0.68}
{'loss': 1.7104, 'grad_norm': 0.40234375, 'learning_rate': 7.8125e-07, 'epoch': 0.69}
{'loss': 1.6912, 'grad_norm': 1.1328125, 'learning_rate': 0.0, 'epoch': 0.7}


  0%|          | 0/1536 [00:00<?, ?it/s]

{'eval_loss': 1.632299780845642, 'eval_runtime': 341.1944, 'eval_samples_per_second': 9.004, 'eval_steps_per_second': 4.502, 'epoch': 0.7}
{'train_runtime': 45259.3186, 'train_samples_per_second': 2.896, 'train_steps_per_second': 0.181, 'train_loss': 1.8079456202685833, 'epoch': 0.7}


TrainOutput(global_step=8192, training_loss=1.8079456202685833, metrics={'train_runtime': 45259.3186, 'train_samples_per_second': 2.896, 'train_steps_per_second': 0.181, 'train_loss': 1.8079456202685833, 'epoch': 0.7})

## Push to Hub

In [14]:
# Push tokenizer to hub
tokenizer.push_to_hub(hf_model_name, private = True)

# Push model to hub
trainer.push_to_hub()

README.md:   0%|          | 0.00/2.99k [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/robinsmits/BAdam-Qwen1.5-1.8B-Dutch-Chat-Sft/commit/ac75d3924593a631854344f6e3220485b7908572', commit_message='End of training', commit_description='', oid='ac75d3924593a631854344f6e3220485b7908572', pr_url=None, pr_revision=None, pr_num=None)