<a href="https://www.kaggle.com/code/loubl00m/llama-chatbot?scriptVersionId=194552821" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

# Loading modules

In [1]:
!pip install -U transformers datasets accelerate peft trl bitsandbytes wandb

Collecting transformers
  Downloading transformers-4.44.2-py3-none-any.whl.metadata (43 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.7/43.7 kB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0m
Collecting datasets
  Downloading datasets-2.21.0-py3-none-any.whl.metadata (21 kB)
Collecting accelerate
  Downloading accelerate-0.33.0-py3-none-any.whl.metadata (18 kB)
Collecting peft
  Downloading peft-0.12.0-py3-none-any.whl.metadata (13 kB)
Collecting trl
  Downloading trl-0.9.6-py3-none-any.whl.metadata (12 kB)
Collecting bitsandbytes
  Downloading bitsandbytes-0.43.3-py3-none-manylinux_2_24_x86_64.whl.metadata (3.5 kB)
Collecting wandb
  Downloading wandb-0.17.8-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.6 kB)
Collecting tyro>=0.5.11 (from trl)
  Downloading tyro-0.8.10-py3-none-any.whl.metadata (8.4 kB)
Collecting docstring-parser>=0.16 (from tyro>=0.5.11->trl)
  Downloading docstring_parser-0.16-py3-none-any.whl.metadata (3.

In [2]:
import torch
from datasets import load_dataset
from peft import LoraConfig, PeftModel, prepare_model_for_kbit_training
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
)
import wandb
from huggingface_hub import login, Repository, create_repo
from trl import SFTConfig, SFTTrainer
from kaggle_secrets import UserSecretsClient

2024-08-29 11:52:10.607982: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-08-29 11:52:10.608097: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-08-29 11:52:10.737998: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [3]:
user_secrets = UserSecretsClient()
wb_key = user_secrets.get_secret("wandb_key")
wandb.login(key = wb_key)

[34m[1mwandb[0m: W&B API key is configured. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

In [4]:
hf_token = user_secrets.get_secret("hf_token")
login(token = hf_token, add_to_git_credential=True)

Token is valid (permission: write).
[1m[31mCannot authenticate through git-credential as no helper is defined on your machine.
You might have to re-authenticate when pushing to the Hugging Face Hub.
Run the following command in your terminal in case you want to set the 'store' credential helper as default.

git config --global credential.helper store

Read https://git-scm.com/book/en/v2/Git-Tools-Credential-Storage for more details.[0m
Token has not been saved to git credential helper.
Your token has been saved to /root/.cache/huggingface/token
Login successful


# Loading the model

In [5]:
if torch.cuda.get_device_capability()[0] >= 8:
    !pip install -qqq flash-attn
    attn_implementation = "flash_attention_2"
    torch_dtype = torch.bfloat16
else:
    attn_implementation = "eager"
    torch_dtype = torch.float16
    
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [6]:
# QLoRA config
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch_dtype,
    bnb_4bit_use_double_quant=True,
)

# LoRA config
peft_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=['up_proj', 'down_proj', 'gate_proj', 'k_proj', 'q_proj', 'v_proj', 'o_proj']
)

In [7]:
base_model = "meta-llama/Meta-Llama-3.1-8B-Instruct"

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model)

# Load model
model = AutoModelForCausalLM.from_pretrained(
    base_model,
    quantization_config=bnb_config,
    device_map="auto",
    attn_implementation=attn_implementation
)

tokenizer_config.json:   0%|          | 0.00/55.4k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/296 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/855 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/4 [00:00<?, ?it/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/1.17G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/184 [00:00<?, ?B/s]

In [8]:
model = prepare_model_for_kbit_training(model)

# Preparing the dataset

### The dataset has been shuffled, split and set to the correct format in advance.

In [9]:
train_path = "/kaggle/input/student-assistance/train.jsonl"
val_path = "/kaggle/input/student-assistance/test.jsonl"
train_set = load_dataset("json", data_files = train_path, split="train")
val_set = load_dataset("json", data_files = val_path, split="train")

Generating train split: 0 examples [00:00, ? examples/s]

Generating train split: 0 examples [00:00, ? examples/s]

# Model training

In [10]:
tokenizer.add_special_tokens({'pad_token': tokenizer.eos_token})

0

In [11]:
lr = 1e-5
batch_size = 8
max_seq_length = 512
epochs = 10
seed = 42
repo_name = "llama-edu"

training_args = SFTConfig(
        per_device_train_batch_size = batch_size,
        per_device_eval_batch_size = batch_size,
        gradient_accumulation_steps = 2,
        eval_steps = 20,
        warmup_steps = 20,
        num_train_epochs = epochs,
        learning_rate = lr,
        fp16 = not torch.cuda.is_bf16_supported(),
        bf16 = torch.cuda.is_bf16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = seed,
        max_seq_length = max_seq_length,
        dataset_num_proc = 2,
        run_name = "llama_finetuning",
        output_dir = repo_name,
        eval_strategy = "steps",
        logging_strategy = "steps",
    )

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = train_set,
    eval_dataset = val_set,
    packing = False, # Can make training 5x faster for short sequences.
    args = training_args,
    peft_config = peft_config,
)

  self.pid = os.fork()


Map (num_proc=2):   0%|          | 0/310 [00:00<?, ? examples/s]

Map (num_proc=2):   0%|          | 0/100 [00:00<?, ? examples/s]

In [12]:
# Evaluation before finetuning.
trainer.evaluate()

We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)


[34m[1mwandb[0m: Currently logged in as: [33mlouay-yahyaoui[0m ([33mlouay-yahyaoui-insat[0m). Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Tracking run with wandb version 0.17.8
[34m[1mwandb[0m: Run data is saved locally in [35m[1m/kaggle/working/wandb/run-20240829_115510-mppzyfpl[0m
[34m[1mwandb[0m: Run [1m`wandb offline`[0m to turn off syncing.
[34m[1mwandb[0m: Syncing run [33mllama_finetuning[0m
[34m[1mwandb[0m: ⭐️ View project at [34m[4mhttps://wandb.ai/louay-yahyaoui-insat/huggingface[0m
[34m[1mwandb[0m: 🚀 View run at [34m[4mhttps://wandb.ai/louay-yahyaoui-insat/huggingface/runs/mppzyfpl[0m


{'eval_loss': 2.886655807495117,
 'eval_model_preparation_time': 0.0161,
 'eval_runtime': 47.5261,
 'eval_samples_per_second': 2.104,
 'eval_steps_per_second': 0.274}

In [13]:
trainer.train()

`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.


Step,Training Loss,Validation Loss,Model Preparation Time
20,2.2418,2.056357,0.0161
40,1.2511,1.200573,0.0161
60,0.9906,0.955159,0.0161
80,0.8046,0.8716,0.0161
100,0.7893,0.837618,0.0161
120,0.8012,0.816706,0.0161
140,0.7734,0.803767,0.0161
160,0.78,0.796952,0.0161
180,0.7576,0.793896,0.0161


TrainOutput(global_step=190, training_loss=1.1031482912992177, metrics={'train_runtime': 4447.651, 'train_samples_per_second': 0.697, 'train_steps_per_second': 0.043, 'total_flos': 2.4531002935443456e+16, 'train_loss': 1.1031482912992177, 'epoch': 9.743589743589745})

In [14]:
trainer.evaluate()

{'eval_loss': 0.7933565378189087,
 'eval_model_preparation_time': 0.0161,
 'eval_runtime': 46.474,
 'eval_samples_per_second': 2.152,
 'eval_steps_per_second': 0.28,
 'epoch': 9.743589743589745}

In [15]:
trainer.push_to_hub("add: llama for education")

adapter_model.safetensors:   0%|          | 0.00/168M [00:00<?, ?B/s]

events.out.tfevents.1724937021.98b9c91667d9.24.1:   0%|          | 0.00/425 [00:00<?, ?B/s]

events.out.tfevents.1724932510.98b9c91667d9.24.0:   0%|          | 0.00/49.2k [00:00<?, ?B/s]

training_args.bin:   0%|          | 0.00/5.43k [00:00<?, ?B/s]

Upload 4 LFS files:   0%|          | 0/4 [00:00<?, ?it/s]

CommitInfo(commit_url='https://huggingface.co/LouayYahyaoui/llama-edu/commit/523c87a06ea456857d0509100c7d88ed443c48c4', commit_message='add: llama for education', commit_description='', oid='523c87a06ea456857d0509100c7d88ed443c48c4', pr_url=None, pr_revision=None, pr_num=None)