# HIGH SCHOOL AI TUTOR

## Libraries

In [1]:
!pip install -q -U bitsandbytes transformers peft accelerate datasets trl tqdm

[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
beatrix-jupyterlab 2024.66.154055 requires jupyterlab~=3.6.0, but you have jupyterlab 4.2.4 which is incompatible.
conda 24.7.1 requires packaging>=23.0, but you have packaging 21.3 which is incompatible.
momepy 0.7.2 requires shapely>=2, but you have shapely 1.8.5.post1 which is incompatible.
spopt 0.6.1 requires shapely>=2.0.1, but you have shapely 1.8.5.post1 which is incompatible.
ydata-profiling 4.9.0 requires scipy<1.14,>=1.4.1, but you have scipy 1.14.0 which is incompatible.[0m[31m
[0m

In [2]:
from kaggle_secrets import UserSecretsClient
from huggingface_hub import login
import wandb

# get key
user_secrets = UserSecretsClient()
hf_token = user_secrets.get_secret("HF_TOKEN")
wandb_token = user_secrets.get_secret("WANDB_TOKEN")

# login huggingface
login(token=hf_token)

# login wandb and config
wandb.login(key=wandb_token)
wandb.init(project="llama3-healthcare-assistant", name='llama3-8b')

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: fineGrained).
Your token has been saved to /root/.cache/huggingface/token
Login successful


[34m[1mwandb[0m: W&B API key is configured. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mbuitanphuong10c13[0m ([33mbtp712[0m). Use [1m`wandb login --relogin`[0m to force relogin


In [3]:
import os
from dataclasses import dataclass, field
from typing import Optional

In [4]:
import torch
from datasets import load_dataset, load_from_disk
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    pipeline,
    logging
)
from peft import (
    LoraConfig,
    PeftModel,
    prepare_model_for_kbit_training,
    get_peft_model
)

from trl import SFTTrainer, setup_chat_format

In [5]:
from tqdm.notebook import tqdm

## Load model & tokenizer

In [6]:
model_name = "meta-llama/Meta-Llama-3-8B-Instruct"
finetuned_model_name = "Llama-3-8b-healthcare-assistant"
attn_implementation = "eager"

**QLoRA parameters**

In [7]:
# LoRA attention dimension
lora_r = 16
# LoRA scaling
lora_alpha = 32
# dropout probability
lora_dropout = 0.05

**`bitsandbytes` parameters**

In [8]:
use_4bit = True
bnb_4bit_compute_dtype = torch.bfloat16
bnb_4bit_quant_type = "nf4"
use_nested_quant = True

In [9]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=use_4bit,
    bnb_4bit_quant_type=bnb_4bit_quant_type,
    bnb_4bit_compute_dtype=bnb_4bit_compute_dtype,
    bnb_4bit_use_double_quant=use_nested_quant,
)

In [10]:
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="auto",
    attn_implementation=attn_implementation
)

tokenizer = AutoTokenizer.from_pretrained(model_name)

config.json:   0%|          | 0.00/654 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/4 [00:00<?, ?it/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/1.17G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/187 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/51.0k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/73.0 [00:00<?, ?B/s]

In [11]:
model, tokenizer = setup_chat_format(model, tokenizer)
model = prepare_model_for_kbit_training(model)

## Prepare Dataset

In [12]:
dataset = load_dataset("Amod/mental_health_counseling_conversations")
dataset = dataset.shuffle(seed=65)

README.md:   0%|          | 0.00/2.82k [00:00<?, ?B/s]

combined_dataset.json:   0%|          | 0.00/4.79M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/3512 [00:00<?, ? examples/s]

In [13]:
dataset

DatasetDict({
    train: Dataset({
        features: ['Context', 'Response'],
        num_rows: 3512
    })
})

In [14]:
def format_chat_template(row):
    row_json = [{'role': 'user', 'content': row['Context']},
                {'role': 'assistant', 'content': row['Response']}]
    row['text'] = tokenizer.apply_chat_template(row_json, tokenize=False)

    return row

In [15]:
dataset = dataset.map(format_chat_template)

Map:   0%|          | 0/3512 [00:00<?, ? examples/s]

In [16]:
dataset = dataset['train']
dataset = dataset.train_test_split(test_size=0.1)

In [17]:
dataset

DatasetDict({
    train: Dataset({
        features: ['Context', 'Response', 'text'],
        num_rows: 3160
    })
    test: Dataset({
        features: ['Context', 'Response', 'text'],
        num_rows: 352
    })
})

## Train

In [18]:
model = prepare_model_for_kbit_training(model)

In [19]:
peft_config = LoraConfig(
    r=lora_r,
    lora_alpha=lora_alpha,
    lora_dropout=lora_dropout,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=['up_proj', 'down_proj', 'gate_proj',
                    'k_proj', 'v_proj', 'v_proj', 'o_proj']
)

In [22]:
training_arguments = TrainingArguments(
    output_dir=finetuned_model_name,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    gradient_accumulation_steps=2,
    optim="paged_adamw_32bit",
    num_train_epochs=3,
    eval_strategy="steps",
    eval_steps=0.2,
    logging_steps=1,
    warmup_steps=10,
    logging_strategy="steps",
    learning_rate=2e-4,
    fp16=False,
    bf16=False,
    group_by_length=True,
    report_to='wandb',
    run_name='llama3-8b'
)

In [23]:
trainer = SFTTrainer(
    model=model,
    train_dataset=dataset['train'],
    eval_dataset=dataset['test'],
    peft_config=peft_config,
    dataset_text_field="text",
    tokenizer=tokenizer,
    args=training_arguments,
    packing=False
)


Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.


Map:   0%|          | 0/3160 [00:00<?, ? examples/s]

Map:   0%|          | 0/352 [00:00<?, ? examples/s]

In [None]:
trainer.train()

`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


Step,Training Loss,Validation Loss
