<a href="https://colab.research.google.com/github/nyp-sit/iti107-2024S2/blob/main/Finetune_LLAMA3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
%pip install -U transformers
%pip install -U datasets
%pip install -U accelerate
%pip install -U peft
%pip install -U trl
%pip install -U bitsandbytes
%pip install -U wandb

In [1]:
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    pipeline,
    logging,
)
from peft import (
    LoraConfig,
    PeftModel,
    prepare_model_for_kbit_training,
    get_peft_model,
)
import os, torch, wandb
from datasets import load_dataset
from trl import SFTTrainer, setup_chat_format

In [2]:
from huggingface_hub import notebook_login

notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [3]:
wandb.login(key="90c8e9188f485d7fef8cd4d76beac203d1dd589e")

run = wandb.init(
    project='llama3',
    job_type="training",
    anonymous="allow"
)

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Using wandb-core as the SDK backend. Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mmarkk[0m. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /home/ubuntu/.netrc


In [4]:
base_model = 'meta-llama/Llama-3.1-8B-Instruct'
dataset_name = "ruslanmv/ai-medical-chatbot"
new_model = "llama-3.1-8b-mkk-doctor"

In [5]:
torch_dtype = torch.float16
attn_implementation = "eager"


bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch_dtype,
    bnb_4bit_use_double_quant=True,
)

# Load model
model = AutoModelForCausalLM.from_pretrained(
    base_model,
    quantization_config=bnb_config,
    device_map="auto",
    attn_implementation=attn_implementation
)

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [7]:
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model)
model, tokenizer = setup_chat_format(model, tokenizer)

In [8]:
tokenizer.get_chat_template()

"{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}"

In [7]:
# LoRA config
peft_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=['up_proj', 'down_proj', 'gate_proj', 'k_proj', 'q_proj', 'v_proj', 'o_proj']
)
model = get_peft_model(model, peft_config)

In [8]:
#Importing the dataset
dataset = load_dataset(dataset_name, split="all")
dataset = dataset.shuffle(seed=65).select(range(1000)) # Only use 1000 samples for quick demo

def format_chat_template(row):
    row_json = [{"role": "user", "content": row["Patient"]},
               {"role": "assistant", "content": row["Doctor"]}]
    row["text"] = tokenizer.apply_chat_template(row_json, tokenize=False)
    return row

dataset = dataset.map(
    format_chat_template,
    num_proc=4,
)

dataset['text'][3]

'<|im_start|>user\nFell on sidewalk face first about 8 hrs ago. Swollen, cut lip bruised and cut knee, and hurt pride initially. Now have muscle and shoulder pain, stiff jaw(think this is from the really swollen lip),pain in wrist, and headache. I assume this is all normal but are there specific things I should look for or will I just be in pain for a while given the hard fall?<|im_end|>\n<|im_start|>assistant\nHello and welcome to HCM,The injuries caused on various body parts have to be managed.The cut and swollen lip has to be managed by sterile dressing.The body pains, pain on injured site and jaw pain should be managed by pain killer and muscle relaxant.I suggest you to consult your primary healthcare provider for clinical assessment.In case there is evidence of infection in any of the injured sites, a course of antibiotics may have to be started to control the infection.Thanks and take careDr Shailja P Wahal<|im_end|>\n'

In [9]:
dataset = dataset.train_test_split(test_size=0.1)

In [10]:
training_arguments = TrainingArguments(
    output_dir=new_model,
    per_device_train_batch_size=5,
    per_device_eval_batch_size=5,
    gradient_accumulation_steps=2,
    optim="paged_adamw_32bit",
    num_train_epochs=1,
    evaluation_strategy="steps",
    eval_steps=0.2,
    logging_steps=1,
    warmup_steps=10,
    logging_strategy="steps",
    learning_rate=2e-4,
    fp16=False,
    bf16=False,
    group_by_length=True,
    report_to="wandb"
)



In [11]:
trainer = SFTTrainer(
    model=model,
    train_dataset=dataset["train"],
    eval_dataset=dataset["test"],
    peft_config=peft_config,
    max_seq_length=512,
    dataset_text_field="text",
    tokenizer=tokenizer,
    args=training_arguments,
    packing= False,
)


Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.


Map:   0%|          | 0/900 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

In [12]:
trainer.train()



Step,Training Loss,Validation Loss
18,2.3849,2.583171
36,2.6766,2.536918
54,2.3141,2.523287
72,2.556,2.508971
90,2.6892,2.500993




TrainOutput(global_step=90, training_loss=2.5662101030349733, metrics={'train_runtime': 217.7877, 'train_samples_per_second': 4.132, 'train_steps_per_second': 0.413, 'total_flos': 9496611293798400.0, 'train_loss': 2.5662101030349733, 'epoch': 1.0})

In [13]:
wandb.finish()
model.config.use_cache = True

VBox(children=(Label(value='0.018 MB of 0.018 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
eval/loss,█▄▃▂▁
eval/runtime,▁▅▇▇█
eval/samples_per_second,█▄▂▂▁
eval/steps_per_second,█▄▂▁▁
train/epoch,▁▁▁▂▂▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▅▅▅▅▅▅▅▆▆▆▇▇▇▇▇▇███
train/global_step,▁▁▁▁▁▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▄▄▅▅▅▅▅▆▆▆▇▇▇▇▇▇████
train/grad_norm,▆█▆▆▇▆▅▄▅▄▁▁▂▂▂▂▃▃▃▃▃▆▂▂▂▂▃▃▃▃▆▁▁▂▂▂▂▃▃▅
train/learning_rate,▃▅▅▆▇████▇▇▇▇▇▆▆▆▆▆▆▅▅▅▅▅▄▄▄▃▃▃▃▂▂▂▂▂▂▂▁
train/loss,█▄▃▃▂▂▃▄▄▃▅▄▄▃▃▄▂▂▄▄▄▁▃▄▂▂▄▂▅▄▃▅▃▁▂▄▃▂▂▅

0,1
eval/loss,2.50099
eval/runtime,10.854
eval/samples_per_second,9.213
eval/steps_per_second,1.843
total_flos,9496611293798400.0
train/epoch,1.0
train/global_step,90.0
train/grad_norm,0.92514
train/learning_rate,0.0
train/loss,2.6892


In [17]:
messages = [
    {
        "role": "user",
        "content": "Hello doctor, my knee has been painful for weeks and I can't squat at all. "
    }
]

prompt = tokenizer.apply_chat_template(messages, tokenize=False,
                                       add_generation_prompt=True)

inputs = tokenizer(prompt, return_tensors='pt', padding=True,
                   truncation=True).to("cuda")

outputs = model.generate(**inputs, max_length=150,
                         num_return_sequences=1)

text = tokenizer.decode(outputs[0], skip_special_tokens=True)

print(text.split("assistant")[1])


Hello,Thanks for asking to HCMI would suggest you to take X-ray of your knee joint and get it evaluated by an orthopedic surgeon. Hope I have answered your query. Let me know if I can assist you further.Regards,Dr. Arunachalam D.Orthopedic Surgeon, Chennai, India.  For more information consult an orthopedic surgeon online --> https://www.drugs.com/doctor/orthopedic-surgeon-consultants.html --> https://www.drugs.com/consultant/Dr-Arunachalam-D-Orthopedic-Surgeon-CH


In [19]:
trainer.model.save_pretrained(new_model)
trainer.model.push_to_hub(new_model, use_temp_dir=False)



adapter_model.safetensors:   0%|          | 0.00/2.27G [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/khengkok/llama-3.1-8b-mkk-doctor/commit/a367fa2e0d4cee0eada578d16cca7a463bf530e0', commit_message='Upload model', commit_description='', oid='a367fa2e0d4cee0eada578d16cca7a463bf530e0', pr_url=None, repo_url=RepoUrl('https://huggingface.co/khengkok/llama-3.1-8b-mkk-doctor', endpoint='https://huggingface.co', repo_type='model', repo_id='khengkok/llama-3.1-8b-mkk-doctor'), pr_revision=None, pr_num=None)

## Merging the base model with the adapter

In [20]:
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from peft import PeftModel
import torch
from trl import setup_chat_format
# Reload tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(base_model)

base_model_reload = AutoModelForCausalLM.from_pretrained(
        base_model,
        return_dict=True,
        low_cpu_mem_usage=True,
        torch_dtype=torch.float16,
        device_map="auto",
        trust_remote_code=True,
)

base_model_reload, tokenizer = setup_chat_format(base_model_reload, tokenizer)

# Merge adapter with base model
model = PeftModel.from_pretrained(base_model_reload, new_model)

model = model.merge_and_unload()

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [22]:
messages = [{"role": "user", "content": "Hello doctor, I have bad acne. How do I get rid of it?"}]

prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    torch_dtype=torch.float16,
    device_map="auto",
)

outputs = pipe(prompt, max_new_tokens=512, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)
print(outputs[0]["generated_text"])

<|im_start|>user
Hello doctor, I have bad acne. How do I get rid of it?<|im_end|>
<|im_start|>assistant
Hi, Welcome to HCM! I have gone through your query. For treating acne you need to do the following: Clean your face properly with a face wash. Use a scrub once or twice in a week. Use a moisturizer. You should consult a dermatologist for proper treatment. For more information consult a dermatologist online --> https://www.hcm.com/consult/doctors/dermatologist?utm_source=healthcaremagic&utm_medium=referral&utm_campaign=dermatologist Regards, Dr. Praveen Kulkarni, Dermatologist, Indore. For more information consult a dermatologist online --> https://www.hcm.com/consult/doctors/dermatologist?utm_source=healthcaremagic&utm_medium=referral&utm_campaign=dermatologist Regards, Dr. Praveen Kulkarni, Dermatologist, Indore. For more information consult a dermatologist online --> https://www.hcm.com/consult/doctors/dermatologist?utm_source=healthcaremagic&utm_medium=referral&utm_campaign=dermat

In [23]:
model.save_pretrained("llama-3-8b-chat-doctor")
tokenizer.save_pretrained("llama-3-8b-chat-doctor")

('llama-3-8b-chat-doctor/tokenizer_config.json',
 'llama-3-8b-chat-doctor/special_tokens_map.json',
 'llama-3-8b-chat-doctor/tokenizer.json')

In [24]:
model.push_to_hub("llama-3-8b-chat-doctor", use_temp_dir=False)
tokenizer.push_to_hub("llama-3-8b-chat-doctor", use_temp_dir=False)

model-00001-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/1.17G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

Upload 4 LFS files:   0%|          | 0/4 [00:00<?, ?it/s]

README.md:   0%|          | 0.00/5.17k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/khengkok/llama-3-8b-chat-doctor/commit/63078d4bf934dada3251799cf9806690f3d8de54', commit_message='Upload tokenizer', commit_description='', oid='63078d4bf934dada3251799cf9806690f3d8de54', pr_url=None, repo_url=RepoUrl('https://huggingface.co/khengkok/llama-3-8b-chat-doctor', endpoint='https://huggingface.co', repo_type='model', repo_id='khengkok/llama-3-8b-chat-doctor'), pr_revision=None, pr_num=None)

## upload the gguf to HF

In [29]:
from huggingface_hub import login
from huggingface_hub import HfApi
# user_secrets = UserSecretsClient()
# hf_token = user_secrets.get_secret("HUGGINGFACE_TOKEN")
login(token = "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")

api = HfApi()
api.upload_file(
    path_or_fileobj="/home/ubuntu/git/llama.cpp/llama-3-1-8b-chat-doctor-Q4_K_M.gguf",
    path_in_repo="llama-3-1-8b-chat-doctor-Q4_K_M.gguf",
    repo_id="khengkok/llama-3-1-8b-chat-doctor",
    repo_type="model",
)


The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: fineGrained).
Your token has been saved to /home/ubuntu/.cache/huggingface/token
Login successful


llama-3-1-8b-chat-doctor-Q4_K_M.gguf:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/khengkok/llama-3-1-8b-chat-doctor/commit/37b47059108c6085a6c09101d7d2294e51922f62', commit_message='Upload llama-3-1-8b-chat-doctor-Q4_K_M with huggingface_hub', commit_description='', oid='37b47059108c6085a6c09101d7d2294e51922f62', pr_url=None, repo_url=RepoUrl('https://huggingface.co/khengkok/llama-3-1-8b-chat-doctor', endpoint='https://huggingface.co', repo_type='model', repo_id='khengkok/llama-3-1-8b-chat-doctor'), pr_revision=None, pr_num=None)