In [1]:
!pip install accelerate transformers torch bitsandbytes datasets peft trl wandb



In [2]:
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    pipeline,
    logging,
)
from peft import (
    LoraConfig,
    PeftModel,
    prepare_model_for_kbit_training,
    get_peft_model,
)
import os, torch, wandb, getpass
from datasets import load_dataset
from trl import SFTTrainer, setup_chat_format

In [4]:
from huggingface_hub import login
import getpass
hf_token = getpass.getpass("Enter your Hugging Face token: ")

login(token = hf_token)
wb_token = getpass.getpass("Enter your wandb token: ")
wandb.login(key=wb_token)
run = wandb.init(
    project='Fine-tune Llama 3 8B on Medical Dataset', 
    job_type="training", 
    anonymous="allow"
)


Enter your Hugging Face token:  ········
Enter your wandb token:  ········


[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mmuneebzain484[0m ([33mmuneebzain484-university-of-the-punjab[0m). Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


In [5]:
dataset_name = "ruslanmv/ai-medical-chatbot"
new_model = "llama-3-8b-chat-doctor"
base_model='meta-llama/Llama-3.2-3B-Instruct'

In [6]:
torch_dtype = torch.float16
attn_implementation = "eager"

In [7]:
# QLoRA config
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch_dtype,
    bnb_4bit_use_double_quant=True,
)

# Load model
model = AutoModelForCausalLM.from_pretrained(
    base_model,
    quantization_config=bnb_config,
    device_map="auto",
    attn_implementation=attn_implementation,
    local_files_only=False
)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [8]:
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model)

In [9]:
# LoRA config
peft_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=['up_proj', 'down_proj', 'gate_proj', 'k_proj', 'q_proj', 'v_proj', 'o_proj']
)
model = get_peft_model(model, peft_config)

In [10]:
#Importing the dataset
dataset = load_dataset(dataset_name, split="all")
dataset = dataset.shuffle(seed=65).select(range(1000)) # Only use 1000 samples for quick demo

def format_chat_template(row):
    row_json = [{"role": "user", "content": row["Patient"]},
               {"role": "assistant", "content": row["Doctor"]}]
    row["text"] = tokenizer.apply_chat_template(row_json, tokenize=False)
    return row

dataset = dataset.map(
    format_chat_template,
    num_proc=4,
)

dataset['text'][3]

'<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nCutting Knowledge Date: December 2023\nToday Date: 17 Jan 2025\n\n<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nFell on sidewalk face first about 8 hrs ago. Swollen, cut lip bruised and cut knee, and hurt pride initially. Now have muscle and shoulder pain, stiff jaw(think this is from the really swollen lip),pain in wrist, and headache. I assume this is all normal but are there specific things I should look for or will I just be in pain for a while given the hard fall?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\nHello and welcome to HCM,The injuries caused on various body parts have to be managed.The cut and swollen lip has to be managed by sterile dressing.The body pains, pain on injured site and jaw pain should be managed by pain killer and muscle relaxant.I suggest you to consult your primary healthcare provider for clinical assessment.In case there is evidence of infection in any of the injured si

In [11]:
dataset = dataset.train_test_split(test_size=0.1)

In [12]:
training_arguments = TrainingArguments(
    output_dir=new_model,
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    gradient_accumulation_steps=2,
    optim="paged_adamw_32bit",
    num_train_epochs=1,
    evaluation_strategy="steps",
    eval_steps=0.2,
    logging_steps=1,
    warmup_steps=10,
    logging_strategy="steps",
    learning_rate=2e-4,
    fp16=False,
    bf16=False,
    group_by_length=True,
    report_to="wandb"
)



In [14]:
trainer = SFTTrainer(
    model=model,
    train_dataset=dataset["train"],
    eval_dataset=dataset["test"],
    peft_config=peft_config,
    tokenizer=tokenizer,
    args=training_arguments,
)

  trainer = SFTTrainer(


Map:   0%|          | 0/900 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

In [17]:
tokenizer.add_special_tokens({'pad_token': '[PAD]'})

1

In [18]:
trainer.train()

Step,Training Loss,Validation Loss
90,4.5347,2.434954
180,5.095,2.410472
270,4.3104,2.377518
360,5.6771,2.358709
450,3.607,2.348392


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


TrainOutput(global_step=450, training_loss=4.897308553589715, metrics={'train_runtime': 891.2229, 'train_samples_per_second': 1.01, 'train_steps_per_second': 0.505, 'total_flos': 3941762267289600.0, 'train_loss': 4.897308553589715, 'epoch': 1.0})

In [19]:
wandb.finish()
model.config.use_cache = True

0,1
eval/loss,█▆▃▂▁
eval/runtime,▁█▄▇▄
eval/samples_per_second,█▁▅▁▅
eval/steps_per_second,█▁▅▁▅
train/epoch,▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▄▄▄▅▆▆▆▆▆▆▆▇▇▇▇▇▇▇███
train/global_step,▁▁▁▁▁▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▆▇▇████
train/grad_norm,▄▂▃▄▂▅▃▄▄▄▂▂▃▃▃█▆▂▂▄▃▁▂▂▂▃▃▂▂▄▃▃▃▂▂▄▄▂▃▄
train/learning_rate,▅▅▆███▇▇▇▇▆▆▆▆▆▆▅▅▅▅▅▅▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▁▁▁
train/loss,█▆▄▄▄▄▃▂▄▃▅▂▅▄▃▄▄▅▅▆▃▄▁▂▆▅▅▁▂▃▄▂▃▄▁▃▂▄▄▄

0,1
eval/loss,2.34839
eval/runtime,38.0165
eval/samples_per_second,2.63
eval/steps_per_second,2.63
total_flos,3941762267289600.0
train/epoch,1.0
train/global_step,450.0
train/grad_norm,3.30848
train/learning_rate,0.0
train/loss,3.607


In [23]:
messages = [
    {
        "role": "user",
        "content": "Hello doctor, I have bad acne. How do I get rid of it?"
    }
]

prompt = tokenizer.apply_chat_template(messages, tokenize=False, 
                                       add_generation_prompt=True)

inputs = tokenizer(prompt, return_tensors='pt', padding=True, 
                   truncation=True).to("cuda")

outputs = model.generate(**inputs, max_length=150, 
                         num_return_sequences=1)

text = tokenizer.decode(outputs[0], skip_special_tokens=True)

print(text.split("assistant")[1])

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.




Hi,Thanks for writing to us. I can understand your concern. I suggest you to follow the following treatment:1. Do not touch or squeeze the acne lesions. It will lead to further inflammation and scarring.2. Use a gentle cleanser and lukewarm water to clean your face.3. Apply a non-comedogenic moisturizer after cleaning your face.4. Use a spot treatment for the acne lesions.5. Avoid using harsh soaps and face washes
