<a href="https://www.kaggle.com/code/ankitkumarcse67/finetune-phi3-with-lora?scriptVersionId=223932799" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

In [1]:
!pip install -qqq --upgrade bitsandbytes transformers peft accelerate datasets trl 

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.0/44.0 kB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m69.7/69.7 MB[0m [31m24.5 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.0/10.0 MB[0m [31m96.5 MB/s[0m eta [36m0:00:00[0m:00:01[0m0:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m342.1/342.1 kB[0m [31m22.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m485.4/485.4 kB[0m [31m18.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m318.9/318.9 kB[0m [31m22.7 MB/s[0m eta [36m0:00:00[0m
[?25h

In [2]:
import os
import torch
from transformers import AutoTokenizer,BitsAndBytesConfig,AutoModelForCausalLM,TrainingArguments,pipeline
from peft import LoraConfig, prepare_model_for_kbit_training, get_peft_model,AutoPeftModelForCausalLM
from datasets import load_dataset
from trl import SFTTrainer,SFTConfig

In [7]:
tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct", trust_remote_code=True)
tokenizer.pad_token = "<PAD>"
tokenizer.padding_side = "left"

In [22]:
def format_propmt(example):
    messages = []

    user = {"role" : "user","content" : example['input']}
    messages.append(user)

    assistant = {"role" : "assistant","content" : example['output']}
    messages.append(assistant)
    
    return {'text' : tokenizer.apply_chat_template(messages,tokenize=False)}
    

In [19]:
ds = load_dataset("medalpaca/medical_meadow_wikidoc_patient_information")

In [20]:
ds = ds.filter(lambda example: example['input'] is not None and example['output'] is not None)

In [23]:
dataset = ds.map(format_propmt)

In [24]:
dataset['train']['text'][0]

'<|user|>\nWhat are the symptoms of Allergy?<|end|>\n<|assistant|>\nAllergy symptoms vary, but may include:\nBreathing problems (coughing, shortness of breath) Burning, tearing, or itchy eyes Conjunctivitis (red, swollen eyes) Coughing Diarrhea Headache Hives Itching of the nose, mouth, throat, skin, or any other area Runny nose Skin rashes Stomach cramps Vomiting Wheezing\nWhat part of the body is contacted by the allergen plays a role in the symptoms you develop. For example:\nAllergens that are breathed in often cause a stuffy nose, itchy nose and throat, mucus production, cough, or wheezing. Allergens that touch the eyes may cause itchy, watery, red, swollen eyes. Eating something you are allergic to can cause nausea, vomiting, abdominal pain, cramping, diarrhea, or a severe, life-threatening reaction. Allergens that touch the skin can cause a skin rash, hives, itching, blisters, or even skin peeling. Drug allergies usually involve the whole body and can lead to a variety of sympto

In [25]:
dataset = dataset['train'].select(range(2_000)).train_test_split(test_size=0.1)

In [26]:
dataset

DatasetDict({
    train: Dataset({
        features: ['input', 'output', 'instruction', 'text'],
        num_rows: 1800
    })
    test: Dataset({
        features: ['input', 'output', 'instruction', 'text'],
        num_rows: 200
    })
})

In [27]:
bnb_config = BitsAndBytesConfig(load_in_4bit=True,
                               bnb_4bit_quant_type="nf4",
                               bnb_4bit_compute_dtype="float16",
                                bnb_4bit_use_double_quant=True)

In [None]:
model = AutoModelForCausalLM.from_pretrained("microsoft/Phi-3-mini-4k-instruct",
                                             device_map = 'auto',
                                             quantization_config = bnb_config)

In [15]:
model.config.use_cache = False
model.config.pretraining_tp = 1

In [16]:
peft_config = LoraConfig(lora_alpha = 32,      # Controls the amount of change that is added to the original weights
                         lora_dropout = 0.1,
                         r = 64,               #  This is the rank of the compressed matrices
                         bias = 'none',
                         task_type="CAUSAL_LM",
                         target_modules= ['k_proj', 'q_proj', 'v_proj', 'o_proj', "gate_proj", "down_proj", "up_proj"])

In [17]:
model = prepare_model_for_kbit_training(model)

In [18]:
model = get_peft_model(model,peft_config)

In [19]:
model

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): Phi3ForCausalLM(
      (model): Phi3Model(
        (embed_tokens): Embedding(32064, 3072, padding_idx=32000)
        (layers): ModuleList(
          (0-31): 32 x Phi3DecoderLayer(
            (self_attn): Phi3Attention(
              (o_proj): lora.Linear4bit(
                (base_layer): Linear4bit(in_features=3072, out_features=3072, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.1, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=3072, out_features=64, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=64, out_features=3072, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
                (lora_magnitude_vector): ModuleDict()
              )
              

In [20]:
training_arguments = SFTConfig(
    output_dir = "results/",
    per_device_train_batch_size=2,
    gradient_accumulation_steps=4,
    per_device_eval_batch_size=2,
    optim = "paged_adamw_32bit",
    learning_rate = 2e-4,
    num_train_epochs = 5,
    lr_scheduler_type="cosine",
    logging_steps=20,
    fp16=True,
    gradient_checkpointing=True,
    save_strategy="epoch",
    report_to = 'tensorboard'
)

In [21]:
trainer = SFTTrainer(
        model=model,
        train_dataset=dataset['train'],
        eval_dataset=dataset['test'],
        peft_config=peft_config,
        processing_class=tokenizer,
        args=training_arguments
)

Converting train dataset to ChatML:   0%|          | 0/1800 [00:00<?, ? examples/s]

Applying chat template to train dataset:   0%|          | 0/1800 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/1800 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/1800 [00:00<?, ? examples/s]

Converting eval dataset to ChatML:   0%|          | 0/200 [00:00<?, ? examples/s]

Applying chat template to eval dataset:   0%|          | 0/200 [00:00<?, ? examples/s]

Tokenizing eval dataset:   0%|          | 0/200 [00:00<?, ? examples/s]

Tokenizing eval dataset:   0%|          | 0/200 [00:00<?, ? examples/s]

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


In [22]:
trainer.train()

Step,Training Loss
20,1.6249
40,1.3722
60,1.3505
80,1.327
100,1.2911
120,1.3073
140,1.2629
160,1.3265
180,1.235
200,1.3152


TrainOutput(global_step=1125, training_loss=1.1191874220106337, metrics={'train_runtime': 9256.4385, 'train_samples_per_second': 0.972, 'train_steps_per_second': 0.122, 'total_flos': 4.087274133771878e+16, 'train_loss': 1.1191874220106337})

In [23]:
trainer.model.save_pretrained("phi3_mini_finetune")

In [31]:
# del model
# del trainer
torch.cuda.empty_cache()

In [3]:
model = AutoPeftModelForCausalLM.from_pretrained(
 "phi3_mini_finetune",
 low_cpu_mem_usage=True,
 return_dict=True,
 trust_remote_code=True
 )

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [4]:
model

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): Phi3ForCausalLM(
      (model): Phi3Model(
        (embed_tokens): Embedding(32064, 3072, padding_idx=32000)
        (embed_dropout): Dropout(p=0.0, inplace=False)
        (layers): ModuleList(
          (0-31): 32 x Phi3DecoderLayer(
            (self_attn): Phi3Attention(
              (o_proj): lora.Linear(
                (base_layer): Linear(in_features=3072, out_features=3072, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.1, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=3072, out_features=64, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=64, out_features=3072, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
                (lora_magnitude_vecto

In [5]:
merged_model = model.merge_and_unload()

In [None]:
merged_model.save_pretrained('merged_model', trust_remote_code=True, safe_serialization=True)

In [8]:
tokenizer.save_pretrained("merged_model")

('merged_model/tokenizer_config.json',
 'merged_model/special_tokens_map.json',
 'merged_model/tokenizer.model',
 'merged_model/added_tokens.json',
 'merged_model/tokenizer.json')

In [9]:
del merged_model
del model

In [10]:
final_model = AutoModelForCausalLM.from_pretrained("/kaggle/working/merged_model")
final_tokenizer = AutoTokenizer.from_pretrained("/kaggle/working/merged_model")

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [11]:
pipe = pipeline("text-generation",model=final_model,tokenizer=final_tokenizer)

Device set to use cuda:0


In [12]:
question = "what are major causes of brain tumor"
prompt = pipe.tokenizer.apply_chat_template([{'role':'user','content':question}],tokenize=False, add_generation_prompt=True)

In [16]:
outputs = pipe(prompt,max_new_tokens=256, do_sample=True, num_beams=1, temperature=0.3, top_k=50, top_p=0.95)

In [17]:
print(outputs)

[{'generated_text': '<|user|>\nwhat are major causes of brain tumor<|end|>\n<|assistant|>\n The exact cause of brain tumors is unknown. Some tumors are caused by exposure to radiation or certain chemicals. Some brain tumors are linked to genetic diseases, such as neurofibromatosis, tuberous sclerosis, and von Hippel-Lindau disease.\nIn children, brain tumors are most often found in the lower parts of the brain. In adults, they are most often found in the upper parts of the brain.'}]


In [18]:
print(outputs[0]['generated_text'][len(prompt):].strip())

The exact cause of brain tumors is unknown. Some tumors are caused by exposure to radiation or certain chemicals. Some brain tumors are linked to genetic diseases, such as neurofibromatosis, tuberous sclerosis, and von Hippel-Lindau disease.
In children, brain tumors are most often found in the lower parts of the brain. In adults, they are most often found in the upper parts of the brain.
