In [1]:
import os
import torch
from dotenv import load_dotenv
from datasets import load_dataset
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from trl import SFTTrainer, SFTConfig

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_type=torch.float32
)

Lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    bias="none",
    task_type="CAUSAL_LM",
    lora_dropout=0.05,
    target_modules=["o_proj", "qkv_proj", "gate_up_proj", "down_proj"],
)

sft_config = SFTConfig(
    gradient_checkpointing=True,
    gradient_checkpointing_kwargs={"use_reentrant": False},
    gradient_accumulation_steps=1,
    per_device_train_batch_size=16,
    auto_find_batch_size=True,
    max_length=1024,
    packing=True,
    num_train_epochs=1,
    learning_rate=2e-4,
    optim="paged_adamw_8bit",
    logging_steps=10,
    logging_dir="./logs",
    output_dir="./medifox-fine-tuned",
    report_to="none"
)

In [3]:
repo_id = "microsoft/Phi-3-mini-4k-instruct"

In [4]:
model = AutoModelForCausalLM.from_pretrained(
    repo_id,
    device_map="cuda:0",
    # local_files_only = True,
    quantization_config=bnb_config,
)

print(model.get_memory_footprint()/1e6, "GB")
print(model)

Loading checkpoint shards: 100%|██████████| 2/2 [00:09<00:00,  4.57s/it]


2206.341312 GB
Phi3ForCausalLM(
  (model): Phi3Model(
    (embed_tokens): Embedding(32064, 3072, padding_idx=32000)
    (layers): ModuleList(
      (0-31): 32 x Phi3DecoderLayer(
        (self_attn): Phi3Attention(
          (o_proj): Linear4bit(in_features=3072, out_features=3072, bias=False)
          (qkv_proj): Linear4bit(in_features=3072, out_features=9216, bias=False)
        )
        (mlp): Phi3MLP(
          (gate_up_proj): Linear4bit(in_features=3072, out_features=16384, bias=False)
          (down_proj): Linear4bit(in_features=8192, out_features=3072, bias=False)
          (activation_fn): SiLUActivation()
        )
        (input_layernorm): Phi3RMSNorm((3072,), eps=1e-05)
        (post_attention_layernorm): Phi3RMSNorm((3072,), eps=1e-05)
        (resid_attn_dropout): Dropout(p=0.0, inplace=False)
        (resid_mlp_dropout): Dropout(p=0.0, inplace=False)
      )
    )
    (norm): Phi3RMSNorm((3072,), eps=1e-05)
    (rotary_emb): Phi3RotaryEmbedding()
  )
  (lm_head): Line

In [5]:
model_peft = prepare_model_for_kbit_training(model)

In [6]:
model_peft = get_peft_model(model_peft, Lora_config)

In [7]:
model_peft

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): Phi3ForCausalLM(
      (model): Phi3Model(
        (embed_tokens): Embedding(32064, 3072, padding_idx=32000)
        (layers): ModuleList(
          (0-31): 32 x Phi3DecoderLayer(
            (self_attn): Phi3Attention(
              (o_proj): lora.Linear4bit(
                (base_layer): Linear4bit(in_features=3072, out_features=3072, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.05, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=3072, out_features=8, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=8, out_features=3072, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
                (lora_magnitude_vector): ModuleDict()
              )
              (

In [8]:
dataset = load_dataset(
    "Malikeh1375/medical-question-answering-datasets",
    'all-processed',
    split="train",
    
)

In [9]:
print(dataset)
print(dataset[0])

Dataset({
    features: ['instruction', 'input', 'output', '__index_level_0__'],
    num_rows: 246678
})
{'instruction': "If you are a doctor, please answer the medical questions based on the patient's description.", 'input': 'Hey Just wondering.  I am a 39 year old female, pretty smallMy heart rate is around 97 to 106 at rest, and my BP is 140/90 and twice I get 175/118I did visit a doctor because I  didnt feel well past month or twoThen the doctor gave me a heart medicine to take the pulse down and BP  (its still in further examination.)But I wondering what it can be? Do I need the medicine really?  Is that bad ?', 'output': "hello and thank you for using chatbot. i carefully read your question and i understand your concern. i will try to explain you something and give you my opinion. we talk about hypertension if we have mean value that exceeds 140 / 90 mmhg. a person might have high value during emotional and physicals trees so it's mandatory to judge on mean values. usaly hyperten

In [10]:
dataset = dataset.remove_columns("__index_level_0__")
dataset = dataset.remove_columns("instruction")
# dataset = dataset.rename_column("instruction", "system")
# dataset = dataset.rename_column("input", "user")
# dataset = dataset.rename_column("output", "assistant")

In [11]:
dataset

Dataset({
    features: ['input', 'output'],
    num_rows: 246678
})

In [12]:
tokenizer = AutoTokenizer.from_pretrained(repo_id)
tokenizer.chat_template

"{% for message in messages %}{% if message['role'] == 'system' %}{{'<|system|>\n' + message['content'] + '<|end|>\n'}}{% elif message['role'] == 'user' %}{{'<|user|>\n' + message['content'] + '<|end|>\n'}}{% elif message['role'] == 'assistant' %}{{'<|assistant|>\n' + message['content'] + '<|end|>\n'}}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>\n' }}{% else %}{{ eos_token }}{% endif %}"

In [13]:
# messages = [
#     {"role": "system", "content": dataset[0]["system"]},
#     {"role": "user", "content": dataset[0]["user"]},
#     {"role": "assistant", "content": dataset[0]["assistant"]},
# ]
# messages

messages = [
    {"role": "user", "content": dataset[0]["input"]},
    {"role": "assistant", "content": dataset[0]["output"]},
]

# Tokenizer

In [14]:
tokenizer = AutoTokenizer.from_pretrained(repo_id)
tokenizer.chat_template
tokenizer.apply_chat_template(messages, tokenize=False)

"<|user|>\nHey Just wondering.  I am a 39 year old female, pretty smallMy heart rate is around 97 to 106 at rest, and my BP is 140/90 and twice I get 175/118I did visit a doctor because I  didnt feel well past month or twoThen the doctor gave me a heart medicine to take the pulse down and BP  (its still in further examination.)But I wondering what it can be? Do I need the medicine really?  Is that bad ?<|end|>\n<|assistant|>\nhello and thank you for using chatbot. i carefully read your question and i understand your concern. i will try to explain you something and give you my opinion. we talk about hypertension if we have mean value that exceeds 140 / 90 mmhg. a person might have high value during emotional and physicals trees so it's mandatory to judge on mean values. usaly hypertension does not give any symptoms but left untreated he slowly modifies the heart. according to heart rhythm, the normal rate is between 50-100 beat for minute. when it exceeds 100 we talk about sinus tachyca

In [15]:
tokenizer.chat_template

"{% for message in messages %}{% if message['role'] == 'system' %}{{'<|system|>\n' + message['content'] + '<|end|>\n'}}{% elif message['role'] == 'user' %}{{'<|user|>\n' + message['content'] + '<|end|>\n'}}{% elif message['role'] == 'assistant' %}{{'<|assistant|>\n' + message['content'] + '<|end|>\n'}}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>\n' }}{% else %}{{ eos_token }}{% endif %}"

In [16]:
def formatting_func(example):
    messages = [
        {"role": "user", "content": example["input"]},
        {"role": "assistant", "content": example["output"]},
    ]
    return tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=False
    )

In [17]:
trainer = SFTTrainer(
    model=model_peft,
    processing_class=tokenizer,
    args=sft_config,
    train_dataset=dataset,
    formatting_func=formatting_func,
)

Padding-free training is enabled, but the attention implementation is not set to a supported flash attention variant. Padding-free training flattens batches into a single sequence, and only the following implementations are known to reliably support this: flash_attention_2, flash_attention_3, kernels-community/flash-attn, kernels-community/flash-attn3, kernels-community/vllm-flash-attn3. Using other implementations may lead to unexpected behavior. To ensure compatibility, set `attn_implementation` in the model configuration to one of these supported options or verify that your attention mechanism can handle flattened sequences.
You are using packing, but the attention implementation is not set to a supported flash attention variant. Packing gathers multiple samples into a single sequence, and only the following implementations are known to reliably support this: flash_attention_2, flash_attention_3, kernels-community/flash-attn, kernels-community/flash-attn3, kernels-community/vllm-fla

In [18]:
trainer.train()

Step,Training Loss
10,2.1837
20,2.0043
30,2.0108
40,1.9812
50,1.9613
60,1.8824
70,1.8711
80,1.9613
90,1.8749
100,1.9008


TrainOutput(global_step=7823, training_loss=1.75296197662295, metrics={'train_runtime': 99394.6045, 'train_samples_per_second': 0.63, 'train_steps_per_second': 0.079, 'total_flos': 1.4316326474661642e+18, 'train_loss': 1.75296197662295, 'entropy': 1.6590767701466878, 'num_tokens': 63900320.0, 'mean_token_accuracy': 0.6102907856305441, 'epoch': 1.0})

In [20]:
trainer.save_model("medical-phi3-mini-4k")

In [39]:
from dotenv import load_dotenv
from huggingface_hub import login

load_dotenv()
login(token=os.environ.get("HF_TOKEN"))

Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.


In [38]:
trainer.push_to_hub()

Processing Files (2 / 2): 100%|██████████| 50.4MB / 50.4MB, 2.09MB/s  
New Data Upload: 100%|██████████| 50.4MB / 50.4MB, 2.09MB/s  


CommitInfo(commit_url='https://huggingface.co/Peds/medifox-fine-tuned/commit/77fd0804c9bdcdd0f9a46c2a77415d3be4878bc3', commit_message='End of training', commit_description='', oid='77fd0804c9bdcdd0f9a46c2a77415d3be4878bc3', pr_url=None, repo_url=RepoUrl('https://huggingface.co/Peds/medifox-fine-tuned', endpoint='https://huggingface.co', repo_type='model', repo_id='Peds/medifox-fine-tuned'), pr_revision=None, pr_num=None)