## Mount Google Drive

In [7]:
from google.colab import drive

drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


## Install Dependencies

In [8]:
!pip install unsloth

## Import Libraries

In [9]:
import json
from datasets import Dataset # This gives the dataset class to tranform our json data into hugging face dataset
from unsloth import FastLanguageModel

## Load Base Model and Tokenizer


In [10]:
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = 'unsloth/Phi-3-mini-4k-instruct-bnb-4bit',
    max_seq_length = 2048,
    dtype = None,
    load_in_4bit = True
)

==((====))==  Unsloth 2025.9.1: Fast Mistral patching. Transformers: 4.56.0.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.8.0+cu126. CUDA: 7.5. CUDA Toolkit: 12.6. Triton: 3.4.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.32.post2. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


## Converting JSON Data into a Chat-Formatted Dataset provided by the Tokenize

In [18]:
with open("/content/drive/MyDrive/patient_admission.json", "r", encoding="utf-8") as f:
    data = json.load(f)

ds = Dataset.from_list(data) # Huggingface dataset object

def to_text(ex):
    resp = ex["response"]
    if not isinstance(resp, str):
        resp = json.dumps(resp, ensure_ascii=False) # "response" dictionary is converted into JSON string
    msgs = [
        {"role": "user", "content": ex["prompt"]}, # for the prompt role will be user, content --> our prompt
        {"role": "assistant", "content": resp}, # for the response role will be assistant, content --> our response in json string
    ]
    return {
        "text": tokenizer.apply_chat_template(
            msgs, tokenize=False, add_generation_prompt=False
        )                          # this is where our json data is wrapped with the model's chat template
    }

dataset = ds.map(to_text, remove_columns=ds.column_names) # dromp columns drops original columns (prompt, respone)

print("=== BEFORE (raw JSON row) ===")
print(ds[0])   # has "prompt" and "response"

print("\n=== AFTER (chat template applied) ===")
print(dataset[0]["text"])

Map:   0%|          | 0/234 [00:00<?, ? examples/s]

=== BEFORE (raw JSON row) ===
{'prompt': 'With joint pain, Rohit, aged 24 was admitted on 5 Sept whose BP was at 140/70.', 'response': {'age': '24', 'bp': '140/70', 'name': 'Rohit', 'symptom': 'joint pain'}}

=== AFTER (chat template applied) ===
<|user|>
With joint pain, Rohit, aged 24 was admitted on 5 Sept whose BP was at 140/70.<|end|>
<|assistant|>
{"age": "24", "bp": "140/70", "name": "Rohit", "symptom": "joint pain"}<|end|>
<|endoftext|>


## Load PEFT (qLoRA) Model

In [20]:
# Default LoRA Configuration from github
model = FastLanguageModel.get_peft_model(
    model,
    r = 64,  # rank of matrices (for LoRA)
    target_modules=[
        'q_proj', 'k_proj', 'v_proj', 'o_proj',
        'gate_proj', 'up_proj', 'down_proj',
    ],  # which layers to inject LoRA into
    lora_alpha = 64 * 2,  # scaling factor, usually 2x rank
    lora_dropout = 0,  # no regularization, but still since LoRA is often small there is no risk of overfitting
    bias = 'none',  # bias stays frozen, only learn the low-rank matrices
    use_gradient_checkpointing = 'unsloth',  # activate custom checkpointing scheme of Unsloth -> higher compute but less GPU memory when backpropagating
)

Unsloth 2025.9.1 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.
