In [None]:
!pip install --upgrade unsloth kaggle wandb transformers accelerate torch bitsandbytes
!pip install torch --extra-index-url https://download.pytorch.org/whl/cu118

!pip install bitsandbytes

!pip install --upgrade unsloth kaggle wandb transformers accelerate torch bitsandbytes

Collecting unsloth
  Downloading unsloth-2025.3.18-py3-none-any.whl.metadata (46 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/46.2 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m46.2/46.2 kB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
Collecting transformers
  Downloading transformers-4.50.0-py3-none-any.whl.metadata (39 kB)
Collecting bitsandbytes
  Downloading bitsandbytes-0.45.3-py3-none-manylinux_2_24_x86_64.whl.metadata (5.0 kB)
Collecting unsloth_zoo>=2025.3.14 (from unsloth)
  Downloading unsloth_zoo-2025.3.16-py3-none-any.whl.metadata (8.0 kB)
Collecting xformers>=0.0.27.post2 (from unsloth)
  Downloading xformers-0.0.29.post3-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (1.0 kB)
Collecting tyro (from unsloth)
  Downloading tyro-0.9.17-py3-none-any.whl.metadata (9.5 kB)
Collecting datasets>=2.16.0 (from unsloth)
  Downloading datasets-3.4.1-py3-none-any.whl.metadata (19 kB)
Collecting trl!=0.1

Looking in indexes: https://pypi.org/simple, https://download.pytorch.org/whl/cu118


In [None]:
hf_token = "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"
wb_token = "6cc6bf1ce7c5fe75ceefa32067d562ec7338a963"

from huggingface_hub import login
import wandb

login(token=hf_token)

wandb.login(key=wb_token)

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mwillpowertostudy[0m ([33mwillpowertostudy-dayananda-sagar-institutions[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [None]:
import os
import torch
from datasets import load_dataset
from unsloth import FastLanguageModel
from trl import SFTTrainer
from transformers import TrainingArguments, AutoTokenizer

print("Checking CUDA availability...")
if torch.cuda.is_available():
    print(f"CUDA Available: {torch.cuda.is_available()}")
    print(f"GPU Name: {torch.cuda.get_device_name(0)}")
    print(f"PyTorch CUDA Version: {torch.version.cuda}")
else:
    print("No GPU detected! Check your Colab settings.")

print("Loading model and tokenizer...")
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="google/gemma-2-2b",
    max_seq_length=2048,
    dtype=None,
    load_in_4bit=True,
    device_map="auto",
)

if not hasattr(model, 'config'):
    raise ValueError("Model not properly loaded - missing config attribute")

print("Loading and preparing dataset...")

prompt_style = """Below is an instruction that describes a task, paired with an input that provides further context.
Write a response that appropriately completes the request.
Before answering, think carefully about the question and create a step-by-step chain of thoughts to ensure a logical and accurate response.

### Instruction:
You are a medical expert with advanced knowledge in clinical reasoning, diagnostics, and treatment planning.
Please answer the following medical question.

### Question:
{}

### Response:
<think>{}"""

EOS_TOKEN = tokenizer.eos_token

def formatting_prompts_func(examples):
    inputs = examples["Question"]
    cots = examples["Complex_CoT"]
    outputs = examples["Response"]
    texts = []
    for input, cot, output in zip(inputs, cots, outputs):
        text = prompt_style.format(input, cot) + output + EOS_TOKEN
        texts.append(text)
    return {"text": texts}

dataset = load_dataset("FreedomIntelligence/medical-o1-reasoning-SFT", "en", split="train[0:500]", trust_remote_code=True)
dataset = dataset.map(formatting_prompts_func, batched=True)

print("Applying LoRA adapters...")
model = FastLanguageModel.get_peft_model(
    model,
    r=16,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
    lora_alpha=16,
    lora_dropout=0,
    bias="none",
    use_gradient_checkpointing=False,
    random_state=3407,
)

def force_convert_to_trainable(model):
    for name, param in model.named_parameters():
        if param.requires_grad:
            new_param = torch.nn.Parameter(
                param.data.clone().detach().to(torch.float32),
                requires_grad=True
            )
            module = model
            parts = name.split('.')
            for part in parts[:-1]:
                module = getattr(module, part)
            setattr(module, parts[-1], new_param)
    return model

model = force_convert_to_trainable(model)
model.train()

print("Verifying parameter states...")
for name, param in model.named_parameters():
    if param.requires_grad and not param.is_leaf:
        print(f"Warning: Parameter {name} is not a leaf tensor!")

print("Setting up training...")
training_args = TrainingArguments(
    per_device_train_batch_size=1,
    gradient_accumulation_steps=1,
    warmup_steps=5,
    max_steps=60,
    learning_rate=2e-4,
    fp16=not torch.cuda.is_bf16_supported(),
    bf16=torch.cuda.is_bf16_supported(),
    logging_steps=10,
    optim="adamw_torch",
    weight_decay=0.01,
    lr_scheduler_type="linear",
    seed=3407,
    output_dir="outputs",
    report_to="wandb" if "wandb" in globals() else None,
)

trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=dataset,
    dataset_text_field="text",
    max_seq_length=2048,
    args=training_args,
)

print("Starting training")
try:
    trainer_stats = trainer.train()
    print("Training completed successfully!")

    print("Saving model...")
    # model.save_pretrained("trained_model")
    # tokenizer.save_pretrained("trained_model")
    repo_name = "GVAmaresh/gemma-2b-medical-finetuned"
    model.push_to_hub(repo_name)
    tokenizer.push_to_hub(repo_name)

except Exception as e:
    print(f"Training failed: {str(e)}")
    print("\nDebugging Info:")
    print(f"Model device: {next(model.parameters()).device}")
    print(f"Model dtype: {next(model.parameters()).dtype}")
    print(f"Trainable params: {sum(p.numel() for p in model.parameters() if p.requires_grad)}")
    print(f"CUDA memory: {torch.cuda.memory_allocated()/1024**2:.2f}MB allocated")

🔍 Checking CUDA availability...
✅ CUDA Available: True
🚀 GPU Name: Tesla T4
🔥 PyTorch CUDA Version: 12.4
📥 Loading model and tokenizer...
==((====))==  Unsloth 2025.3.18: Fast Gemma2 patching. Transformers: 4.50.0.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.29.post3. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
📊 Loading and preparing dataset...


README.md:   0%|          | 0.00/1.65k [00:00<?, ?B/s]

medical_o1_sft.json:   0%|          | 0.00/74.1M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/25371 [00:00<?, ? examples/s]

Map:   0%|          | 0/500 [00:00<?, ? examples/s]

Applying LoRA adapters...


Unsloth 2025.3.18 patched 26 layers with 26 QKV layers, 26 O layers and 26 MLP layers.


Verifying parameter states...
Setting up training...


Unsloth: Tokenizing ["text"] (num_proc=2):   0%|          | 0/500 [00:00<?, ? examples/s]

🚀 Starting training...


==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 500 | Num Epochs = 1 | Total steps = 60
O^O/ \_/ \    Batch size per device = 1 | Gradient accumulation steps = 1
\        /    Data Parallel GPUs = 1 | Total batch size (1 x 1 x 1) = 1
 "-____-"     Trainable parameters = 20,766,720/2,000,000,000 (1.04% trained)


Step,Training Loss
10,1.77
20,1.5341
30,1.5126
40,1.366
50,1.444
60,1.4332


Unsloth: Will smartly offload gradients to save VRAM!
✅ Training completed successfully!
💾 Saving model...


adapter_model.safetensors:   0%|          | 0.00/83.1M [00:00<?, ?B/s]

Saved model to https://huggingface.co/GVAmaresh/gemma-2b-medical-finetuned


README.md:   0%|          | 0.00/5.18k [00:00<?, ?B/s]

Upload 2 LFS files:   0%|          | 0/2 [00:00<?, ?it/s]

tokenizer.model:   0%|          | 0.00/4.24M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/34.4M [00:00<?, ?B/s]

In [None]:
from unsloth import FastLanguageModel
from transformers import AutoTokenizer

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "GVAmaresh/gemma-2b-medical-finetuned",
    max_seq_length = 2048,
    dtype = None,
    load_in_4bit = True,
    device_map = "auto",
)

FastLanguageModel.for_inference(model)

def ask_medical_question(question):

    prompt = f"""Below is an instruction that describes a task, paired with an input that provides further context.
Write a response that appropriately completes the request.
Before answering, think carefully about the question and create a step-by-step chain of thoughts to ensure a logical and accurate response.

### Instruction:
You are a medical expert with advanced knowledge in clinical reasoning, diagnostics, and treatment planning.
Please answer the following medical question.

### Question:
{question}

### Response:
<think>"""

    inputs = tokenizer([prompt], return_tensors="pt").to("cuda")
    outputs = model.generate(
        input_ids=inputs.input_ids,
        attention_mask=inputs.attention_mask,
        max_new_tokens=1024,
        temperature=0.7,
        do_sample=True,
    )

    full_response = tokenizer.batch_decode(outputs)[0]
    response = full_response.split("### Response:")[1].strip()
    return response



==((====))==  Unsloth 2025.3.18: Fast Gemma2 patching. Transformers: 4.50.0.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.29.post3. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


adapter_model.safetensors:   0%|          | 0.00/83.1M [00:00<?, ?B/s]

In [None]:

def evaluate_medical_response(question, response):
    if "migraine" in response.lower() and "aura" in response.lower():
        diagnosis_correct = True
    else:
        diagnosis_correct = False

    if "triptan" in response.lower() or "sumatriptan" in response.lower():
        treatment_correct = True
    else:
        treatment_correct = False

    reasoning_quality = "good" if ("unilateral" in response and "photophobia" in response) else "needs improvement"

    return {
        "diagnosis_correct": diagnosis_correct,
        "treatment_correct": treatment_correct,
        "reasoning_quality": reasoning_quality,
        "model_response": response
    }

question = """A 45-year-old male presents with recurrent episodes of severe headache accompanied by
nausea, photophobia, and visual disturbances. The headaches are unilateral and throbbing in nature.
What is the most likely diagnosis and what first-line treatment would you recommend?"""

response = ask_medical_question(question)
evaluation = evaluate_medical_response(question, response)

print("🧪 Evaluation Results:")
print(f"Diagnosis Correct: {evaluation['diagnosis_correct']}")
print(f"Treatment Correct: {evaluation['treatment_correct']}")
print(f"Reasoning Quality: {evaluation['reasoning_quality']}")
print("\n📝 Model Response:")
print(evaluation['model_response'])

🧪 Evaluation Results:
Diagnosis Correct: False
Treatment Correct: False
Reasoning Quality: needs improvement

📝 Model Response:
<think>Alright, so this guy has headaches that are really intense. They're throbbing and come and go. He gets them on one side of his head mostly, which is super weird. It's not like he's just feeling a bit achy all the time, these are really bad headaches. He even gets some nausea and blurry vision along with them! 

It's pretty clear something is going on with his nerves or blood vessels. He's had these headaches before, which is definitely concerning. It's not just a one-off, it's something that's happening over and over again.

Now, on to the diagnosis. I'm not ruling out any big disease. But when you're dealing with these really intense headaches, there's usually something going on with his blood vessels or nerves. 

Let me think. It's not usually just headaches, right? He's also got some nausea and blurred vision. That's definitely unusual. It's not like

In [None]:
questions = [
    "Describe the mechanism of action of ACE inhibitors and their main side effects.",
    "What are the diagnostic criteria for type 2 diabetes mellitus?",
    "Explain the difference between ischemic and hemorrhagic stroke on a CT scan.",
]

for q in questions:
    print(f"\n❓ Question: {q}")
    print(ask_medical_question(q))
    print("\n" + "="*80)


❓ Question: Describe the mechanism of action of ACE inhibitors and their main side effects.
<think>Okay, let's dive into this. So, ACE inhibitors, right? They're these little drugs that can really help with blood pressure control. But how do they do it? Let's see.

ACE stands for angiotensin-converting enzyme. This is a protein that converts angiotensin I into angiotensin II, which is crucial in blood vessel contraction. Basically, these drugs block ACE, so no angiotensin II is made.

But why does that help with blood pressure? Well, angiotensin II is like the conductor of the pressure orchestra, directing blood vessels to constrict. When ACE inhibitors block the action of ACE, there's less angiotensin II around, so blood vessels don't constrict as much. This effect is a boon for people with high blood pressure.

Now, let's talk about side effects. ACE inhibitors have some pretty common ones, like dry cough and angioedema. These are really unpleasant and can be a deterrent for some pe