In [6]:
# Install packages
!pip install unsloth --upgrade
!pip install wandb

# Torch imports
import torch
device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using device:", device)

# Import wandb safely
import os
import wandb

# Disable wandb online mode (no login needed)
os.environ["WANDB_MODE"] = "disabled"

print("Wandb tracking is disabled. Continuing without wandb login.")



[0m[31mERROR: Could not find a version that satisfies the requirement unsloth (from versions: none)[0m[31m
[0m[31mERROR: No matching distribution found for unsloth[0m[31m
Using device: cpu
Wandb tracking is disabled. Continuing without wandb login.


# PART 2: Dataset Preparation
Let’s load and process the dataset.the FreedomIntelligence/Medical-CoT dataset.

In [None]:
from datasets import load_dataset

dataset = load_dataset("FreedomIntelligence/Medical-CoT")
df = dataset['train'].to_pandas()

# Format <think> and <response> tags
def format_sample(row):
    return f"<think>{row['rationale']}</think>\n<response>{row['answer']}</response>"

df["formatted"] = df.apply(format_sample, axis=1)

# Train/Validation Split
train_data = df.iloc[100:]
val_data = df.iloc[:100]

train_texts = train_data["formatted"].tolist()
val_texts = val_data["formatted"].tolist()


# Load LLaMA 3.2 (3B) Quantized & Setup LoRA

In [None]:
from unsloth import FastLanguageModel

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/llama-3-3b-bnb-4bit",
    max_seq_length = 2048,
    dtype = torch.float16,
    load_in_4bit = True,
)

model = FastLanguageModel.get_peft_model(
    model,
    r = 16,
    lora_alpha = 32,
    lora_dropout = 0.05,
    bias = "none",
    task_type = "CAUSAL_LM",
)


# Dataset to Dataloader Format

In [None]:
from transformers import Trainer, TrainingArguments
from unsloth.data import preprocess

train_dataset = preprocess(train_texts, tokenizer)
val_dataset = preprocess(val_texts, tokenizer)


# Fine-Tune with wandb Tracking

In [None]:
import os
os.environ["WANDB_PROJECT"] = "llama3-medical-finetune"

training_args = TrainingArguments(
    output_dir = "./llama3-cot-finetuned",
    per_device_train_batch_size = 2,
    per_device_eval_batch_size = 2,
    gradient_accumulation_steps = 4,
    evaluation_strategy = "epoch",
    logging_strategy = "steps",
    logging_steps = 20,
    num_train_epochs = 3,
    learning_rate = 2e-4,
    fp16 = True,
    report_to = "wandb",
    save_strategy = "no",
)

trainer = Trainer(
    model = model,
    args = training_args,
    train_dataset = train_dataset,
    eval_dataset = val_dataset,
)

trainer.train()


# Evaluation using ROUGE-L

In [None]:
from datasets import load_metric

rouge = load_metric("rouge")

def compute_rouge(preds, targets):
    results = rouge.compute(predictions=preds, references=targets)
    return results["rougeL"].mid.fmeasure

# Example evaluation
sample_preds = [tokenizer.decode(model.generate(tokenizer(text, return_tensors="pt").input_ids.to(device), max_new_tokens=100)[0]) for text in val_texts[:10]]
sample_targets = [text.split("<response>")[1].replace("</response>", "") for text in val_texts[:10]]

rouge_l_score = compute_rouge(sample_preds, sample_targets)
print("ROUGE-L:", rouge_l_score)


# Save and Upload to Hugging Face

In [None]:
model.save_pretrained("finetuned-lora")
tokenizer.save_pretrained("finetuned-tokenizer")

# Then, in a cell:
# Login to Hugging Face CLI to upload
# !huggingface-cli login

# After login:
# !huggingface-cli repo create llama3-medical-cot --type model
# !git clone https://huggingface.co/username/llama3-medical-cot
# !cp -r finetuned-lora/* llama3-medical-cot/
# !cp -r finetuned-tokenizer/* llama3-medical-cot/
# !cd llama3-medical-cot && git add . && git commit -m "Upload finetuned model" && git push



# Inference Instructions

In [None]:
from unsloth import FastLanguageModel

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/llama-3-3b-bnb-4bit",
    max_seq_length = 2048,
    dtype = torch.float16,
    load_in_4bit = True,
)

model.load_adapter("path_to_your_finetuned_lora")

prompt = "<think>Patient shows signs of...</think> <response>"
input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)

output = model.generate(input_ids, max_new_tokens=100)
print(tokenizer.decode(output[0]))


end of task