# NurseSim-RL: Robust Training (Drive-Saved Edition)

**OpenEnv Challenge Entry - 2026**

**Changes:**
- **Saves DIRECTLY to Google Drive** (No data loss if disconnected!)
- Auto-resume from Drive checkpoints

## Setup
- **PASTE YOUR TOKEN BELOW** in the code cell when prompted.

## 1. Install Dependencies & Mount Drive

In [None]:
%%capture
# Install
!pip install --upgrade "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
!pip install --no-deps trl peft accelerate bitsandbytes xformers

# Mount Drive
from google.colab import drive
drive.mount('/content/drive')

## 2. Load Llama-3.2-3B

In [None]:
from unsloth import FastLanguageModel
import torch

# 1. PASTE YOUR HF TOKEN HERE
HF_TOKEN = "YOUR_HF_TOKEN_HERE"

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="unsloth/Llama-3.2-3B-Instruct",
    max_seq_length=2048,
    dtype=None,
    load_in_4bit=True,
    token=HF_TOKEN,
)

model = FastLanguageModel.get_peft_model(
    model,
    r=16, 
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
    lora_alpha=16,
    lora_dropout=0,
    bias="none",
    use_gradient_checkpointing="unsloth",
    random_state=42,
)
print("Model & LoRA Ready!")

## 3. Prepare Dataset
**Please Upload `train_expanded.jsonl` to Colab files now!**

In [None]:
from datasets import load_dataset
import os

if not os.path.exists("train_expanded.jsonl"):
    raise FileNotFoundError("‚ùå Please upload 'train_expanded.jsonl' to the Files tab on the left!")

dataset = load_dataset("json", data_files="train_expanded.jsonl", split="train")

alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{instruction}

### Input:
{input}

### Response:
{output}"""

EOS_TOKEN = tokenizer.eos_token
def format_prompts(examples):
    texts = []
    for instruction, input, output in zip(examples["instruction"], examples["input"], examples["output"]):
        texts.append(alpaca_prompt.format(instruction=instruction, input=input, output=output) + EOS_TOKEN)
    return { "text" : texts }

dataset = dataset.map(format_prompts, batched = True)
print(f"Dataset loaded: {len(dataset)} examples")

## 4. Train (Direct-to-Drive)
This will save checkpoints and the final model directly to your Google Drive.

In [None]:
from trl import SFTTrainer
from transformers import TrainingArguments
from transformers.trainer_utils import get_last_checkpoint

# SAVE PATHS IN DRIVE
output_dir = "/content/drive/MyDrive/nursesim_checkpoints"
os.makedirs(output_dir, exist_ok=True)

trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=dataset,
    dataset_text_field="text",
    max_seq_length=2048,
    dataset_num_proc=2,
    packing=False, 
    args=TrainingArguments(
        per_device_train_batch_size=8, 
        gradient_accumulation_steps=4,
        warmup_steps=10,
        max_steps=300, 
        learning_rate=2e-4,
        fp16=not torch.cuda.is_bf16_supported(),
        bf16=torch.cuda.is_bf16_supported(),
        logging_steps=1,
        optim="adamw_8bit",
        weight_decay=0.01,
        lr_scheduler_type="linear",
        seed=42,
        output_dir=output_dir,  # Saves to Drive!
        save_strategy="steps",
        save_steps=50,       
        save_total_limit=2,
    ),
)

# Check for resume
last_checkpoint = get_last_checkpoint(output_dir)
if last_checkpoint:
    print(f"üîÑ Resuming from Drive checkpoint: {last_checkpoint}")
    trainer.train(resume_from_checkpoint=last_checkpoint)
else:
    print("üöÄ Starting fresh training")
    trainer.train()

# SAVE FINAL MODEL TO DRIVE
final_path = "/content/drive/MyDrive/nursesim_lora_llama3_robust"
print(f"Saving final model to {final_path}...")
model.save_pretrained(final_path)
tokenizer.save_pretrained(final_path)
print("‚úÖ DONE! Saved to Google Drive.")