# Phase 3: Full Fine-Tuning

**Goal**: Train on entire dataset for robust RPG generation

**Expected**: High valid JSON rate on test set

**Success Criteria**: 90%+ valid, complete JSON on unseen examples

In [None]:
# Setup (same as Phase 2)
!pip uninstall -y tensorflow  # Remove TF to prevent interference
!pip install --upgrade transformers torch accelerate bitsandbytes peft datasets trl -q

from google.colab import drive
drive.mount('/content/drive')

import os
os.environ["WANDB_DISABLED"] = "true"
os.environ["WANDB_MODE"] = "offline"

print("✅ Setup complete!")

Found existing installation: tensorflow 2.19.0
Uninstalling tensorflow-2.19.0:
  Successfully uninstalled tensorflow-2.19.0
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m40.1/40.1 kB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.6/11.6 MB[0m [31m87.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.3/61.3 MB[0m [31m11.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m544.8/544.8 kB[0m [31m25.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.3/3.3 MB[0m [31m46.6 MB/s[0m eta [36m0:00:00[0m
[?25hMounted at /content/drive
✅ Setup complete!


In [None]:
# Copy files
os.chdir('/content')
!rm -rf fine_tuning
!cp -r "/content/drive/MyDrive/fine_tuning" .
!cp "/content/drive/MyDrive/fine_tuning/rpg_training_dataset_gpt4_1_filtered.jsonl" .

print("✅ Files copied!")

✅ Files copied!


In [None]:
# Imports
import sys
sys.path.append('/content/fine_tuning')
from utils.data_utils import load_dataset
import json
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer, BitsAndBytesConfig
from peft import LoraConfig, get_peft_model, TaskType, PeftModel
from datasets import Dataset
import torch

print("✅ Imports complete!")

⚙️  Running in WANDB offline mode
✅ Imports complete!


In [None]:
# GPU Check
print("GPU Available:", torch.cuda.is_available())
print("GPU Name:", torch.cuda.get_device_name(0) if torch.cuda.is_available() else "No GPU")
!nvidia-smi

GPU Available: True
GPU Name: Tesla T4
Mon Sep  1 05:56:58 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |
| N/A   39C    P8              9W /   70W |       2MiB /  15360MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
         

In [None]:
# Load full dataset and format
dataset = load_dataset("rpg_training_dataset_gpt4_1_filtered.jsonl")
print(f"✅ Loaded {len(dataset)} examples")

formatted_data = []
for item in dataset:
    prompt = item['prompt']
    response = json.dumps(item['response']) + "</output>"  # Add stop token to teach ending
    formatted = f"<|user|>\n{prompt}<|end|>\n<|assistant|>\n{response}<|end|>"
    formatted_data.append({"text": formatted})

print(f"✅ Formatted full dataset")

✅ Loaded 421 examples
✅ Formatted full dataset


In [None]:
# Load model (use overfitted as base for warm start)
quant_config = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_compute_dtype=torch.float16)

base_model = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
overfit_path = "/content/drive/MyDrive/overfit_tinyllama_rpg"

model = AutoModelForCausalLM.from_pretrained(
    base_model,
    quantization_config=quant_config,
    device_map="auto",
    dtype=torch.float16  # Fixed: Use 'dtype' instead of 'torch_dtype'
)

# Load LoRA config (assume same as Phase 2)
lora_config = LoraConfig(
    task_type=TaskType.CAUSAL_LM,
    r=8,
    lora_alpha=16,
    target_modules=["q_proj", "v_proj", "k_proj", "o_proj"],
    lora_dropout=0.05
)

model = get_peft_model(model, lora_config)

# Load adapters with required adapter_name (fixes the error)
model.load_adapter(overfit_path, adapter_name="default")  # Add this argument

tokenizer = AutoTokenizer.from_pretrained(overfit_path)
tokenizer.pad_token = "[PAD]"
if tokenizer.pad_token is None:
    tokenizer.add_special_tokens({'pad_token': '[PAD]'})
    model.resize_token_embeddings(len(tokenizer))

print("✅ Model loaded with overfitted adapters!")

✅ Model loaded with overfitted adapters!


In [None]:
# Tokenize full dataset (split train/test)
split_point = int(len(formatted_data) * 0.8)  # 80% train
train_data = formatted_data[:split_point]
test_data = formatted_data[split_point:]

train_dataset = Dataset.from_list(train_data)
test_dataset = Dataset.from_list(test_data)

def tokenize_function(examples):
    tokenized = tokenizer(
        examples["text"],
        padding="max_length",
        truncation=True,
        max_length=1024
    )
    tokenized["labels"] = tokenized["input_ids"].copy()
    return tokenized

tokenized_train = train_dataset.map(tokenize_function, batched=True)
tokenized_test = test_dataset.map(tokenize_function, batched=True)

print(f"✅ Tokenized {len(tokenized_train)} train and {len(tokenized_test)} test examples")

Map:   0%|          | 0/336 [00:00<?, ? examples/s]

Map:   0%|          | 0/85 [00:00<?, ? examples/s]

✅ Tokenized 336 train and 85 test examples


In [None]:
# Train
training_args = TrainingArguments(
    output_dir="/content/full_model",
    num_train_epochs=3,
    per_device_train_batch_size=1,
    gradient_accumulation_steps=4,
    learning_rate=1e-4,
    fp16=True,
    save_steps=100,
    logging_steps=10,
    report_to=None
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train,
)

trainer.train()
print("✅ Full training complete!")

Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


Step,Training Loss
10,0.0603
20,0.0126
30,0.0062
40,0.0056
50,0.0054
60,0.006
70,0.0057
80,0.0055
90,0.0057
100,0.0062


✅ Full training complete!


In [None]:
# Save
save_path = "/content/drive/MyDrive/full_tinyllama_rpg"
model.save_pretrained(save_path)
tokenizer.save_pretrained(save_path)
print(f"💾 Model saved!")

💾 Model saved!


In [None]:
model.eval()
test_results = []

print("🧪 Testing on held-out examples...")

for i, example in enumerate(test_data[:5]):
    # Extract prompt from the formatted text (assumes your format)
    prompt_parts = example['text'].split('<|assistant|>\n')
    user_prompt = prompt_parts[0] if len(prompt_parts) > 0 else ""
    prompt = f"{user_prompt}<|end|>\n<|assistant|>\n"  # Rebuild prompt

    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    outputs = model.generate(**inputs, max_new_tokens=4096)

    # Decode with error handling
    try:
        response = tokenizer.decode(outputs[0], skip_special_tokens=True).split("<|assistant|>\n")[-1].split("</output>")[0]
    except IndexError:
        response = ""  # Fallback if split fails

    # Print FULL response for debugging
    print(f"Full Response {i+1}:\n{response}\n")  # Prints everything!

    try:
        parsed = json.loads(response)
        if all(k in parsed for k in ['width', 'height', 'walls', 'enemies', 'player_pos']):
            test_results.append(True)
        else:
            test_results.append(False)
    except:
        test_results.append(False)

    print(f"Test {i+1}: {'✅' if test_results[-1] else '❌'}")
    print("-" * 80)  # Separator

success_rate = sum(test_results) / len(test_results)
print(f"Success Rate: {success_rate:.1%}")

🧪 Testing on held-out examples...
Full Response 1:
{"width": 20, "height": 15, "walls": [{"x": 0, "y": 0}, {"x": 1, "y": 0}, {"x": 2, "y": 0}, {"x": 3, "y": 0}, {"x": 4, "y": 0}, {"x": 5, "y": 0}, {"x": 6, "y": 0}, {"x": 7, "y": 0}, {"x": 8, "y": 0}, {"x": 9, "y": 0}, {"x": 10, "y": 0}, {"x": 11, "y": 0}, {"x": 12, "y": 0}, {"x": 13, "y": 0}, {"x": 14, "y": 0}, {"x": 15, "y": 0}, {"x": 16, "y": 0}, {"x": 17, "y": 0}, {"x": 18, "y": 0}, {"x": 19, "y": 0}, {"x": 0, "y": 1}, {"x": 19, "y": 1}, {"x": 0, "y": 2}, {"x": 19, "y": 2}, {"x": 0, "y": 3}, {"x": 19, "y": 3}, {"x": 0, "y": 4}, {"x": 19, "y": 4}, {"x": 0, "y": 5}, {"x": 19, "y": 5}, {"x": 0, "y": 6}, {"x": 19, "y": 6}, {"x": 0, "y": 7}, {"x": 19, "y": 7}, {"x": 0, "y": 8}, {"x": 19, "y": 8}, {"x": 0, "y": 9}, {"x": 19, "y": 9}, {"x": 0, "y": 10}, {"x": 19, "y": 10}, {"x": 0, "y": 11}, {"x": 19, "y": 11}, {"x": 0, "y": 12}, {"x": 19, "y": 12}, {"x": 0, "y": 13}, {"x": 19, "y": 13}, {"x": 0, "y": 14}, {"x": 1, "y": 14}, {"x": 2, "y": 

KeyboardInterrupt: 