In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip install unsloth
!pip install selectolax


Collecting unsloth
  Downloading unsloth-2025.11.1-py3-none-any.whl.metadata (61 kB)
Collecting unsloth_zoo>=2025.11.1 (from unsloth)
  Downloading unsloth_zoo-2025.11.1-py3-none-any.whl.metadata (32 kB)
Collecting tyro (from unsloth)
  Using cached tyro-0.9.35-py3-none-any.whl.metadata (12 kB)
INFO: pip is looking at multiple versions of unsloth to determine which version is compatible with other requirements. This could take a while.
Collecting unsloth
  Using cached unsloth-2025.10.12-py3-none-any.whl.metadata (61 kB)
  Using cached unsloth-2025.10.11-py3-none-any.whl.metadata (61 kB)
  Using cached unsloth-2025.10.10-py3-none-any.whl.metadata (61 kB)
  Using cached unsloth-2025.10.9-py3-none-any.whl.metadata (59 kB)
  Using cached unsloth-2025.10.8-py3-none-any.whl.metadata (59 kB)
  Using cached unsloth-2025.10.7-py3-none-any.whl.metadata (59 kB)
  Using cached unsloth-2025.10.6-py3-none-any.whl.metadata (59 kB)
INFO: pip is still looking at multiple versions of unsloth to determi

In [None]:
#!/usr/bin/env python3
# """
# Main training script for fine-tuning with QLoRA.
# """

# def main():
#     print("Training script - TODO: Implement")
#     print("1. Load data from ../1_data_engineering/outputs/")
#     print("2. Convert to instruction format")
#     print("3. Setup QLoRA")
#     print("4. Train model")
#     print("5. Save adapters")

# if __name__ == "__main__":
#     main()
import torch
import yaml
import os
import json
import sys

# Add the directory containing data_loader.py to the Python path
sys.path.append('humanized-npc-llm/fine_tuning')


from datasets import load_dataset
from transformers import AutoTokenizer, TrainingArguments
from unsloth import FastLanguageModel
from trl import SFTTrainer

# Import our custom data loading logic
from data_loader import create_chat_messages

def load_config(config_path):
    """Loads the training config file."""
    print(f"Loading configuration from {config_path}...")
    # Load the configuration directly from the provided Google Drive path
    with open(config_path, 'r') as f:
        return yaml.safe_load(f)

def get_tokenizer(model_name):
    """Loads tokenizer and sets padding."""
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token
    return tokenizer

def format_dataset(example, tokenizer):
    """
    Applies the chat message creation and tokenization
    for a single example.
    """
    # 1. Create the list of messages
    messages = create_chat_messages(example)
    if messages is None:
        return {"text": None} # This will be filtered out

    # 2. Apply the chat template
    # We set add_generation_prompt=False as we provide the full conversation
    # including the assistant's response.
    try:
        formatted_text = tokenizer.apply_chat_template(
            messages,
            tokenize=False,
            add_generation_prompt=False,
        )
        # Add EOS token to signal end of turn
        return {"text": formatted_text + tokenizer.eos_token}
    except Exception as e:
        print(f"Error applying chat template: {e}")
        return {"text": None}


def main():
    # --- 1. Load Configuration ---
    # Use the provided path for the configuration file
    config = load_config("../data_engineering/config/training.yaml")


    # Set up W&B (Weights & Biases) for logging
    os.environ["WANDB_PROJECT"] = config['wandb_project']

    # --- 2. Load Model & Tokenizer (with Unsloth) ---
    print(f"Loading base model: {config['base_model_id']}")
    model, tokenizer = FastLanguageModel.from_pretrained(
        model_name = config['base_model_id'],
        max_seq_length = config['max_seq_length'],
        dtype = None,           # Auto-detect (bf16 or fp16)
        load_in_4bit = True,    # Enable QLoRA
    )

    # --- 3. Configure PEFT (QLoRA) ---
    print("Configuring PEFT (QLoRA)...")
    model = FastLanguageModel.get_peft_model(
        model,
        r = config['lora_r'],
        lora_alpha = config['lora_alpha'],
        lora_dropout = config['lora_dropout'],
        target_modules = config['lora_target_modules'],
        bias = "none",
        use_gradient_checkpointing = config['gradient_checkpointing'],
        random_state = 42,
        max_seq_length = config['max_seq_length'],
    )

    # --- 4. Load and Process Datasets ---
    print(f"Loading and formatting datasets...")
    tokenizer = get_tokenizer(config['base_model_id'])

    # Create a partial function for mapping
    format_func = lambda ex: format_dataset(ex, tokenizer)

    # Adjust the data file paths to load from Google Drive
    train_data_file = "../data_engineering/outputs/train.jsonl"
    val_data_file = "../data_engineering/outputs/val.jsonl"
    # Load and process train dataset
    train_dataset = load_dataset("json", data_files=train_data_file, split="train")
    train_dataset = train_dataset.map(format_func, num_proc=os.cpu_count())
    train_dataset = train_dataset.filter(lambda ex: ex['text'] is not None)

    # Load and process validation dataset
    val_dataset = load_dataset("json", data_files=val_data_file, split="train") # HF loads jsonl as 'train' split
    val_dataset = val_dataset.map(format_func, num_proc=os.cpu_count())
    val_dataset = val_dataset.filter(lambda ex: ex['text'] is not None)

    print(f"Training samples: {len(train_dataset)}, Validation samples: {len(val_dataset)}")

    # --- 5. Configure Trainer ---
    print("Setting up TrainingArguments and SFTTrainer...")

    training_args = TrainingArguments(
        output_dir = config['output_dir'], # This path might also need adjustment if saving to Drive
        per_device_train_batch_size = config['batch_size'],
        per_device_eval_batch_size = config['batch_size'],
        gradient_accumulation_steps = config['gradient_accumulation_steps'],

        num_train_epochs = config['num_train_epochs'],
        learning_rate = config['learning_rate'],

        fp16 = not torch.cuda.is_bf16_supported(),
        bf16 = torch.cuda.is_bf16_supported(),

        logging_steps = config['logging_steps'],
        eval_strategy = "steps",
        eval_steps = config['eval_steps'],
        save_strategy = "steps",
        save_steps = config['save_steps'],

        save_total_limit = config['save_total_limit'],
        load_best_model_at_end = True,

        report_to = "wandb",
        run_name = config['wandb_run_name'],

        # Removed optimizer and related args as they are not direct args in this version
        # optimizer = config['optimizer'],
        # lr_scheduler_type = config['lr_scheduler_type'],
        # warmup_ratio = config['warmup_ratio'],
        # weight_decay = config['weight_decay'],
        # max_grad_norm = config['max_grad_norm'],

        seed = 42,
    )

    trainer = SFTTrainer(
        model = model,
        tokenizer = tokenizer,
        train_dataset = train_dataset,
        eval_dataset = val_dataset,
        dataset_text_field = "text", # Use the "text" column we create
        max_seq_length = config['max_seq_length'],
        args = training_args,
        packing = False, # Set to True for speedup if you have many short samples
    )

    # --- 6. Start Training ---
    print("--- Starting Model Training ---")
    trainer.train()
    print("--- Training Finished ---")

    # --- 7. Save Final Model ---
    final_model_path = f"{config['output_dir'].replace('../', '', 1)}/final_model"
    print(f"Saving final model adapters to {final_model_path}")
    model.save_pretrained(final_model_path)
    tokenizer.save_pretrained(final_model_path)

    # Save the training config for reference
    with open(os.path.join(final_model_path, "training_config_snapshot.yaml"), 'w') as f:
        yaml.dump(config, f)

    print("--- Task 2: Fine-Tuning Complete ---")

if __name__ == "__main__":
    main()


Please restructure your imports with 'import unsloth' at the top of your file.
  from unsloth import FastLanguageModel


ðŸ¦¥ Unsloth: Will patch your computer to enable 2x faster free finetuning.
ðŸ¦¥ Unsloth Zoo will now patch everything to make training faster!
Loading configuration from /content/drive/MyDrive/Colab Notebooks/SEM04/GenAI/project/v1/humanized-npc-llm/data_engineering/config/training.yaml...
Loading base model: Qwen/Qwen2.5-3B-Instruct
==((====))==  Unsloth 2025.11.1: Fast Qwen2 patching. Transformers: 4.57.1.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.8.0+cu126. CUDA: 7.5. CUDA Toolkit: 12.6. Triton: 3.4.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.32.post2. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/2.36G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/271 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

added_tokens.json:   0%|          | 0.00/605 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/614 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/11.4M [00:00<?, ?B/s]

Unsloth: Dropout = 0 is supported for fast patching. You are using dropout = 0.05.
Unsloth will patch all other layers, except LoRA matrices, causing a performance hit.


Configuring PEFT (QLoRA)...


Unsloth 2025.11.1 patched 36 layers with 0 QKV layers, 0 O layers and 0 MLP layers.


Loading and formatting datasets...


tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

Generating train split: 0 examples [00:00, ? examples/s]

Map (num_proc=2):   0%|          | 0/54878 [00:00<?, ? examples/s]

Filter:   0%|          | 0/54878 [00:00<?, ? examples/s]

Generating train split: 0 examples [00:00, ? examples/s]

Map (num_proc=2):   0%|          | 0/3048 [00:00<?, ? examples/s]

Filter:   0%|          | 0/3048 [00:00<?, ? examples/s]

Training samples: 48833, Validation samples: 2683
Setting up TrainingArguments and SFTTrainer...


Unsloth: Tokenizing ["text"] (num_proc=6):   0%|          | 0/48833 [00:00<?, ? examples/s]

Unsloth: Tokenizing ["text"] (num_proc=6):   0%|          | 0/2683 [00:00<?, ? examples/s]

The model is already on multiple devices. Skipping the move to device specified in `args`.


--- Starting Model Training ---


==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 48,833 | Num Epochs = 3 | Total steps = 9,159
O^O/ \_/ \    Batch size per device = 4 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (4 x 4 x 1) = 16
 "-____-"     Trainable parameters = 29,933,568 of 3,115,872,256 (0.96% trained)
  | |_| | '_ \/ _` / _` |  _/ -_)


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize?ref=models
wandb: Paste an API key from your profile and hit enter:

 Â·Â·Â·Â·Â·Â·Â·Â·Â·Â·


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mm23csa531[0m ([33mm23csa531-prom-iit-rajasthan[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


[34m[1mwandb[0m: Detected [huggingface_hub.inference, openai] in use.
[34m[1mwandb[0m: Use W&B Weave for improved LLM call tracing. Install Weave with `pip install weave` then add `import weave` to the top of your script.
[34m[1mwandb[0m: For more information, check out the docs at: https://weave-docs.wandb.ai/


Unsloth: Will smartly offload gradients to save VRAM!


Step,Training Loss,Validation Loss


Unsloth: Not an error, but Qwen2ForCausalLM does not accept `num_items_in_batch`.
Using gradient accumulation will be very slightly less accurate.
Read more on gradient accumulation issues here: https://unsloth.ai/blog/gradient
