In [None]:
# ==============================================================================
#  FOUR WHEELER GEMMA 2B - FINE TUNING SCRIPT (FIXED & FULL)
# ==============================================================================

# 1. MOUNT GOOGLE DRIVE
# ------------------------------------------------------------------------------
from google.colab import drive
drive.mount('/content/drive')

import os
# Define where everything will be saved in your Drive
drive_base_path = "/content/drive/MyDrive/Gemma2B_FourWheeler_Fixed"
gguf_path = os.path.join(drive_base_path, "gguf_versions")
os.makedirs(drive_base_path, exist_ok=True)
os.makedirs(gguf_path, exist_ok=True)

print(f"‚úÖ Drive Mounted. Output will be saved to: {drive_base_path}")

# 2. INSTALL LIBRARIES
# ------------------------------------------------------------------------------
print("‚è≥ Installing Unsloth and dependencies (this takes about 2 mins)...")
# Note: Using the specific unsloth colab install to prevent dependency conflicts
!pip install --upgrade --no-cache-dir "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
!pip install --no-deps trl peft accelerate bitsandbytes datasets

# 3. IMPORTS & SETUP
# ------------------------------------------------------------------------------
import torch
from unsloth import FastLanguageModel
from datasets import load_dataset
from trl import SFTTrainer, SFTConfig
from unsloth.chat_templates import get_chat_template

# 4. LOAD MODEL (GEMMA 2B INSTRUCT)
# ------------------------------------------------------------------------------
max_seq_length = 2048
dtype = None # Auto-detect (Float16 for T4, BF16 for Ampere)
load_in_4bit = True

print("‚è≥ Loading Gemma-2-2b-it model...")
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "google/gemma-2-2b-it",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
)

# Add LoRA Adapters (This makes the model learnable)
model = FastLanguageModel.get_peft_model(
    model,
    r = 16,
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0,
    bias = "none",
    use_gradient_checkpointing = "unsloth",
    random_state = 3407,
    use_rslora = False,
    loftq_config = None,
)

# 5. PREPARE DATASET (THE CRITICAL FIX)
# ------------------------------------------------------------------------------
# We use the JSONL file you provided
dataset_file = "Four_Wheeler_Dataset_Cleaned.jsonl"

if not os.path.exists(dataset_file):
    raise FileNotFoundError(f"‚ùå ERROR: Please upload '{dataset_file}' to the Colab Files tab!")

print("‚è≥ Loading and Formatting Dataset...")
dataset = load_dataset("json", data_files = dataset_file, split = "train")

# Apply the Official Gemma Chat Template
# This fixes the </s> issue and ensures the model knows it's a chatbot.
tokenizer = get_chat_template(
    tokenizer,
    chat_template = "gemma",
)

def formatting_prompts_func(examples):
    # Using your specific keys: "question" and "answer"
    inputs       = examples["question"]
    outputs      = examples["answer"]
    texts = []

    for input_text, output_text in zip(inputs, outputs):
        conversation = [
            {"role": "user", "content": input_text},
            {"role": "assistant", "content": output_text},
        ]
        # This function adds <start_of_turn>user ... <end_of_turn> automatically
        text = tokenizer.apply_chat_template(
            conversation,
            tokenize = False,
            add_generation_prompt = False
        )
        texts.append(text)

    return { "text" : texts }

dataset = dataset.map(formatting_prompts_func, batched = True)

# Quick check to ensure formatting is correct
print(f"‚úÖ Dataset Loaded. Sample:\n{dataset[0]['text'][:100]}...")

# 6. TRAINING
# ------------------------------------------------------------------------------
print("üöÄ Starting Training...")
trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    dataset_num_proc = 2,
    packing = False,
    args = SFTConfig(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        warmup_steps = 5,
        max_steps = 60, # Adjust based on dataset size (60 is good for small tests)
        learning_rate = 2e-4,
        fp16 = not torch.cuda.is_bf16_supported(),
        bf16 = torch.cuda.is_bf16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",
    ),
)

trainer_stats = trainer.train()
print("‚úÖ Training Complete.")

# 7. SAVE EVERYTHING TO GOOGLE DRIVE
# ------------------------------------------------------------------------------
print("üíæ Saving files to Google Drive... Do not close the tab!")

# A. SAVE MERGED 16-BIT MODEL (For Hugging Face / Server)
# This saves: config.json, model.safetensors, tokenizer.json, special_tokens_map.json
print(f"   -> Saving Standard Merged Model (16-bit) to: {drive_base_path}")
model.save_pretrained_merged(
    drive_base_path,
    tokenizer,
    save_method = "merged_16bit",
)

# B. SAVE GGUF Q4_K_M (For Mobile / Ollama - Small size)
print(f"   -> Saving GGUF (q4_k_m) to: {gguf_path}")
model.save_pretrained_gguf(
    gguf_path,
    tokenizer,
    quantization_method = "q4_k_m"
)

# C. SAVE GGUF F16 (For High Precision - Large size)
print(f"   -> Saving GGUF (f16) to: {gguf_path}")
model.save_pretrained_gguf(
    gguf_path,
    tokenizer,
    quantization_method = "f16"
)

print("\n" + "="*50)
print("üéâ ALL DONE!")
print(f"1. Standard Model folder: {drive_base_path}")
print(f"2. GGUF Files folder:     {gguf_path}")
print("="*50)

In [None]:
import os
if os.path.exists("Four_Wheeler_Dataset_Cleaned.jsonl"):
    print("‚úÖ Success! File found. You can run the training script now.")
else:
    print("‚ùå Error: File not found. Please drag and drop it into the Files sidebar on the left.")

In [None]:
import json
import os

# 1. Define input and output filenames
input_file = "Four_Wheeler_Dataset_JSON.jsonl"
output_file = "Four_Wheeler_Dataset_Cleaned.jsonl"

print(f"üßπ Scanning {input_file} for errors...")

fixed_count = 0
with open(input_file, 'r', encoding='utf-8') as infile, \
     open(output_file, 'w', encoding='utf-8') as outfile:

    for i, line in enumerate(infile):
        try:
            data = json.loads(line)

            # CHECK THE 'ANSWER' FIELD
            if "answer" in data:
                # If it's a list (e.g. ["text"]), convert to string
                if isinstance(data["answer"], list):
                    data["answer"] = " ".join(data["answer"]) # Join list items
                    fixed_count += 1
                    if fixed_count == 1: print(f"   -> Found error at Row {i+1}: Converted list to string.")

            # CHECK THE 'QUESTION' FIELD (Just in case)
            if "question" in data:
                if isinstance(data["question"], list):
                    data["question"] = " ".join(data["question"])
                    fixed_count += 1

            # Write the clean line to the new file
            json.dump(data, outfile)
            outfile.write('\n')

        except json.JSONDecodeError:
            print(f"‚ùå Skipped invalid JSON at line {i+1}")

print("-" * 30)
print(f"‚úÖ Done! Fixed {fixed_count} rows.")
print(f"üìÇ New file created: {output_file}")

In [None]:
# ==============================================================================
#  CORRECTED GGUF CONVERSION (For Merged Models)
# ==============================================================================
from google.colab import drive
drive.mount('/content/drive')

import os
from unsloth import FastLanguageModel

# 1. PATHS
drive_model_path = "/content/drive/MyDrive/Gemma2B_FourWheeler_Fixed"
gguf_path = os.path.join(drive_model_path, "gguf_versions")
os.makedirs(gguf_path, exist_ok=True)

# 2. CHECK IF MODEL EXISTS
if not os.path.exists(os.path.join(drive_model_path, "model.safetensors")):
    print(f"‚ùå ERROR: No model found at {drive_model_path}")
    print("   You likely lost the data when restarting. Please run the RETRAIN script (Option B).")
else:
    print(f"‚úÖ Found merged model at: {drive_model_path}")
    print("‚è≥ Loading your fine-tuned model...")

    # 3. LOAD THE SAVED MERGED MODEL
    # We load YOUR model path, not the Google base path
    model, tokenizer = FastLanguageModel.from_pretrained(
        model_name = drive_model_path,
        max_seq_length = 2048,
        dtype = None,
        load_in_4bit = True, # Load efficiently to save RAM
    )

    # 4. CONVERT TO GGUF
    print("üöÄ Starting Conversion (q4_k_m)...")
    model.save_pretrained_gguf(
        gguf_path,
        tokenizer,
        quantization_method = "q4_k_m"
    )

    print("üéâ SUCCESS! GGUF is saved in your Drive inside 'gguf_versions'.")

In [None]:
# ==============================================================================
#  FINAL GGUF GENERATOR (Auto-Install + Save)
# ==============================================================================
print("‚è≥ Step 1: Installing Unsloth (This takes ~2 mins)...")
# We install the library again because Colab wiped it
!pip install --upgrade --no-cache-dir "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
!pip install --no-deps trl peft accelerate bitsandbytes datasets

import os
from google.colab import drive
from unsloth import FastLanguageModel

# 2. MOUNT DRIVE (FORCE REMOUNT TO FIX ERRORS)
print("‚è≥ Step 2: Connecting to Google Drive...")
drive.mount('/content/drive', force_remount=True)

# 3. DEFINE PATHS
drive_model_path = "/content/drive/MyDrive/Gemma2B_FourWheeler_Fixed"
gguf_path = os.path.join(drive_model_path, "gguf_versions")
os.makedirs(gguf_path, exist_ok=True)

# 4. CHECK IF MODEL EXISTS
if not os.path.exists(os.path.join(drive_model_path, "model.safetensors")):
    print("‚ùå ERROR: The model file is missing from Drive.")
    print("   Please check your Drive folder manually.")
else:
    print(f"‚úÖ Found model at: {drive_model_path}")
    print("‚è≥ Step 3: Loading model (1 minute)...")

    try:
        # Load the saved model
        model, tokenizer = FastLanguageModel.from_pretrained(
            model_name = drive_model_path,
            max_seq_length = 2048,
            dtype = None,
            load_in_4bit = True,
        )

        # Convert to GGUF
        print("üöÄ Step 4: Converting to GGUF (q4_k_m)...")
        model.save_pretrained_gguf(
            gguf_path,
            tokenizer,
            quantization_method = "q4_k_m"
        )

        print("\n" + "="*40)
        print("üéâ DONE! GGUF SAVED SUCCESSFULLY.")
        print(f"Location: {gguf_path}")
        print("="*40)

    except Exception as e:
        print(f"‚ùå Error: {e}")

In [None]:
import os
import shutil
from google.colab import drive

# 1. Force unmount just in case
print("force unmounting...")
!umount /content/drive 2>/dev/null

# 2. Delete the blocked folder
if os.path.exists("/content/drive"):
    shutil.rmtree("/content/drive")
    print("Cleaned up old drive folder.")

# 3. Remount fresh
print("Mounting fresh...")
drive.mount('/content/drive')

In [None]:
# ==============================================================================
#  FINAL REPAIR: FRESH TOKENIZER FIX
# ==============================================================================
print("‚è≥ Installing Dependencies...")
!pip install --upgrade --no-cache-dir "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
!pip install --no-deps trl peft accelerate bitsandbytes datasets

import os
import shutil
from google.colab import drive
from unsloth import FastLanguageModel
from transformers import AutoTokenizer

# 1. MOUNT DRIVE
drive.mount('/content/drive', force_remount=True)

# 2. DEFINE PATHS
drive_model_path = "/content/drive/MyDrive/Gemma2B_FourWheeler_Fixed"
local_temp_path = "/content/temp_fix_build"
final_drive_path = os.path.join(drive_model_path, "gguf_versions")

os.makedirs(local_temp_path, exist_ok=True)
os.makedirs(final_drive_path, exist_ok=True)

# 3. LOAD YOUR FINE-TUNED MODEL (Weights Only)
print(f"‚è≥ Loading your trained weights from: {drive_model_path}...")
try:
    model, _ = FastLanguageModel.from_pretrained(
        model_name = drive_model_path,
        max_seq_length = 2048,
        dtype = None,
        load_in_4bit = True,
    )

    # 4. LOAD A FRESH TOKENIZER (The Fix)
    # Instead of using your saved tokenizer, we grab a clean one from Google.
    print("‚ú® Downloading fresh Tokenizer from Google...")
    tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-2b-it")

    # 5. CONVERT TO GGUF (Using the Clean Tokenizer)
    print("üöÄ Attempting Conversion (q4_k_m)...")

    # We save to local disk first to avoid Drive timeouts
    model.save_pretrained_gguf(
        local_temp_path,
        tokenizer,
        quantization_method = "q4_k_m"
    )
    print("‚úÖ Conversion Success!")

    # 6. MOVE TO DRIVE
    print("üöö Moving file to Google Drive...")
    found_file = None
    for file in os.listdir(local_temp_path):
        if file.endswith(".gguf"):
            found_file = file
            break

    if found_file:
        shutil.copy(
            os.path.join(local_temp_path, found_file),
            os.path.join(final_drive_path, found_file)
        )
        print("\n" + "="*40)
        print("üéâ SUCCESS! IT WORKED.")
        print(f"File: {found_file}")
        print(f"Location: {final_drive_path}")
        print("="*40)
    else:
        print("‚ùå Error: Script finished but file is missing.")

except Exception as e:
    print(f"‚ùå ERROR: {e}")

In [None]:
# ==============================================================================
#  CPU-ONLY RESCUE MISSION (No GPU Required)
# ==============================================================================
import os
from google.colab import drive

# 1. Mount Drive
drive.mount('/content/drive', force_remount=True)

# 2. Setup Paths
model_path = "/content/drive/MyDrive/Gemma2B_FourWheeler_Fixed"
gguf_folder = os.path.join(model_path, "gguf_versions")
os.makedirs(gguf_folder, exist_ok=True)

# 3. Clone the Raw Conversion Tool (llama.cpp)
print("‚è≥ Downloading llama.cpp (The conversion tool)...")
!git clone https://github.com/ggerganov/llama.cpp
!cd llama.cpp && make  # Compile it for CPU

# 4. Install simple dependencies
!pip install -r llama.cpp/requirements.txt

# 5. Convert to GGUF (Phase 1: High Quality F16)
print("üöÄ Phase 1: Converting to GGUF F16 (This runs on CPU)...")
f16_filename = os.path.join(gguf_folder, "Gemma2B_FourWheeler.F16.gguf")

# We run the python conversion script directly
!python llama.cpp/convert_hf_to_gguf.py {model_path} --outfile {f16_filename} --outtype f16

# 6. Quantize to Q4_K_M (Phase 2: Compress it)
print("üî® Phase 2: Compressing to Q4_K_M...")
q4_filename = os.path.join(gguf_folder, "Gemma2B_FourWheeler.Q4_K_M.gguf")

if os.path.exists(f16_filename):
    # Run the quantize tool
    !./llama.cpp/llama-quantize {f16_filename} {q4_filename} q4_k_m
    print("\n" + "="*40)
    print("üéâ SUCCESS! Your GGUF is ready.")
    print(f"File: {q4_filename}")
    print("="*40)
else:
    print("‚ùå Phase 1 failed. The input model files might be corrupted/missing.")

In [None]:
# ==============================================================================
#  PHASE 2 REPAIR: BUILD QUANTIZE TOOL & COMPRESS
# ==============================================================================
import os
from google.colab import drive

# 1. Mount Drive
drive.mount('/content/drive', force_remount=True)

# 2. Define Paths
model_path = "/content/drive/MyDrive/Gemma2B_FourWheeler_Fixed/gguf_versions"
f16_file = os.path.join(model_path, "Gemma2B_FourWheeler.F16.gguf")
q4_file = os.path.join(model_path, "Gemma2B_FourWheeler.Q4_K_M.gguf")

# 3. Check if we have the Big File
if not os.path.exists(f16_file):
    print("‚ùå Error: Could not find the F16 file. Did Phase 1 finish?")
else:
    print(f"‚úÖ Found F16 Master File: {f16_file}")

    # 4. Build llama.cpp correctly (using CMake)
    print("‚è≥ Building the compression tool (this takes 2-3 mins)...")
    !rm -rf llama.cpp # Clean up old broken version
    !git clone https://github.com/ggerganov/llama.cpp
    !cd llama.cpp && cmake -B build && cmake --build build --config Release

    # 5. Run Quantization (Compress F16 -> Q4)
    print("üî® Compressing model (High Quality -> Fast)...")

    # Note: The tool is now located at build/bin/llama-quantize
    quantize_cmd = f"./llama.cpp/build/bin/llama-quantize {f16_file} {q4_file} q4_k_m"
    exit_code = os.system(quantize_cmd)

    if exit_code == 0:
        print("\n" + "="*40)
        print("üéâ SUCCESS! MISSION COMPLETE.")
        print(f"Your final model is here: {q4_file}")
        print("You can download this file and use it!")
        print("="*40)
    else:
        print(f"‚ùå Compression failed with error code {exit_code}")

In [None]:
# ==============================================================================
#  UPLOAD EVERYTHING TO HUGGING FACE (Maintains Folder Structure)
# ==============================================================================
import os
from google.colab import drive
from huggingface_hub import HfApi, login

# 1. MOUNT DRIVE
drive.mount('/content/drive', force_remount=True)

# 2. SETUP CONFIGURATION
# ------------------------------------------------------------------------------
hf_token = "hf_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"  # üëà REPLACE WITH YOUR HUGGING FACE TOKEN

repo_id = "Prithwiraj731/FourWheeler-Gemma-2B"

# üëá THE FOLDER TO UPLOAD (This is your main folder in Drive)
folder_path = "/content/drive/MyDrive/Gemma2B_FourWheeler_Fixed"
# ------------------------------------------------------------------------------

# 3. LOGIN & CREATE REPO
print(f"üîê Logging in to Hugging Face...")
login(token=hf_token)
api = HfApi()

print(f"üî® Creating repository: {repo_id} (if it doesn't exist)...")
try:
    api.create_repo(repo_id=repo_id, private=False, exist_ok=True)
    print("   ‚úÖ Repo is ready.")
except Exception as e:
    print(f"   ‚ùå Error creating repo: {e}")

# 4. UPLOAD FOLDER (This pushes the subfolders too!)
print(f"üöÄ Starting Upload from: {folder_path}")
print("   This might take a while depending on the file sizes...")

try:
    api.upload_folder(
        folder_path=folder_path,
        repo_id=repo_id,
        repo_type="model",
        commit_message=f"Upload fine-tuned model and GGUF versions"
    )

    print("\n" + "="*50)
    print("üéâ UPLOAD SUCCESSFUL!")
    print(f"View your model here: https://huggingface.co/{repo_id}")
    print("="*50)

except Exception as e:
    print(f"‚ùå Upload failed: {e}")

In [None]:
# ==============================================================================
#  UPLOAD EXISTING DATASET TO HUGGING FACE
# ==============================================================================
import os
from google.colab import drive
from huggingface_hub import HfApi, login

# 1. MOUNT DRIVE
drive.mount('/content/drive', force_remount=True)

# 2. CONFIGURATION
# ------------------------------------------------------------------------------
hf_token = "hf_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"  # üëà REPLACE WITH YOUR HUGGING FACE TOKEN
dataset_repo_id = "Prithwiraj731/Four-Wheeler-Dataset"

# This is the file you just showed me
filename = "Four_Wheeler_Dataset_JSON.jsonl"
# We assume you uploaded it to the main Colab folder (root)
file_path = os.path.join("/content", filename)
# ------------------------------------------------------------------------------

# 3. VERIFY FILE EXISTS
if not os.path.exists(file_path):
    print(f"‚ùå Error: Could not find '{filename}' in the Colab folder.")
    print("   üëâ Please drag and drop the file into the Files sidebar on the left!")
else:
    # 4. LOGIN & UPLOAD
    print(f"üîê Logging in...")
    login(token=hf_token)
    api = HfApi()

    print(f"üî® Creating/Connecting to Dataset Repo: {dataset_repo_id}...")
    try:
        api.create_repo(repo_id=dataset_repo_id, repo_type="dataset", private=False, exist_ok=True)

        print(f"üöÄ Uploading {filename}...")
        api.upload_file(
            path_or_fileobj=file_path,
            path_in_repo="train.jsonl", # Renaming to standard 'train.jsonl'
            repo_id=dataset_repo_id,
            repo_type="dataset",
            commit_message="Upload original JSONL dataset"
        )

        print("\n" + "="*50)
        print("üéâ DATASET UPLOADED SUCCESSFULLY!")
        print(f"Name to copy for Model Card: {dataset_repo_id}")
        print("="*50)

    except Exception as e:
        print(f"‚ùå Error: {e}")