<a href="https://colab.research.google.com/github/Leads-DigiSaka-System/digisaka_gpt/blob/main/Fine_Tuning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [29]:
!pip install unsloth trl peft accelerate bitsandbytes



In [30]:
import torch

# Check for GPU availability
cuda_available = torch.cuda.is_available()
print(f"CUDA available: {cuda_available}")

if cuda_available:
    device_name = torch.cuda.get_device_name(0)
    print(f"GPU Device: {device_name}")
else:
    print("No GPU detected. Training will be significantly slower on CPU.")

CUDA available: True
GPU Device: Tesla T4


In [31]:
import json
import random
from datasets import Dataset

# 1. Helper functions
def random_image_id():
    return f"img{random.randint(10000, 99999)}.png"

def create_payload_example(tool):
    img_id = random_image_id()
    instruction = "Convert user request into API payload JSON only."
    if tool == "tree_detection":
        user_input = f"Detect trees in image {img_id}."
        output = json.dumps({"tool": "tree_detection", "params": {"image_id": img_id}})
    elif tool == "crown_segmentation":
        user_input = f"Segment tree crowns in image {img_id}."
        output = json.dumps({"tool": "crown_segmentation", "params": {"image_id": img_id}})
    return {"instruction": instruction, "input": user_input, "output": output}

def create_response_example(tool, detections, segments=None):
    instruction = "Convert API JSON result into human readable answer."
    if tool == "tree_detection":
        api_input = json.dumps({"success": True, "total_detections": detections})
        output = f"The tree detection tool successfully detected {detections} trees in the uploaded image."
    elif tool == "crown_segmentation":
        api_input = json.dumps({"success": True, "total_detections": detections, "total_segments": segments})
        output = f"The crown segmentation tool successfully detected {detections} trees and segmented {segments} crowns in the uploaded image."
    return {"instruction": instruction, "input": api_input, "output": output}

# 2. Generate and save dataset
num_payload_examples = 1000
num_response_examples = 20
dataset_list = []

for _ in range(num_payload_examples):
    tool = random.choice(["tree_detection", "crown_segmentation"])
    dataset_list.append(create_payload_example(tool))

for _ in range(num_response_examples):
    detections = random.randint(100, 300)
    segments = detections - random.randint(0, 5)
    tool = random.choice(["tree_detection", "crown_segmentation"])
    if tool == "tree_detection":
        dataset_list.append(create_response_example(tool, detections))
    else:
        dataset_list.append(create_response_example(tool, detections, segments))

with open("generated_dataset.json", "w") as f:
    json.dump(dataset_list, f, indent=2)

# 3. Load dataset
try:
    with open("generated_dataset.json", "r") as f:
        loaded_data = json.load(f)
    print(f"✓ Loaded {len(loaded_data)} examples from generated_dataset.json")
except FileNotFoundError:
    print("Error: generated_dataset.json not found.")
    loaded_data = []

# 4. Format function
def format_prompt(example):
    instruction = example.get('instruction', '')
    input_text = example.get('input', '')
    output_text = example.get('output', '')
    return f"### Instruction: {instruction}\n### Input: {input_text}\n### Output: {output_text}<|endoftext|>"

# 5. Convert to Hugging Face Dataset
formatted_texts = [format_prompt(item) for item in loaded_data]
hf_dataset = Dataset.from_dict({"text": formatted_texts})

# 6. Verification
print(f"✓ Total examples in HF Dataset: {len(hf_dataset)}")
print("\nSample formatted prompt:")
print(hf_dataset[0]['text'])


✓ Loaded 1020 examples from generated_dataset.json
✓ Total examples in HF Dataset: 1020

Sample formatted prompt:
### Instruction: Convert user request into API payload JSON only.
### Input: Detect trees in image img64959.png.
### Output: {"tool": "tree_detection", "params": {"image_id": "img64959.png"}}<|endoftext|>


In [32]:
# Align variable names with the rest of the notebook requirements
file = loaded_data
dataset = hf_dataset

print(f"Verified: 'file' contains {len(file)} examples.")
print(f"Verified: 'dataset' is a Hugging Face Dataset with {len(dataset)} records.")
print("Dataset engineering subtask completed.")

Verified: 'file' contains 1020 examples.
Verified: 'dataset' is a Hugging Face Dataset with 1020 records.
Dataset engineering subtask completed.


In [33]:
from unsloth import FastLanguageModel
import torch

# 1. Load pre-quantized 4-bit model and tokenizer
model_name = "unsloth/Phi-3-mini-4k-instruct-bnb-4bit"
max_seq_length = 2048
dtype = None # Auto detection
load_in_4bit = True

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = model_name,
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
)

# 2. Add LoRA adapters for fine-tuning
model = FastLanguageModel.get_peft_model(
    model,
    r = 64,
    target_modules = [
        "q_proj", "k_proj", "v_proj", "o_proj",
        "gate_proj", "up_proj", "down_proj",
    ],
    lora_alpha = 128,
    lora_dropout = 0,
    bias = "none",
    use_gradient_checkpointing = "unsloth",
    random_state = 3407,
    use_rslora = False,
    loftq_config = None,
)

print(f"Model {model_name} loaded and LoRA adapters configured successfully.")

==((====))==  Unsloth 2026.1.4: Fast Mistral patching. Transformers: 4.57.6.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.10.0+cu128. CUDA: 7.5. CUDA Toolkit: 12.8. Triton: 3.6.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.34. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Model unsloth/Phi-3-mini-4k-instruct-bnb-4bit loaded and LoRA adapters configured successfully.


In [34]:
from trl import SFTTrainer
from transformers import TrainingArguments
import os

# 1. Setup Training Arguments
training_args = TrainingArguments(
    per_device_train_batch_size=2,
    gradient_accumulation_steps=4,
    warmup_steps=10,
    num_train_epochs=3,
    learning_rate=2e-4,
    fp16=not torch.cuda.is_bf16_supported(),
    bf16=torch.cuda.is_bf16_supported(),
    logging_steps=25,
    optim="adamw_8bit",
    weight_decay=0.01,
    lr_scheduler_type="linear",
    seed=3407,
    output_dir="outputs",
    save_strategy="epoch",
    save_total_limit=2,
    dataloader_pin_memory=False,
    report_to="none",
)

# 2. Initialize SFTTrainer
trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=dataset,
    dataset_text_field="text",
    max_seq_length=max_seq_length,
    dataset_num_proc=2,
    args=training_args,
)

# 3. Execute Training
print("Starting training...")
trainer_stats = trainer.train()

# 4. Save the Model and Tokenizer
save_dir = "/content/tree_tool_model"
model.save_pretrained(save_dir)
tokenizer.save_pretrained(save_dir)

print(f"Training complete. Model saved to: {save_dir}")

Unsloth: Tokenizing ["text"] (num_proc=4):   0%|          | 0/1020 [00:00<?, ? examples/s]

The model is already on multiple devices. Skipping the move to device specified in `args`.


Starting training...


==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 1,020 | Num Epochs = 3 | Total steps = 384
O^O/ \_/ \    Batch size per device = 2 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (2 x 4 x 1) = 8
 "-____-"     Trainable parameters = 119,537,664 of 3,940,617,216 (3.03% trained)


Step,Training Loss
25,0.767


KeyboardInterrupt: 

In [35]:
import json
import torch
from unsloth import FastLanguageModel

def mock_api_call(payload_dict):
    """Simulates the geospatial API execution for testing the pipeline."""
    tool = payload_dict.get("tool")
    params = payload_dict.get("params", {})
    image_id = params.get("image_id", "default_image.png")

    # Simulate responses based on the detected tool
    if tool == "tree_detection":
        return {
            "success": True,
            "total_detections": 182,
            "image_id": image_id
        }
    elif tool == "crown_segmentation":
        return {
            "success": True,
            "total_detections": 182,
            "total_segments": 179,
            "image_id": image_id
        }
    else:
        return {"success": False, "error": f"Tool '{tool}' not supported in mock."}

def integrate_pipeline(user_prompt):
    """
    Integrated pipeline that runs the fine-tuned model twice:
    1. To create an API payload from natural language.
    2. To summarize the API results back into a human response.
    """
    print(f"--- PIPELINE INITIATED ---")
    print(f"USER INPUT: {user_prompt}")

    # STEP 1: PREPARE MODEL FOR INFERENCE
    # Switches the model to inference mode to optimize speed and memory usage.
    FastLanguageModel.for_inference(model)

    # STEP 2: PHASE 1 - CONVERT TEXT TO API PAYLOAD
    # Construct the message list following the training format (System instruction + User prompt).
    payload_messages = [
        {"role": "system", "content": "Convert user request into API payload JSON only. Output ONLY valid JSON."},
        {"role": "user", "content": user_prompt}
    ]

    # Tokenize the input and move it to the GPU.
    inputs = tokenizer.apply_chat_template(
        payload_messages,
        tokenize=True,
        add_generation_prompt=True,
        return_tensors="pt"
    ).to("cuda")

    input_len = inputs.shape[1]
    # Generate the technical JSON payload.
    outputs = model.generate(input_ids=inputs, max_new_tokens=100, temperature=0.0)
    # Decode only the newly generated tokens (skipping the input prompt).
    generated_payload = tokenizer.decode(outputs[0][input_len:], skip_special_tokens=True).strip()

    print(f"\n[Step 1] Generated Payload: {generated_payload}")

    # STEP 3: PARSE THE MODEL OUTPUT
    # Extract and parse the JSON from the model's text response.
    try:
        start = generated_payload.find("{")
        end = generated_payload.rfind("}") + 1
        payload_dict = json.loads(generated_payload[start:end])
    except Exception as e:
        return f"Pipeline Error: Failed to parse model payload: {e}"

    # STEP 4: PHASE 2 - EXECUTE API CALL (MOCK)
    # Run the technical tool (simulated here) to get the actual results.
    api_data = mock_api_call(payload_dict)
    print(f"[Step 2] API Result (Mock): {json.dumps(api_data)}")

    # STEP 5: PHASE 3 - CONVERT API RESULT TO HUMAN SUMMARY
    # Simplify the API response to include only what the model needs for summarization.
    summary_input = {
        "success": api_data.get("success", False),
        "total_detections": api_data.get("total_detections", 0)
    }
    if "total_segments" in api_data:
        summary_input["total_segments"] = api_data["total_segments"]

    # Prepare the second set of messages for response generation.
    summary_messages = [
        {"role": "system", "content": "Convert API JSON result into human readable answer."},
        {"role": "user", "content": json.dumps(summary_input)}
    ]

    inputs_summary = tokenizer.apply_chat_template(
        summary_messages,
        tokenize=True,
        add_generation_prompt=True,
        return_tensors="pt"
    ).to("cuda")

    input_len_summary = inputs_summary.shape[1]
    # Generate the final human-readable answer.
    outputs_summary = model.generate(input_ids=inputs_summary, max_new_tokens=100, temperature=0.0)
    generated_summary = tokenizer.decode(outputs_summary[0][input_len_summary:], skip_special_tokens=True).strip()

    print(f"[Step 3] Model Response: {generated_summary}")
    print(f"--- PIPELINE COMPLETE ---\n")

    return generated_summary

# RUN TEST CASE
test_prompt = "Can you detect the trees in image aerial_0092.png?"
final_human_answer = integrate_pipeline(test_prompt)
print(f"FINAL RESULT:\n{final_human_answer}")

--- PIPELINE INITIATED ---
USER INPUT: Can you detect the trees in image aerial_0092.png?

[Step 1] Generated Payload: {"tool": "tree_detection", "params": {"image_id": "aerial_0092.png"}}
[Step 2] API Result (Mock): {"success": true, "total_detections": 182, "image_id": "aerial_0092.png"}
[Step 3] Model Response: Detected trees in image img12277.png: 182.
--- PIPELINE COMPLETE ---

FINAL RESULT:
Detected trees in image img12277.png: 182.


In [28]:
print("--- Testing Tree Detection Workflow ---")
tree_detection_prompt = "Please detect all the trees in image survey_01.png"
tree_detection_result = integrate_pipeline(tree_detection_prompt)
print(f"Final Human Response: {tree_detection_result}\n")

print("--- Testing Crown Segmentation Workflow ---")
crown_segmentation_prompt = "Segment the tree crowns for image drone_capture_55.png"
crown_segmentation_result = integrate_pipeline(crown_segmentation_prompt)
print(f"Final Human Response: {crown_segmentation_result}")

--- Testing Tree Detection Workflow ---
--- PIPELINE INITIATED ---
USER INPUT: Please detect all the trees in image survey_01.png

[Step 1] Generated Payload: {"tool": "tree_detection", "params": {"image_id": "survey_01.png"}}
[Step 2] API Result (Mock): {"success": true, "total_detections": 182, "image_id": "survey_01.png"}
[Step 3] Model Response: The tree detection tool successfully detected 182 trees in the uploaded image.
--- PIPELINE COMPLETE ---

Final Human Response: The tree detection tool successfully detected 182 trees in the uploaded image.

--- Testing Crown Segmentation Workflow ---
--- PIPELINE INITIATED ---
USER INPUT: Segment the tree crowns for image drone_capture_55.png

[Step 1] Generated Payload: {"tool": "crown_segmentation", "params": {"image_id": "drone_capture_55.png"}}
[Step 2] API Result (Mock): {"success": true, "total_detections": 182, "total_segments": 179, "image_id": "drone_capture_55.png"}
[Step 3] Model Response: The crown segmentation tool successfull