### Installation

In [None]:
from google.colab import drive
drive.mount('/content/drive')

import os
import gc
import re
import torch

if "COLAB_" not in "".join(os.environ.keys()):
  !pip install unsloth
else:
  # Do this only in Colab notebooks! Otherwise use pip install unsloth
  !pip install --no-deps bitsandbytes accelerate xformers==0.0.29.post3 peft trl==0.15.2 triton cut_cross_entropy unsloth_zoo
  !pip install sentencepiece protobuf "datasets>=3.4.1" huggingface_hub hf_transfer
  # Upgrade transformers to the required version
  !pip install --upgrade "transformers>=4.53.0"
  !pip install --no-deps unsloth
  !pip install pynvml
  !pip install outlines

from google.colab import files
from datasets import load_dataset, DatasetDict
from unsloth.chat_templates import get_chat_template
from transformers import AutoTokenizer
from trl import SFTTrainer
from transformers import TrainingArguments, DataCollatorForSeq2Seq
from unsloth import is_bfloat16_supported
from unsloth import FastLanguageModel
import json
from pprint import pprint

<a name="Data"></a>
### Data Prep


In [None]:
# =================================================================================
# I. MODEL AND TOKENIZER INITIALIZATION
# =================================================================================

# --- 1. Model Configuration ---
# Define the maximum number of tokens the model can process.
max_seq_length = 8192

# --- 2. Load Model and Tokenizer ---
# Load the 4-bit quantized Llama-3.2 model using Unsloth for efficiency.
# 4-bit loading reduces memory usage.
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="unsloth/Llama-3.2-3B-Instruct-bnb-4bit",
    max_seq_length=max_seq_length,
    load_in_4bit=True,
)

# --- 3. Configure PEFT with LoRA ---
# Apply LoRA for efficient fine-tuning. This updates a small number of parameters
# instead of the full model, saving memory and time.
model = FastLanguageModel.get_peft_model(
    model,
    r=256,  # LoRA rank.
    target_modules=[
        "q_proj", "k_proj", "v_proj", "o_proj",  # Attention projections
        "gate_proj", "up_proj", "down_proj",     # Feed-forward layers
    ],
    lora_alpha=512,  # LoRA scaling factor.
    lora_dropout=0,  # LoRA dropout.
    bias="none",     # No bias training for optimization.
    use_gradient_checkpointing="unsloth",  # Memory-saving technique.
    random_state=3407, # Reproducibility seed.
    use_rslora=False,  # Rank-Stabilized LoRA (disabled).
    loftq_config=None, # LoftQ configuration (disabled).
)


# =================================================================================
# II. DATA PREPARATION AND FORMATTING
# =================================================================================

# --- 1. Configure Chat Template ---
# Set up the tokenizer with the correct chat format for Llama-3.2.
print("Configuring Llama-3.2 chat template...")
tokenizer = get_chat_template(
    tokenizer,
    chat_template="llama-3.2",
)
print("Template configuration complete.")

# --- 2. Define Formatting Function ---
# This function converts each dataset entry (instruction, input, output)
# into the Llama-3.2 chat format.
def formatting_prompts_func(examples):
    """Formats a batch of examples into the Llama-3.2 chat structure."""
    instructions = examples["instruction"]
    inputs = examples["input"]
    outputs = examples["output"]

    convos = []
    # Build the conversation structure for each example
    for instruction, user_input, output in zip(instructions, inputs, outputs):
        conversation = [
            {"role": "system", "content": instruction}, # System instruction
            {"role": "user", "content": user_input},    # User input
            {"role": "assistant", "content": output},   # Desired model output
        ]
        convos.append(conversation)

    # Apply the template to create formatted text strings
    texts = [tokenizer.apply_chat_template(convo, tokenize=False, add_generation_prompt=False) for convo in convos]
    return {"text": texts}

# --- 3. Load Datasets ---
# Upload and load the two JSONL files for fine-tuning.
from google.colab import files
uploaded = files.upload()

# Validate that exactly two files were uploaded.
if len(uploaded) != 2:
    raise ValueError("Please upload exactly 2 JSONL files for the two-step fine-tuning.")

# Sort file names for consistent order.
file_names = sorted(list(uploaded.keys()))
file1, file2 = file_names[0], file_names[1]
print(f"Files loaded:\nStep 1: {file1}\nStep 2: {file2}")

# Load each JSONL file into a Dataset object.
dataset1 = load_dataset("json", data_files={"train": file1}, split="train")
dataset2 = load_dataset("json", data_files={"train": file2}, split="train")

# --- 4. Split Datasets ---
# Reserve 5% of the data for validation.
split1 = dataset1.train_test_split(test_size=0.05, seed=42)
split2 = dataset2.train_test_split(test_size=0.05, seed=42)

# --- 5. Apply Formatting to All Datasets ---
# Use .map() to efficiently apply the formatting function.
print("\nFormatting datasets...")
train_dataset1 = split1["train"].map(formatting_prompts_func, batched=True)
val_dataset1 = split1["test"].map(formatting_prompts_func, batched=True)

train_dataset2 = split2["train"].map(formatting_prompts_func, batched=True)
val_dataset2 = split2["test"].map(formatting_prompts_func, batched=True)

print("\nAll datasets have been formatted.")
print(f"Step 1 - Training samples: {len(train_dataset1)}, Validation samples: {len(val_dataset1)}")
print(f"Step 2 - Training samples: {len(train_dataset2)}, Validation samples: {len(val_dataset2)}")

# --- 6. Verify Formatted Output ---
# Inspect an example to ensure the formatting is correct.
print("\n--- Example of a formatted sample from Step 1 ---")
print(train_dataset1[0]['text'])
print("\n--- Example of a formatted sample from Step 2 ---")
print(train_dataset2[0]['text'])


<a name="Train"></a>
### Train the model

In [None]:
trainer_step1 = SFTTrainer(
    model = model,  # The model to be fine-tuned
    tokenizer = tokenizer,  # Tokenizer used to preprocess the text data
    train_dataset = train_dataset1,  # Training dataset
    eval_dataset = val_dataset1,  # Validation dataset
    # dataset_text_field = "text",  # Specify the dataset field containing text (optional)
    max_seq_length = max_seq_length,  # Maximum input sequence length
    data_collator = DataCollatorForSeq2Seq(tokenizer = tokenizer),  # Prepares batches for Seq2Seq training
    dataset_num_proc = 2,  # Number of processes for dataset preprocessing
    packing = False,  # If True, packs multiple short sequences into one for faster training
    args = TrainingArguments(
        per_device_train_batch_size = 4,  # Batch size per device during training
        gradient_accumulation_steps = 8,  # Accumulates gradients over multiple steps before updating weights
        warmup_steps = 20,  # Number of steps for learning rate warmup
        num_train_epochs = 2,  # Number of full passes through the training data
        # max_steps = 100,  # Override number of steps (None means run full training)
        learning_rate = 1e-5,  # Initial learning rate
        fp16 = not is_bfloat16_supported(),  # Use FP16 if BF16 is not supported
        bf16 = is_bfloat16_supported(),  # Use BF16 if supported by the GPU
        logging_steps = 1,  # Log training metrics every N steps
        optim = "adamw_8bit",  # Optimizer choice (8-bit AdamW to save memory)
        weight_decay = 0.01,  # L2 regularization coefficient
        lr_scheduler_type = "linear",  # Learning rate schedule type
        seed = 3407,  # Random seed for reproducibility
        output_dir = "outputs",  # Directory to save training outputs
        report_to = "none",  # Logging integration target (e.g., WandB, TensorBoard)
    ),
)

# Start the training process and store training statistics
trainer_stats = trainer_step1.train()

# Save the trained model and tokenizer to Google Drive
model.save_pretrained("/content/drive/MyDrive/CAD_history_model_v15.")
tokenizer.save_pretrained("/content/drive/MyDrive/CAD_history_model_v15.3")

# Alternative save path (commented out)
# model.save_pretrained("/content/drive/MyDrive/CAD_history_model_v15.5")
# tokenizer.save_pretrained("/content/drive/MyDrive/CAD_history_model_v15.5")

# ======================================================
#  Memory Release Section
# ======================================================

# 1. Delete large objects that are no longer needed
del model
del trainer_step1
# Optionally delete other large variables, such as tokenized datasets
# del train_dataset1, val_dataset1

# 2. Force Python's garbage collector to free up memory
gc.collect()

# 3. Clear the CUDA memory cache in PyTorch
torch.cuda.empty_cache()

# 4. (Optional) Print GPU memory usage to verify cleanup
# Requires 'pynvml' library: !pip install pynvml
try:
    from pynvml import *
    nvmlInit()
    handle = nvmlDeviceGetHandleByIndex(0)
    info = nvmlDeviceGetMemoryInfo(handle)
    print(f"✅ GPU memory released. Current memory usage: {info.used//1024**2} MB")
except Exception as e:
    print(f"Unable to check GPU memory, but cleanup executed. Error: {e}")


In [None]:
# =================================================================================
# SECOND-STEP FINE-TUNING
# =================================================================================

# --- 1. Load the Model from Step 1 ---
# Reload the model with the LoRA adapters that were saved after the first training step.
# This allows us to continue training from where we left off.
print("Loading the model fine-tuned in Step 1...")
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="/content/drive/MyDrive/CAD_history_model_v15.3", # Path to the previously saved model
    max_seq_length=max_seq_length,
    load_in_4bit=True,
)
print("Model loaded successfully.")

# --- 2. Configure the SFTTrainer for the Second Dataset ---
# We set up a new trainer instance, this time using the second dataset (train_dataset2
# and val_dataset2) to continue the fine-tuning process.
trainer_step2 = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=train_dataset2,
    eval_dataset=val_dataset2,
    max_seq_length=max_seq_length,
    data_collator=DataCollatorForSeq2Seq(tokenizer=tokenizer),
    dataset_num_proc=2,
    packing=False,

    # --- Training Arguments (can be adjusted for the second step if needed) ---
    args=TrainingArguments(
        per_device_train_batch_size=4,
        gradient_accumulation_steps=8,
        warmup_steps=20,
        num_train_epochs=2,
        learning_rate=1e-5,
        fp16=not is_bfloat16_supported(),
        bf16=is_bfloat16_supported(),
        logging_steps=1,
        optim="adamw_8bit",
        weight_decay=0.01,
        lr_scheduler_type="linear",
        seed=3407,
        output_dir="outputs",
        report_to="none",
    ),
)

# --- 3. Start the Second Training Job ---
# This command resumes the fine-tuning process with the second dataset.
print("\nStarting second training step...")
trainer_stats = trainer_step2.train()
print("Second training step completed.")

# --- 4. Save the Final Model and Tokenizer ---
# Save the final version of the LoRA adapters and tokenizer to a new directory.
print("Saving final model and tokenizer...")
model.save_pretrained("/content/drive/MyDrive/CAD_history_model_v15.4")
tokenizer.save_pretrained("/content/drive/MyDrive/CAD_history_model_v15.4")
print("Final model and tokenizer saved successfully.")


# =================================================================================
# FINAL MEMORY CLEANUP
# =================================================================================
# Release GPU memory after the final training step is complete.

# --- 1. Delete Unused Objects ---
del model
del trainer_step2

# --- 2. Force Garbage Collection ---
gc.collect()

# --- 3. Empty PyTorch's CUDA Cache ---
torch.cuda.empty_cache()

# --- 4. (Optional) Check GPU Memory Usage ---
try:
    from pynvml import *
    nvmlInit()
    handle = nvmlDeviceGetHandleByIndex(0)
    info = nvmlDeviceGetMemoryInfo(handle)
    print(f"✅ Memory released. Current used memory: {info.used//1024**2} MB")
except Exception as e:
    print(f"Could not check memory, but release operations were executed. Error: {e}")


<a name="Inference"></a>
### Inference

test A

In [None]:
import json
import os
import re
from google.colab import drive
from unsloth import FastLanguageModel
from transformers import TextStreamer
from pprint import pprint

# ==============================================================================
# 1. ENVIRONMENT SETUP AND MODEL LOADING
# ==============================================================================

# --- Mount Google Drive ---
try:
    drive.mount('/content/drive')
    print("Google Drive mounted successfully.")
except Exception as e:
    print(f"Drive already mounted or mounting failed: {e}")

# --- Load the Fine-Tuned Model ---
print("\nLoading the fine-tuned model...")
# NOTE: The user's code points to v15.2, but the last saved model was v15.4.
# Please ensure this path points to your final, intended model version.
model_path = "/content/drive/MyDrive/CAD_history_model_v15.4"
model = None  # Initialize variable for error checking.

try:
    # --- Pre-load Sanity Checks ---
    print(f"Checking model path: {model_path}")
    if not os.path.exists(model_path):
        raise FileNotFoundError("Model directory does not exist! Please confirm the path is correct.")
    if not os.path.isdir(model_path):
        raise NotADirectoryError("The specified path is not a directory.")

    # Check for essential files to ensure the model was saved correctly.
    required_files = ["adapter_config.json", "tokenizer_config.json", "special_tokens_map.json"]
    missing_files = [f for f in required_files if not os.path.exists(os.path.join(model_path, f))]
    if missing_files:
        raise FileNotFoundError(
            f"The model directory '{os.path.basename(model_path)}' is missing key files: {', '.join(missing_files)}. "
            "This usually means the model was not saved correctly or files are corrupted."
        )
    print("✅ Model file check passed. Attempting to load...")

    # --- Load Model and Apply Adapters ---
    # Step 1: Load the original, un-tuned base model.
    model, tokenizer = FastLanguageModel.from_pretrained(
        model_name="unsloth/Llama-3.2-3B-Instruct-bnb-4bit",  # Must point to the original base model.
        max_seq_length=8192,
        load_in_4bit=True,
    )
    # Step 2: Apply the saved LoRA adapters to the base model.
    print("Applying saved LoRA adapters...")
    model.load_adapter(model_path)
    print("Adapters loaded successfully!")

    # Enable Unsloth's fast inference mode.
    FastLanguageModel.for_inference(model)

except Exception as e:
    print("\n❌ Model loading failed! Error details below:")
    print(f"Error Type: {type(e).__name__}")
    print(f"Error Message: {e}")
    print("\n--- Please check your model files and path based on the error message. ---")


# ==============================================================================
# 2. HELPER FUNCTIONS AND PROMPT DEFINITIONS
# ==============================================================================

def extract_json_block(raw_text: str):
    """A robust function to extract the first valid JSON block from raw model output."""
    if not isinstance(raw_text, str):
        return None
    # Prioritize matching a Markdown-formatted JSON code block.
    json_match = re.search(r"```json\s*(\{.*?\})\s*```", raw_text, re.DOTALL)
    if json_match:
        return json_match.group(1)

    # If no Markdown block is found, fall back to finding the first and last curly braces.
    first_brace = raw_text.find("{")
    last_brace = raw_text.rfind("}")
    if first_brace != -1 and last_brace != -1 and last_brace > first_brace:
        json_text = raw_text[first_brace : last_brace + 1]
        if json_text.strip().startswith("{") and json_text.strip().endswith("}"):
            return json_text
    return None

def generate_response(instruction, user_input, max_tokens=8192):
    """Generic model invocation function using the Llama-3.2 chat template."""
    messages = [
        {"role": "system", "content": instruction},
        {"role": "user", "content": user_input},
    ]
    prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    inputs = tokenizer([prompt], return_tensors="pt").to("cuda")
    prompt_len_tokens = inputs['input_ids'].shape[1]

    outputs = model.generate(
        **inputs,
        max_new_tokens=max_tokens,
        temperature=0.7,
        top_p=0.9,
        repetition_penalty=1.2,
        pad_token_id=tokenizer.eos_token_id,
    )
    response_part = tokenizer.decode(outputs[0][prompt_len_tokens:], skip_special_tokens=True)
    return response_part

# --- User Inputs ---
brief_description_input = "A stepped cylindrical disc with a central through-hole."
bbox_input = "X=2.2,Y=0.7,Z=2.2"

# --- System Prompts for the Two-Step Process ---
prompt_step1 = {
    "instruction": (
        "As a CAD expert, create a procedural build plan from the provided part description and bounding box.\n\n"
        "### Requirements ###\n"
        "1.  **Logical Flow**: The plan must be geometrically sound, and the steps (`Sketch`, `Extrude`) must be in a logical sequence to build the part.\n"
        "2.  **Bounding Box Adherence**: The final geometry's external dimensions must strictly match the provided bounding box. Use the bounding box to define the primary feature dimensions.\n"
        "3.  **Natural Language**: Describe the geometric meaning of operations.\n"
        "4.  **Conciseness**: Be direct. Do not include any introductory or concluding text. Your response must start directly with the build plan.\n\n"
        "### Formatting Rules ###\n"
        "- **Top-Level Features**: For each feature like `Sketch` or `Extrude`, start a new line with its name in bold (e.g., `**Sketch1**`).\n"
        "- **Profile Description**: For a `Sketch`, create an indented bullet point for each `Profile` it contains (e.g., `- **Profile1**`). On the same line, describe its geometry, focusing on loop relationships (inner/outer), shapes, and key dimensions (e.g., \"side length X\", \"radius Y\").\n"
        "- **Extrude Description**: For an `Extrude` feature, add a colon and the description on the same line as its name. The description must specify the referenced profile, the operation (new body, join, cut), direction, and distance."
    ),
    "input_template": (
        "Task: Infer a detailed, precise, and procedural build plan from a brief part description and its bounding box. Provide only the plan itself, strictly following all rules.\n\n"
        "Brief description: {brief_desc}\n"
        "Bounding box: {bbox_info}\n"
    )
}

prompt_step2 = {
    "instruction": (
        "Task: Generate a complete, precise, and parametric JSON modeling sequence based on the provided Build Plan. Your output must strictly adhere to the following rules:\n"
        "1. **Adherence to Plan**: Strictly and faithfully follow every step, operation, and sequence defined in the 'Build Plan'. Not only the profile itself, but also all the points and curves used within it must be defined. Do not add, omit, or alter the modeling process.\n"
        "2. **JSON Structure Integrity**: The root of each JSON file should be 'entities' ONLY. The 'entities' section is a library of all individual modeling features, such as 'Sketch' and 'Extrude'.\n"
        "3. **Parameter Integrity**: Provide correct values for all fields in the JSON structure (including numerical values, attributes, and references). No placeholders or missing parameters are permitted. Ensure all entity references are correct.\n"
        "4. **Geometric & Numerical Accuracy**: The geometric logic must be flawless: profiles must be closed, constituent curves must connect sequentially (head-to-tail), and the number and type of points/curves defined must match the Build Plan.\n"
        "5. **Structural & Syntactic Correctness**: Strictly adhere to the predefined JSON structure, naming conventions, and official terminology. Do not add or remove fields or invent terms. The final output must be a single, valid JSON object."
    ),
    "input_template": (
        "Task: Generate a complete, precise, and parametric JSON modeling sequence based on the provided Build Plan. Do not include any comments, explanations, or any other extraneous text.\n\n"
        "--- Build Plan ---\n"
        "{build_plan}"
    )
}


# ==============================================================================
# 3. EXECUTE THE TWO-STEP INFERENCE CHAIN
# ==============================================================================

# Only run inference if the model was loaded successfully.
if model:
    # --- Step 1: Generate the Modeling Build Plan ---
    print("="*50)
    print("🚀 STEP 1: Generating Modeling Build Plan...")
    print("="*50)
    step1_input = prompt_step1["input_template"].format(
        brief_desc=brief_description_input,
        bbox_info=bbox_input
    )
    build_plan = generate_response(prompt_step1["instruction"], step1_input, max_tokens=1024)
    print(f"\n✅ Step 1 Generated Build Plan:\n---\n{build_plan}\n---")

    # --- Step 2: Convert the Build Plan to Full JSON ---
    print("\n" + "="*50)
    print("🚀 STEP 2: Converting Build Plan to Full JSON...")
    print("="*50)

    step2_input = prompt_step2["input_template"].format(build_plan=build_plan)
    step2_raw_output = generate_response(prompt_step2["instruction"], step2_input, max_tokens=8192)
    print(f"\n✅ Step 2 Raw Output from Model:\n---\n{step2_raw_output}\n---")

    # --- Final Processing: Extract and Validate JSON ---
    final_json_str = extract_json_block(step2_raw_output)

    if not final_json_str:
        print("\n❌ Step 2 Failed: Could not extract a final JSON object from the raw output.")
    else:
        print("\n✅ Step 2 Extracted Final JSON. Checking syntax...")
        try:
            # Check if the extracted string is valid JSON.
            parsed_json = json.loads(final_json_str)
            print("---")
            pprint(parsed_json, sort_dicts=False, width=120)
            print("---")
            print("\n✅✅✅ SUCCESS! A syntactically valid JSON object was extracted and parsed. ✅✅✅")

        except json.JSONDecodeError as e:
            # Catch JSON syntax errors.
            print(f"\n❌ JSON Parsing Failed: The extracted block is not valid JSON. Error: {e}")
            print("\n--- Extracted String was: ---\n", final_json_str)

else:
    print("\nAborting execution because the model was not loaded successfully.")


In [None]:
import io
import os
from google.colab import files
from tqdm.notebook import tqdm

# ==============================================================================
# BATCH INFERENCE SCRIPT (with Resume Capability)
# ==============================================================================
# This script assumes the model, tokenizer, prompts, and helper functions
# from the previous cells have already been loaded and defined.

# --- 1. Define Output Path on Google Drive ---
gdrive_output_path = "/content/drive/MyDrive/batch_test_results_A.txt"

# --- 2. Resume Logic ---
# Check if a results file already exists to skip previously processed items.
processed_ids = set()
if os.path.exists(gdrive_output_path):
    print(f"✅ Found existing results file: {gdrive_output_path}")
    print("Scanning for completed parts to skip...")
    try:
        with open(gdrive_output_path, "r", encoding="utf-8") as f:
            for line in f:
                line = line.strip()
                # Ensure the line is not empty and contains the delimiter.
                if line and ';' in line:
                    # The part ID is the segment before the first semicolon.
                    part_id = line.split(';', 1)[0]
                    processed_ids.add(part_id)
        print(f"Found {len(processed_ids)} completed parts. They will be skipped.")
    except Exception as e:
        print(f"⚠️ Warning: Could not read the existing results file. Starting a fresh run. Error: {e}")
        processed_ids = set() # Reset if reading fails.
else:
    print("No existing results file found. Starting a new run.")


# --- 3. Upload Test Data File ---
print("\n🚀 Please upload your test data file...")
uploaded = files.upload()

if not uploaded:
    print("\n❌ Operation cancelled, no file was uploaded.")
elif not model:
    print("\n❌ Model is not loaded. Cannot proceed with batch inference.")
else:
    # Get the uploaded file name and content.
    test_file_name = next(iter(uploaded))
    test_file_content = uploaded[test_file_name].decode('utf-8')
    lines = test_file_content.strip().split('\n')

    print(f"\nFile '{test_file_name}' uploaded successfully with {len(lines)} lines to process.")

    # --- 4. Main Loop: Process and Write Line-by-Line ---
    # Open the file in "append" mode to add to it.
    with open(gdrive_output_path, "a", encoding="utf-8") as output_file:
        for line in tqdm(lines, desc="Processing parts"):
            line = line.strip()
            if not line:
                continue

            try:
                # --- 4.1 Parse Data from Each Line ---
                # Expected format: PartID;Description;BoundingBox;BuildPlan;InstructionSet
                parts = line.split(';')
                if len(parts) < 5:
                    print(f"⚠️ Parsing Warning: Skipping malformed line (requires at least 4 semicolons): {line[:100]}...")
                    continue

                part_id = parts[0].strip()
                # The build plan is the 4th part (index 3).
                build_plan_from_file = parts[3].strip()

                # --- 4.2 Check if Part Should Be Skipped ---
                if part_id in processed_ids:
                    continue # Skip to the next loop iteration.

                # --- 4.3 Prepare Model Input ---
                # This is a key step: restore escaped newlines ('\\n') from the file
                # back to actual newlines ('\n') to match the model's training format.
                build_plan_for_model = build_plan_from_file.replace('\\n', '\n')

                print(f"\n⚙️  Processing Part ID: {part_id}...")
                responses = []
                # --- 4.4 Call the Model 3 Times for Each Build Plan ---
                for i in range(3):
                    print(f"    - Invocation {i+1}/3...")
                    # Use the "Step 2" inference logic from the previous cell.
                    step2_input = prompt_step2["input_template"].format(build_plan=build_plan_for_model)
                    raw_output = generate_response(prompt_step2["instruction"], step2_input, max_tokens=8192)
                    json_response = extract_json_block(raw_output)
                    final_response = json_response if json_response else raw_output

                    # To save in a single line, replace real newlines with escaped ones.
                    processed_response = final_response.replace('\r\n', '\\n').replace('\n', '\\n')
                    responses.append(processed_response)

                    print(f"    - Invocation {i+1}/3 complete. JSON extracted: {'✅' if json_response else '❌'}")

                # --- 4.5 Combine Results and Write to File ---
                # Format: "id;build_plan;response1<|>response2<|>response3"
                # Note: We use the original `build_plan_from_file` to construct the result line.
                result_line = f"{part_id};{build_plan_from_file};{'<|>'.join(responses)}"

                # Write the result line and a newline character.
                output_file.write(result_line + "\n")
                # Force the buffer to write to disk, ensuring real-time saving.
                output_file.flush()

                print("-" * 70)
                print(f"✅ Result for Part ID: {part_id} has been saved.")
                print("-" * 70)

            except Exception as e:
                # Use 'locals()' to safely access part_id if it was assigned.
                current_part_id = part_id if 'part_id' in locals() else 'Unknown'
                print(f"❌ A critical error occurred while processing Part ID {current_part_id}: {e}")
                continue

    # --- 5. Final Success Message ---
    print("\n" + "="*50)
    print(f"✅ All parts processed!")
    print(f"✅ Results have been progressively saved to your Google Drive.")
    print(f"   File Path: {gdrive_output_path}")
    print("="*50)


test B

In [None]:
import os
import re
from google.colab import drive
from unsloth import FastLanguageModel
from transformers import TextStreamer

# ==============================================================================
# 1. ENVIRONMENT SETUP AND MODEL LOADING
# ==============================================================================

# --- Mount Google Drive ---
try:
    drive.mount('/content/drive')
    print("Google Drive mounted successfully.")
except Exception as e:
    print(f"Drive already mounted or mounting failed: {e}")

# --- Load the Fine-Tuned Model ---
print("\nLoading the fine-tuned model...")
model_path = "/content/drive/MyDrive/CAD_history_model_v15.4"
model = None  # Initialize variable for error checking.

try:
    # --- Pre-load Sanity Checks ---
    print(f"Checking model path: {model_path}")
    if not os.path.exists(model_path):
        raise FileNotFoundError("Model directory does not exist! Please confirm the path is correct.")
    if not os.path.isdir(model_path):
        raise NotADirectoryError("The specified path is not a directory.")

    # Check for essential files.
    required_files = ["adapter_config.json", "tokenizer_config.json", "special_tokens_map.json"]
    missing_files = [f for f in required_files if not os.path.exists(os.path.join(model_path, f))]
    if missing_files:
        raise FileNotFoundError(
            f"The model directory '{os.path.basename(model_path)}' is missing key files: {', '.join(missing_files)}. "
            "This usually means the model was not saved correctly."
        )
    print("✅ Model file check passed. Attempting to load...")

    # --- Load Model and Apply Adapters ---
    # Step 1: Load the original base model.
    model, tokenizer = FastLanguageModel.from_pretrained(
        model_name="unsloth/Llama-3.2-3B-Instruct-bnb-4bit",
        max_seq_length=8192,
        load_in_4bit=True,
    )
    # Step 2: Apply the saved LoRA adapters.
    print("Applying saved LoRA adapters...")
    model.load_adapter(model_path)
    print("Adapters loaded successfully!")

    # Enable Unsloth's fast inference mode.
    FastLanguageModel.for_inference(model)

except Exception as e:
    print("\n❌ Model loading failed! Error details below:")
    print(f"Error Type: {type(e).__name__}")
    print(f"Error Message: {e}")
    print("\n--- Please check your model files and path based on the error message. ---")


# ==============================================================================
# 2. HELPER FUNCTIONS AND PROMPT DEFINITIONS
# ==============================================================================

def extract_json_block(raw_text: str):
    """A robust function to extract the first valid JSON block from raw model output."""
    if not isinstance(raw_text, str):
        return None
    # Prioritize matching a Markdown-formatted JSON code block.
    json_match = re.search(r"```json\s*(\{.*?\})\s*```", raw_text, re.DOTALL)
    if json_match:
        return json_match.group(1)

    # If no Markdown block is found, fall back to finding the first and last curly braces.
    first_brace = raw_text.find("{")
    last_brace = raw_text.rfind("}")
    if first_brace != -1 and last_brace != -1 and last_brace > first_brace:
        json_text = raw_text[first_brace : last_brace + 1]
        if json_text.strip().startswith("{") and json_text.strip().endswith("}"):
            return json_text
    return None

def generate_response(instruction, user_input, max_tokens=1024):
    """Generic model invocation function using the Llama-3.2 chat template."""
    messages = [
        {"role": "system", "content": instruction},
        {"role": "user", "content": user_input},
    ]
    prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    inputs = tokenizer([prompt], return_tensors="pt").to("cuda")
    prompt_len_tokens = inputs['input_ids'].shape[1]

    outputs = model.generate(
        **inputs,
        max_new_tokens=max_tokens,
        temperature=0.7,
        top_p=0.9,
        repetition_penalty=1.2,
        pad_token_id=tokenizer.eos_token_id,
    )
    response_part = tokenizer.decode(outputs[0][prompt_len_tokens:], skip_special_tokens=True)
    return response_part

# --- User Inputs ---
brief_description_input = "A stepped cylindrical disc with a central through-hole."
bbox_input = "X=2.2,Y=0.7,Z=2.2"

# --- System Prompts for the Two-Step Process ---
prompt_step1 = {
    "instruction": (
        "As a CAD expert, create a procedural build plan from the provided part description and bounding box.\n\n"
        "### Requirements ###\n"
        "1.  **Logical Flow**: The plan must be geometrically sound, and the steps (`Sketch`, `Extrude`) must be in a logical sequence to build the part.\n"
        "2.  **Bounding Box Adherence**: The final geometry's external dimensions must strictly match the provided bounding box. Use the bounding box to define the primary feature dimensions.\n"
        "3.  **Natural Language**: Describe the geometric meaning of operations.\n"
        "4.  **Conciseness**: Be direct. Do not include any introductory or concluding text. Your response must start directly with the build plan.\n\n"
        "### Formatting Rules ###\n"
        "- **Top-Level Features**: For each feature like `Sketch` or `Extrude`, start a new line with its name in bold (e.g., `**Sketch1**`).\n"
        "- **Profile Description**: For a `Sketch`, create an indented bullet point for each `Profile` it contains (e.g., `- **Profile1**`). On the same line, describe its geometry, focusing on loop relationships (inner/outer), shapes, and key dimensions (e.g., \"side length X\", \"radius Y\").\n"
        "- **Extrude Description**: For an `Extrude` feature, add a colon and the description on the same line as its name. The description must specify the referenced profile, the operation (new body, join, cut), direction, and distance."
    ),
    "input_template": (
        "Task: Infer a detailed, precise, and procedural build plan from a brief part description and its bounding box. Provide only the plan itself, strictly following all rules.\n\n"
        "Brief description: {brief_desc}\n"
        "Bounding box: {bbox_info}\n"
    )
}

prompt_step2 = {
    "instruction": (
        """
Task: Convert the CAD Build Plan into a structured, machine-readable command set.

### Rules ###
1.  **Direct Translation**: Faithfully translate every step from the Build Plan into its corresponding command format.
2.  **Geometric Continuity**: The multiple curve segments that form a profile must be connected head-to-tail by their coordinates.
3.  **Strict Syntax**: Adhere strictly to the command syntax, parameters, and ordering defined in the reference below.
4.  **Completeness**: Ensure every detail from the Build Plan (dimensions, references, operations) is represented in the final instruction set.
5.  **Conciseness**: Your response must contain only the command set. Do not include any other text, comments, or explanations.

### Command Reference ###
- **`S (sketch_id)`**: Marks the beginning of a new sketch. `sketch_id` is a natural number (1, 2, ...).
- **`P (profile_id)`**: Marks the beginning of a new profile within a sketch. `profile_id` is a natural number.
- **`O (is_outer)`**: Defines the loop type for the current profile. `is_outer` is `true` (outer loop) or `false` (inner loop).
- **`L (x1, y1, x2, y2)`**: Defines a straight line using absolute start (x1, y1) and end (x2, y2) coordinates.
- **`A (x1, y1, x2, y2, cx, cy, rvx, rvy, r, sa, ea)`**: Defines an arc with start/end points, center, reference vector, radius, and start/end angles.
- **`C (cx, cy, r)`**: Defines a full circle with center (cx, cy) and radius (r).
- **`T (ox, oy, oz, xx, xy, xz, yx, yy, yz, zx, zy, zz)`**: Defines the 3D spatial pose (origin and axis vectors) of the current sketch.
- **`E (profile_id, sketch_id, operation, type, d1, d2)`**: Defines an extrusion feature.
        """
    ),
    "input_template": (
        "Task: Convert the CAD Build Plan into a structured, machine-readable command set.\n\n"
        "--- Build Plan ---\n"
        "{build_plan}"
    )
}


# ==============================================================================
# 3. EXECUTE THE TWO-STEP INFERENCE CHAIN
# ==============================================================================

# Only run inference if the model was loaded successfully.
if model:
    # --- Step 1: Generate the Modeling Build Plan ---
    print("="*50)
    print("🚀 STEP 1: Generating Modeling Build Plan...")
    print("="*50)
    step1_input = prompt_step1["input_template"].format(
        brief_desc=brief_description_input,
        bbox_info=bbox_input
    )
    build_plan = generate_response(prompt_step1["instruction"], step1_input, max_tokens=1024)
    print(f"\n✅ Step 1 Generated Build Plan:\n---\n{build_plan}\n---")

    # --- Step 2: Convert the Build Plan to a Command Set ---
    print("\n" + "="*50)
    print("🚀 STEP 2: Converting Build Plan to Command Set...")
    print("="*50)

    step2_input = prompt_step2["input_template"].format(build_plan=build_plan)
    step2_raw_output = generate_response(prompt_step2["instruction"], step2_input, max_tokens=8192)

    # --- Final Processing: Display the raw model output directly ---
    print(f"\n✅ Step 2 Raw Output from Model (Command Set):\n---\n{step2_raw_output}\n---")
    print("\n✅✅✅ SUCCESS! The two-step process is complete. ✅✅✅")

else:
    print("\nAborting execution because the model was not loaded successfully.")


In [None]:
import io
import os
from google.colab import files
from tqdm.notebook import tqdm

# ==============================================================================
# 3. BATCH INFERENCE SCRIPT (Full Two-Step Process)
# ==============================================================================

# --- 1. Define Output Path on Google Drive ---
gdrive_output_path = "/content/drive/MyDrive/batch_test_results_B.txt"

# --- 2. Resume Logic ---
processed_ids = set()
if os.path.exists(gdrive_output_path):
    print(f"✅ Found existing results file: {gdrive_output_path}")
    print("Scanning for completed parts to skip...")
    try:
        with open(gdrive_output_path, "r", encoding="utf-8") as f:
            for line in f:
                line = line.strip()
                if line and ';' in line:
                    part_id = line.split(';', 1)[0]
                    processed_ids.add(part_id)
        print(f"Found {len(processed_ids)} completed parts. They will be skipped.")
    except Exception as e:
        print(f"⚠️ Warning: Could not read the existing results file. Starting a fresh run. Error: {e}")
        processed_ids = set()
else:
    print("No existing results file found. Starting a new run.")


# --- 3. Upload Test Data File ---
print("\n🚀 Please upload your test data file (Format: PartID;Description;BBox;BuildPlan;StandardCommandSet)")
uploaded = files.upload()

if not uploaded:
    print("\n❌ Operation cancelled, no file was uploaded.")
elif not model:
    print("\n❌ Model is not loaded. Cannot proceed with batch inference.")
else:
    test_file_name = next(iter(uploaded))
    test_file_content = uploaded[test_file_name].decode('utf-8')
    lines = test_file_content.strip().split('\n')

    print(f"\nFile '{test_file_name}' uploaded successfully with {len(lines)} lines to process.")

    # --- 4. Main Loop: Process and Write Line-by-Line ---
    with open(gdrive_output_path, "a", encoding="utf-8") as output_file:
        for line in tqdm(lines, desc="Processing parts"):
            line = line.strip()
            if not line:
                continue

            part_id = "Unknown"  # Initialize for error handling.
            try:
                # --- 4.1 Parse Data from Each Line ---
                parts = line.split(';')
                if len(parts) < 5:
                    print(f"⚠️ Parsing Warning: Skipping malformed line (requires 5 semicolon-separated parts): {line[:100]}...")
                    continue

                part_id = parts[0].strip()
                brief_description_input = parts[1].strip()
                bbox_input = parts[2].strip()
                # The standard command set from the file is the 5th part (index 4).
                standard_command_set_from_file = parts[4].strip()

                # --- 4.2 Check if Part Should Be Skipped ---
                if part_id in processed_ids:
                    continue

                print(f"\n⚙️  Processing Part ID: {part_id}...")

                # --- 4.3 [Step 1] Generate the Build Plan ---
                print("   - STEP 1: Generating Build Plan...")
                step1_input = prompt_step1["input_template"].format(
                    brief_desc=brief_description_input,
                    bbox_info=bbox_input
                )
                build_plan = generate_response(prompt_step1["instruction"], step1_input, max_tokens=1024)
                print("   - STEP 1: Plan Generated.")

                # --- 4.4 [Step 2] Convert Plan to Command Set (3 runs) ---
                print("   - STEP 2: Converting Plan to Command Set (3 runs)...")
                newly_generated_command_sets = []
                for i in range(3):
                    print(f"     - Run {i+1}/3...")
                    step2_input = prompt_step2["input_template"].format(build_plan=build_plan)
                    raw_output = generate_response(prompt_step2["instruction"], step2_input, max_tokens=8192)

                    # Replace newlines with escaped characters for single-line storage.
                    processed_response = raw_output.replace('\r\n', '\\n').replace('\n', '\\n')
                    newly_generated_command_sets.append(processed_response)
                    print(f"     - Run {i+1}/3 finished.")

                # --- 4.5 Combine Results and Write to File ---
                # Output format: "id;standard_command_set_from_file;new_set_1<|>new_set_2<|>new_set_3"
                all_new_command_set_results = "<|>".join(newly_generated_command_sets)
                result_line = f"{part_id};{standard_command_set_from_file};{all_new_command_set_results}"

                output_file.write(result_line + "\n")
                output_file.flush() # Ensure real-time saving.

                print("-" * 70)
                print(f"✅ Full two-step result for Part ID: {part_id} has been saved.")
                print("-" * 70)

            except Exception as e:
                print(f"❌ A critical error occurred while processing Part ID {part_id}: {e}")
                error_line = f"{part_id};ERROR;{str(e).replace(';', ',')}\n"
                output_file.write(error_line)
                output_file.flush()
                continue

    # --- 5. Final Success Message ---
    print("\n" + "="*50)
    print("✅ All parts processed!")
    print("✅ Results have been progressively saved to your Google Drive.")
    print(f"   File Path: {gdrive_output_path}")
    print("="*50)

test C

In [None]:
import json
import os
import re
from google.colab import drive
from unsloth import FastLanguageModel
from transformers import TextStreamer
from pprint import pprint

# ==============================================================================
# 1. ENVIRONMENT SETUP AND MODEL LOADING
# ==============================================================================

# --- Mount Google Drive ---
try:
    drive.mount('/content/drive')
    print("Google Drive mounted successfully.")
except Exception as e:
    print(f"Drive already mounted or mounting failed: {e}")

# --- Load the Fine-Tuned Model ---
print("\nLoading the fine-tuned model...")
model_path = "/content/drive/MyDrive/CAD_history_model_v15.5"
model = None  # Initialize variable for error checking.

try:
    # --- Pre-load Sanity Checks ---
    print(f"Checking model path: {model_path}")
    if not os.path.exists(model_path):
        raise FileNotFoundError("Model directory does not exist! Please confirm the path is correct.")
    if not os.path.isdir(model_path):
        raise NotADirectoryError("The specified path is not a directory.")

    # Check for essential files.
    required_files = ["adapter_config.json", "tokenizer_config.json", "special_tokens_map.json"]
    missing_files = [f for f in required_files if not os.path.exists(os.path.join(model_path, f))]
    if missing_files:
        raise FileNotFoundError(
            f"The model directory '{os.path.basename(model_path)}' is missing key files: {', '.join(missing_files)}. "
            "This usually means the model was not saved correctly."
        )
    print("✅ Model file check passed. Attempting to load...")

    # --- Load Model and Apply Adapters ---
    # Step 1: Load the original base model.
    model, tokenizer = FastLanguageModel.from_pretrained(
        model_name="unsloth/Llama-3.2-3B-Instruct-bnb-4bit",
        max_seq_length=8192,
        load_in_4bit=True,
    )
    # Step 2: Apply the saved LoRA adapters.
    print("Applying saved LoRA adapters...")
    model.load_adapter(model_path)
    print("Adapters loaded successfully!")

    # Enable Unsloth's fast inference mode.
    FastLanguageModel.for_inference(model)

except Exception as e:
    print("\n❌ Model loading failed! Error details below:")
    print(f"Error Type: {type(e).__name__}")
    print(f"Error Message: {e}")
    print("\n--- Please check your model files and path based on the error message. ---")


# ==============================================================================
# 2. HELPER FUNCTIONS AND PROMPT DEFINITION
# ==============================================================================

def extract_json_block(raw_text: str):
    """A robust function to extract the first valid JSON block from raw model output."""
    if not isinstance(raw_text, str):
        return None
    # Prioritize matching a Markdown-formatted JSON code block.
    json_match = re.search(r"```json\s*(\{.*?\})\s*```", raw_text, re.DOTALL)
    if json_match:
        return json_match.group(1)

    # If no Markdown block is found, fall back to finding the first and last curly braces.
    first_brace = raw_text.find("{")
    last_brace = raw_text.rfind("}")
    if first_brace != -1 and last_brace != -1 and last_brace > first_brace:
        json_text = raw_text[first_brace : last_brace + 1]
        if json_text.strip().startswith("{") and json_text.strip().endswith("}"):
            return json_text
    return None

def generate_response(instruction, user_input, max_tokens=8192):
    """Generic model invocation function using the Llama-3.2 chat template."""
    messages = [
        {"role": "system", "content": instruction},
        {"role": "user", "content": user_input},
    ]
    prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    inputs = tokenizer([prompt], return_tensors="pt").to("cuda")
    prompt_len_tokens = inputs['input_ids'].shape[1]

    outputs = model.generate(
        **inputs,
        max_new_tokens=max_tokens,
        temperature=0.7,
        top_p=0.9,
        repetition_penalty=1.25,
        pad_token_id=tokenizer.eos_token_id,
    )
    response_part = tokenizer.decode(outputs[0][prompt_len_tokens:], skip_special_tokens=True)
    return response_part

# --- User Inputs ---
brief_description_input = "A stepped cylindrical disc with a central through-hole."
bbox_input = "X=2.2,Y=0.7,Z=2.2"


# --- New Single-Step System Prompt ---
prompt_single_step = {
    "instruction": ("""
Task: Generate a structured, machine-readable command set directly from a CAD part description and its bounding box.

### Requirements ###
1.  **Direct Generation**: Infer the geometric steps and translate them directly into the specified command format.
2.  **Bounding Box Adherence**: The commands must produce a model whose final dimensions strictly match the provided bounding box.
3.  **Geometric Continuity**: The multiple curve segments that form a profile must be connected head-to-tail by their coordinates.
4.  **Strict Syntax**: Adhere strictly to the command syntax, parameters, and ordering defined in the reference below.
5.  **Conciseness**: Your response must contain only the command set. Do not include any other text, comments, or explanations.

### Command Reference ###
- **`S (sketch_id)`**: Marks the beginning of a new sketch. `sketch_id` is a natural number (1, 2, ...).
- **`P (profile_id)`**: Marks the beginning of a new profile within a sketch. `profile_id` is a natural number.
- **`O (is_outer)`**: Defines the loop type for the current profile. `is_outer` is `true` (outer loop) or `false` (inner loop).
- **`L (x1, y1, x2, y2)`**: Defines a straight line using absolute start (x1, y1) and end (x2, y2) coordinates.
- **`A (x1, y1, x2, y2, cx, cy, rvx, rvy, r, sa, ea)`**: Defines an arc with start/end points, center, reference vector, radius, and start/end angles.
- **`C (cx, cy, r)`**: Defines a full circle with center (cx, cy) and radius (r).
- **`T (ox, oy, oz, xx, xy, xz, yx, yy, yz, zx, zy, zz)`**: Defines the 3D spatial pose (origin and axis vectors) of the current sketch.
- **`E (profile_id, sketch_id, operation, type, d1, d2)`**: Defines an extrusion feature.
        """
    ),
    "input_template": (
        "Task: Infer the geometric modeling steps and generate a structured, machine-readable command set from the following part description and bounding box. Provide only the command set, strictly following all rules.\n\n"
        "Brief description: {brief_desc}\n"
        "Bounding box: {bbox_info}\n"
    )
}

# ==============================================================================
# 3. EXECUTE THE COMBINED SINGLE-STEP INFERENCE
# ==============================================================================

# Only run inference if the model was loaded successfully.
if model:
    print("="*50)
    print("🚀 EXECUTING SINGLE-STEP INFERENCE...")
    print("="*50)

    # --- Combine all information into a single input to generate the final output ---
    final_input = prompt_single_step["input_template"].format(
        brief_desc=brief_description_input,
        bbox_info=bbox_input
    )
    raw_output = generate_response(prompt_single_step["instruction"], final_input, max_tokens=1024)
    print(f"\n✅ Raw Output from Model:\n---\n{raw_output}\n---")

    # --- Final Processing: Extract and check JSON syntax ---
    # NOTE: The prompt asks for a "command set", but the code below attempts to
    # extract and parse a JSON object. This may fail if the model produces the
    # command set format as requested.
    final_json_str = extract_json_block(raw_output)

    if not final_json_str:
        print("\n❌ FAILED: Could not extract a final JSON object from the raw output.")
    else:
        print("\n✅ Extracted Final JSON. Checking syntax...")
        try:
            # Check if the extracted string is valid JSON.
            parsed_json = json.loads(final_json_str)
            print("---")
            pprint(parsed_json, sort_dicts=False, width=120)
            print("---")
            print("\n✅✅✅ SUCCESS! A syntactically valid JSON object was extracted and parsed. ✅✅✅")

        except json.JSONDecodeError as e:
            # Catch JSON syntax errors.
            print(f"\n❌ JSON Parsing Failed: The extracted block is not valid JSON. Error: {e}")
            print("\n--- Extracted String was: ---\n", final_json_str)

else:
    print("\nAborting execution because the model was not loaded successfully.")


In [None]:
import io
import os
from google.colab import files
from tqdm.notebook import tqdm

# ==============================================================================
# 3. BATCH INFERENCE SCRIPT (Single-Step Process)
# ==============================================================================

# --- 1. Define Output Path on Google Drive ---
gdrive_output_path = "/content/drive/MyDrive/batch_test_results_C.txt"

# --- 2. Resume Logic ---
processed_ids = set()
if os.path.exists(gdrive_output_path):
    print(f"✅ Found existing results file: {gdrive_output_path}")
    print("Scanning for completed parts to skip...")
    try:
        with open(gdrive_output_path, "r", encoding="utf-8") as f:
            for line in f:
                line = line.strip()
                if line and ';' in line:
                    part_id = line.split(';', 1)[0]
                    processed_ids.add(part_id)
        print(f"Found {len(processed_ids)} completed parts. They will be skipped.")
    except Exception as e:
        print(f"⚠️ Warning: Could not read the existing results file. Starting a fresh run. Error: {e}")
        processed_ids = set()
else:
    print("No existing results file found. Starting a new run.")


# --- 3. Upload Test Data File ---
print("\n🚀 Please upload your test data file...")
uploaded = files.upload()

if not uploaded:
    print("\n❌ Operation cancelled, no file was uploaded.")
elif not model:
    print("\n❌ Model is not loaded. Cannot proceed with batch inference.")
else:
    test_file_name = next(iter(uploaded))
    test_file_content = uploaded[test_file_name].decode('utf-8')
    lines = test_file_content.strip().split('\n')

    print(f"\nFile '{test_file_name}' uploaded successfully with {len(lines)} lines to process.")

    # --- 4. Main Loop: Process and Write Line-by-Line ---
    with open(gdrive_output_path, "a", encoding="utf-8") as output_file:
        for line in tqdm(lines, desc="Processing parts"):
            line = line.strip()
            if not line:
                continue

            part_id = "Unknown" # Initialize for error handling
            try:
                # --- 4.1 Parse Data from Each Line ---
                # Format: PartID;Description;BoundingBox;BuildPlan;CommandSet
                parts = line.split(';')
                if len(parts) < 5:
                    print(f"⚠️ Parsing Warning: Skipping malformed line (requires at least 5 parts): {line[:100]}...")
                    continue

                part_id = parts[0].strip()
                brief_desc_from_file = parts[1].strip() # Description is the 2nd part
                bbox_from_file = parts[2].strip()       # Bounding box is the 3rd part
                command_from_file = parts[4].strip()    # Command set is the 5th part

                # --- 4.2 Check if Part Should Be Skipped ---
                if part_id in processed_ids:
                    continue

                # --- 4.3 Prepare Model Input and Generate Responses ---
                print(f"\n⚙️  Processing Part ID: {part_id}...")
                responses = []
                # --- 4.4 Call the model 3 times for the same input ---
                for i in range(3):
                    print(f"    - Invocation {i+1}/3...")
                    # Use the single-step prompt with data from the file
                    step_input = prompt_single_step["input_template"].format(
                        brief_desc=brief_desc_from_file,
                        bbox_info=bbox_from_file
                    )
                    raw_output = generate_response(prompt_single_step["instruction"], step_input, max_tokens=1024)

                    # The raw output is the final response for this workflow
                    final_response = raw_output

                    # Replace newlines with escaped characters for single-line storage
                    processed_response = final_response.replace('\r\n', '\\n').replace('\n', '\\n')
                    responses.append(processed_response)
                    print(f"    - Invocation {i+1}/3 complete.")

                # --- 4.5 Combine Results and Write to File ---
                # Format: "id;command_set_from_file;response1<|>response2<|>response3"
                result_line = f"{part_id};{command_from_file};{'<|>'.join(responses)}"

                output_file.write(result_line + "\n")
                output_file.flush() # Ensure real-time saving

                print("-" * 70)
                print(f"✅ Result for Part ID: {part_id} has been saved.")
                print("-" * 70)

            except Exception as e:
                current_part_id = part_id if 'part_id' in locals() else 'Unknown'
                print(f"❌ A critical error occurred while processing Part ID {current_part_id}: {e}")
                continue

    # --- 5. Final Success Message ---
    print("\n" + "="*50)
    print("✅ All parts processed!")
    print("✅ Results have been progressively saved to your Google Drive.")
    print(f"   File Path: {gdrive_output_path}")
    print("="*50)