In [1]:
import torch
import os
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from peft import LoraConfig, get_peft_model
from trl import SFTTrainer, SFTConfig
from datasets import load_dataset

  from .autonotebook import tqdm as notebook_tqdm
Skipping import of cpp extensions due to incompatible torch version 2.12.0.dev20260221+cu128 for torchao version 0.16.0             Please see https://github.com/pytorch/ao/issues/2919 for more info
W0222 20:37:51.292000 29776 site-packages\torch\distributed\elastic\multiprocessing\redirects.py:29] NOTE: Redirects are currently not supported in Windows or MacOs.


In [2]:
BASE_DIR = os.getcwd()
INPUT_PATH   = os.path.join(BASE_DIR, "..", "datasets", "L3", "L3_dataser_v1.jsonl")
ADAPTER_PATH = os.path.join(BASE_DIR, "..", "adapters", "L3","aegis_L3_v1")
model_name = "Qwen/Qwen2.5-Coder-1.5B-Instruct"
CURRENT_LAYER = 3

In [3]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16, # RTX 50-series supports native BF16
)

In [4]:
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

In [5]:
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    torch_dtype=torch.bfloat16,
    device_map="auto",
    attn_implementation="sdpa", # Standard SDPA for stability
)

`torch_dtype` is deprecated! Use `dtype` instead!
  torch._check_is_size(blocksize)


In [6]:
lora_config = LoraConfig(
    r=128, 
    lora_alpha=256,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
)
model = get_peft_model(model, lora_config)

In [7]:
dataset = load_dataset("json", data_files=INPUT_PATH, split="train")

In [8]:
def formatting_func(example):
    # This uses the system prompt you already have in the file
    text = tokenizer.apply_chat_template(example["messages"], tokenize=False, add_generation_prompt=False)
    return {"text": text}

dataset = dataset.map(formatting_func)

In [9]:
sft_config = SFTConfig(
    output_dir="outputs",
    dataset_text_field="text",
    per_device_train_batch_size=2,   # Small batch for 12GB
    gradient_accumulation_steps=8,  # High accumulation to keep effective batch size at 16
    num_train_epochs=3,             # Higher epochs to solidify the strict JSON rules
    learning_rate=1e-4,             # Lower LR for better convergence on strict enums
    bf16=True,
    optim="paged_adamw_8bit",       # Paged optimizer prevents OOM spikes
    logging_steps=1,
    gradient_checkpointing=True,    # Crucial for 12GB VRAM
    save_strategy="epoch",
    max_length=2048,
    weight_decay=0.1
)

trainer = SFTTrainer(
    model=model,
    train_dataset=dataset,
    args=sft_config,
)

In [None]:
trainer.train()

The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'bos_token_id': None, 'pad_token_id': 151643}.


Step,Training Loss
1,1.3098
2,1.0293
3,0.8243
4,0.658
5,0.5602
6,0.424
7,0.3433
8,0.2746
9,0.2377
10,0.2457


In [14]:
trainer.save_model(ADAPTER_PATH)
tokenizer.save_pretrained(ADAPTER_PATH)
print(f"âœ… Training Complete. Adapter saved to {ADAPTER_PATH}")

âœ… Training Complete. Adapter saved to d:\Python\AegisFlow-\snippets\..\adapters\aegis_L2_v1


In [15]:
model.eval()

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): Qwen2ForCausalLM(
      (model): Qwen2Model(
        (embed_tokens): Embedding(151936, 1536)
        (layers): ModuleList(
          (0-27): 28 x Qwen2DecoderLayer(
            (self_attn): Qwen2Attention(
              (q_proj): lora.Linear4bit(
                (base_layer): Linear4bit(in_features=1536, out_features=1536, bias=True)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.05, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=1536, out_features=128, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=128, out_features=1536, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
                (lora_magnitude_vector): ModuleDict()
              )
              (k_proj): lo

In [None]:
def generate_intent_L1(user_prompt):
    # This must match the system prompt in your L1_dataset_v3.jsonl exactly
    system_prompt = (
        "You are L1 of Mini Replit. Extract intent from user prompts as strict JSON only. "
        "No explanation. No markdown.\n"
        "Enums â€” project_type: landing_page|portfolio|blog  theme: dark_mode|light|minimal|vibrant  "
        "tone: modern|professional|playful|bold\n"
        "domain: snake_case string. audience: target audience string.\n"
        "explicit_sections: normalize user terms to: navbar,hero,features,about,services,pricing,"
        "testimonials,gallery,faq,blog,contact,call_to_action,footer,section_generic "
        "(menuâ†’navbar, reviewsâ†’testimonials, about meâ†’about, locationâ†’contact). Deduplicate.\n"
        "error: scope_violation if request needs backend/auth/payments/realtime/DB. Else null.\n"
        "On scope_violation: still fill all fields with best-effort values.\n"
        "Schema: {\"project_type\":\"...\",\"theme\":\"...\",\"domain\":\"...\",\"tone\":\"...\",\"audience\":\"...\",\"explicit_sections\":[...],\"error\":null}"
    )

    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt}
    ]
    
    # Apply the chat template
    # add_generation_prompt=True ensures the model starts exactly at the assistant's JSON response
    inputs = tokenizer.apply_chat_template(
        messages, 
        tokenize=True, 
        add_generation_prompt=True, 
        return_tensors="pt"
    ).to("cuda")

    # Generate with high determinism
    with torch.no_grad():
        outputs = model.generate(
            input_ids=inputs,
            max_new_tokens=256, 
            temperature=0,      # Zero temperature for deterministic output
            do_sample=False,    # Disable sampling to prevent hallucinations like "dog_deography"
            pad_token_id=tokenizer.eos_token_id,
            eos_token_id=tokenizer.eos_token_id
        )

    # Decode and remove the prompt tokens
    decoded_output = tokenizer.decode(outputs[0][len(inputs[0]):], skip_special_tokens=True)
    return decoded_output.strip()

def generate_structure_L2(l1_intent_json):
    """
    Takes the JSON output from L1 and generates the L2 Structural Skeleton.
    Ensures attention_mask is passed to avoid 'unexpected behavior'.
    """
    # MUST match L2_dataset_v2.jsonl system prompt EXACTLY
    system_prompt = (
        "You are L2 of Mini Replit. Input: intent JSON. Output: ONLY strict JSON with exactly 3 keys: "
        "pages, constraints, file_tree. NEVER copy input fields into output. No explanation. No markdown.\n\n"
        "Schema: {\"pages\":{\"index.html\":{\"sections\":[{\"id\":\"<n>\",\"tag\":\"<t>\",\"class\":\"<n>\",\"layout\":\"<l>\"},...]}},"
        "\"constraints\":[\"semantic_html\",\"responsive\",\"external_css_only\",\"no_inline_styles\",\"no_script_tags\"],"
        "\"file_tree\":[\"index.html\",\"styles.css\"]}\n\n"
        "Tag rules (exact):\n  navbarâ†’header  footerâ†’footer  EVERYTHING ELSEâ†’section\n"
        "  (tag is NEVER 'grid' or 'block' â€” those are layouts, not tags)\n\n"
        "Layout rules:\n  flex:  navbar, hero, contact, call_to_action, footer\n"
        "  grid:  features, pricing, testimonials, gallery\n"
        "  block: about, services, faq, blog, section_generic\n\n"
        "Section order (always):\n  navbar(1st) â†’ hero(2nd,MANDATORY) â†’ core sections â†’ call_to_action â†’ contact â†’ footer(LAST,MANDATORY)\n\n"
        "Core section canonical order: featuresâ†’aboutâ†’servicesâ†’pricingâ†’testimonialsâ†’galleryâ†’faqâ†’blog\n"
        "Max 4 core sections. If input has more, keep first 4 by canonical order.\n\n"
        "Defaults when core is empty:\n  landing_pageâ†’features  portfolioâ†’gallery  blogâ†’blog\n"
        "portfolio: ALWAYS include gallery even if not in explicit_sections.\n\n"
        "class must ALWAYS equal id. No exceptions."
    )

    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": l1_intent_json}
    ]
    
    # âœ… FIX 1: return_dict=True to get the attention_mask
    inputs = tokenizer.apply_chat_template(
        messages, 
        tokenize=True, 
        add_generation_prompt=True, 
        return_tensors="pt",
        return_dict=True
    ).to("cuda")

    with torch.no_grad():
        outputs = model.generate(
            **inputs, # âœ… FIX 2: Pass both input_ids and attention_mask
            max_new_tokens=1536,
            temperature=0,       
            do_sample=False,     
            pad_token_id=tokenizer.eos_token_id,
            eos_token_id=tokenizer.eos_token_id
        )

    # Decode using the correct input_ids key
    decoded_output = tokenizer.decode(outputs[0][len(inputs['input_ids'][0]):], skip_special_tokens=True)
    return decoded_output.strip()

In [20]:
if CURRENT_LAYER == 1:
    print("--- TEST 1: The Wedding Photographer (Standard) ---")
    print(generate_intent_L1("Create a dark mode portfolio for a wedding photographer with gallery and contact."))

    print("\n--- TEST 2: The Bubble Tea Shop (Messy/Normalization) ---")
    print(generate_intent_L1("vibrant site for bubble tea in Kovilpatti. playful tone. include menu and locations."))

    print("\n--- TEST 3: The E-commerce Request (Scope Violation) ---")
    print(generate_intent_L1("Build an e-commerce store with checkout and user login."))

    print("\n--- TEST 4: The Empty Prompt (Minimalist) ---")
    print(generate_intent_L1("make a site"))
    
elif CURRENT_LAYER == 2:
   # ==========================================
    # L2 FULL STRESS TEST BATTERY
    # ==========================================

    print("ðŸ”¥ STARTING L2 STRESS TEST ðŸ”¥\n")

    # STRESS 1: The Kitchen Sink (Over-limit & Canonical Order)
    # Input provides 8 core sections. L2 must only pick the first 4 based on canonical order.
    print("--- STRESS 1: The Kitchen Sink (Limit & Order) ---")
    intent_s1 = '{"project_type":"landing_page","explicit_sections":["blog","faq","gallery","testimonials","pricing","services","about","features"]}'
    print(f"Input Intent: {intent_s1}")
    print(f"Output:\n{generate_structure_L2(intent_s1)}\n")


    # STRESS 2: The Silent Portfolio (Defaulting Logic)
    # No sections requested. Model must inject 'gallery' because it's a portfolio.
    print("--- STRESS 2: The Silent Portfolio (Defaulting) ---")
    intent_s2 = '{"project_type":"portfolio","explicit_sections":[]}'
    print(f"Input Intent: {intent_s2}")
    print(f"Output:\n{generate_structure_L2(intent_s2)}\n")


    # STRESS 3: The Scrambled Egg (Input Order vs. Canonical Order)
    # User asks for sections in the 'wrong' order. L2 must sort them correctly.
    print("--- STRESS 3: The Scrambled Egg (Sorting) ---")
    intent_s3 = '{"project_type":"landing_page","explicit_sections":["contact","pricing","about","navbar"]}'
    print(f"Input Intent: {intent_s3}")
    print(f"Output:\n{generate_structure_L2(intent_s3)}\n")


    # STRESS 4: Minimalist Request (Skeleton Minimums)
    # Testing if mandatory navbar, hero, and footer are always present even if unrequested.
    print("--- STRESS 4: Minimalist Request (Minimums) ---")
    intent_s4 = '{"project_type":"landing_page","explicit_sections":[]}'
    print(f"Input Intent: {intent_s4}")
    print(f"Output:\n{generate_structure_L2(intent_s4)}\n")


    # STRESS 5: The "Blog-Only" (Deep Defaults)
    print("--- STRESS 5: The Blog-Only (Defaulting) ---")
    intent_s5 = '{"project_type":"blog","explicit_sections":["contact"]}'
    print(f"Input Intent: {intent_s5}")
    print(f"Output:\n{generate_structure_L2(intent_s5)}\n")

ðŸ”¥ STARTING L2 STRESS TEST ðŸ”¥

--- STRESS 1: The Kitchen Sink (Limit & Order) ---
Input Intent: {"project_type":"landing_page","explicit_sections":["blog","faq","gallery","testimonials","pricing","services","about","features"]}
Output:
{"pages":{"index.html":{"sections":[{"id":"navbar","tag":"header","class":"navbar","layout":"flex"},{"id":"hero","tag":"section","class":"hero","layout":"flex"},{"id":"features","tag":"section","class":"features","layout":"grid"},{"id":"pricing","tag":"section","class":"pricing","layout":"grid"},{"id":"testimonials","tag":"section","class":"testimonials","layout":"grid"},{"id":"gallery","tag":"section","class":"gallery","layout":"grid"},{"id":"footer","tag":"footer","class":"footer","layout":"flex"}]}},"constraints":["semantic_html","responsive","external_css_only","no_inline_styles","no_script_tags"],"file_tree":["index.html","styles.css"]}

--- STRESS 2: The Silent Portfolio (Defaulting) ---
Input Intent: {"project_type":"portfolio","explicit_secti