In [None]:
import torch
import os
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from peft import LoraConfig, get_peft_model
from trl import SFTTrainer, SFTConfig
from datasets import load_dataset

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
BASE_DIR = os.getcwd()
INPUT_PATH   = os.path.join(BASE_DIR, "..", "datasets", "L2_dataset_v1.jsonl")
ADAPTER_PATH = os.path.join(BASE_DIR, "..", "adapters", "aegis_L2_v1")
model_name = "Qwen/Qwen2.5-Coder-1.5B-Instruct"
CURRENT_LAYER = 2

In [3]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16, # RTX 50-series supports native BF16
)

In [4]:
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


In [5]:
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    torch_dtype=torch.bfloat16,
    device_map="auto",
    attn_implementation="sdpa", # Standard SDPA for stability
)

`torch_dtype` is deprecated! Use `dtype` instead!
  torch._check_is_size(blocksize)


In [6]:
lora_config = LoraConfig(
    r=128, 
    lora_alpha=256,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
)
model = get_peft_model(model, lora_config)

In [7]:
dataset = load_dataset("json", data_files=INPUT_PATH, split="train")

In [8]:
def formatting_func(example):
    # This uses the system prompt you already have in the file
    text = tokenizer.apply_chat_template(example["messages"], tokenize=False, add_generation_prompt=False)
    return {"text": text}

dataset = dataset.map(formatting_func)

Map: 100%|██████████| 804/804 [00:00<00:00, 8551.21 examples/s]


In [9]:
sft_config = SFTConfig(
    output_dir="outputs",
    dataset_text_field="text",
    per_device_train_batch_size=2,   # Small batch for 12GB
    gradient_accumulation_steps=8,  # High accumulation to keep effective batch size at 16
    num_train_epochs=3,             # Higher epochs to solidify the strict JSON rules
    learning_rate=1e-4,             # Lower LR for better convergence on strict enums
    bf16=True,
    optim="paged_adamw_8bit",       # Paged optimizer prevents OOM spikes
    logging_steps=1,
    gradient_checkpointing=True,    # Crucial for 12GB VRAM
    save_strategy="epoch",
    max_length=1024,
    weight_decay=0.1
)

trainer = SFTTrainer(
    model=model,
    train_dataset=dataset,
    args=sft_config,
)

Tokenizing train dataset: 100%|██████████| 804/804 [00:00<00:00, 1991.23 examples/s]
Truncating train dataset: 100%|██████████| 804/804 [00:00<00:00, 121758.39 examples/s]


In [10]:
trainer.train()

The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'bos_token_id': None, 'pad_token_id': 151643}.


Step,Training Loss
1,3.7525
2,2.8331
3,2.0391
4,1.3888
5,0.8979
6,0.5656
7,0.4455
8,0.2946
9,0.2661
10,0.2783


TrainOutput(global_step=153, training_loss=0.2092133745843289, metrics={'train_runtime': 562.4115, 'train_samples_per_second': 4.289, 'train_steps_per_second': 0.272, 'total_flos': 5983278230237184.0, 'train_loss': 0.2092133745843289})

In [11]:
trainer.save_model(ADAPTER_PATH)
tokenizer.save_pretrained(ADAPTER_PATH)
print(f"✅ Training Complete. Adapter saved to {ADAPTER_PATH}")

✅ Training Complete. Adapter saved to d:\Python\AegisFlow-\snippets\..\adapters\aegis_L1_v5


In [12]:
model.eval()

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): Qwen2ForCausalLM(
      (model): Qwen2Model(
        (embed_tokens): Embedding(151936, 1536)
        (layers): ModuleList(
          (0-27): 28 x Qwen2DecoderLayer(
            (self_attn): Qwen2Attention(
              (q_proj): lora.Linear4bit(
                (base_layer): Linear4bit(in_features=1536, out_features=1536, bias=True)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.05, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=1536, out_features=128, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=128, out_features=1536, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
                (lora_magnitude_vector): ModuleDict()
              )
              (k_proj): lo

In [None]:
def generate_intent_L1(user_prompt):
    # This must match the system prompt in your L1_dataset_v3.jsonl exactly
    system_prompt = (
        "You are L1 of Mini Replit. Extract intent from user prompts as strict JSON only. "
        "No explanation. No markdown.\n"
        "Enums — project_type: landing_page|portfolio|blog  theme: dark_mode|light|minimal|vibrant  "
        "tone: modern|professional|playful|bold\n"
        "domain: snake_case string. audience: target audience string.\n"
        "explicit_sections: normalize user terms to: navbar,hero,features,about,services,pricing,"
        "testimonials,gallery,faq,blog,contact,call_to_action,footer,section_generic "
        "(menu→navbar, reviews→testimonials, about me→about, location→contact). Deduplicate.\n"
        "error: scope_violation if request needs backend/auth/payments/realtime/DB. Else null.\n"
        "On scope_violation: still fill all fields with best-effort values.\n"
        "Schema: {\"project_type\":\"...\",\"theme\":\"...\",\"domain\":\"...\",\"tone\":\"...\",\"audience\":\"...\",\"explicit_sections\":[...],\"error\":null}"
    )

    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt}
    ]
    
    # Apply the chat template
    # add_generation_prompt=True ensures the model starts exactly at the assistant's JSON response
    inputs = tokenizer.apply_chat_template(
        messages, 
        tokenize=True, 
        add_generation_prompt=True, 
        return_tensors="pt"
    ).to("cuda")

    # Generate with high determinism
    with torch.no_grad():
        outputs = model.generate(
            input_ids=inputs,
            max_new_tokens=256, 
            temperature=0,      # Zero temperature for deterministic output
            do_sample=False,    # Disable sampling to prevent hallucinations like "dog_deography"
            pad_token_id=tokenizer.eos_token_id,
            eos_token_id=tokenizer.eos_token_id
        )

    # Decode and remove the prompt tokens
    decoded_output = tokenizer.decode(outputs[0][len(inputs[0]):], skip_special_tokens=True)
    return decoded_output.strip()

def generate_structure_L2(l1_intent_json):
    """
    Takes the JSON output from L1 and generates the L2 Structural Skeleton.
    """
    # Canonical L2 System Prompt [cite: 33, 34, 38]
    system_prompt = (
        "You are L2 of Mini Replit, the Structure Planner. Given an intent JSON from L1, "
        "output a structure JSON for index.html. Output ONLY valid JSON.\n"
        "Rules:\n"
        "- navbar: always include first. tag=header, class=navbar, layout=flex.\n"
        "- hero: mandatory second. tag=section, class=hero, layout=flex.\n"
        "- footer: mandatory last. tag=footer, class=footer, layout=flex.\n"
        "- Core sections: features(grid), about(block), services(block), pricing(grid), "
        "testimonials(grid), gallery(grid), faq(block), blog(block). Max 4 core sections.\n"
        "- Order: features->about->services->pricing->testimonials->gallery->faq->blog.\n"
        "- Class name must exactly equal ID. No exceptions.\n"
        "- Constraints: [semantic_html, responsive, external_css_only, no_inline_styles, no_script_tags]."
    )

    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": l1_intent_json}
    ]
    
    inputs = tokenizer.apply_chat_template(
        messages, 
        tokenize=True, 
        add_generation_prompt=True, 
        return_tensors="pt"
    ).to("cuda")

    with torch.no_grad():
        outputs = model.generate(
            input_ids=inputs,
            max_new_tokens=512, # L2 JSON is longer than L1
            temperature=0,      
            do_sample=False,    
            pad_token_id=tokenizer.eos_token_id,
            eos_token_id=tokenizer.eos_token_id
        )

    return tokenizer.decode(outputs[0][len(inputs[0]):], skip_special_tokens=True).strip()

In [None]:
if CURRENT_LAYER == 1:
    print("--- TEST 1: The Wedding Photographer (Standard) ---")
    print(generate_intent_L1("Create a dark mode portfolio for a wedding photographer with gallery and contact."))

    print("\n--- TEST 2: The Bubble Tea Shop (Messy/Normalization) ---")
    print(generate_intent_L1("vibrant site for bubble tea in Kovilpatti. playful tone. include menu and locations."))

    print("\n--- TEST 3: The E-commerce Request (Scope Violation) ---")
    print(generate_intent_L1("Build an e-commerce store with checkout and user login."))

    print("\n--- TEST 4: The Empty Prompt (Minimalist) ---")
    print(generate_intent_L1("make a site"))
    
elif CURRENT_LAYER == 2:
    print("--- TEST 1: Wedding Photographer (Portfolio Intent) ---")
    # Simulating L1 output
    intent_1 = '{"project_type":"portfolio","theme":"dark_mode","domain":"wedding_photography","explicit_sections":["gallery","contact"]}'
    print(generate_structure_L2(intent_1))

    print("\n--- TEST 2: Bubble Tea (Landing Page Intent) ---")
    intent_2 = '{"project_type":"landing_page","theme":"vibrant","domain":"beverage","explicit_sections":["navbar","contact"]}'
    print(generate_structure_L2(intent_2))

    print("\n--- TEST 3: Max Section Stress (Too many sections) ---")
    # Testing if L2 correctly limits to 4-6 sections total
    intent_3 = '{"project_type":"landing_page","explicit_sections":["features","about","services","pricing","testimonials","gallery","faq"]}'
    print(generate_structure_L2(intent_3))

--- TEST 1: The Wedding Photographer (Standard) ---
{"project_type":"portfolio","theme":"dark_mode","domain":"wedding_photography","tone":"modern","audience":"engaged_couples","explicit_sections":["gallery","contact"],"error":null}

--- TEST 2: The Bubble Tea Shop (Messy/Normalization) ---
{"project_type":"landing_page","theme":"vibrant","domain":"beverage","tone":"playful","audience":"customers","explicit_sections":["navbar","contact"],"error":null}

--- TEST 3: The E-commerce Request (Scope Violation) ---
{"project_type":"landing_page","theme":"light","domain":"ecommerce","tone":"modern","audience":"general","explicit_sections":[],"error":"scope_violation"}

--- TEST 4: The Empty Prompt (Minimalist) ---
{"project_type":"landing_page","theme":"light","domain":"general","tone":"modern","audience":"general","explicit_sections":[],"error":null}
