# Lab 3 - DL

## 1) Instalación + carga EXACTA de Qwen3-8B

In [None]:
!pip -q install --upgrade transformers accelerate bitsandbytes einops

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.4/10.4 MB[0m [31m96.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.7/60.7 MB[0m [31m13.6 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
import json, re, time
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

In [None]:
MODEL_ID = "Qwen/Qwen3-8B"

In [None]:
torch.manual_seed(0)
torch.cuda.manual_seed_all(0)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

In [None]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_quant_type="nf4",
)

tokenizer = AutoTokenizer.from_pretrained(
    MODEL_ID,
    use_fast=True,
    trust_remote_code=True
)

model = AutoModelForCausalLM.from_pretrained(
    MODEL_ID,
    quantization_config=bnb_config,
    device_map="auto",
    torch_dtype=torch.float16,
    trust_remote_code=True
)
model.eval()

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/728 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json:   0%|          | 0.00/11.4M [00:00<?, ?B/s]

`torch_dtype` is deprecated! Use `dtype` instead!


model.safetensors.index.json: 0.00B [00:00, ?B/s]

Downloading (incomplete total...): 0.00B [00:00, ?B/s]

Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

Loading weights:   0%|          | 0/399 [00:00<?, ?it/s]



generation_config.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

Qwen3ForCausalLM(
  (model): Qwen3Model(
    (embed_tokens): Embedding(151936, 4096)
    (layers): ModuleList(
      (0-35): 36 x Qwen3DecoderLayer(
        (self_attn): Qwen3Attention(
          (q_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear4bit(in_features=4096, out_features=1024, bias=False)
          (v_proj): Linear4bit(in_features=4096, out_features=1024, bias=False)
          (o_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (q_norm): Qwen3RMSNorm((128,), eps=1e-06)
          (k_norm): Qwen3RMSNorm((128,), eps=1e-06)
        )
        (mlp): Qwen3MLP(
          (gate_proj): Linear4bit(in_features=4096, out_features=12288, bias=False)
          (up_proj): Linear4bit(in_features=4096, out_features=12288, bias=False)
          (down_proj): Linear4bit(in_features=12288, out_features=4096, bias=False)
          (act_fn): SiLUActivation()
        )
        (input_layernorm): Qwen3RMSNorm((4096,), eps=1e-06

## 2) Utils: parsear los 2 [STATEMENT] (demo + query) del contexto

In [None]:
def extract_domain(scenario_context: str) -> str:
    return "blocks" if "set of blocks" in scenario_context else "objects"

def parse_statements(scenario_context: str):
    """
    Devuelve lista de statements en orden.
    Cada statement: {init, goal, plan_lines}
    - demo: tiene plan_lines no vacío
    - query: último, plan_lines vacío (termina en [PLAN])
    """
    parts = scenario_context.split("[STATEMENT]")
    out = []
    for part in parts[1:]:
        m_init = re.search(r"As initial conditions I have that,(.*?)(?:\n|$)", part)
        m_goal = re.search(r"My goal is to have that(.*?)(?:\n|$)", part)
        init = m_init.group(1).strip() if m_init else ""
        goal = m_goal.group(1).strip() if m_goal else ""

        m_plan = re.search(r"\[PLAN\](.*?)(?:\[PLAN END\]|$)", part, flags=re.S)
        plan_text = (m_plan.group(1).strip() if m_plan else "")
        plan_lines = [ln.strip() for ln in plan_text.splitlines() if ln.strip()]

        out.append({"init": init, "goal": goal, "plan_lines": plan_lines})
    return out

## 3) Mapear el plan demo (natural) -> acciones canónicas (como Examples.json)

In [None]:
def canonicalize_line(line: str, domain: str):
    line = line.strip().lower()

    if domain == "blocks":
        # pick up -> engage_payload
        m = re.match(r"pick up the (\w+) block", line)
        if m: return f"(engage_payload {m.group(1)})"

        # put down -> release_payload
        m = re.match(r"put down the (\w+) block", line)
        if m: return f"(release_payload {m.group(1)})"

        # unmount_node X from Y
        m = re.match(r"unmount_node the (\w+) block from on top of the (\w+) block", line)
        if m: return f"(unmount_node {m.group(1)} {m.group(2)})"

        # mount_node X on Y
        m = re.match(r"mount_node the (\w+) block on top of the (\w+) block", line)
        if m: return f"(mount_node {m.group(1)} {m.group(2)})"

        # stack (por si aparece)
        m = re.match(r"stack the (\w+) block on top of the (\w+) block", line)
        if m: return f"(mount_node {m.group(1)} {m.group(2)})"

        return None

    # domain == "objects"
    m = re.match(r"attack object (\w+)", line)
    if m: return f"(attack {m.group(1)})"

    m = re.match(r"succumb object (\w+)", line)
    if m: return f"(succumb {m.group(1)})"

    m = re.match(r"feast object (\w+) from object (\w+)", line)
    if m: return f"(feast {m.group(1)} {m.group(2)})"

    m = re.match(r"overcome object (\w+) from object (\w+)", line)
    if m: return f"(overcome {m.group(1)} {m.group(2)})"

    return None

def canonicalize_plan(plan_lines, domain):
    out = []
    for ln in plan_lines:
        can = canonicalize_line(ln, domain)
        if can is None:
            # si algo raro ocurriera, lo dejamos tal cual (pero en Task debería parsear todo el demo)
            out.append(ln.strip())
        else:
            out.append(can)
    return out

## 4) Prompt (CoT/ToT/GoT interno) + salida estricta

In [None]:
SYSTEM = (
    "You are a deterministic planner. Solve the final query using the rules shown.\n"
    "Use internal Chain-of-Thought, Tree-of-Thought (2 candidates), and Graph-of-Thought (dependency graph),\n"
    "BUT DO NOT reveal reasoning.\n"
    "Output ONLY the final plan as one canonical action per line, exactly like '(action arg1 arg2)'.\n"
    "No numbering, no extra text."
)

In [None]:
def build_user_prompt(domain: str, demo_init: str, demo_goal: str, demo_plan_canon: list,
                      query_init: str, query_goal: str) -> str:
    if domain == "blocks":
        action_spec = (
            "Domain = BLOCKS. Canonical actions:\n"
            "- (engage_payload X)\n"
            "- (release_payload X)\n"
            "- (unmount_node X Y)\n"
            "- (mount_node X Y)\n"
        )
    else:
        action_spec = (
            "Domain = OBJECTS. Canonical actions:\n"
            "- (attack x)\n"
            "- (succumb x)\n"
            "- (feast x y)\n"
            "- (overcome x y)\n"
        )

    demo_block = (
        "Solved example (for style + rules):\n"
        f"Initial: {demo_init}\n"
        f"Goal: {demo_goal}\n"
        "Plan:\n" + "\n".join(demo_plan_canon) + "\n"
    )

    query_block = (
        "\nNow solve this query. Start directly with the first action in parentheses.\n" # Instrucción extra
        f"Initial: {query_init}\n"
        f"Goal: {query_goal}\n"
        "Plan:\n"
    )

    return action_spec + "\n" + demo_block + query_block

## 5) Inferencia BATCH determinista (temperature=0.0, do_sample=False)

In [None]:
@torch.inference_mode()
def qwen_generate_batch(user_prompts, max_new_tokens=96, batch_size=8):
    outputs = []
    for i in range(0, len(user_prompts), batch_size):
        chunk = user_prompts[i:i+batch_size]

        # chat template por prompt
        texts = []
        for up in chunk:
            messages = [{"role": "system", "content": SYSTEM},
                        {"role": "user", "content": up}]
            text = tokenizer.apply_chat_template(
                messages,
                add_generation_prompt=True,
                tokenize=False,
                enable_thinking=False,   # evita "thinking tokens" visibles
            )
            texts.append(text)

        inputs = tokenizer(texts, return_tensors="pt", padding=True).to(model.device)
        input_lens = inputs["attention_mask"].sum(dim=1)

        gen_kwargs = dict(
            max_new_tokens=max_new_tokens,
            do_sample=False,       # greedy => determinista
            temperature=0.0,       # requerido por la consigna
            top_p=1.0,
            eos_token_id=tokenizer.eos_token_id,
            pad_token_id=tokenizer.eos_token_id,
        )

        out = model.generate(**inputs, **gen_kwargs)

        for j in range(len(chunk)):
            gen_ids = out[j, input_lens[j]:]
            txt = tokenizer.decode(gen_ids, skip_special_tokens=True).strip()
            outputs.append(txt)

    return outputs

## 6) Parsear salida -> target_action_sequence + complexity_level

In [None]:
def parse_action_lines(model_text: str):
    # Busca cualquier cosa que esté entre paréntesis, sin importar lo que haya antes o después
    actions = re.findall(r'\(.*?\)', model_text)
    # Limpiamos espacios extra internos por si acaso
    return [re.sub(r'\s+', ' ', a).strip() for a in actions]

## 7) Correr Task.json y escribir output.json

In [None]:
from google.colab import drive
drive.mount("/content/drive")

Mounted at /content/drive


In [None]:
TASK_PATH = "/content/drive/MyDrive/Deep Learning/Laboratorios/laboratorio_3_planning/Task.json"
EXAMPLES_PATH = "/content/drive/MyDrive/Deep Learning/Laboratorios/laboratorio_3_planning/Examples.json"
OUT_PATH = "/content/drive/MyDrive/Deep Learning/Laboratorios/laboratorio_3_planning/predictions.json"

In [None]:
import time # Asegúrate de tener importado time

with open(TASK_PATH, "r", encoding="utf-8") as f:
    task_data = json.load(f)

# (Opcional) cargamos Examples.json solo para referencia/debug,
# pero este solver NO necesita retrieval: cada task ya trae 1 demo embebido.
with open(EXAMPLES_PATH, "r", encoding="utf-8") as f:
    examples_data = json.load(f)

user_prompts = []
meta = []  # para reconstruir output alineado

for item in task_data:
    sc = item["scenario_context"]
    domain = extract_domain(sc)
    statements = parse_statements(sc)

    # esperamos: demo = penúltimo, query = último
    demo = statements[-2]
    query = statements[-1]

    demo_plan_canon = canonicalize_plan(demo["plan_lines"], domain)

    up = build_user_prompt(
        domain=domain,
        demo_init=demo["init"],
        demo_goal=demo["goal"],
        demo_plan_canon=demo_plan_canon,
        query_init=query["init"],
        query_goal=query["goal"],
    )

    user_prompts.append(up)
    meta.append({"assembly_task_id": item["assembly_task_id"]})

preds = []
print("Iniciando generación y medición de tiempo por tarea...")

# Iteramos tarea por tarea para medir el tiempo exacto de cada una
for m, up in zip(meta, user_prompts):
    t0 = time.time()

    # Llamamos a la función enviando solo 1 prompt y con batch_size=1
    raw_output = qwen_generate_batch([up], max_new_tokens=256, batch_size=1)[0]

    dt = time.time() - t0 # Calculamos el tiempo que tomó esta tarea

    actions = parse_action_lines(raw_output)

    preds.append({
        "assembly_task_id": m["assembly_task_id"],
        "complexity_level": len(actions),
        "target_action_sequence": actions,
        "execution_time_seconds": round(dt, 2) # Agregamos el tiempo redondeado a 2 decimales
    })

with open(OUT_PATH, "w", encoding="utf-8") as f:
    json.dump(preds, f, ensure_ascii=False, indent=2)

print("Saved:", OUT_PATH)
print("Preview first item:", preds[0])

Iniciando generación y medición de tiempo por tarea...
Saved: /content/drive/MyDrive/Deep Learning/Laboratorios/laboratorio_3_planning/predictions.json
Preview first item: {'assembly_task_id': 'task_f6c3f52f55', 'complexity_level': 9, 'target_action_sequence': ['(unmount_node orange red)', '(release_payload orange)', '(unmount_node red blue)', '(release_payload red)', '(mount_node red orange)', '(engage_payload blue)', '(mount_node blue red)', '(engage_payload yellow)', '(mount_node yellow blue)'], 'execution_time_seconds': 5.27}
