# Test LoRA en Google Colab (estable)

Este notebook carga tu adapter LoRA y genera un ejercicio directamente en Colab.


## 1) Instalar dependencias (versiones compatibles)


In [None]:
!pip install -q --upgrade "transformers>=4.48.0" "trl==0.11.4" "peft>=0.12.0" "datasets>=2.20.0" "accelerate>=0.31.0" "bitsandbytes>=0.43.0"


**Nota:** si acabas de instalar, reinicia el runtime y vuelve a correr desde aqui.


## 2) Subir tu adapter LoRA (zip)
Sube `qwen3-jupyter-lora.zip`.


In [None]:
from google.colab import files
uploaded = files.upload()


In [None]:
import os, zipfile
zip_name = [k for k in uploaded.keys() if k.endswith('.zip')][0]
with zipfile.ZipFile(zip_name, 'r') as z:
    z.extractall('model')
print('model files:', os.listdir('model'))
!find model -name adapter_config.json


## 3) Parche opcional (si falla por `alora_invocation_tokens`)


In [None]:
import json
cfg_path = 'model/qwen3-jupyter-lora/adapter_config.json'
if os.path.exists(cfg_path):
    with open(cfg_path) as f:
        cfg = json.load(f)
    if 'alora_invocation_tokens' in cfg:
        cfg.pop('alora_invocation_tokens', None)
        with open(cfg_path, 'w') as f:
            json.dump(cfg, f, indent=2)
        print('patched', cfg_path)
    else:
        print('no patch needed')
else:
    print('adapter_config.json not found, check LORA_PATH')


## 4) Cargar modelo base + LoRA y generar ejercicio


In [None]:
import json
import torch
from peft import PeftModel
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig

BASE_MODEL = 'Qwen/Qwen3-4B-Instruct-2507'
LORA_PATH = './model/qwen3-jupyter-lora'
SCHEMA = r'''{"title":"...","instructions":"...","starterCode":"...","solutionCode":"...","expectedOutput":"...","hints":["...","..."],"files":[{"filename":"...","description":"...","columns":["..."]}],"steps":["..."],"acceptanceCriteria":["..."]}'''

tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, use_fast=True, trust_remote_code=True)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type='nf4',
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
)

base_model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL,
    device_map='auto',
    quantization_config=bnb_config,
    torch_dtype=torch.float16,
    trust_remote_code=True,
)
model = PeftModel.from_pretrained(base_model, LORA_PATH)
model.eval()

def build_prompt(topic, difficulty, exercise_type, dataset_size):
    return '\n'.join([
        'You are an expert instructor creating beginner-friendly Jupyter exercises.',
        'Return ONLY valid JSON, no markdown fences, no extra commentary.',
        'Use exactly this schema:',
        SCHEMA,
        'All text must be in Spanish. hints must be an array with 2 to 4 items.',
        f'- topic: {topic}',
        f'- difficulty: {difficulty}',
        f'- exerciseType: {exercise_type}',
        f'- datasetSize: {dataset_size}',
    ])

prompt = build_prompt('pandas','basica','completar_codigo','pequeno')
inputs = tokenizer(prompt, return_tensors='pt').to(model.device)

with torch.inference_mode():
    output = model.generate(
        **inputs,
        max_new_tokens=96,
        temperature=0.2,
        top_p=0.7,
        do_sample=True,
        eos_token_id=tokenizer.eos_token_id,
        pad_token_id=tokenizer.pad_token_id,
    )

gen = tokenizer.decode(output[0][inputs['input_ids'].shape[-1]:], skip_special_tokens=True)
print(gen[:600])


## 5) Validar JSON


In [None]:
text = gen.strip()
print('raw preview:', text[:300])
start, end = text.find('{'), text.rfind('}')
def build_fallback(kv):
    return {
        'title': 'Ejercicio de pandas (basica)',
        'instructions': 'Practica pandas con dificultad basica.\nTipo de actividad: completar_codigo.\nTrabaja con un dataset de tamano pequeno (~40 filas).',
        'starterCode': "import pandas as pd\n\ndf = pd.read_csv('datos_practica.csv')\n# TODO: completa la solucion\ndf.head()\n",
        'solutionCode': "import pandas as pd\n\ndf = pd.read_csv('datos_practica.csv')\nresumen = df.groupby('categoria')['ventas'].mean().sort_values(ascending=False)\nprint(resumen)\n",
        'expectedOutput': 'Serie con promedio de ventas por categoria ordenada de mayor a menor.',
        'hints': [
            'Verifica que el archivo cargue sin columnas nulas inesperadas.',
            'Descompone el problema en pasos pequenos y validables.',
            'Compara tu salida con el criterio de aceptacion.',
        ],
        'files': [
            {
                'filename': 'datos_practica.csv',
                'description': kv.get('datasetDescription', 'Dataset de ventas de productos en una tienda local'),
                'columns': ['id','fecha','categoria','ventas','costo'],
            }
        ],
        'steps': [
            'Carga el archivo de datos y revisa columnas y tipos.',
            'Aplica la operacion solicitada segun el tema.',
            'Muestra la salida final y valida que sea consistente.',
        ],
        'acceptanceCriteria': [
            'El codigo se ejecuta sin errores.',
            'La salida cumple el objetivo del ejercicio.',
            'El resultado usa correctamente las columnas esperadas.',
        ],
    }

if start == -1 or end == -1 or end <= start:
    print('No JSON object found, using fallback.')
    kv = {}
    for line in text.splitlines():
        line = line.strip()
        if not line or ':' not in line:
            continue
        if line.startswith('-'): line = line[1:].strip()
        k, v = line.split(':', 1)
        kv[k.strip()] = v.strip()
    payload = build_fallback(kv)
else:
    try:
        payload = json.loads(text[start:end+1])
        print('JSON ok')
    except json.JSONDecodeError:
        print('JSON invalid, using fallback.')
        kv = {}
        for line in text.splitlines():
            line = line.strip()
            if not line or ':' not in line:
                continue
            if line.startswith('-'): line = line[1:].strip()
            k, v = line.split(':', 1)
            kv[k.strip()] = v.strip()
        payload = build_fallback(kv)

payload
