In [10]:
# 1.1 ‚Äì Assicurati di aver installato requirements.txt e di avere training.py nel working dir
import os
from pathlib import Path
import json

# riduci la dimensione massima dei blocchi allocati per evitare frammentazione
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128"
import torch
import matplotlib.pyplot as plt

import sys
import warnings
from tqdm import TqdmWarning
warnings.filterwarnings("ignore", category=TqdmWarning)
warnings.filterwarnings(
    "ignore",
    message="You have not specified a value for the `type` parameter.*"
)

sys.path.append(str(Path().resolve()))

from training import run_lora_train

import gradio as gr
chatbot = gr.Chatbot(type="messages")



In [11]:
print("CUDA available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("Device name:", torch.cuda.get_device_name(0))
    print("Current device index:", torch.cuda.current_device())
    print("Allocated (MB):", torch.cuda.memory_allocated(0) / 1024**2)
    print("Cached    (MB):", torch.cuda.memory_reserved(0)  / 1024**2)

CUDA available: True
Device name: NVIDIA GeForce RTX 3070 Laptop GPU
Current device index: 0
Allocated (MB): 4996.25
Cached    (MB): 7748.0


In [12]:
# 2. Quick‚ÄêExperiment: LoRA su sottoinsieme
"""
Qui lanciamo `run_lora_train` su un sottoinsieme di 500 esempi per testare velocemente.
"""

# Configurazione esperimento rapido
BASE_TRAIN = "data/splits/train.json"
SUBSET_FILE = "checkpoints/train_aug.json"
with open(BASE_TRAIN, "r", encoding="utf-8") as f:
    records = json.load(f)

# creiamo subset di 500 esempi
subset = records[:500]
Path("data/splits").mkdir(exist_ok=True, parents=True)
with open(SUBSET_FILE, "w", encoding="utf-8") as f:
    json.dump(subset, f, ensure_ascii=False, indent=2)

print(f"Subset con {len(subset)} esempi salvato in {SUBSET_FILE}")


Subset con 500 esempi salvato in checkpoints/train_aug.json


In [14]:
# Assicurati che la cartella esista
exp_dir = Path("experiments/lora_subset")
exp_dir.mkdir(parents=True, exist_ok=True)

# Lancia LoRA sul subset con batch_size piccolo
exp_dir = Path("experiments/lora_subset")
run_lora_train(
    train_file=SUBSET_FILE,
    output_dir=str(exp_dir),
    model_name="huggyllama/llama-7b",
    seed=123,
    use_4bit=True,
    use_bfloat16=False,
    batch_size=1
)


[LoRA] Esempi da checkpoints\train_aug.json: 500


loading file tokenizer.model from cache at C:\Users\jinet\.cache\huggingface\hub\models--huggyllama--llama-7b\snapshots\4782ad278652c7c71b72204d462d6d01eaaf7549\tokenizer.model
loading file added_tokens.json from cache at None
loading file special_tokens_map.json from cache at C:\Users\jinet\.cache\huggingface\hub\models--huggyllama--llama-7b\snapshots\4782ad278652c7c71b72204d462d6d01eaaf7549\special_tokens_map.json
loading file tokenizer_config.json from cache at C:\Users\jinet\.cache\huggingface\hub\models--huggyllama--llama-7b\snapshots\4782ad278652c7c71b72204d462d6d01eaaf7549\tokenizer_config.json
loading file tokenizer.json from cache at C:\Users\jinet\.cache\huggingface\hub\models--huggyllama--llama-7b\snapshots\4782ad278652c7c71b72204d462d6d01eaaf7549\tokenizer.json
loading file chat_template.jinja from cache at None


[LoRA] Train/Val sizes = 475/25
[LoRA] Dataset tokenization debug salvato in lora_dataset_debug.csv


Map:   0%|          | 0/475 [00:00<?, ? examples/s]

Map:   0%|          | 0/25 [00:00<?, ? examples/s]

[LoRA] Device: cuda
[LoRA] ‚öôÔ∏è  Preparazione caricamento modello‚Ä¶


loading configuration file config.json from cache at C:\Users\jinet\.cache\huggingface\hub\models--huggyllama--llama-7b\snapshots\4782ad278652c7c71b72204d462d6d01eaaf7549\config.json
Model config LlamaConfig {
  "architectures": [
    "LlamaForCausalLM"
  ],
  "attention_bias": false,
  "attention_dropout": 0.0,
  "bos_token_id": 1,
  "eos_token_id": 2,
  "head_dim": 128,
  "hidden_act": "silu",
  "hidden_size": 4096,
  "initializer_range": 0.02,
  "intermediate_size": 11008,
  "max_position_embeddings": 2048,
  "max_sequence_length": 2048,
  "mlp_bias": false,
  "model_type": "llama",
  "num_attention_heads": 32,
  "num_hidden_layers": 32,
  "num_key_value_heads": 32,
  "pad_token_id": 0,
  "pretraining_tp": 1,
  "rms_norm_eps": 1e-06,
  "rope_scaling": null,
  "rope_theta": 10000.0,
  "tie_word_embeddings": false,
  "torch_dtype": "float16",
  "transformers_version": "4.52.4",
  "use_cache": true,
  "vocab_size": 32000
}

CUDA backend validation successful.
loading weights file model

ValueError: Some modules are dispatched on the CPU or the disk. Make sure you have enough GPU RAM to fit the quantized model. If you want to dispatch the model on the CPU or the disk while keeping these modules in 32-bit, you need to set `llm_int8_enable_fp32_cpu_offload=True` and pass a custom `device_map` to `from_pretrained`. Check https://huggingface.co/docs/transformers/main/en/main_classes/quantization#offload-between-cpu-and-gpu for more details. 

In [None]:
# 3. Plot delle curve di training vs validation
"""
Ogni `run_lora_train` salva il best adapter e la directory `output_dir`.
Qui carichiamo il CSV di debug (`lora_dataset_debug.csv`) e il `loss_curve.png` generato.
"""

# Carica curve se presenti
import matplotlib.image as mpimg

img_path = exp_dir / "loss_curve.png"
if img_path.exists():
    img = mpimg.imread(str(img_path))
    plt.figure(figsize=(6,4))
    plt.imshow(img)
    plt.axis("off")
else:
    print("Nessuna loss_curve.png trovata in", img_path)


In [None]:
# 4. Param Sweeps: temperatura & lr
"""
Esempio di loop rapido su 2 temperature e 2 learning rate, per 1 epoca sola.
"""

temps = [0.5, 0.8]
lrs   = [5e-5, 1e-4]
results = {}

for temp in temps:
    for lr in lrs:
        tag = f"t{temp}_lr{lr}"
        out = Path(f"experiments/sweep_{tag}")
        out.mkdir(parents=True, exist_ok=True)
        # Modifica run_lora_train per passare temp e lr?
        # Se non supportato, dovresti parametrizzare la funzione.
        print(f"‚Üí Sweep {tag} (1 epoca)")
        # ipotetico run modificato:
        run_lora_train(
            train_file=SUBSET_FILE,
            output_dir=str(out),
            model_name="huggyllama/llama-7b",
            seed=123,
            use_4bit=False,
            batch_size=1
            # lr e temp dovrebbero essere argomenti aggiuntivi
        )
        # poi estrai dai log il train/val loss finale e salvali in results[tag]



## üìù 5. Conclusioni preliminari

- **Subset training**: time per epoca ~ X minuti
- **Curve**: trend di loss, over/underfitting
- **Param Sweeps**: quale combo (temp, lr) d√† loss pi√π bassa?

> *Qui annota direttamente nel notebook i tuoi commenti mentre guardi i grafici e i numeri.*
