# README

Files:
- config.json — architecture + basic metadata
- weights.pth — raw PyTorch state_dict (CPU tensors)
- model_package.pth — single-file package with arch_config + state_dict + meta
- {os.path.basename(ckpt_path)} — original Lightning checkpoint (copied for provenance)
{('- model_torchscript.pt — TorchScript' if DO_TORCHSCRIPT and 'torchscript_path' in results else '')}
{('- model.onnx — ONNX' if DO_ONNX and 'onnx_path' in results else '')}

Load from model_package.pth:
```python
import torch, json
from model_generation import GeneratedModel

pkg = torch.load("model_package.pth", map_location="cpu")
arch = pkg["arch_config"]
state = pkg["state_dict"]
meta = pkg["meta"]

# If you know input/output size:
model = GeneratedModel(input_size=INPUT_SIZE, output_size=OUTPUT_SIZE, architecture_config=arch)
# Strip 'model.' keys if present
state = {{ (k.split('model.',1)[1] if k.startswith('model.') else k): v for k,v in state.items() }}
model.load_state_dict(state, strict=False)
model.eval()
```

In [18]:

import os, re, json, shutil, argparse
from datetime import datetime
from typing import Dict, Any, Optional, List
import pandas as pd
import torch

# ----------------------
# Config (edit defaults or pass args)
# ----------------------
CSV_PATH = "/home/admindi/sbenites/WirelessLocation/validation/model_per_dataset_validation/wandb_export_2025-08-22T14_23_07.979+01_00.csv"
SEARCH_ROOTS = [
    "/home/admindi/sbenites/WirelessLocation",
    "/mnt/data",
]
EXPORT_ROOT = "/home/admindi/sbenites/WirelessLocation/validation/model_per_dataset_validation/full_models"

# What to produce per run
WRITE_PACKAGE = True            # model_package.pth containing {'arch_config','state_dict','meta'}
WRITE_RAW_STATE_DICT = True     # weights.pth (state_dict only, CPU)
COPY_CKPT = True                # copy original lightning .ckpt for provenance
DO_TORCHSCRIPT = False          # also export torchscript (requires model construction)
DO_ONNX = False                 # also export onnx (requires model construction)

# If you want TorchScript/ONNX, either set INPUT_SIZE/OUTPUT_SIZE, or enable INFER_DIMS_FROM_DB
INPUT_SIZE = None               # e.g., 256
OUTPUT_SIZE = None              # e.g., 2
INFER_DIMS_FROM_DB = False
DB_NAME = "wifi_fingerprinting_data"
SUBSET_FOR_DIMS = ["reto_grande_indoor"]  # minimal one collection to probe dims

# ----------------------
# CSV -> arch_config mapping
# ----------------------
CSV_KEY_MAP = {
    "architecture.num_conv_layers": "num_conv_layers",
    "architecture.filters_per_layer": "filters_per_layer",
    "architecture.kernel_size": "kernel_size",
    "architecture.stride": "stride",
    "architecture.padding": "padding",
    "architecture.activation": "activation",
    "architecture.batch_norm": "batch_norm",
    "architecture.dropout": "dropout",
    "architecture.pooling_type": "pooling_type",
    "architecture.pool_size": "pool_size",
    "architecture.residual_connections": "residual_connections",
    "architecture.learning_rate": "learning_rate",
    "architecture.weight_decay": "weight_decay",
    "architecture.optimizer": "optimizer",
    "architecture.batch_size": "batch_size",
    "architecture.normalization": "normalization",
    "architecture.initialization": "initialization",
}


In [19]:
def _coerce_bool(v):
    if isinstance(v, bool): return v
    if isinstance(v, (int, float)): return bool(v)
    if isinstance(v, str):
        vl = v.strip().lower()
        if vl in ("true","yes","1"): return True
        if vl in ("false","no","0",""): return False
    return False

def row_to_arch_config(row: pd.Series) -> Optional[Dict[str, Any]]:
    cfg = {}
    present = False
    for csv_key, cfg_key in CSV_KEY_MAP.items():
        if csv_key in row and pd.notnull(row[csv_key]):
            val = row[csv_key]; present = True
            if cfg_key in {"num_conv_layers","filters_per_layer","kernel_size","stride","pool_size","batch_size"}:
                try: val = int(float(val))
                except: pass
            elif cfg_key in {"dropout","learning_rate","weight_decay"}:
                try: val = float(val)
                except: pass
            elif cfg_key in {"batch_norm","residual_connections"}:
                val = _coerce_bool(val)
            elif cfg_key in {"activation","optimizer","padding","pooling_type","normalization","initialization"}:
                val = str(val).strip().lower()
            cfg[cfg_key] = val
    if not present: return None
    if "dropout" in cfg:
        cfg.setdefault("use_dropout", float(cfg["dropout"]) > 0.0)
    cfg.setdefault("normalization", "none")
    cfg.setdefault("initialization", "default")
    return cfg

def pick_model_name(row: pd.Series, cfg: Dict[str, Any]) -> str:
    for k in ["Name","name","run_name","id","Run"]:
        if k in row and pd.notnull(row[k]):
            return sanitize(str(row[k]))
    # fallback hash
    keys = ["num_conv_layers","filters_per_layer","kernel_size","stride","pooling_type","residual_connections"]
    sig = "-".join(str(cfg.get(k,"")) for k in keys)
    return sanitize(f"model_{abs(hash(sig))%100000}")

def sanitize(s: str) -> str:
    return re.sub(r"[^A-Za-z0-9._-]+", "_", s.strip())

def find_ckpt_for_name(name: str) -> Optional[str]:
    for root in SEARCH_ROOTS:
        pattern = os.path.join(root, "**", f"{name}.ckpt")
        matches = [p for p in glob(pattern) if os.path.isfile(p)]
        if matches:
            matches.sort(key=lambda p: os.path.getmtime(p), reverse=True)
            return matches[0]
    return None

def ensure_dir(path: str):
    os.makedirs(path, exist_ok=True)

def to_cpu_state_dict(sd: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
    return {k: v.detach().cpu() for k, v in sd.items()}

def export_one(name: str, ckpt_path: str, arch_config: Dict[str, Any], out_dir: str, dims: Optional[tuple] = None) -> Dict[str, Any]:
    ensure_dir(out_dir)
    # Load lightning checkpoint
    ckpt = torch.load(ckpt_path, map_location="cpu")
    state_dict = ckpt.get("state_dict", ckpt)
    state_dict = to_cpu_state_dict(state_dict)

    # Metadata
    meta = {
        "run_name": name,
        "ckpt_path": ckpt_path,
        "exported_at": datetime.utcnow().isoformat() + "Z",
        "git_commit": os.getenv("GIT_COMMIT", ""),
        "pytorch_version": torch.__version__,
        "package_format_version": 1,
    }

    results = {"name": name, "out_dir": out_dir}

    # Write config.json
    with open(os.path.join(out_dir, "config.json"), "w", encoding="utf-8") as f:
        json.dump({"arch_config": arch_config, "meta": meta}, f, indent=2)

    # Write raw weights.pth
    if WRITE_RAW_STATE_DICT:
        weights_path = os.path.join(out_dir, "weights.pth")
        torch.save(state_dict, weights_path)
        results["weights_path"] = weights_path

    # Write single package with arch + weights + meta
    if WRITE_PACKAGE:
        package = {"arch_config": arch_config, "state_dict": state_dict, "meta": meta}
        pkg_path = os.path.join(out_dir, "model_package.pth")
        torch.save(package, pkg_path)
        results["package_path"] = pkg_path

    # Copy original ckpt
    if COPY_CKPT:
        dst = os.path.join(out_dir, os.path.basename(ckpt_path))
        try:
            shutil.copy2(ckpt_path, dst)
            results["ckpt_copy"] = dst
        except Exception as e:
            results["ckpt_copy_error"] = str(e)

    # Optional TorchScript/ONNX (requires model construction)
    if (DO_TORCHSCRIPT or DO_ONNX) and dims is not None:
        try:
            from model_generation import GeneratedModel
            input_size, output_size = dims
            model = GeneratedModel(input_size=input_size, output_size=output_size, architecture_config=arch_config)
            # The Lightning state_dict usually prefixes with 'model.' — strip if necessary
            cleaned = {}
            for k, v in state_dict.items():
                if k.startswith("model."):
                    cleaned[k[len("model."):]] = v
                else:
                    cleaned[k] = v
            missing, unexpected = model.load_state_dict(cleaned, strict=False)
            if missing:
                print(f"[{name}] Missing keys on model load: {missing[:8]}{'...' if len(missing)>8 else ''}")
            if unexpected:
                print(f"[{name}] Unexpected keys on model load: {unexpected[:8]}{'...' if len(unexpected)>8 else ''}")
            model.eval()

            dummy = torch.randn(1, input_size)
            if DO_TORCHSCRIPT:
                ts = torch.jit.trace(model, dummy)
                ts_path = os.path.join(out_dir, "model_torchscript.pt")
                ts.save(ts_path)
                results["torchscript_path"] = ts_path
            if DO_ONNX:
                onnx_path = os.path.join(out_dir, "model.onnx")
                torch.onnx.export(model, dummy, onnx_path, input_names=["input"], output_names=["output"], opset_version=17)
                results["onnx_path"] = onnx_path
        except Exception as e:
            results["jit_onnx_error"] = str(e)

            
    with open(os.path.join(out_dir, "README.txt"), "w", encoding="utf-8") as f:
        f.write(readme)

    return results

def glob(pattern: str) -> List[str]:
    import glob as _g
    return _g.glob(pattern, recursive=True)

def infer_dims_from_db() -> Optional[tuple]:
    try:
        from data_processing import get_dataset, combine_arrays, shuffle_array, split_combined_data
        datasets = [get_dataset(name, DB_NAME) for name in SUBSET_FOR_DIMS]
        combined = combine_arrays(datasets)
        X, y = split_combined_data(combined)
        return (X.shape[1], y.shape[1])
    except Exception as e:
        print("Failed to infer dims from DB:", e)
        return None

In [20]:



os.makedirs(EXPORT_ROOT, exist_ok=True)

df = pd.read_csv(CSV_PATH)
print("Rows in CSV:", len(df))

dims = None
if DO_TORCHSCRIPT or DO_ONNX:
    if INPUT_SIZE and OUTPUT_SIZE:
        dims = (int(INPUT_SIZE), int(OUTPUT_SIZE))
    elif INFER_DIMS_FROM_DB:
        dims = infer_dims_from_db()
    print("Dims for scripted exports:", dims)

overview = []
for _, row in df.iterrows():
    cfg = row_to_arch_config(row)
    if not isinstance(cfg, dict):
        continue
    name = pick_model_name(row, cfg)
    ckpt = find_ckpt_for_name(name)
    found = bool(ckpt)
    out_dir = os.path.join(EXPORT_ROOT, name)
    rec = {"name": name, "found_ckpt": found, "ckpt_path": ckpt, "out_dir": out_dir}
    if found:
        try:
            res = export_one(name, ckpt, cfg, out_dir, dims=dims)
            rec.update({k:v for k,v in res.items() if k not in {"name","out_dir"}})
            rec["status"] = "exported"
        except Exception as e:
            rec["status"] = f"error: {e}"
    else:
        rec["status"] = "missing_ckpt"
    overview.append(rec)
    print(f"{name}: {rec['status']}")

idx = pd.DataFrame(overview)
idx_path = os.path.join(EXPORT_ROOT, "export_index.csv")
idx.to_csv(idx_path, index=False)
print("Wrote index:", idx_path)

Rows in CSV: 105


  "exported_at": datetime.utcnow().isoformat() + "Z",


outdoor_indoor_and_garage_run3_depth7_model7: error: name 'readme' is not defined
all_data_run1_depth5_model1: error: name 'readme' is not defined
outdoor_indoor_and_garage_run0_depth0_model5: error: name 'readme' is not defined
all_data_run3_depth1_model7: error: name 'readme' is not defined
outdoor_indoor_and_garage_run1_depth3_model5: error: name 'readme' is not defined
all_data_run4_depth9_model9: error: name 'readme' is not defined
all_data_run7_depth2_model7: error: name 'readme' is not defined
all_data_run4_depth2_model5: error: name 'readme' is not defined
outdoor_indoor_and_garage_run2_depth0_model11: error: name 'readme' is not defined
all_data_run1_depth5_model7: error: name 'readme' is not defined
all_data_run3_depth2_model5: error: name 'readme' is not defined
all_data_run0_depth7_model5: error: name 'readme' is not defined
all_data_run0_depth8_model5: error: name 'readme' is not defined
all_data_run6_depth0_model2: error: name 'readme' is not defined
outdoor_indoor_and_ga