In [1]:
import json, numpy as np, torch, onnx
from pathlib import Path
from src.inference import load_torch_checkpoint
from src.models import TinyGlassNet
from src.config import NUM_CLASSES, SR, N_MELS, N_FFT, HOP_LENGTH
from src.data_utils import log_mel_spectrogram, load_audio
from src.datasets import load_index_df, subset_by_folds
from torch.utils.data import DataLoader, TensorDataset
import onnxruntime as ort
from src.config import SEED, LABEL_TO_ID, BACKGROUND_LABEL, NUM_CLASSES

VAL_FOLD = 5
PT_PATH = Path("artifacts/tiny-audio-net.pt")
SAMPLE_PATH = Path("data/freesound/glass_ext_07.wav")
OUTPUT_DIR = Path("cache/exports")
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)


In [2]:
# 载入模型
model, payload = load_torch_checkpoint(PT_PATH, device="cpu", num_classes=NUM_CLASSES)
model.eval()

# 导出 ONNX
# 固定输入形状：batch=1, 1 通道, 64 mel, 87 帧
example = torch.randn(1, 1, 64, 87)
onnx_path = OUTPUT_DIR / "tiny-audio-net_static.onnx"

torch.onnx.export(
    model,
    example,
    onnx_path.as_posix(),
    input_names=["input"],
    output_names=["logits"],
    # 不设 dynamic_axes，保持静态形状
    opset_version=13,
    do_constant_folding=True,
)
print(f"ONNX saved: {onnx_path}")


ONNX saved: cache/exports/tiny-audio-net_static.onnx


In [3]:
import numpy as np

# 1) 模型结构（PyTorch repr）
struct_log = OUTPUT_DIR / "tiny-audio-net.structure.log"
struct_log.write_text(model.__repr__())
print(f"Structure saved to {struct_log}")

# 2) 权重/偏置全量输出（无 ...）
wb_log = OUTPUT_DIR / "tiny-audio-net.weights.log"
with open(wb_log, "w") as f:
    for name, param in model.state_dict().items():
        arr = param.detach().cpu().numpy().astype(np.float32)
        f.write(f"{name} shape={arr.shape}\n")
        # 全量、不省略
        txt = np.array2string(
            arr,
            separator=",",
            threshold=np.inf,
            max_line_width=1_000_000,
            suppress_small=False,
            floatmode="maxprec",
        )
        f.write(txt)
        f.write("\n\n")
print(f"Weights saved to {wb_log}")


Structure saved to cache/exports/tiny-audio-net.structure.log
Weights saved to cache/exports/tiny-audio-net.weights.log


In [4]:
import numpy as np

# 假设 model 已经 load 好
weights_dir = OUTPUT_DIR / "weights_npy"
weights_dir.mkdir(parents=True, exist_ok=True)

for name, param in model.state_dict().items():
    arr = param.detach().cpu().numpy()
    out_path = weights_dir / f"{name.replace('.', '_')}.npy"
    np.save(out_path, arr)
    print("saved", out_path, arr.shape)


saved cache/exports/weights_npy/features_0_weight.npy (16, 1, 3, 3)
saved cache/exports/weights_npy/features_3_weight.npy (32, 16, 3, 3)
saved cache/exports/weights_npy/features_6_weight.npy (64, 32, 3, 3)
saved cache/exports/weights_npy/classifier_weight.npy (2, 64)


In [16]:
import ast
import numpy as np
import torch
from pathlib import Path

BACKGROUND_ID = -1  # 背景标记；正类继续用 LABEL_TO_ID 映射

def load_index_df(csv_path):
    import pandas as pd
    df = pd.read_csv(csv_path)
    # 解析字符串形式的 list
    if "label_ids" in df.columns:
        df["label_ids"] = df["label_ids"].apply(
            lambda v: ast.literal_eval(v) if isinstance(v, str) else (v if v is not None else [])
        )
    if "labels" in df.columns:
        df["labels"] = df["labels"].apply(
            lambda v: ast.literal_eval(v) if isinstance(v, str) else (v if v is not None else [])
        )
    return df

def subset_by_folds(df, folds):
    return df[df["fold_id"].isin(folds)].reset_index(drop=True)

def row_to_ids(row):
    if "label_ids" in row and isinstance(row["label_ids"], (list, tuple)):
        return row["label_ids"]
    if "labels" in row:
        labs = row["labels"]
        if isinstance(labs, str):
            labs = [labs]
        return [LABEL_TO_ID[l] for l in labs if l in LABEL_TO_ID]
    if "label" in row and row["label"] in LABEL_TO_ID:
        return [LABEL_TO_ID[row["label"]]]
    return []

def df_to_tensor(df):
    arrs = [np.load(p) for p in df["path"]]
    x = torch.from_numpy(np.stack(arrs)).unsqueeze(1).float()   # (B,1,64,T)
    y = torch.zeros((len(df), NUM_CLASSES), dtype=torch.float32)
    for i, (_, row) in enumerate(df.iterrows()):
        ids = row_to_ids(row)
        for idx in ids:
            y[i, idx] = 1.0
    return x, y

def save_npz_for_pipeline(x_tensor, y_tensor, dst_path):
    x_np = x_tensor.numpy().astype(np.float32, copy=False)
    y_np = y_tensor.numpy()
    labels = []
    for row in y_np:
        if row.sum() == 0:
            labels.append(BACKGROUND_ID)  # 背景
        else:
            labels.append(int(row.argmax()))
    labels = np.array(labels, dtype=np.int64)
    np.savez_compressed(dst_path, imgs=x_np, labels=labels, y=y_np)

# 构造数据
index_df = load_index_df("cache/window_index.csv")
display(index_df.head())
unique_combos = index_df['label_ids'].apply(tuple).unique()
print("unique labels: ", unique_combos)

test_df = subset_by_folds(index_df, [VAL_FOLD])
train_df = subset_by_folds(index_df, [f for f in index_df["fold_id"].unique() if f != VAL_FOLD])

path_eval_data = OUTPUT_DIR / "test_fold.npz"
path_calib_data = OUTPUT_DIR / "calib_10pct.npz"

# eval/test npz
x_test, y_test = df_to_tensor(test_df)
save_npz_for_pipeline(x_test, y_test, path_eval_data)

# calib npz（随机 10% 示例，可按需调整比例）
calib_df = train_df.sample(frac=0.1, random_state=42)
x_calib, y_calib = df_to_tensor(calib_df)
save_npz_for_pipeline(x_calib, y_calib, path_calib_data)

# 可视化：加载并打印键/形状与标签分布
for p in (path_calib_data, path_eval_data):
    data = np.load(p)
    print("\n", p.name, list(data.keys()))
    print("imgs", data["imgs"].shape, "labels", data["labels"].shape, "y", data["y"].shape)
    unique, counts = np.unique(data["labels"], return_counts=True)
    print("label distribution:", dict(zip(unique, counts)))


Unnamed: 0,path,target_label,label,labels,label_ids,fold_id,pipeline,clip_id,window_id,source,shape,orig_label,length_sec
0,cache/mel64/glass/fold1/5-233607-A-39_5-233607...,glass,glass,[glass],[0],1,base,5-233607-A-39,5-233607-A-39_w0,esc50,"(64, 84)",glass,1.0
1,cache/mel64/glass/fold1/1-85184-A-39_1-85184-A...,glass,glass,[glass],[0],1,base,1-85184-A-39,1-85184-A-39_w0,esc50,"(64, 84)",glass,1.0
2,cache/mel64/glass/fold1/4-204119-A-39_4-204119...,glass,glass,[glass],[0],1,base,4-204119-A-39,4-204119-A-39_w0,esc50,"(64, 84)",glass,1.0
3,cache/mel64/glass/fold1/1-20133-A-39_1-20133-A...,glass,glass,[glass],[0],1,base,1-20133-A-39,1-20133-A-39_w0,esc50,"(64, 84)",glass,1.0
4,cache/mel64/glass/fold1/1-20133-A-39_1-20133-A...,glass,glass,[glass],[0],1,base,1-20133-A-39,1-20133-A-39_w1,esc50,"(64, 84)",glass,1.0


unique labels:  [(0,) (1,) ()]

 calib_10pct.npz ['imgs', 'labels', 'y']
imgs (360, 1, 64, 84) labels (360,) y (360, 2)
label distribution: {-1: 146, 0: 111, 1: 103}

 test_fold.npz ['imgs', 'labels', 'y']
imgs (900, 1, 64, 84) labels (900,) y (900, 2)
label distribution: {-1: 400, 0: 250, 1: 250}


In [7]:
import json, numpy as np, torch
from pathlib import Path
from src.inference import load_torch_checkpoint
from src.config import NUM_CLASSES
from src.models import TinyGlassNet

# 加载模型
model, _ = load_torch_checkpoint("cache/experiments/tinyglassnet_best.pt", device="cpu", num_classes=NUM_CLASSES)
model.eval()

# 从索引里读一个 mel，获取真实帧数（假设 index_df 已加载）
sample_mel = np.load(Path(index_df.iloc[0]["path"]))
if sample_mel.ndim == 2:
    n_mels, T = sample_mel.shape
elif sample_mel.ndim == 3:
    _, n_mels, T = sample_mel.shape
else:
    raise ValueError("unexpected mel shape")

example = torch.randn(1, 1, n_mels, T)  # batch=1，形状与数据一致（常见 64×87）

# 前向一次收集形状
shapes = {}
def hook_fn(name):
    def _fn(module, inp, out):
        shapes[name] = {
            "input_shape": list(inp[0].shape)[1:],   # 去掉 batch
            "output_shape": list(out.shape)[1:],
        }
    return _fn

handles = []
for name, m in model.named_modules():
    if name and not list(m.children()):
        handles.append(m.register_forward_hook(hook_fn(name)))

with torch.no_grad():
    model(example)

for h in handles:
    h.remove()

# 生成 JSON 结构
layers = []
layer_id = 1
for name, m in model.named_modules():
    if name and not list(m.children()):
        info = {
            "layer_id": layer_id,
            "name": name,
            "type": m.__class__.__name__,
            "bias": getattr(m, "bias", None) is not None and m.bias is not None,
        }
        shp = shapes.get(name, {})
        if shp:
            info["input_shape"] = shp["input_shape"]
            info["output_shape"] = shp["output_shape"]
        if isinstance(m, torch.nn.Conv2d):
            info.update({
                "kernel_size": m.kernel_size[0],
                "conv_stride": m.stride[0],
                "padding_top": m.padding[0],
                "padding_bottom": m.padding[0],
                "padding_left": m.padding[1] if len(m.padding) > 1 else m.padding[0],
                "padding_right": m.padding[1] if len(m.padding) > 1 else m.padding[0],
                "num_channels": m.out_channels,
                "in_channels": m.in_channels,
            })
        if isinstance(m, torch.nn.Linear):
            info.update({
                "in_features": m.in_features,
                "out_features": m.out_features,
            })
        layers.append(info)
        layer_id += 1

structure_json = {"layers": layers}
out_path = OUTPUT_DIR / "tinyglassnet_structure.json"
out_path.write_text(json.dumps(structure_json, indent=2))
print(f"Saved JSON structure to {out_path}")


Saved JSON structure to cache/exports/tinyglassnet_structure.json
