In [1]:
import sys, os

print("Python executable:", sys.executable)

# cài torch vào ĐÚNG kernel hiện tại (CPU cho chắc ăn)
!{sys.executable} -m pip install --upgrade pip
!{sys.executable} -m pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu


Python executable: C:\ProgramData\anaconda3\python.exe
Defaulting to user installation because normal site-packages is not writeable
Defaulting to user installation because normal site-packages is not writeable
Looking in indexes: https://download.pytorch.org/whl/cpu
Collecting torchvision
  Using cached https://download.pytorch.org/whl/cpu/torchvision-0.24.1%2Bcpu-cp311-cp311-win_amd64.whl.metadata (6.1 kB)
Collecting torchaudio
  Using cached https://download.pytorch.org/whl/cpu/torchaudio-2.9.1%2Bcpu-cp311-cp311-win_amd64.whl.metadata (7.0 kB)
Using cached https://download.pytorch.org/whl/cpu/torchvision-0.24.1%2Bcpu-cp311-cp311-win_amd64.whl (4.0 MB)
Using cached https://download.pytorch.org/whl/cpu/torchaudio-2.9.1%2Bcpu-cp311-cp311-win_amd64.whl (662 kB)
Installing collected packages: torchvision, torchaudio

   ---------------------------------------- 0/2 [torchvision]
   ---------------------------------------- 0/2 [torchvision]
   ---------------------------------------- 0/2 [

In [2]:
import torch
print("torch version:", torch.__version__)
print("device ok:", "cpu")


torch version: 2.9.1+cpu
device ok: cpu


In [3]:
import os, sys, json
import numpy as np
import matplotlib.pyplot as plt

ROOT = r"C:\Users\ADMIN\Desktop\NDM_Project"
sys.path.append(os.path.join(ROOT, "src"))

from dataio import load_splits_json, iter_segments_from_record
from spectrogram import make_spectrogram

print("ROOT =", ROOT)


ROOT = C:\Users\ADMIN\Desktop\NDM_Project


In [4]:
cfg = {
    "seed": 42,

    # segment params (giống Phase 5/6 bạn đang dùng)
    "seg_len": 4096,
    "hop_len": 4096,
    "normalize_seg": False,

    # spectrogram params (đã ghi rõ theo checklist)
    "spectrogram": {
        "window_type": "hamming",
        "win_length": 512,
        "hop_length": 256,
        "n_fft": 512,
        "use_mel": True,
        "mel_bins": 64,
        "fmin": 0.0,
        "fmax": None,
        "power": 2.0,
        "log_eps": 1e-8,
        "to_db": False
    },

    # training
    "train": {
        "batch_size": 64,
        "epochs": 25,
        "lr": 1e-3,
        "weight_decay": 1e-4,
        "early_stop_patience": 5
    },

    # embedding dim (deep feature)
    "embedding_dim": 128
}

os.makedirs(os.path.join(ROOT, "configs"), exist_ok=True)
cfg_path = os.path.join(ROOT, "configs", "cnn.json")
with open(cfg_path, "w", encoding="utf-8") as f:
    json.dump(cfg, f, indent=2)

print("Saved:", cfg_path)
print(cfg)


Saved: C:\Users\ADMIN\Desktop\NDM_Project\configs\cnn.json
{'seed': 42, 'seg_len': 4096, 'hop_len': 4096, 'normalize_seg': False, 'spectrogram': {'window_type': 'hamming', 'win_length': 512, 'hop_length': 256, 'n_fft': 512, 'use_mel': True, 'mel_bins': 64, 'fmin': 0.0, 'fmax': None, 'power': 2.0, 'log_eps': 1e-08, 'to_db': False}, 'train': {'batch_size': 64, 'epochs': 25, 'lr': 0.001, 'weight_decay': 0.0001, 'early_stop_patience': 5}, 'embedding_dim': 128}


In [5]:
records_map, splits, meta = load_splits_json(ROOT, r"data\splits\cwru_splits.json")
print("Split sizes:", {k: len(v) for k, v in splits.items()})
print("Meta:", meta)

# label list
labels_sorted = sorted({records_map[rid]["label"] for rid in records_map.keys()})
label_to_idx = {lb:i for i, lb in enumerate(labels_sorted)}
idx_to_label = {i:lb for lb,i in label_to_idx.items()}
num_classes = len(labels_sorted)

print("Classes:", labels_sorted)

# build segment index: list of (record_id, seg_id_in_record, label_idx)
def build_index(split_name):
    seg_len = int(cfg["seg_len"])
    hop_len = int(cfg["hop_len"])
    normalize = bool(cfg["normalize_seg"])
    index = []
    for rid in splits[split_name]:
        rec = records_map[rid]
        seg_iter = iter_segments_from_record(ROOT, rec, seg_len=seg_len, hop_len=hop_len, normalize=normalize)
        s = 0
        for _seg in seg_iter:
            index.append((rid, s, label_to_idx[rec["label"]]))
            s += 1
    return index

train_index = build_index("train")
val_index   = build_index("val")
test_index  = build_index("test")

print("Num segments:", {"train": len(train_index), "val": len(val_index), "test": len(test_index)})


Split sizes: {'train': 80, 'val': 10, 'test': 10}
Meta: {'fs': 48000, 'chunk_seconds': 1.0, 'seed': 42, 'note': 'Split by non-overlapping chunks (anti-leak) before segmentation'}
Classes: ['BF_007', 'BF_014', 'BF_021', 'H', 'IRF_007', 'IRF_014', 'IRF_021', 'ORF_007', 'ORF_014', 'ORF_021']
Num segments: {'train': 880, 'val': 110, 'test': 110}


In [6]:
import torch
from torch.utils.data import Dataset, DataLoader

torch.manual_seed(cfg["seed"])
np.random.seed(cfg["seed"])

spec_cfg = cfg["spectrogram"]

def compute_spec(seg, fs):
    # seg: 1D np array
    S = make_spectrogram(
        seg, fs=fs,
        window_type=spec_cfg["window_type"],
        win_length=int(spec_cfg["win_length"]),
        hop_length=int(spec_cfg["hop_length"]),
        n_fft=int(spec_cfg["n_fft"]),
        use_mel=bool(spec_cfg["use_mel"]),
        mel_bins=int(spec_cfg["mel_bins"]),
        fmin=float(spec_cfg["fmin"]),
        fmax=spec_cfg["fmax"],
        power=float(spec_cfg["power"]),
        log_eps=float(spec_cfg["log_eps"]),
        to_db=bool(spec_cfg["to_db"]),
    )
    # per-sample normalize (không leak)
    S = (S - S.mean()) / (S.std() + 1e-6)
    return S.astype(np.float32)  # (mel_bins, frames)

# tiện: map record_id -> list segments để lấy seg theo seg_id nhanh (dataset nhỏ nên cache được)
FS = int(meta["fs"])
SEG_LEN = int(cfg["seg_len"])
HOP_LEN = int(cfg["hop_len"])
NORM_SEG = bool(cfg["normalize_seg"])

_record_cache = {}  # rid -> list(np.array seg)

def get_segments_of_record(rid):
    if rid in _record_cache:
        return _record_cache[rid]
    rec = records_map[rid]
    segs = []
    for seg in iter_segments_from_record(ROOT, rec, seg_len=SEG_LEN, hop_len=HOP_LEN, normalize=NORM_SEG):
        segs.append(seg.astype(np.float32))
    _record_cache[rid] = segs
    return segs

class CWRUSpecDataset(Dataset):
    def __init__(self, index_list):
        self.index = index_list

    def __len__(self):
        return len(self.index)

    def __getitem__(self, i):
        rid, seg_id, y = self.index[i]
        segs = get_segments_of_record(rid)
        seg = segs[seg_id]
        S = compute_spec(seg, FS)                 # (F, T)
        x = torch.from_numpy(S).unsqueeze(0)      # (1, F, T)
        y = torch.tensor(y, dtype=torch.long)
        return x, y

bs = int(cfg["train"]["batch_size"])
train_loader = DataLoader(CWRUSpecDataset(train_index), batch_size=bs, shuffle=True,  num_workers=0)
val_loader   = DataLoader(CWRUSpecDataset(val_index),   batch_size=bs, shuffle=False, num_workers=0)
test_loader  = DataLoader(CWRUSpecDataset(test_index),  batch_size=bs, shuffle=False, num_workers=0)

# xem shape 1 batch
xb, yb = next(iter(train_loader))
print("Batch x:", xb.shape, "Batch y:", yb.shape)


TypeError: make_spectrogram() got an unexpected keyword argument 'mel_bins'

In [None]:
import torch.nn as nn
import torch.nn.functional as F

EMB_DIM = int(cfg["embedding_dim"])

class CNNBaseline(nn.Module):
    def __init__(self, num_classes, emb_dim=128):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 16, kernel_size=3, padding=1)
        self.bn1   = nn.BatchNorm2d(16)

        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, padding=1)
        self.bn2   = nn.BatchNorm2d(32)

        self.conv3 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.bn3   = nn.BatchNorm2d(64)

        self.pool  = nn.MaxPool2d(2)  # giảm F,T

        self.gap   = nn.AdaptiveAvgPool2d((1,1))
        self.fc_emb = nn.Linear(64, emb_dim)      # embedding layer gần cuối
        self.fc_out = nn.Linear(emb_dim, num_classes)

    def forward(self, x, return_emb=False):
        x = self.pool(F.relu(self.bn1(self.conv1(x))))
        x = self.pool(F.relu(self.bn2(self.conv2(x))))
        x = F.relu(self.bn3(self.conv3(x)))

        x = self.gap(x).squeeze(-1).squeeze(-1)   # (B, 64)
        emb = F.relu(self.fc_emb(x))              # (B, emb_dim)
        logits = self.fc_out(emb)

        if return_emb:
            return logits, emb
        return logits

device = torch.device("cpu")
model = CNNBaseline(num_classes=num_classes, emb_dim=EMB_DIM).to(device)
print(model)


In [None]:
import time
import pandas as pd

os.makedirs(os.path.join(ROOT, "results"), exist_ok=True)

def run_eval(model, loader):
    model.eval()
    total_loss, total, correct = 0.0, 0, 0
    all_y, all_p = [], []
    with torch.no_grad():
        for x, y in loader:
            x = x.to(device)
            y = y.to(device)
            logits = model(x)
            loss = F.cross_entropy(logits, y)
            total_loss += float(loss.item()) * y.size(0)

            pred = torch.argmax(logits, dim=1)
            correct += int((pred == y).sum().item())
            total += int(y.size(0))

            all_y.append(y.cpu().numpy())
            all_p.append(pred.cpu().numpy())

    avg_loss = total_loss / max(total, 1)
    acc = correct / max(total, 1)
    all_y = np.concatenate(all_y) if len(all_y) else np.array([])
    all_p = np.concatenate(all_p) if len(all_p) else np.array([])
    return avg_loss, acc, all_y, all_p

train_cfg = cfg["train"]
optimizer = torch.optim.Adam(model.parameters(), lr=float(train_cfg["lr"]), weight_decay=float(train_cfg["weight_decay"]))

best_val_acc = -1.0
best_path = os.path.join(ROOT, "results", "cnn_baseline.pt")
log_rows = []
pat = 0

epochs = int(train_cfg["epochs"])
for epoch in range(1, epochs+1):
    t0 = time.time()
    model.train()

    total_loss, total, correct = 0.0, 0, 0
    for x, y in train_loader:
        x = x.to(device)
        y = y.to(device)

        optimizer.zero_grad()
        logits = model(x)
        loss = F.cross_entropy(logits, y)
        loss.backward()
        optimizer.step()

        total_loss += float(loss.item()) * y.size(0)
        pred = torch.argmax(logits, dim=1)
        correct += int((pred == y).sum().item())
        total += int(y.size(0))

    train_loss = total_loss / max(total, 1)
    train_acc  = correct / max(total, 1)

    val_loss, val_acc, _, _ = run_eval(model, val_loader)

    row = {
        "epoch": epoch,
        "train_loss": train_loss, "train_acc": train_acc,
        "val_loss": val_loss, "val_acc": val_acc,
        "sec": time.time() - t0
    }
    log_rows.append(row)
    print(f"[{epoch:02d}] train loss {train_loss:.4f} acc {train_acc:.4f} | val loss {val_loss:.4f} acc {val_acc:.4f}")

    # early stop theo val_acc
    if val_acc > best_val_acc + 1e-6:
        best_val_acc = val_acc
        pat = 0
        torch.save({
            "model_state": model.state_dict(),
            "labels_sorted": labels_sorted,
            "cfg": cfg,
            "meta": meta
        }, best_path)
        print("  -> saved best:", best_path)
    else:
        pat += 1
        if pat >= int(train_cfg["early_stop_patience"]):
            print("Early stop.")
            break

log_path = os.path.join(ROOT, "results", "cnn_log.csv")
pd.DataFrame(log_rows).to_csv(log_path, index=False)
print("Saved log:", log_path)
print("Best val acc:", best_val_acc)


In [None]:
from sklearn.metrics import confusion_matrix

ckpt = torch.load(best_path, map_location="cpu")
model.load_state_dict(ckpt["model_state"])
model.eval()

test_loss, test_acc, y_true, y_pred = run_eval(model, test_loader)
print("TEST loss:", test_loss, "acc:", test_acc)

cm = confusion_matrix(y_true, y_pred, labels=list(range(num_classes)))

plt.figure()
plt.imshow(cm)
plt.title("CNN baseline confusion matrix")
plt.xlabel("Predicted")
plt.ylabel("True")
plt.xticks(range(num_classes), labels_sorted, rotation=45, ha="right")
plt.yticks(range(num_classes), labels_sorted)
for i in range(num_classes):
    for j in range(num_classes):
        plt.text(j, i, str(cm[i, j]), ha="center", va="center", fontsize=8)
plt.tight_layout()

cm_path = os.path.join(ROOT, "results", "cm_cnn.png")
plt.savefig(cm_path, dpi=200)
plt.show()
print("Saved:", cm_path)

metrics_path = os.path.join(ROOT, "results", "cnn_metrics.csv")
import pandas as pd
pd.DataFrame([{
    "best_val_acc": best_val_acc,
    "test_acc": test_acc,
    "test_loss": test_loss
}]).to_csv(metrics_path, index=False)
print("Saved:", metrics_path)


In [None]:
def extract_embeddings(model, loader):
    model.eval()
    X_list, y_list = [], []
    with torch.no_grad():
        for x, y in loader:
            x = x.to(device)
            logits, emb = model(x, return_emb=True)
            X_list.append(emb.cpu().numpy().astype(np.float32))
            y_list.append(y.numpy().astype(np.int64))
    X = np.vstack(X_list) if len(X_list) else np.zeros((0, EMB_DIM), np.float32)
    y = np.concatenate(y_list) if len(y_list) else np.zeros((0,), np.int64)
    return X, y

Xtr, ytr = extract_embeddings(model, train_loader)
Xva, yva = extract_embeddings(model, val_loader)
Xte, yte = extract_embeddings(model, test_loader)

out_dir = os.path.join(ROOT, "results")
np.save(os.path.join(out_dir, "X_train_deep.npy"), Xtr)
np.save(os.path.join(out_dir, "X_val_deep.npy"),   Xva)
np.save(os.path.join(out_dir, "X_test_deep.npy"),  Xte)

np.save(os.path.join(out_dir, "y_train.npy"), ytr)
np.save(os.path.join(out_dir, "y_val.npy"),   yva)
np.save(os.path.join(out_dir, "y_test.npy"),  yte)

print("Saved deep features:")
print("  X_train_deep:", Xtr.shape, "y_train:", ytr.shape)
print("  X_val_deep  :", Xva.shape, "y_val  :", yva.shape)
print("  X_test_deep :", Xte.shape, "y_test :", yte.shape)
