# ML Strategy + Error Analysis + CNN Playbook (PyTorch)

**Mục tiêu:** Notebook mẫu, gọn mà đủ xài, để bạn áp dụng nhanh 4 chủ đề:

1. Chiến lược ML (metric, split, baseline),
2. Error analysis (gom lỗi, phân loại, mismatch),
3. CNN cơ bản (Conv/Pool/Stride + CNN nhỏ),
4. Case studies (ResNet block, 1×1 conv, Inception-mini, Depthwise separable, MobileNet-style, Transfer Learning),
5. EfficientNet compound scaling (minh hoạ),
6. Chẩn đoán bias/variance & mismatch (hàm gợi ý),
7. Random Search HPO (demo nhỏ).

**Lưu ý chạy offline:** Notebook ưu tiên dùng `FakeData` nếu không có CIFAR10 cục bộ; Transfer Learning sẽ **fallback** khi không tải được trọng số pretrained.


## 0) Chuẩn bị: import, seed, tiện ích


In [None]:
import time
import math
import random
import os
import numpy as np
from collections import Counter, defaultdict

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, random_split, Subset
from torchvision import datasets, transforms, models

from sklearn.metrics import accuracy_score, precision_recall_fscore_support, confusion_matrix
from sklearn.model_selection import train_test_split

# reproducibility


def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)


set_seed(42)

device = "cuda" if torch.cuda.is_available() else "cpu"
device


'cpu'

## 1) ML Strategy — Single metric + Split đúng & Baseline nhanh


In [None]:
# 1A) Single metric (ví dụ F1) + ràng buộc latency (p95)
def evaluate_with_constraints(y_true, y_pred, latencies_ms, max_p95_latency=100):
    acc = accuracy_score(y_true, y_pred)
    p, r, f1, _ = precision_recall_fscore_support(
        y_true, y_pred, average='macro', zero_division=0)
    p95_latency = float(np.percentile(latencies_ms, 95)) if len(
        latencies_ms) > 0 else float('nan')
    ok_latency = (p95_latency <= max_p95_latency) if not math.isnan(
        p95_latency) else False
    return {"acc": acc, "precision": p, "recall": r, "f1": f1,
            "p95_latency_ms": p95_latency, "latency_ok": bool(ok_latency)}


In [None]:
# 1B) Dataset loader: ưu tiên CIFAR10 cục bộ; fallback FakeData (offline-friendly)
transform = transforms.Compose([transforms.Resize(224), transforms.ToTensor()])


def load_dataset():
    try:
        # Không tải từ internet: chỉ dùng nếu đã có sẵn
        ds = datasets.CIFAR10(root="./data", train=True,
                              download=False, transform=transform)
    except Exception as e:
        print("CIFAR10 không sẵn, dùng FakeData thay thế:", e)
        ds = datasets.FakeData(size=20000, image_size=(
            3, 224, 224), num_classes=10, transform=transform)
    return ds


ds = load_dataset()
n = len(ds)
n_train = int(0.8*n)
n_dev = int(0.1*n)
n_test = n - n_train - n_dev
train_ds, dev_ds, test_ds = random_split(
    ds, [n_train, n_dev, n_test], generator=torch.Generator().manual_seed(42))

train_loader = DataLoader(train_ds, batch_size=64, shuffle=True,
                          num_workers=2, pin_memory=torch.cuda.is_available())
dev_loader = DataLoader(dev_ds,   batch_size=128, shuffle=False,
                        num_workers=2, pin_memory=torch.cuda.is_available())
test_loader = DataLoader(test_ds,  batch_size=128, shuffle=False,
                         num_workers=2, pin_memory=torch.cuda.is_available())

len(train_ds), len(dev_ds), len(test_ds)


CIFAR10 không sẵn, dùng FakeData thay thế: Dataset not found or corrupted. You can use download=True to download it


(16000, 2000, 2000)

In [None]:
# 1C) Baseline nhanh (linear head trên mean RGB) + vòng lặp train/eval
class BaselineLinear(nn.Module):
    def __init__(self, num_classes=10):
        super().__init__()
        self.pool = nn.AdaptiveAvgPool2d(1)
        self.fc = nn.Linear(3, num_classes)  # 3 kênh RGB -> num_classes

    def forward(self, x):
        x = self.pool(x).squeeze(-1).squeeze(-1)  # [B,3]
        return self.fc(x)  # logits


def train_one_epoch(model, loader, opt, device):
    model.train()
    loss_fn = nn.CrossEntropyLoss()
    total = 0
    correct = 0
    for xb, yb in loader:
        xb, yb = xb.to(device), yb.to(device)
        opt.zero_grad()
        logits = model(xb)
        loss = loss_fn(logits, yb)
        loss.backward()
        opt.step()
        total += yb.size(0)
        correct += (logits.argmax(1) == yb).sum().item()
    return correct/total


def evaluate(model, loader, device):
    model.eval()
    ys = []
    yh = []
    lat = []
    with torch.no_grad():
        for xb, yb in loader:
            xb, yb = xb.to(device), yb.to(device)
            t0 = time.time()
            logits = model(xb)
            t1 = time.time()
            ys.extend(yb.cpu().numpy())
            yh.extend(logits.argmax(1).cpu().numpy())
            lat.extend([(t1-t0)*1000]*yb.size(0))
    return evaluate_with_constraints(np.array(ys), np.array(yh), np.array(lat))


baseline = BaselineLinear(num_classes=10).to(device)
opt = torch.optim.Adam(baseline.parameters(), lr=1e-3)
for _ in range(1):  # baseline rất nhanh
    train_one_epoch(baseline, train_loader, opt, device)
dev_metrics = evaluate(baseline, dev_loader, device)
dev_metrics


{'acc': 0.1045,
 'precision': 0.01045,
 'recall': 0.1,
 'f1': 0.01892258940697148,
 'p95_latency_ms': 6.576061248779297,
 'latency_ok': True}

## 2) Error Analysis — Gom lỗi, phân loại nguyên nhân, kiểm tra mismatch


In [None]:
# 2A) Thu lỗi trên dev (giữ index để soi)
def collect_errors(model, dataset, device, max_items=200):
    loader = DataLoader(dataset, batch_size=128, shuffle=False)
    model.eval()
    wrong = []
    with torch.no_grad():
        offset = 0
        for xb, yb in loader:
            xb, yb = xb.to(device), yb.to(device)
            pred = model(xb).argmax(1)
            mism = (pred != yb).nonzero(
                as_tuple=False).squeeze(-1).cpu().numpy()
            for i in mism:
                wrong.append(offset+i)
                if len(wrong) >= max_items:
                    return wrong
            offset += yb.size(0)
    return wrong


wrong_idx = collect_errors(baseline, dev_ds, device, max_items=120)
len(wrong_idx), wrong_idx[:10]


(120,
 [np.int64(0),
  np.int64(1),
  np.int64(2),
  np.int64(3),
  np.int64(4),
  np.int64(5),
  np.int64(6),
  np.int64(7),
  np.int64(8),
  np.int64(9)])

In [None]:
# 2B) Gán nhãn loại lỗi (demo heuristic: tối/sáng/quá giống) & breakdown %
def brightness(img_tensor):  # img [3,H,W] in [0,1]
    return img_tensor.mean().item()


def categorize_error(img, label, pred):
    b = brightness(img)
    if b < 0.28:
        return "too_dark"
    if b > 0.75:
        return "too_bright"
    if label in [3, 5] and pred in [3, 5]:
        return "confused_similar_classes"
    return "other"


def error_breakdown(model, dataset, wrong_idx, device):
    cat = Counter()
    loader = DataLoader(Subset(dataset, wrong_idx),
                        batch_size=1, shuffle=False)
    model.eval()
    with torch.no_grad():
        for (xb, yb) in loader:
            img = xb[0]
            pred = model(xb.to(device)).argmax(1).item()
            cat[categorize_error(img, yb.item(), pred)] += 1
    total = sum(cat.values()) if len(cat) > 0 else 1
    return {k: (v, v/total) for k, v in cat.items()}, total


cats, total_err = error_breakdown(baseline, dev_ds, wrong_idx, device)
cats, total_err


({'other': (120, 1.0)}, 120)

In [None]:
# 2C) Kiểm tra mismatch: training-dev (lấy mẫu từ train) vs dev
train_dev_ds = Subset(train_ds, np.random.choice(
    len(train_ds), size=min(2000, len(train_ds)), replace=False))


def quick_acc(model, dataset):
    ld = DataLoader(dataset, batch_size=256, shuffle=False)
    model.eval()
    y = []
    p = []
    with torch.no_grad():
        for xb, yb in ld:
            y.extend(yb.numpy())
            p.extend(model(xb.to(device)).argmax(1).cpu().numpy())
    return accuracy_score(y, p)


acc_train_dev = quick_acc(baseline, train_dev_ds)
acc_dev = quick_acc(baseline, dev_ds)
{"acc_train_dev": acc_train_dev, "acc_dev": acc_dev,
    "hint": "Nếu dev << train-dev ⇒ dev khác phân phối train (mismatch)"}


{'acc_train_dev': 0.1045,
 'acc_dev': 0.1045,
 'hint': 'Nếu dev << train-dev ⇒ dev khác phân phối train (mismatch)'}

## 3) CNN cơ bản — Conv/Stride/Padding/Pooling & CNN nhỏ


In [None]:
class ConvBlock(nn.Module):
    def __init__(self, c_in, c_out, k=3, s=1, p='same'):
        super().__init__()
        pad = k//2 if p == 'same' else 0
        self.conv = nn.Conv2d(c_in, c_out, kernel_size=k,
                              stride=s, padding=pad, bias=False)
        self.bn = nn.BatchNorm2d(c_out)
        self.act = nn.ReLU(inplace=True)

    def forward(self, x): return self.act(self.bn(self.conv(x)))


class TinyCNN(nn.Module):
    def __init__(self, num_classes=10):
        super().__init__()
        self.body = nn.Sequential(
            ConvBlock(3, 32), ConvBlock(32, 32), nn.MaxPool2d(2),  # H/2
            ConvBlock(32, 64), ConvBlock(64, 64), nn.MaxPool2d(2),  # H/4
            ConvBlock(64, 128), nn.AdaptiveAvgPool2d(1)
        )
        self.head = nn.Linear(128, num_classes)

    def forward(self, x):
        x = self.body(x).squeeze(-1).squeeze(-1)
        return self.head(x)


cnn = TinyCNN().to(device)
opt = torch.optim.Adam(cnn.parameters(), lr=1e-3)
for _ in range(1):
    train_one_epoch(cnn, train_loader, opt, device)
evaluate(cnn, dev_loader, device)


{'acc': 0.094,
 'precision': 0.0094,
 'recall': 0.1,
 'f1': 0.017184643510054845,
 'p95_latency_ms': 754.4708251953125,
 'latency_ok': False}

## 4) Case studies — ResNet block, 1×1 conv, Inception-mini, Depthwise/MobileNet, Transfer Learning


In [None]:
# 4A) Residual Block (ResNet)
class BasicBlock(nn.Module):
    def __init__(self, c_in, c_out, stride=1):
        super().__init__()
        self.conv1 = nn.Conv2d(
            c_in, c_out, 3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(c_out)
        self.conv2 = nn.Conv2d(c_out, c_out, 3, stride=1,
                               padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(c_out)
        self.act = nn.ReLU(inplace=True)
        self.skip = nn.Sequential()
        if stride != 1 or c_in != c_out:
            self.skip = nn.Sequential(nn.Conv2d(c_in, c_out, 1, stride=stride, bias=False),
                                      nn.BatchNorm2d(c_out))

    def forward(self, x):
        identity = self.skip(x)
        out = self.act(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out = self.act(out + identity)
        return out

# 4B) 1×1 Convolution


class Conv1x1(nn.Module):
    def __init__(self, c_in, c_out):
        super().__init__()
        self.conv = nn.Conv2d(c_in, c_out, 1, bias=False)
        self.bn = nn.BatchNorm2d(c_out)

    def forward(self, x): return F.relu(self.bn(self.conv(x)), inplace=True)

# 4C) Inception mini


class InceptionMini(nn.Module):
    def __init__(self, c_in, c_out):
        super().__init__()
        c = c_out//4
        self.b1 = nn.Sequential(Conv1x1(c_in, c), nn.Conv2d(
            c, c, 3, padding=1), nn.ReLU(True))
        self.b2 = nn.Sequential(Conv1x1(c_in, c), nn.Conv2d(
            c, c, 5, padding=2), nn.ReLU(True))
        self.b3 = nn.Sequential(nn.MaxPool2d(
            3, stride=1, padding=1), Conv1x1(c_in, c))
        self.b4 = Conv1x1(c_in, c)

    def forward(self, x):
        return torch.cat([self.b1(x), self.b2(x), self.b3(x), self.b4(x)], dim=1)

# 4D) Depthwise separable conv (MobileNet-style)


class DepthwiseSeparableConv(nn.Module):
    def __init__(self, c_in, c_out, stride=1):
        super().__init__()
        self.dw = nn.Conv2d(c_in, c_in, 3, stride=stride,
                            padding=1, groups=c_in, bias=False)
        self.pw = nn.Conv2d(c_in, c_out, 1, bias=False)
        self.bn1 = nn.BatchNorm2d(c_in)
        self.bn2 = nn.BatchNorm2d(c_out)

    def forward(self, x):
        x = F.relu(self.bn1(self.dw(x)), inplace=True)
        x = F.relu(self.bn2(self.pw(x)), inplace=True)
        return x

# 4E) MobileNetV2 Inverted Residual (rút gọn)


class InvertedResidual(nn.Module):
    def __init__(self, c_in, c_out, stride=1, expand=6):
        super().__init__()
        hidden = c_in * expand
        self.use_skip = (stride == 1 and c_in == c_out)
        self.conv = nn.Sequential(
            nn.Conv2d(c_in, hidden, 1, bias=False), nn.BatchNorm2d(
                hidden), nn.ReLU6(True),
            nn.Conv2d(hidden, hidden, 3, stride=stride,
                      padding=1, groups=hidden, bias=False),
            nn.BatchNorm2d(hidden), nn.ReLU6(True),
            nn.Conv2d(hidden, c_out, 1, bias=False), nn.BatchNorm2d(c_out)
        )

    def forward(self, x):
        out = self.conv(x)
        return x+out if self.use_skip else out

# 4F) Transfer learning với ResNet18 (fallback nếu không tải được pretrained)


def transfer_learning_resnet18(num_classes=10, freeze_backbone=True):
    try:
        model = models.resnet18(weights=models.ResNet18_Weights.DEFAULT)
    except Exception as e:
        print("Không tải được trọng số pretrained, dùng weights=None. Lý do:", e)
        model = models.resnet18(weights=None)
    if freeze_backbone:
        for p in model.parameters():
            p.requires_grad = False
    in_feat = model.fc.in_features
    model.fc = nn.Linear(in_feat, num_classes)
    return model


# Demo init (không train lâu để tiết kiệm thời gian)
ft_model = transfer_learning_resnet18(
    num_classes=10, freeze_backbone=True).to(device)
sum(p.numel() for p in ft_model.parameters() if p.requires_grad)


Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to C:\Users\Admin/.cache\torch\hub\checkpoints\resnet18-f37072fd.pth


100%|██████████| 44.7M/44.7M [00:00<00:00, 68.2MB/s]


5130

## 5) EfficientNet — Compound scaling (minh hoạ)


In [None]:
def compound_scale(base_channels=32, base_depth=4, base_res=128, phi=1, alpha=1.2, beta=1.1, gamma=1.15):
    C = int(base_channels * (beta ** phi))      # width
    D = int(round(base_depth * (alpha ** phi)))  # depth
    R = int(base_res * (gamma ** phi))          # resolution
    return C, D, R


for phi in range(4):
    print(f"phi={phi} -> (channels,width,depth,res)={compound_scale(phi=phi)}")


phi=0 -> (channels,width,depth,res)=(32, 4, 128)
phi=1 -> (channels,width,depth,res)=(35, 5, 147)
phi=2 -> (channels,width,depth,res)=(38, 6, 169)
phi=3 -> (channels,width,depth,res)=(42, 7, 194)


## 6) Chẩn đoán & khuyến nghị (bias/variance & mismatch)


In [None]:
def diagnose_and_recommend(train_acc, dev_acc, human_acc=None, train_dev_acc=None):
    tips = []
    if human_acc is not None and train_acc < human_acc - 0.05:
        tips.append(
            "High bias so với mức người: tăng capacity (ResNet sâu hơn), train lâu hơn, LR schedule.")
    if train_acc > 0.95 and dev_acc < 0.85:
        tips.append(
            "High variance: thêm dữ liệu cùng phân phối dev, L2/Dropout/augment, giảm model.")
    if train_dev_acc is not None and (train_dev_acc - dev_acc) > 0.1:
        tips.append(
            "Data mismatch: dev khác train; thu thập thêm dữ liệu giống dev hoặc synth điều kiện dev.")
    if not tips:
        tips.append(
            "Ổn: tiếp tục HPO (random search log-scale), tinh chỉnh threshold/latency.")
    return tips


diagnose_and_recommend(train_acc=0.92, dev_acc=0.80,
                       human_acc=0.98, train_dev_acc=0.91)


['High bias so với mức người: tăng capacity (ResNet sâu hơn), train lâu hơn, LR schedule.',
 'Data mismatch: dev khác train; thu thập thêm dữ liệu giống dev hoặc synth điều kiện dev.']

## 7) Random Search HPO (log-scale LR, weight decay, dropout) — demo nhỏ


In [None]:
class MLPHead(nn.Module):
    def __init__(self, d_in=3*7*7, hidden=256, num_classes=10, p_drop=0.5):
        super().__init__()
        self.net = nn.Sequential(
            nn.Flatten(),
            nn.Linear(d_in, hidden), nn.ReLU(True), nn.Dropout(p_drop),
            nn.Linear(hidden, num_classes)
        )

    def forward(self, x): return self.net(x)


def sample_hparams():
    lr = 10 ** np.random.uniform(-4, -2)           # [1e-4, 1e-2]
    weight_decay = 10 ** np.random.uniform(-6, -3)  # [1e-6, 1e-3]
    p_drop = np.random.uniform(0.1, 0.6)
    return dict(lr=lr, weight_decay=weight_decay, p_drop=p_drop)


# tạo feature nhỏ từ ảnh để HPO nhanh (giảm chi phí)
shrink = nn.AdaptiveAvgPool2d(7)


def run_trial(model_ctor, train_loader, dev_loader, max_epochs=2):
    hp = sample_hparams()
    model = model_ctor(p_drop=hp['p_drop']).to(device)
    opt = torch.optim.Adam(
        model.parameters(), lr=hp['lr'], weight_decay=hp['weight_decay'])
    # train ngắn
    for _ in range(max_epochs):
        for xb, yb in train_loader:
            xb = shrink(xb).to(device)
            yb = yb.to(device)
            opt.zero_grad()
            logits = model(xb)
            loss = nn.CrossEntropyLoss()(logits, yb)
            loss.backward()
            opt.step()
    # eval
    model.eval()
    ys = []
    yh = []
    with torch.no_grad():
        for xb, yb in dev_loader:
            ys.extend(yb.numpy())
            yh.extend(model(shrink(xb).to(device)).argmax(1).cpu().numpy())
    acc = accuracy_score(ys, yh)
    return acc, hp


best = (-1, None)
for t in range(3):  # demo 3 trials
    acc, hp = run_trial(lambda p_drop: MLPHead(p_drop=p_drop),
                        train_loader, dev_loader, max_epochs=1)
    if acc > best[0]:
        best = (acc, hp)
best


(0.102,
 {'lr': 0.0011605429165440168,
  'weight_decay': 7.531309895034413e-06,
  'p_drop': 0.42788865089209005})