## 一、导入库

In [1]:
import numpy as np
from dataclasses import dataclass
from typing import List
import random
import scipy.optimize as opt
import scipy.linalg as sla

## 二、数据读取与处理

In [21]:
# 数据加载（复用作业给的风格）
@dataclass
class Dataset:
    Xtr: np.ndarray  # noisy train+val 的特征
    Str: np.ndarray  # noisy train+val 的标签（{0,1,2}）
    Xts: np.ndarray  # clean test 的特征
    Yts: np.ndarray  # clean test 的标签（{0,1,2}）

def load_npz(path): 
    d=np.load(path); 
    return Dataset(d['Xtr'],d['Str'],d['Xts'],d['Yts'])

#其他函数定义
def set_seed(seed: int):
    np.random.seed(seed)
    random.seed(seed)

def one_hot(y: np.ndarray, C: int) -> np.ndarray:
    oh = np.zeros((y.shape[0], C), dtype=np.float32)
    oh[np.arange(y.shape[0]), y] = 1.0
    return oh

def softmax(z: np.ndarray) -> np.ndarray:
    # 数值稳定
    z = z - z.max(axis=1, keepdims=True)
    expz = np.exp(z)
    return expz / (expz.sum(axis=1, keepdims=True) + 1e-12)

def accuracy(y, yhat): return float((y==yhat).mean())


#根据文件名返回已知的转移矩阵 T。
def get_T(name: str) -> np.ndarray:
    name = name.lower()
    if "0.3" in name:
        T = np.array([[0.7, 0.3, 0.0],
                      [0.0, 0.7, 0.3],
                      [0.3, 0.0, 0.7]], dtype=np.float32)
    elif "0.6" in name:
        T = np.array([[0.4, 0.3, 0.3],
                      [0.3, 0.4, 0.3],
                      [0.3, 0.3, 0.4]], dtype=np.float32)
    else:
        raise ValueError("Unknown dataset: only 0.3 and 0.6 are supported here.")
    return T

# 训练集划分
def split(X,y,ratio=0.2,seed=42):
    set_seed(seed); 
    n=X.shape[0]; 
    idx=np.random.permutation(n); 
    sp=int(n*(1-ratio))
    return X[idx[:sp]],y[idx[:sp]],X[idx[sp:]],y[idx[sp:]]

## 三、ccn算法实现

### prepocess

In [22]:
@dataclass
class Standardizer:
    mean: np.ndarray; std: np.ndarray
    def transform(self,Xf):
        Xn=(Xf-self.mean)/self.std
        bias=np.ones((Xn.shape[0],1),dtype=np.float64)
        return np.hstack([Xn,bias])

def flatten(X): 
    return X.reshape(X.shape[0],-1).astype(np.float64)
def fit_std(Xtrf): 
    m=Xtrf.mean(0,keepdims=True); 
    s=Xtrf.std(0,keepdims=True)+1e-5; 
    return Standardizer(m,s)

### forward方法

Forward 方法：把「干净预测分布」p_clean 乘以 T^T，得到 noisy 标签的预测分布：p_tilde = T^T * p_clean，然后对 noisy 标签做交叉熵。

In [28]:
def forward_loss(p_clean, y, T):
    p_tilde = p_clean @ T         
    p_tilde = np.clip(p_tilde, 1e-12, 1.0)
    return float(-np.log(p_tilde[np.arange(y.shape[0]), y]).mean())

返回 dL/dz（对logits的梯度）。便于与线性层合成 dL/dW = X^T @ dL/dz

In [29]:
def dLdz_forward(p_clean, y, T):
    N, C = p_clean.shape
    Y = one_hot(y, C)
    p_tilde = p_clean @ T
    p_tilde = np.clip(p_tilde, 1e-12, 1.0)
    dL_dp_tilde = -(Y / p_tilde) / N
    dL_dp_clean = dL_dp_tilde @ T.T
    s = (dL_dp_clean * p_clean).sum(axis=1, keepdims=True)
    dL_dz = p_clean * (dL_dp_clean - s)
    return dL_dz

### 多类逻辑回归模型

In [25]:
@dataclass
class FwdCfg: wd:float=1e-4; max_iter:int=300; seed:int=42
class SoftmaxFwd:
    def __init__(self, D, C, T, cfg:FwdCfg):
        self.D, self.C, self.T, self.cfg = D, C, T, cfg
        set_seed(cfg.seed)
        self.W = (0.01*np.random.randn(D,C)).astype(np.float64)
    def _fun(self,w,X,y):
        W=w.reshape(self.D,self.C); p=softmax(X@W)
        base=forward_loss(p,y,self.T)
        reg =0.5*self.cfg.wd*(sla.norm(W,'fro')**2)
        dLdz=dLdz_forward(p,y,self.T)
        grad = X.T@dLdz + self.cfg.wd*W
        return base+reg, grad.reshape(-1)
    def fit(self,X,y):
        res=opt.minimize(self._fun,self.W.reshape(-1),args=(X,y),method="L-BFGS-B",jac=True,
                         options={"maxiter":self.cfg.max_iter})  # 不再传 disp，避免 warning
        self.W=res.x.reshape(self.D,self.C); return res
    def predict_proba(self,X): return softmax(X@self.W)
    def predict(self,X): return self.predict_proba(X).argmax(1)
       

### 调参

In [26]:
@dataclass
class Result: cfg:FwdCfg; val_loss:float; test_acc:float
def search_with_direction(Xtr, Str, Xts, Yts, T, seed=42):
    # 预处理（用 train 部分的统计量）
    Xtr_f = flatten(Xtr); Xts_f = flatten(Xts)
    X_tr_raw, y_tr, X_val_raw, y_val = split(Xtr_f, Str, ratio=0.2, seed=seed)
    std = fit_std(X_tr_raw)
    X_tr = std.transform(X_tr_raw); X_val = std.transform(X_val_raw); X_te = std.transform(Xts_f)
    D, C = X_tr.shape[1], T.shape[0]

    # grid（只在优化/线代范围内改：wd、max_iter）
    grid = [
        FwdCfg(wd=1e-4, max_iter=500, seed=seed),
        FwdCfg(wd=1e-3, max_iter=500, seed=seed),
        FwdCfg(wd=5e-2, max_iter=500, seed=seed),
        FwdCfg(wd=1e-2, max_iter=500, seed=seed),
        FwdCfg(wd=5e-1, max_iter=500, seed=seed),
        FwdCfg(wd=1e-1, max_iter=500, seed=seed),
        
    ]
    results: List[Result] = []
    for cfg in grid:
        model = SoftmaxFwd(D, C, T, cfg)
        model.fit(X_tr, y_tr)
        # 以 forward 验证损失选型
        p_val = model.predict_proba(X_val)
        vloss = forward_loss(p_val, y_val, T) + 0.5*cfg.wd*(sla.norm(model.W,'fro')**2)
        # 在 clean test 上报 acc
        test_acc = accuracy(Yts, model.predict(X_te))
        results.append(Result(cfg, vloss, test_acc))

    # 选最优（按 forward val loss）
    best = min(results, key=lambda r: r.val_loss)
    return best, results

## 四、main部分

In [30]:
def main():
    set_seed(42)
    datasets = [
        "datasets/FashionMNIST0.3.npz",
        "datasets/FashionMNIST0.6.npz",
    ]
    for path in datasets:
        print(f"\n==== Dataset: {path} ====")
        T = get_T(path)
        data = load_npz(path)
        best, results = search_with_direction(data.Xtr, data.Str, data.Xts, data.Yts, T, seed=42)

        def fmt(r:Result):
            return f"[p@T] wd={r.cfg.wd}, it={r.cfg.max_iter} | val_loss={r.val_loss:.4f}, test_acc={r.test_acc*100:.2f}%"

        print("Tuned configs (both orientations):")
        for r in results: print(" ", fmt(r))
        print("** Best:", fmt(best))

if __name__ == "__main__":
    main()


==== Dataset: datasets/FashionMNIST0.3.npz ====
Tuned configs (both orientations):
  [p@T] wd=0.0001, it=500 | val_loss=0.7299, test_acc=95.47%
  [p@T] wd=0.001, it=500 | val_loss=0.6817, test_acc=96.23%
  [p@T] wd=0.05, it=500 | val_loss=0.6749, test_acc=97.07%
  [p@T] wd=0.01, it=500 | val_loss=0.6633, test_acc=97.47%
  [p@T] wd=0.5, it=500 | val_loss=0.7295, test_acc=96.30%
  [p@T] wd=0.1, it=500 | val_loss=0.6854, test_acc=96.93%
** Best: [p@T] wd=0.01, it=500 | val_loss=0.6633, test_acc=97.47%

==== Dataset: datasets/FashionMNIST0.6.npz ====
Tuned configs (both orientations):
  [p@T] wd=0.0001, it=500 | val_loss=1.1009, test_acc=72.17%
  [p@T] wd=0.001, it=500 | val_loss=1.0996, test_acc=72.77%
  [p@T] wd=0.05, it=500 | val_loss=1.0935, test_acc=95.13%
  [p@T] wd=0.01, it=500 | val_loss=1.0943, test_acc=92.10%
  [p@T] wd=0.5, it=500 | val_loss=1.0964, test_acc=92.83%
  [p@T] wd=0.1, it=500 | val_loss=1.0941, test_acc=94.97%
** Best: [p@T] wd=0.05, it=500 | val_loss=1.0935, test_a