In [1]:
#不运行这个kernel在运行softmax时会自动restart（不知道为啥
%env MKL_THREADING_LAYER=SEQUENTIAL
%env OMP_NUM_THREADS=1
%env MKL_NUM_THREADS=1
%env NUMEXPR_NUM_THREADS=1


env: MKL_THREADING_LAYER=SEQUENTIAL
env: OMP_NUM_THREADS=1
env: MKL_NUM_THREADS=1
env: NUMEXPR_NUM_THREADS=1


In [2]:
import numpy as np
from dataclasses import dataclass
from typing import List
import random
import scipy.optimize as opt
import scipy.linalg as sla


In [3]:
# 数据加载（复用作业给的风格）
@dataclass
class Dataset:
    Xtr: np.ndarray  # noisy train+val 的特征
    Str: np.ndarray  # noisy train+val 的标签（{0,1,2}）
    Xts: np.ndarray  # clean test 的特征
    Yts: np.ndarray  # clean test 的标签（{0,1,2}）

def load_npz(path): 
    d=np.load(path); 
    return Dataset(d['Xtr'],d['Str'],d['Xts'],d['Yts'])

#其他函数定义
def set_seed(seed: int):
    np.random.seed(seed)
    random.seed(seed)

def one_hot(y: np.ndarray, C: int) -> np.ndarray:
    oh = np.zeros((y.shape[0], C), dtype=np.float32)
    oh[np.arange(y.shape[0]), y] = 1.0
    return oh

def softmax(z: np.ndarray) -> np.ndarray:
    z = z - z.max(axis=1, keepdims=True)       # 数值稳定
    expz = np.exp(z)
    return expz / (expz.sum(axis=1, keepdims=True) + 1e-12)

def accuracy(y_true, y_pred):
    return float((y_true == y_pred).mean())

def split(X, y, ratio=0.2, seed=42):
    set_seed(seed)
    n = X.shape[0]
    idx = np.random.permutation(n)
    sp = int(n * (1 - ratio))
    return X[idx[:sp]], y[idx[:sp]], X[idx[sp:]], y[idx[sp:]]

In [4]:
def standardize_train_val(Xtr, Xva):
    Xtr = Xtr.astype(np.float32)
    Xva = Xva.astype(np.float32)
    if Xtr.max() > 1.5:
        Xtr /= 255.0
        Xva /= 255.0
    mu = Xtr.mean(axis=0, keepdims=True)
    sg = Xtr.std(axis=0, keepdims=True) + 1e-6
    Xtr = (Xtr - mu) / sg
    Xva = (Xva - mu) / sg
    return Xtr, Xva

# 1 Warmup Model 初步分类器

## 1.1 Softmax

In [5]:
class SoftmaxWarmup:
    """简单多类 softmax 回归 (NumPy版)"""
    def __init__(self, in_dim: int, num_classes: int, lr: float = 0.05, weight_decay: float = 5e-4):
        self.C = num_classes
        self.W = 0.01 * np.random.randn(in_dim, num_classes).astype(np.float32)
        self.b = np.zeros((num_classes,), dtype=np.float32)
        self.lr = lr
        self.wd = weight_decay

    def _forward(self, X):
        logits = X @ self.W + self.b
        return softmax(logits)

    def _loss_and_grads(self, X, y):
        N = X.shape[0]
        P = self._forward(X)
        loss = -np.log(P[np.arange(N), y] + 1e-12).mean()
        if self.wd > 0:
            loss += 0.5 * self.wd * np.sum(self.W * self.W)
        G = P
        G[np.arange(N), y] -= 1.0
        G /= N
        dW = X.T @ G + self.wd * self.W
        db = G.sum(axis=0)
        return loss, dW, db

    def fit(self, X, y, Xval=None, yval=None, epochs=10, batch_size=256, verbose=True, seed=0):
        set_seed(seed)
        N = X.shape[0]
        for ep in range(1, epochs + 1):
            idx = np.random.permutation(N)
            Xs, ys = X[idx], y[idx]
            for st in range(0, N, batch_size):
                ed = min(st + batch_size, N)
                loss, dW, db = self._loss_and_grads(Xs[st:ed], ys[st:ed])
                self.W -= self.lr * dW
                self.b -= self.lr * db
            # 每个 epoch 打印一次训练和验证准确率
            if verbose:
                tr_acc = self.score(X, y)
                msg = f"[Softmax Warmup] epoch {ep:02d} | train_acc={tr_acc:.3f}"
                if Xval is not None:
                    va_acc = self.score(Xval, yval)
                    msg += f", val_acc={va_acc:.3f}"
                print(msg)

    def predict_proba(self, X):
        return self._forward(X)

    def predict(self, X):
        return np.argmax(self._forward(X), axis=1)

    def score(self, X, y):
        return accuracy(y, self.predict(X))

## 1.2 CNN（没有用，可删）

In [6]:
# # ===== CNN =====
# import numpy as np
# import torch
# import torch.nn as nn
# import torch.nn.functional as F
# from torch.utils.data import TensorDataset, DataLoader

# def _to_float01(x: np.ndarray) -> np.ndarray:
#     """把数据转 float32，并在需要时从 0-255 归一到 0-1。"""
#     x = x.astype(np.float32)
#     if x.max() > 1.5:
#         x = x / 255.0
#     return x

# def _reshape_to_nchw(X: np.ndarray, image_shape=(28, 28), channels=1) -> np.ndarray:
#     """
#     把 (N, D) 或 (N, H*W*C) 的扁平数组重塑成 (N, C, H, W)。
#     image_shape: (H, W); channels: 1(灰度) 或 3(RGB)
#     """
#     N = X.shape[0]
#     H, W = image_shape
#     if X.ndim == 2:
#         X = X.reshape(N, H, W, channels)  # NHWC
#     # NHWC -> NCHW
#     if X.shape[-1] == channels:
#         X = np.transpose(X, (0, 3, 1, 2))
#     return X

# def _compute_channel_stats(X_nchw: np.ndarray):
#     """按通道计算训练集均值/方差（用于标准化）"""
#     # X: (N, C, H, W)
#     mu = X_nchw.mean(axis=(0, 2, 3), keepdims=True)
#     sg = X_nchw.std(axis=(0, 2, 3), keepdims=True) + 1e-6
#     return mu, sg

# class _SimpleCNN(nn.Module):
#     """
#     极简 CNN：
#       Conv( C->32 ) -> ReLU -> MaxPool
#       Conv(32->64 ) -> ReLU -> MaxPool
#       Flatten -> FC -> Softmax(通过CE)
#     适合 28x28 或 32x32 小图。
#     """
#     def __init__(self, in_ch: int, num_classes: int, img_hw: int):
#         super().__init__()
#         self.conv1 = nn.Conv2d(in_ch, 32, kernel_size=3, padding=1)
#         self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
#         self.pool  = nn.MaxPool2d(2, 2)
#         # 下采样两次：H,W 各 /4
#         feat_hw = img_hw // 4
#         self.fc   = nn.Linear(64 * feat_hw * feat_hw, num_classes)

#     def forward(self, x):
#         x = self.pool(F.relu(self.conv1(x)))  # (N,32,H/2,W/2)
#         x = self.pool(F.relu(self.conv2(x)))  # (N,64,H/4,W/4)
#         x = torch.flatten(x, 1)
#         x = self.fc(x)
#         return x  # logits

# class CNNWarmup:
#     """
#     用小型 CNN 做 warm-up（拟合 noisy 标签），暴露 fit / predict_proba / predict / score。
#     会：
#       - 自动把 numpy 扁平数组重塑为 (N,C,H,W)
#       - 用训练集通道均值/方差做标准化（验证集复用同统计量）
#     """
#     def __init__(self, image_shape=(28, 28), channels=1, num_classes=3,
#                  lr=1e-3, weight_decay=5e-4, batch_size=256, epochs=10, seed=0, device=None):
#         self.H, self.W = image_shape
#         self.C = channels
#         self.K = num_classes
#         self.lr = lr
#         self.wd = weight_decay
#         self.batch_size = batch_size
#         self.epochs = epochs
#         self.seed = seed
#         self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
#         torch.manual_seed(seed)
#         np.random.seed(seed)

#         self.model = _SimpleCNN(self.C, self.K, img_hw=self.H).to(self.device)
#         self.mu = None
#         self.sg = None  # 训练集通道均值/方差（NCHW 维度下 C×1×1）
#         self.tr_acc = None
#         self.va_acc = None

#     def _prep_xy(self, X: np.ndarray, y: np.ndarray, is_train=False):
#         """把 numpy X,y 转成标准化后的 torch Tensor"""
#         X = _to_float01(X)
#         X = _reshape_to_nchw(X, (self.H, self.W), self.C)
#         if is_train:
#             self.mu, self.sg = _compute_channel_stats(X)
#         assert self.mu is not None and self.sg is not None, "Must fit on train before val."
#         X = (X - self.mu) / self.sg
#         X = torch.from_numpy(X).float()
#         y = torch.from_numpy(y.astype(np.int64))
#         return X, y

#     def fit(self, Xtr: np.ndarray, ytr: np.ndarray, Xva: np.ndarray=None, yva: np.ndarray=None, verbose=True):
#         """训练 CNN 并在每个 epoch 打印 train/val acc。"""
#         # 预处理 & DataLoader
#         Xtr_t, ytr_t = self._prep_xy(Xtr, ytr, is_train=True)
#         tr_loader = DataLoader(TensorDataset(Xtr_t, ytr_t), batch_size=self.batch_size, shuffle=True)

#         if Xva is not None:
#             Xva_t, yva_t = self._prep_xy(Xva, yva, is_train=False)
#             va_loader = DataLoader(TensorDataset(Xva_t, yva_t), batch_size=self.batch_size, shuffle=False)
#         else:
#             va_loader = None

#         opt = torch.optim.Adam(self.model.parameters(), lr=self.lr, weight_decay=self.wd)
#         for ep in range(1, self.epochs + 1):
#             self.model.train()
#             for xb, yb in tr_loader:
#                 xb, yb = xb.to(self.device), yb.to(self.device)
#                 opt.zero_grad()
#                 logits = self.model(xb)
#                 loss = F.cross_entropy(logits, yb)
#                 loss.backward()
#                 opt.step()

#             # ---- 监控：train / val acc ----
#             self.model.eval()
#             with torch.no_grad():
#                 tr_acc = self._accuracy_loader(tr_loader)
#                 msg = f"[CNN Warmup] epoch {ep:02d} | train_acc={tr_acc:.3f}"
#                 if va_loader is not None:
#                     va_acc = self._accuracy_loader(va_loader)
#                     msg += f", val_acc={va_acc:.3f}"
#                     self.va_acc = va_acc
#                 self.tr_acc = tr_acc
#                 if verbose: print(msg)

#     @torch.no_grad()
#     def _accuracy_loader(self, loader):
#         total = correct = 0
#         for xb, yb in loader:
#             xb, yb = xb.to(self.device), yb.to(self.device)
#             logits = self.model(xb)
#             pred = logits.argmax(1)
#             correct += (pred == yb).sum().item()
#             total += yb.numel()
#         return correct / total

#     # === 下面三个方法保持和 SoftmaxWarmup 同名，以便无缝替换 ===
#     @torch.no_grad()
#     def predict_proba(self, X: np.ndarray) -> np.ndarray:
#         """返回 softmax 概率 (N, K)"""
#         self.model.eval()
#         X = _to_float01(X)
#         X = _reshape_to_nchw(X, (self.H, self.W), self.C)
#         X = (X - self.mu) / self.sg
#         X_t = torch.from_numpy(X).float().to(self.device)
#         probs = F.softmax(self.model(X_t), dim=1).cpu().numpy()
#         return probs

#     @torch.no_grad()
#     def predict(self, X: np.ndarray) -> np.ndarray:
#         return np.argmax(self.predict_proba(X), axis=1)

#     def score(self, X: np.ndarray, y: np.ndarray) -> float:
#         yhat = self.predict(X)
#         return float((yhat == y).mean())


# 2 T估计器

## 2.1 选前百分之多少的样本作为锚点

In [7]:
def select_anchors_topk(probs: np.ndarray, top_pct: float = 0.01):
    N, C = probs.shape
    k = max(1, int(np.ceil(top_pct * N)))
    anchors = []
    for i in range(C):
        idx = np.argsort(probs[:, i])[-k:]  # 概率最高的 k 个
        anchors.append(idx)
    return anchors


## 2.2 估计 T 的每一行（两种常见法：概率平均 / 计数直方图）

In [8]:
def estimate_T_anchor(probs: np.ndarray,
                      noisy_labels: np.ndarray,
                      anchors: list,
                      use_counts: bool = False,
                      eps: float = 1e-6) -> np.ndarray:
    """
    用锚点估计转移矩阵 T。
    - 概率平均法（use_counts=False）：对锚点样本的概率向量取均值；
    - 计数法（use_counts=True）：对锚点样本的 noisy 标签计频率。
    """
    N, C = probs.shape
    T_hat = np.zeros((C, C), dtype=np.float32)
    for i in range(C):
        idx = anchors[i]
        if len(idx) == 0:
            row = np.zeros(C, dtype=np.float32); row[i] = 1.0
        else:
            if use_counts:
                hist = np.bincount(noisy_labels[idx], minlength=C).astype(np.float32)
                row = hist / (hist.sum() + eps)
            else:
                row = probs[idx].mean(axis=0).astype(np.float32)
                row = row / (row.sum() + eps)
        row = np.clip(row, eps, 1.0)
        row = row / row.sum()
        T_hat[i] = row
    return T_hat

def estimate_T_from_probs(probs: np.ndarray,
                          noisy_labels: np.ndarray,
                          top_pct: float = 0.01,
                          use_counts: bool = False,
                          verbose: bool = True):
    """
    入口：直接从 P= predict_proba(X) 开始估计 T。
    - probs: (N,C)  warm-up 输出的 softmax 概率（和 noisy_labels 一一对应）
    - noisy_labels: (N,)
    """
    assert probs.shape[0] == noisy_labels.shape[0], "probs 与 noisy_labels 数量必须一致。"
    C = probs.shape[1]

    # 1) 选锚点
    anchors = select_anchors_topk(probs, top_pct=top_pct)
    if verbose:
        cnts = [len(a) for a in anchors]
        print(f"[Anchor] top_pct={top_pct:.4f}, per-class counts={cnts}")

    # 2) 估计 T
    T_hat = estimate_T_anchor(probs, noisy_labels, anchors, use_counts=use_counts)

    # 3) 简单检查与打印
    if verbose:
        row_sums = T_hat.sum(axis=1)
        print("[T_hat] row sums:", np.round(row_sums, 6))
        # 按需：整数去掉小数点的友好打印
        def smart_round(x):
            s = f"{x:.4f}".rstrip('0').rstrip('.')
            return s if s != '' else '0'
        print("T_hat:\n",
              np.array2string(T_hat, formatter={'float_kind': smart_round}, separator=' '))

    return T_hat, anchors


# 3 看初步分类器在数据集上的准确率

In [9]:
data1 = load_npz("datasets/FashionMNIST0.3.npz")
data2 = load_npz("datasets/FashionMNIST0.6.npz")
data3 = load_npz("datasets/CIFAR.npz")

Xtr_1, ytr_1, Xva_1, yva_1 = split(data1.Xtr, data1.Str, ratio=0.2, seed=0)
Xtr_1_std, Xva_1_std = standardize_train_val(Xtr_1, Xva_1)

Xtr_2, ytr_2, Xva_2, yva_2 = split(data2.Xtr, data2.Str, ratio=0.2, seed=0)
Xtr_2_std, Xva_2_std = standardize_train_val(Xtr_2, Xva_2)

Xtr_3, ytr_3, Xva_3, yva_3 = split(data3.Xtr, data3.Str, ratio=0.2, seed=0)
#flatten
Xtr_3 = Xtr_3.reshape(len(Xtr_3), -1)
Xva_3 = Xva_3.reshape(len(Xva_3), -1)

Xtr_3_std, Xva_3_std = standardize_train_val(Xtr_3, Xva_3)

in_dim_1=Xtr_1_std.shape[1]
in_dim_2=Xtr_2_std.shape[1]
in_dim_3=Xtr_3_std.shape[1]

In [10]:
softmax_warm_1 = SoftmaxWarmup(in_dim_1, num_classes=3, lr=0.02, weight_decay=5e-4)
softmax_warm_1.fit(Xtr_1_std, ytr_1, Xva_1_std, yva_1,
                 epochs=10, batch_size=512, verbose=True, seed=0)

train_acc_1 = softmax_warm_1.score(Xtr_1_std, ytr_1)
val_acc_1 = softmax_warm_1.score(Xva_1_std, yva_1)
print()
print("FashionMNIST0.3:")
print(f"\nFinal train_acc: {train_acc_1:.4f}")
print(f"Final val_acc: {val_acc_1:.4f}")

[Softmax Warmup] epoch 01 | train_acc=0.663, val_acc=0.667
[Softmax Warmup] epoch 02 | train_acc=0.669, val_acc=0.678
[Softmax Warmup] epoch 03 | train_acc=0.613, val_acc=0.609
[Softmax Warmup] epoch 04 | train_acc=0.665, val_acc=0.667
[Softmax Warmup] epoch 05 | train_acc=0.667, val_acc=0.674
[Softmax Warmup] epoch 06 | train_acc=0.675, val_acc=0.681
[Softmax Warmup] epoch 07 | train_acc=0.676, val_acc=0.686
[Softmax Warmup] epoch 08 | train_acc=0.640, val_acc=0.636
[Softmax Warmup] epoch 09 | train_acc=0.669, val_acc=0.673
[Softmax Warmup] epoch 10 | train_acc=0.674, val_acc=0.672

FashionMNIST0.3:

Final train_acc: 0.6736
Final val_acc: 0.6717


In [11]:
softmax_warm_2 = SoftmaxWarmup(in_dim_2, num_classes=3, lr=0.02, weight_decay=5e-4)
softmax_warm_2.fit(Xtr_2_std, ytr_2, Xva_2_std, yva_2,
                 epochs=10, batch_size=512, verbose=True, seed=0)

train_acc_2 = softmax_warm_2.score(Xtr_2_std, ytr_2)
val_acc_2 = softmax_warm_2.score(Xva_2_std, yva_2)
print()
print("FashionMNIST0.6:")
print(f"\nFinal train_acc: {train_acc_2:.4f}")
print(f"Final val_acc: {val_acc_2:.4f}")

[Softmax Warmup] epoch 01 | train_acc=0.377, val_acc=0.356
[Softmax Warmup] epoch 02 | train_acc=0.338, val_acc=0.340
[Softmax Warmup] epoch 03 | train_acc=0.347, val_acc=0.321
[Softmax Warmup] epoch 04 | train_acc=0.381, val_acc=0.363
[Softmax Warmup] epoch 05 | train_acc=0.384, val_acc=0.351
[Softmax Warmup] epoch 06 | train_acc=0.390, val_acc=0.353
[Softmax Warmup] epoch 07 | train_acc=0.372, val_acc=0.337
[Softmax Warmup] epoch 08 | train_acc=0.391, val_acc=0.367
[Softmax Warmup] epoch 09 | train_acc=0.352, val_acc=0.344
[Softmax Warmup] epoch 10 | train_acc=0.366, val_acc=0.336

FashionMNIST0.6:

Final train_acc: 0.3661
Final val_acc: 0.3358


In [12]:
softmax_warm_3 = SoftmaxWarmup(in_dim_3, num_classes=3, lr=0.02, weight_decay=5e-4)
softmax_warm_3.fit(Xtr_3_std, ytr_3, Xva_3_std, yva_3,
                 epochs=10, batch_size=512, verbose=True, seed=0)

train_acc_3 = softmax_warm_3.score(Xtr_3_std, ytr_3)
val_acc_3 = softmax_warm_3.score(Xva_3_std, yva_3)
print()
print("CIFAR:")
print(f"\nFinal train_acc: {train_acc_3:.4f}")
print(f"Final val_acc: {val_acc_3:.4f}")

[Softmax Warmup] epoch 01 | train_acc=0.350, val_acc=0.332
[Softmax Warmup] epoch 02 | train_acc=0.351, val_acc=0.335
[Softmax Warmup] epoch 03 | train_acc=0.358, val_acc=0.341
[Softmax Warmup] epoch 04 | train_acc=0.364, val_acc=0.340
[Softmax Warmup] epoch 05 | train_acc=0.368, val_acc=0.347
[Softmax Warmup] epoch 06 | train_acc=0.366, val_acc=0.346
[Softmax Warmup] epoch 07 | train_acc=0.366, val_acc=0.347
[Softmax Warmup] epoch 08 | train_acc=0.371, val_acc=0.342
[Softmax Warmup] epoch 09 | train_acc=0.365, val_acc=0.337
[Softmax Warmup] epoch 10 | train_acc=0.367, val_acc=0.335

CIFAR:

Final train_acc: 0.3675
Final val_acc: 0.3347


In [13]:
# cnn_warm_1 = CNNWarmup(image_shape=(28,28), channels=1, num_classes=3,
#                      lr=1e-3, weight_decay=5e-4, batch_size=256, epochs=10, seed=0)
# cnn_warm_1.fit(Xtr_1, ytr_1, Xva_1, yva_1, verbose=True)
# print("Final train_acc:", cnn_warm_1.score(Xtr_1, ytr_1))
# print("Final val_acc:",   cnn_warm_1.score(Xva_1, yva_1))


# cnn_warm_cifar = CNNWarmup(image_shape=(32,32), channels=3, num_classes=3,
#                            lr=1e-3, weight_decay=5e-4, batch_size=256, epochs=15, seed=0)
# cnn_warm_cifar.fit(Xtr3, ytr3, Xva3, yva3, verbose=True)


# 4 估计前2个数据集的T，比对

In [14]:
import numpy as np

def evaluate_T(T_hat: np.ndarray, T_true: np.ndarray, verbose: bool = True):
    
    assert T_hat.shape == T_true.shape, "T_hat.shape != T_true"
    diff = T_hat - T_true

    # (1) 每行 L1 差
    per_row_L1 = np.sum(np.abs(diff), axis=1)

    # (2) Frobenius 范数
    frob = np.linalg.norm(diff, 'fro')

    # (3) 最大绝对误差
    max_abs = np.max(np.abs(diff))

    if verbose:
        print("T_hat:\n", np.round(T_hat, 4))
        print("T_true:\n", np.round(T_true, 4))
        print("-" * 30)
        print("per row L1:", np.round(per_row_L1, 4))
        print("Frobenius:", round(float(frob), 4))
        print("max|Δ|:", round(float(max_abs), 4))

    return {
        "per_row_L1": per_row_L1,
        "frobenius": frob,
        "max_abs": max_abs
    }


In [15]:
import numpy as np

# ==== 先把两套真 T 写成常量（来自作业说明） ====
T_true_03 = np.array([[0.7, 0.3, 0.0],
                      [0.0, 0.7, 0.3],
                      [0.3, 0.0, 0.7]], dtype=np.float32)

T_true_06 = np.array([[0.4, 0.3, 0.3],
                      [0.3, 0.4, 0.3],
                      [0.3, 0.3, 0.4]], dtype=np.float32)



In [16]:
print("!!(  FashionMNIST0.3  )!!\n")
P_1 = softmax_warm_1.predict_proba(Xtr_1_std)  # (N, C)

for a in [0.05, 0.1, 0.2, 0.22, 0.25, 0.28, 0.3]:
    T_hat_1, _ = estimate_T_from_probs(P_1, ytr_1, top_pct=a, use_counts=True, verbose=False)
    print(f"---------Softmax+Counts α={a}--------------- ")
    #print(f"Softmax+Counts α={a}: \n", np.round(T_hat,4))
    evaluate_T(T_hat_1, T_true_03)
    print()


!!(  FashionMNIST0.3  )!!

---------Softmax+Counts α=0.05--------------- 
T_hat:
 [[0.7306 0.2153 0.0542]
 [0.0042 0.7361 0.2597]
 [0.2444 0.0042 0.7514]]
T_true:
 [[0.7 0.3 0. ]
 [0.  0.7 0.3]
 [0.3 0.  0.7]]
------------------------------
per row L1: [0.1694 0.0806 0.1111]
Frobenius: 0.1405
max|Δ|: 0.0847

---------Softmax+Counts α=0.1--------------- 
T_hat:
 [[0.7306 0.2271 0.0424]
 [0.0042 0.7125 0.2833]
 [0.266  0.0028 0.7312]]
T_true:
 [[0.7 0.3 0. ]
 [0.  0.7 0.3]
 [0.3 0.  0.7]]
------------------------------
per row L1: [0.1458 0.0333 0.0681]
Frobenius: 0.1031
max|Δ|: 0.0729

---------Softmax+Counts α=0.2--------------- 
T_hat:
 [[0.7104 0.2483 0.0413]
 [0.0062 0.7139 0.2799]
 [0.2774 0.0038 0.7188]]
T_true:
 [[0.7 0.3 0. ]
 [0.  0.7 0.3]
 [0.3 0.  0.7]]
------------------------------
per row L1: [0.1035 0.0403 0.0451]
Frobenius: 0.0775
max|Δ|: 0.0517

---------Softmax+Counts α=0.22--------------- 
T_hat:
 [[0.708  0.2503 0.0417]
 [0.0063 0.708  0.2857]
 [0.2828 0.0066 0.7105]

In [17]:
print("!!(  FashionMNIST0.6  )!!\n")
P_2 = softmax_warm_2.predict_proba(Xtr_2_std)  # (N, C)

for a in [0.05, 0.08, 0.1, 0.15, 0.2, 0.25, 0.3]:
    T_hat_2, _ = estimate_T_from_probs(P_2, ytr_2, top_pct=a, use_counts=True, verbose=False)
    print(f"---------Softmax+Counts α={a}--------------- ")
    #print(f"Softmax+Counts α={a}: \n", np.round(T_hat,4))
    evaluate_T(T_hat_2, T_true_06)
    print()

!!(  FashionMNIST0.6  )!!

---------Softmax+Counts α=0.05--------------- 
T_hat:
 [[0.4819 0.2667 0.2514]
 [0.2694 0.4444 0.2861]
 [0.2722 0.3569 0.3708]]
T_true:
 [[0.4 0.3 0.3]
 [0.3 0.4 0.3]
 [0.3 0.3 0.4]]
------------------------------
per row L1: [0.1639 0.0889 0.1139]
Frobenius: 0.1347
max|Δ|: 0.0819

---------Softmax+Counts α=0.08--------------- 
T_hat:
 [[0.4531 0.283  0.2639]
 [0.2865 0.4297 0.2839]
 [0.2856 0.3628 0.3516]]
T_true:
 [[0.4 0.3 0.3]
 [0.3 0.4 0.3]
 [0.3 0.3 0.4]]
------------------------------
per row L1: [0.1063 0.0594 0.1257]
Frobenius: 0.1107
max|Δ|: 0.0628

---------Softmax+Counts α=0.1--------------- 
T_hat:
 [[0.4403 0.2917 0.2681]
 [0.2993 0.416  0.2847]
 [0.2743 0.3653 0.3604]]
T_true:
 [[0.4 0.3 0.3]
 [0.3 0.4 0.3]
 [0.3 0.3 0.4]]
------------------------------
per row L1: [0.0806 0.0319 0.1306]
Frobenius: 0.0984
max|Δ|: 0.0653

---------Softmax+Counts α=0.15--------------- 
T_hat:
 [[0.4116 0.2991 0.2894]
 [0.306  0.4069 0.287 ]
 [0.2815 0.3704 0.3481

# 5 估计CIFAR的T

In [18]:
print("!!(  CIFAR  )!!\n")
P_3 = softmax_warm_3.predict_proba(Xtr_3_std)  # (N, C)
#参考fashionmnist0.6的情况
for a in [0.05, 0.08, 0.10, 0.12, 0.15]:
    T_hat_3, _ = estimate_T_from_probs(P_3, ytr_3, top_pct=a, use_counts=True, verbose=False)
    #print(f"---------Softmax+Counts α={a}--------------- ")
    print(f"Softmax+Counts α={a}: \n", np.round(T_hat_3,4))
    #evaluate_T(T_hat_2, T_true_06)
    print()

!!(  CIFAR  )!!

Softmax+Counts α=0.05: 
 [[0.415  0.3317 0.2533]
 [0.3033 0.375  0.3217]
 [0.26   0.3033 0.4367]]

Softmax+Counts α=0.08: 
 [[0.4188 0.3302 0.251 ]
 [0.2948 0.3604 0.3448]
 [0.2802 0.299  0.4208]]

Softmax+Counts α=0.1: 
 [[0.4008 0.3392 0.26  ]
 [0.2942 0.3775 0.3283]
 [0.2833 0.3017 0.415 ]]

Softmax+Counts α=0.12: 
 [[0.4014 0.3368 0.2618]
 [0.2986 0.3729 0.3285]
 [0.2847 0.2972 0.4181]]

Softmax+Counts α=0.15: 
 [[0.405  0.335  0.26  ]
 [0.2956 0.3706 0.3339]
 [0.29   0.2956 0.4144]]

