## Import Libs

In [1]:
import os
import warnings # 避免一些可以忽略的报错
warnings.filterwarnings('ignore')
import random
import gc
import copy
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from tqdm import tqdm # 进度条
import time
import math

import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F
from torch.optim import lr_scheduler # 学习率调度器
from torch.optim.lr_scheduler import _LRScheduler, CosineAnnealingLR

import timm # 预训练神经网络库，可直接调用预训练好的模型
from PIL import Image
import albumentations as A # 数据增强库
from albumentations.pytorch import ToTensorV2
from sklearn.model_selection import StratifiedKFold # 贴 kfold, 部分任务可能需要用到 StratifiedGroupKFold 将同一个group分到同一个fold中以防止数据泄露
from sklearn.preprocessing import OneHotEncoder # 独热编码

from collections import defaultdict # 记录 loss lr 等相关参数的变化
# 改变 终端颜色 方便观察
from colorama import Fore, Back, Style
b_ = Fore.BLUE
sr_ = Style.RESET_ALL

## CONFIG

In [8]:
is_debug = False

class CONFIG:
    seed = 308
    n_folds = 5
    
    epochs = 10 if not is_debug else 2
    now_cv = 0
    
    is_DataParallel = True
    img_size = [28, 28]
    new_size2train = [32, 32]
    train_batch_size = 256
    valid_batch_size = 512
    labelsmooth_threshold = 0.09 # 标签平滑的阈值
    
    n_classes = 10

    n_workers = os.cpu_count() // 2 # 获取此设备上的 CPU 核心数,使用一半到 DataLoader 的 num_workers
    
    learning_rate = 1e-3
    weight_decay = 1e-6 # 一个参数而已
    scheduler = 'CosineAnnealingWithWarmupLR' # 带热身的优化器
    # T_max : 经过多少 step 降到最低，训练一批 batch 为一个 step，一般训练到最后降到最低 所以可以根据训练数据量动态调整
    T_max = 42000 // n_folds * (n_folds - 1) // train_batch_size * epochs 
    # 33600为train.csv中80%的数据量个数即训练数据量，一轮 (33600 // train_batch_size)个 batch，一共(33600 // train_batch_size) * epochs 个 batch
    min_lr = 1e-6
    
    
    """
    tf_efficientnet_b0.ns_jft_in1k
    tf_efficientnetv2_s.in21k_ft_in1k
    
    convnext_tiny.fb_in22k_ft_in1k_384
    convnext_atto.d2_in1k
    
    tiny_vit_21m_512.dist_in22k_ft_in1k
    # 一般的 vit 模型会限制输入指定大小的图像如 224、384、512等，而 tiny_vit 在拥有优越性能的同时不限制输入大小(推荐使用，但是显存占用会大一点)
    """
    model_name = "convnext_atto.d2_in1k"
    pool_name = "" # 最后的全局池化层使用默认的池化层
    timm_pretrained = True # 是否使用预训练模型设置为 True，表示使用预训练模型
    
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    
    train_csv = "/kaggle/input/digit-recognizer/train.csv"
    my_train_csv = "/kaggle/input/my-digit-recognizer-5skf-train-csv/my_digit_recognizer_5skf.csv" # 与 train.csv的唯一区别是多了一列 kfold
    img_path = "/kaggle/input/308-digit-recognizer-img/output"
    ckpt_save_path = "/kaggle/working/output"

## Set Random Seed

In [3]:
def set_seed(seed=308):
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    
set_seed(CONFIG.seed) # 固定随机种子，方便结果复现

## Data Progress

In [4]:
train = pd.read_csv(CONFIG.train_csv) # 读取训练的 .csv
train

Unnamed: 0,label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
41995,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
41996,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
41997,7,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
41998,6,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [11]:
if os.path.exists(CONFIG.my_train_csv) is False: # 对 train.csv贴 kfold并保存方便后续训练使用
    print("my_train_csv not exists... \nnew_train_csv_with_fold generating...")
    # 初始化 StratifiedKFold
    skf = StratifiedKFold(n_splits=CONFIG.n_folds, shuffle=True, random_state=CONFIG.seed)

    # 创建新的 kfold 列，并初始化为 -1
    train['kfold'] = -1

    # 填充 kfold 列
    for fold, (train_idx, val_idx) in enumerate(skf.split(X=train, y=train['label'])):
        train.loc[val_idx, 'kfold'] = fold
        
    train.to_csv("my_digit_recognizer_5skf.csv", index=False)
    
else: # 如果 my_train_csv存在，直接读取使用 (对每一次训练使用同一个.csv文件是为了有相同的 kfold，这样可以避免数据泄露，翻遍后续模型融合)
    print("my_train_csv exists... \nreading...")
    train = pd.read_csv(CONFIG.my_train_csv)
    
train

my_train_csv exists... 
reading...


Unnamed: 0,label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783,kfold
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,2
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,2
3,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,4
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
41995,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,4
41996,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,4
41997,7,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
41998,6,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,4


## Dataset and DataLoader

In [None]:
def transform_train(img):
    composition = A.Compose([
        A.OneOf([
            A.MotionBlur(blur_limit=5),
            A.MedianBlur(blur_limit=5),
            A.GaussianBlur(blur_limit=5),
            A.GaussNoise(var_limit=(5.0, 30.0)),
        ], p=0.7),
        # albumentations 数据增强库的其他功能参考：https://blog.csdn.net/qq_27039891/article/details/100795846?ops_request_misc=%257B%2522request%255Fid%2522%253A%2522613472AB-3AB7-4407-9825-DA5DD079F755%2522%252C%2522scm%2522%253A%252220140713.130102334.pc%255Fall.%2522%257D&request_id=613472AB-3AB7-4407-9825-DA5DD079F755&biz_id=0&utm_medium=distribute.pc_search_result.none-task-blog-2~all~first_rank_ecpm_v1~rank_v31_ecpm-5-100795846-null-null.142^v100^pc_search_result_base1&utm_term=albumentations%20%E6%95%B0%E6%8D%AE%E5%A2%9E%E5%BC%BA&spm=1018.2226.3001.4187
        
        A.Resize(CONFIG.new_size2train[0], CONFIG.new_size2train[0]),
        A.Normalize(),
        ToTensorV2(),
    ])
    return composition(image=img)["image"]

def transform_valid(img):
    composition = A.Compose([
        A.Resize(CONFIG.new_size2train[0], CONFIG.new_size2train[0]),
        A.Normalize(),
        ToTensorV2(),
    ])
    return composition(image=img)["image"]

In [None]:
# 标签数据，示例为 0 到 9 的标签
labels = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]).reshape(-1, 1)

# 初始化 OneHotEncoder
encoder = OneHotEncoder(sparse=False)
one_hot = encoder.fit_transform(labels)

# print(one_hot)

In [None]:
class MyDataset(Dataset):
    def __init__(self, df, transform=None, mode="train"):
        super().__init__()
        self.df = df
        self.transform = transform
        self.mode = mode
        
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        row = self.df.iloc[idx, :] # 从 df 中取出 idx 这一行
        label = row.label
        
        label_onehot = encoder.transform(np.array(label).reshape(-1, 1))[0] # 将原标签转换成独热编码格式
        if self.mode == "train": # 标签平滑只在训练阶段进行，验证阶段不涉及标签平滑
            # 标签平滑
            label_onehot[label] = label_onehot[label] - CONFIG.labelsmooth_threshold # 让正确标签 减去阈值
            label_onehot[label_onehot == 0] = CONFIG.labelsmooth_threshold / (CONFIG.n_classes - 1) # 再将减去的阈值平均分给其他 9个标签
            label = label_onehot
        elif self.mode == "valid":
            label = label_onehot
        else:
            raise("mode not train or valid")
        
        img = row["pixel0": "pixel783"].values
        img = img.reshape(CONFIG.img_size[0], CONFIG.img_size[1], -1)
        img = np.concatenate([img] * 3, axis=-1).astype(np.uint8) # Albumentations 中的部分数据增强操作需要图像为 uint8 数据类型才能执行 
        
        if self.transform != None: # 可进行 totensor 数据增强 等操作
            img = self.transform(img) # ToTensor操作可自动将最后一维的 channel 提前，即 (H, W, C) --> (C, H, W)
        
        return img, label # Dataset 一般一次返回一条 X(特征), y(标签) 这样的形式

In [None]:
def prepare_loaders(df, fold=0):
    df_train = df[df["kfold"] != fold]
    df_valid = df[df["kfold"] == fold]
    
    train_datasets = MyDataset(df=df_train, transform=transform_train, mode="train")
    valid_datasets = MyDataset(df=df_valid, transform=transform_valid, mode="valid")
    
    train_loader = DataLoader(train_datasets, batch_size=CONFIG.train_batch_size, num_workers=CONFIG.n_workers, shuffle=True, pin_memory=True)
    valid_loader = DataLoader(valid_datasets, batch_size=CONFIG.valid_batch_size, num_workers=CONFIG.n_workers, shuffle=False, pin_memory=True)
    # valid_loader 一般不行打乱操作 所以 shuffle 为 False
    
    return train_loader, valid_loader

In [None]:
# 以下代码可检查Dataset，DataLoader是否实现基本功能
train_loader, valid_loader = prepare_loaders(train, 0)
x_train, y_train = next(iter(train_loader))
x_valid, y_valid = next(iter(valid_loader))
print(f"X_train shape : {x_train.shape}") # (batch_size, channels, H, W)
print(f"y_train shape : {y_train.shape}")
print(f"x_valid shape : {x_valid.shape}")
print(f"y_valid shape : {y_valid.shape}")

# 删除变量，回收垃圾
del train_loader, valid_loader, x_train, y_train, x_valid, y_valid
gc.collect()

## Evaluation

In [None]:
def cal_ACC(y_true, y_preds):
    if len(y_true) != len(y_preds):
        raise("len(y_true) != len(y_preds)")
    length = len(y_true)
    acc = (y_true == y_preds).sum() / length
    
    return acc

## Model

In [None]:
class GeMPool(nn.Module):
    def __init__(self, p=3, eps=1e-6):
        super(GeMPool, self).__init__()
        self.p = nn.Parameter(torch.ones(1) * p)
        self.eps = eps

    def forward(self, x):
        return self.gem(x, p=self.p, eps=self.eps)
    
    def gem(self, x, p=3, eps=1e-6):
        return torch.mean(x.clamp(min=eps).pow(p), dim=(-2, -1)).pow(1./p)
    
    def __repr__(self):
        return self.__class__.__name__ + f'(p={self.p.data.tolist()[0]:.4f}, eps={self.eps})'

In [None]:
class DigitRecognizerModel(nn.Module):
    def __init__(self):
        super(DigitRecognizerModel, self).__init__()
        # 使用 timm.create_model() 创建模型，将模型名称传入 model_name 即可完成创建，pretrained 参数默认为 False，这里我们改为True使用预训练权重
        self.backbone = timm.create_model(model_name=CONFIG.model_name, pretrained=CONFIG.timm_pretrained)
            
        #############################################################################################################################
        # 最后的 head 层
        if "efficientnet" in CONFIG.model_name:
            if CONFIG.pool_name == "GeMPool":
                self.backbone.global_pool = GeMPool() # 修改 最后的全局池化层为 GeMPooling
            in_features = self.backbone.classifier.in_features # 将预训练模型的最后一层的输出参数取出
            self.backbone.classifier = nn.Identity() # nn.Identity() 输入什么就输出什么，这里相当于将模型中的 classifier 层去掉
            
        elif "vit" in CONFIG.model_name:
            in_features = self.backbone.head.fc.in_features
            self.backbone.head.fc = nn.Identity()
            
        elif "convnext" in CONFIG.model_name:
            in_features = self.backbone.head.fc.in_features
            self.backbone.head.fc = nn.Identity()
        #############################################################################################################################
        
        self.head = nn.Sequential( # 用自定义的 head 层代替原模型中的 classifier 分类层
            nn.Linear(in_features, CONFIG.n_classes)
        )
        
        
    def forward(self, x):
        output = self.backbone(x) # 经过预训练的 模型主干backbone 后得到高级语义信息
        output = self.head(output) # 将高级语义信息传入给分类头进行最后的分类输出
        return output

In [None]:
model = DigitRecognizerModel() # 实例化模型
model.to(CONFIG.device)

## Train and Valid Function

In [None]:
criterion = nn.CrossEntropyLoss() # 实例化损失函数 多分类可选 交叉熵损失

In [None]:
def train_one_epoch(model, optimizer, scheduler, train_loader, epoch):
    model.train()
    
    y_preds = []
    y_trues = []
    
    dataset_size = 0
    running_loss = 0.0
    bar = tqdm(enumerate(train_loader), total=len(train_loader)) # 进度条
    for step, (images, labels) in bar:
        optimizer.zero_grad()
        
        batch_size = images.size(0)        
        if CONFIG.is_DataParallel: # 使用多卡并行训练
            images = images.cuda().float()
            labels = labels.cuda().float() # 使用标签平滑操作，此处标签为独热编码，所以用 float32数据类型
        else: # 使用单卡训练
            images = images.to(CONFIG.device, dtype=torch.float)
            labels = labels.to(CONFIG.device, dtype=torch.float) # 使用标签平滑操作，此处标签为独热编码，所以用 float32数据类型
        
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        
        optimizer.step()
        if scheduler is not None:
            scheduler.step()

        y_preds.append(outputs.argmax(1).detach().cpu().numpy())
        y_trues.append(labels.argmax(1).detach().cpu().numpy())
        _y_preds = np.concatenate(y_preds)
        _y_trues = np.concatenate(y_trues)

        train_cv = cal_ACC(_y_trues, _y_preds)

        running_loss += (loss.item() * batch_size)

        dataset_size += batch_size
        
        epoch_loss = running_loss / dataset_size
        
        # 设置让进度条显示的数据
        bar.set_postfix(Epoch=epoch,
                        Train_Loss=epoch_loss,
                        Train_ACC=train_cv,
                        LR=optimizer.param_groups[0]['lr'])
 
    return epoch_loss, train_cv

In [None]:
@torch.inference_mode()
def valid_one_epoch(model, valid_loader, epoch):
    model.eval()
    
    y_preds = []
    y_trues = []
    dataset_size = 0
    running_loss = 0.0
    bar = tqdm(enumerate(valid_loader), total=len(valid_loader))
    with torch.no_grad():
        for step, (images, labels) in bar:
            batch_size = images.size(0)
            
            if CONFIG.is_DataParallel:
                images = images.cuda().float()
                labels = labels.cuda().float()
            else:
                images = images.to(CONFIG.device, dtype=torch.float)
                labels = labels.to(CONFIG.device, dtype=torch.float)

            outputs = model(images)
            loss = criterion(outputs, labels)

            y_preds.append(outputs.argmax(1).detach().cpu().numpy())
            y_trues.append(labels.argmax(1).detach().cpu().numpy())
            _y_preds = np.concatenate(y_preds)
            _y_trues = np.concatenate(y_trues)
            
            valid_cv = cal_ACC(_y_trues, _y_preds)
        
            running_loss += (loss.item() * batch_size)

            dataset_size += batch_size

            epoch_loss = running_loss / dataset_size

            bar.set_postfix(Epoch=epoch,
                            Valid_Loss=epoch_loss,
                            Valid_ACC=valid_cv,
                            LR=optimizer.param_groups[0]['lr'])

    return epoch_loss, valid_cv

In [None]:
def run_training(fold, model, optimizer, scheduler, train_loader, valid_loader, num_epochs=CONFIG.epochs, now_cv=CONFIG.now_cv):
    if torch.cuda.is_available():
        print("[INFO] Using GPU: {} x {}\n".format(torch.cuda.get_device_name(), torch.cuda.device_count()))
    
    start = time.time()
    best_model_wts = copy.deepcopy(model.state_dict()) # 用于 存储最好的 cv 的模型权重
    best_epoch_cv = now_cv
    best_model_path = None # # 用于 存储最好的 cv 的模型权重的路径
    history = defaultdict(list)
    
    for epoch in range(1, num_epochs + 1):
        gc.collect() # 立即回收，清除缓存中的垃圾
        train_epoch_loss, train_epoch_cv = train_one_epoch(model, optimizer, scheduler, train_loader, epoch)
        valid_epoch_loss, valid_epoch_cv = valid_one_epoch(model, valid_loader, epoch)
        print(f"epoch: {epoch}, LOSS = {valid_epoch_loss}, CV(Acc) = {valid_epoch_cv}")
        
        history['Train Loss'].append(train_epoch_loss)
        history['Valid Loss'].append(valid_epoch_loss)
        history['Train CV(Acc)'].append(train_epoch_cv)
        history['Valid CV(Acc)'].append(valid_epoch_cv)
        history['lr'].append(optimizer.param_groups[0]['lr'])
        
        # deep copy the model
        if valid_epoch_cv >= best_epoch_cv:
            print(f"{b_}epoch: {epoch}, Validation CV(Acc) Improved ({best_epoch_cv} ---> {valid_epoch_cv}))")
            best_epoch_cv = valid_epoch_cv
            best_model_wts = copy.deepcopy(model.state_dict())
            if os.path.exists(CONFIG.ckpt_save_path) is False: # 如果该路径不存在，创建相关路径
                os.makedirs(CONFIG.ckpt_save_path)
                
            PATH = "{}/Fold_{}_CV_{:.4f}_Loss{:.4f}_epoch{:.0f}.bin".format(CONFIG.ckpt_save_path, fold, best_epoch_cv, valid_epoch_loss, epoch)
            best_model_path = PATH
            torch.save(model.state_dict(), PATH) # 只保存模型的权重参数到指定路径
            print(f"Model Saved{sr_}")
            
        print()
    
    # 计算训练总消耗时间
    end = time.time()
    time_elapsed = end - start
    print('Training complete in {:.0f}h {:.0f}m {:.0f}s'.format(
        time_elapsed // 3600, (time_elapsed % 3600) // 60, (time_elapsed % 3600) % 60))
    print("Best CV(Acc): {:.4f}".format(best_epoch_cv))

    # load best model weights
    model.load_state_dict(best_model_wts)

    return model, history, best_model_path

## Optimizer

In [None]:
class CosineAnnealingWithWarmupLR(_LRScheduler):
    def __init__(self, optimizer, T_max, eta_min=0, warmup_epochs=10, last_epoch=-1):
        self.T_max = T_max
        self.eta_min = eta_min
        self.warmup_epochs = warmup_epochs
        self.cosine_epochs = T_max - warmup_epochs
        super(CosineAnnealingWithWarmupLR, self).__init__(optimizer, last_epoch)

    def get_lr(self):
        if self.last_epoch < self.warmup_epochs:
            # Linear warmup
            return [(base_lr * (self.last_epoch + 1) / self.warmup_epochs) for base_lr in self.base_lrs]
        else:
            # Cosine annealing
            cosine_epoch = self.last_epoch - self.warmup_epochs
            return [self.eta_min + (base_lr - self.eta_min) * (1 + math.cos(math.pi * cosine_epoch / self.cosine_epochs)) / 2 for base_lr in self.base_lrs]

In [None]:
# lr scheduler
def fetch_scheduler(optimizer, T_max, min_lr):
    if CONFIG.scheduler == 'CosineAnnealingLR': # 学习率根据 cos 函数特性下降，可以观察最后 logs 画出来的图像中 学习率的变化
        scheduler = lr_scheduler.CosineAnnealingLR(optimizer,T_max=T_max, 
                                                   eta_min=min_lr)
    elif CONFIG.scheduler == 'CosineAnnealingWithWarmupLR':
        scheduler = CosineAnnealingWithWarmupLR(optimizer, T_max=T_max, eta_min=min_lr, 
                                                warmup_epochs=T_max//CONFIG.epochs)
    elif CONFIG.scheduler == None:
        return None
        
    return scheduler

In [None]:
# 指定优化器为 AdamW
# optimizer = torch.optim.AdamW(model.parameters(), lr=CONFIG.learning_rate, 
#                              weight_decay=CONFIG.weight_decay)
# scheduler = fetch_scheduler(optimizer, T_max=CONFIG.T_max, min_lr=CONFIG.min_lr)

## Start Training

In [None]:
oof = []
true = []
historys = []

for fold in range(CONFIG.n_folds):
    print(f"================================ Fold {fold} start training ================================")
    del model # 将上一个fold的模型删除，重新初始化模型训练当前的fold
    torch.cuda.empty_cache()
    model = DigitRecognizerModel()
    if CONFIG.is_DataParallel:
        device_ids = [0, 1] # 一共使用两张卡，编号为 0和1
        model = torch.nn.DataParallel(model, device_ids=device_ids) # 将模型并行化到2张卡上
        model = model.cuda()
    else:
        model = model.to(CONFIG.device)
        
    optimizer = torch.optim.AdamW(model.parameters(), lr=CONFIG.learning_rate, 
                                  weight_decay=CONFIG.weight_decay)
    scheduler = fetch_scheduler(optimizer, T_max=CONFIG.T_max, min_lr=CONFIG.min_lr)
    
    train_loader, valid_loader = prepare_loaders(train, fold)
    model, history, best_model_path = run_training(fold, model, optimizer, scheduler, train_loader, valid_loader, 
                                                   num_epochs=CONFIG.epochs, now_cv=CONFIG.now_cv)
    historys.append(history)
    
    # 这一个fold的训练完成后，对这个fold的验证集进行推理，用于计算后面的 oof折外预测
    bar = tqdm(enumerate(valid_loader), total=len(valid_loader))
    with torch.no_grad():
        for step, (images, labels) in bar:
            batch_size = images.size(0)
            if CONFIG.is_DataParallel:
                images = images.cuda().float()
                labels = labels.cuda().float()
            else:
                images = images.to(CONFIG.device, dtype=torch.float)
                labels = labels.to(CONFIG.device, dtype=torch.float)

            outputs = model(images)
            outputs = F.softmax(outputs).argmax(1)
            
            oof.append(outputs.flatten().detach().cpu().numpy())
            true.append(labels.detach().cpu().numpy().argmax(1))
        print() # 换行

## Local CV

In [None]:
oof = np.concatenate(oof)
true = np.concatenate(true)

In [None]:
local_cv = cal_ACC(true, oof)
print("Local CV : ", local_cv)

## Logs

In [None]:
fold = 0
history = historys[fold]

In [None]:
plt.plot( range(len(history["Train Loss"])), history["Train Loss"], label="Train Loss")
plt.plot( range(len(history["Valid Loss"])), history["Valid Loss"], label="Valid Loss")
plt.xlabel("epochs")
plt.ylabel("Loss")
plt.grid()
plt.legend()
plt.show()

In [None]:
plt.plot( range(len(history["Train CV(Acc)"])), history["Train CV(Acc)"], label="Train CV(Acc)")
plt.plot( range(len(history["Valid CV(Acc)"])), history["Valid CV(Acc)"], label="Valid CV(Acc)")
plt.xlabel("epochs")
plt.ylabel("CV(Acc)")
plt.grid()
plt.legend()
plt.show()

In [None]:
plt.plot( range(len(history["lr"])), history["lr"], label="lr")
plt.xlabel("epochs")
plt.ylabel("lr")
plt.grid()
plt.legend()
plt.show()