## Import Libs

In [1]:
import os
import warnings # 避免一些可以忽略的报错
warnings.filterwarnings('ignore')
import random
import gc
import copy
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from tqdm import tqdm # 进度条
import time

import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F
import timm

import albumentations as A # 数据增强库
from albumentations.pytorch import ToTensorV2

## CONFIG

In [2]:
is_debug = False

class CONFIG:
    seed = 308

    is_DataParallel = True
    test_batch_size = 512
    img_size = [28, 28]
    new_size2train = [32, 32]
    
    n_folds = 5
    n_classes = 10

    n_workers = os.cpu_count() // 2
    
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    
    """
    tf_efficientnet_b0.ns_jft_in1k
    tf_efficientnetv2_s.in21k_ft_in1k
    
    tiny_vit_21m_512.dist_in22k_ft_in1k
    """
    model_name = "tf_efficientnet_b0.ns_jft_in1k"
    pool_name = ""
    timm_pretrained = False # 在 kaggle 提交的 notebook 中不允许联网，所以无法下载预训练权重所以设为 False，如果为 True 会internet报错
    
    test_csv = "/kaggle/input/digit-recognizer/test.csv"
    ckpt_path = "/kaggle/input/308-digitrecognizer-base3"

## Set Random Seed

In [3]:
def set_seed(seed=308):
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    
set_seed(CONFIG.seed) # 固定随机种子，方便结果复现

## Data Progress

In [4]:
test = pd.read_csv(CONFIG.test_csv) # 读取 测试集 的数据
test

Unnamed: 0,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
27995,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
27996,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
27997,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
27998,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


## Dataset and DataLoader

In [5]:
def transform(img): # 与训练时的 valid 保持一致
    composition = A.Compose([
        A.Resize(CONFIG.new_size2train[0], CONFIG.new_size2train[0]),
        A.Normalize(),
        ToTensorV2(),
    ])
    return composition(image=img)["image"]

In [6]:
class MyDataset(Dataset):
    def __init__(self, df, transform=None):
        super().__init__()
        self.df = df
        self.transform = transform
        
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        row = self.df.iloc[idx, :] # 从 df 中取出 idx 这一行
        img = row["pixel0":].values
        img = img.reshape(CONFIG.img_size[0], CONFIG.img_size[1], -1).astype(np.uint8)
        img = np.concatenate([img] * 3, axis=-1)
        
        if self.transform != None: # 可进行 totensor 数据增强 等操作
            img = self.transform(img)
        
        return img, str(idx) # 测试集的数据没有 label，我们用它对应的索引代替

In [7]:
def prepare_loaders():
    test_datasets = MyDataset(df=test, transform=transform)
    
    test_loader = DataLoader(test_datasets, batch_size=CONFIG.test_batch_size, num_workers=CONFIG.n_workers, shuffle=False, pin_memory=True)
    # 推理过程对数据按顺序执行，不进行打乱操作，shuffle为 False
    
    return test_loader

## Model

In [8]:
class GeMPool(nn.Module):
    def __init__(self, p=3, eps=1e-6):
        super(GeMPool, self).__init__()
        self.p = nn.Parameter(torch.ones(1) * p)
        self.eps = eps

    def forward(self, x):
        return self.gem(x, p=self.p, eps=self.eps)
    
    def gem(self, x, p=3, eps=1e-6):
        return torch.mean(x.clamp(min=eps).pow(p), dim=(-2, -1)).pow(1./p)
    
    def __repr__(self):
        return self.__class__.__name__ + f'(p={self.p.data.tolist()[0]:.4f}, eps={self.eps})'

In [9]:
class DigitRecognizerModel(nn.Module):
    def __init__(self):
        super(DigitRecognizerModel, self).__init__()
        # 推理 notebook 中 pretrained参数为 False
        self.backbone = timm.create_model(model_name=CONFIG.model_name, pretrained=CONFIG.timm_pretrained)
            
        #############################################################################################################################
        # 最后的 head 层
        if "efficientnet" in CONFIG.model_name:
            if CONFIG.pool_name == "GeMPool":
                self.backbone.global_pool = GeMPool() # 修改 最后的全局池化层为 GeMPooling
            in_features = self.backbone.classifier.in_features # 将预训练模型的最后一层的输出参数取出
            self.backbone.classifier = nn.Identity() # nn.Identity() 输入什么就输出什么，这里相当于将模型中的 classifier 层去掉
            
        elif "vit" in CONFIG.model_name:
            in_features = self.backbone.head.fc.in_features
            self.backbone.head.fc = nn.Identity()
        #############################################################################################################################
        
        self.head = nn.Sequential( # 用自定义的 head 层代替原模型中的 classifier 分类层
            nn.Linear(in_features, CONFIG.n_classes)
        )
        
    def forward(self, x):
        output = self.backbone(x) # 经过预训练的 模型主干backbone 后得到高级语义信息
        output = self.head(output) # 将高级语义信息传入给分类头进行最后的分类输出
        return output

## Load Model

In [10]:
models = []

paths = sorted(os.listdir(CONFIG.ckpt_path))

if CONFIG.is_DataParallel:
    device_ids = [0, 1]
    for i in range(CONFIG.n_folds):
        model = DigitRecognizerModel()
        model = torch.nn.DataParallel(model, device_ids=device_ids)
        model = model.cuda()
        model.load_state_dict(torch.load(os.path.join(CONFIG.ckpt_path, paths[i])))
        print(f"fold : {i} --------- path : {paths[i]}")
        model.eval()
        models.append(model)
else:
    for i in range(CONFIG.n_folds):
        model = DigitRecognizerModel()
        model = model.cuda()
        model.load_state_dict(torch.load(os.path.join(CONFIG.ckpt_path, paths[i])))
        print(f"fold : {i} --------- path : {paths[i]}")
        model.eval()
        models.append(model)

fold : 0 --------- path : Fold_0_CV_0.9954_Loss0.1183_epoch10.bin
fold : 1 --------- path : Fold_1_CV_0.9944_Loss0.1183_epoch9.bin
fold : 2 --------- path : Fold_2_CV_0.9939_Loss0.1251_epoch10.bin
fold : 3 --------- path : Fold_3_CV_0.9930_Loss0.1292_epoch8.bin
fold : 4 --------- path : Fold_4_CV_0.9939_Loss0.1206_epoch8.bin


## Infer Function

In [11]:
def Infer(model, test_loader):
    model.eval()
    
    y_preds = []
    bar = tqdm(enumerate(test_loader), total=len(test_loader))
    with torch.no_grad():
        for step, (img, img_id) in bar:

            img = img.to(CONFIG.device, dtype=torch.float)

            outputs = model(img)

            y_preds.append(outputs.detach().cpu().numpy())
            
    y_preds = np.concatenate(y_preds) # 因为最后要做每个fold 训练出来的模型的结果的融合，所以这里返回模型预测的每一类的概率值，不经过softmax，也不取 argmax

    return y_preds

## Start Infer

In [12]:
# test preds
total_test_pred = []

test_loader = prepare_loaders()

for fold in range(0, CONFIG.n_folds):
    preds = Infer(models[fold], test_loader)
    total_test_pred.append(preds)
    
total_test_pred = np.mean(total_test_pred, axis=0) # 将每一个 fold的模型输出的结果进行均值融合
total_test_pred = F.softmax(torch.from_numpy(total_test_pred)).numpy() # 对融合的结果进行 softmax转换为概率值
y_preds = total_test_pred.argmax(1) # 取概率值最高的一类作为预测值

100%|██████████| 55/55 [00:14<00:00,  3.80it/s]
100%|██████████| 55/55 [00:13<00:00,  3.96it/s]
100%|██████████| 55/55 [00:14<00:00,  3.84it/s]
100%|██████████| 55/55 [00:14<00:00,  3.92it/s]
100%|██████████| 55/55 [00:13<00:00,  3.99it/s]


## Make Submission

In [13]:
sub = pd.DataFrame()
sub["ImageId"] = test.index + 1
sub["Label"] = y_preds
sub

Unnamed: 0,ImageId,Label
0,1,2
1,2,0
2,3,9
3,4,0
4,5,3
...,...,...
27995,27996,9
27996,27997,7
27997,27998,3
27998,27999,9


In [14]:
sub.to_csv('submission.csv', index=False)
pd.read_csv('submission.csv')

Unnamed: 0,ImageId,Label
0,1,2
1,2,0
2,3,9
3,4,0
4,5,3
...,...,...
27995,27996,9
27996,27997,7
27997,27998,3
27998,27999,9


In [17]:
# # TTA 参考：https://blog.csdn.net/Together_CZ/article/details/132759753

# n_TTA = 4 # 如何确定使用 TTA，可以将此参数放到 CONFIG里

# def transform(img): # 与训练时的 transform_train 保持一致
#     composition = A.Compose([
#         A.OneOf([
#             A.MotionBlur(blur_limit=5),
#             A.MedianBlur(blur_limit=5),
#             A.GaussianBlur(blur_limit=5),
#             A.GaussNoise(var_limit=(5.0, 30.0)),
#         ], p=0.7),
#         A.Resize(CONFIG.new_size2train[0], CONFIG.new_size2train[0]),
#         A.Normalize(),
#         ToTensorV2(),
#     ])
#     return composition(image=img)["image"]

# def prepare_loaders(): # 注意此函数中的 transform已经换成了 上述的 transform
#     test_datasets = MyDataset(df=test, transform=transform)
    
#     test_loader = DataLoader(test_datasets, batch_size=CONFIG.test_batch_size, num_workers=CONFIG.n_workers, shuffle=False, pin_memory=True)
    
#     return test_loader

# # 执行 TTA
# y_preds = []
# for _ in range(n_TTA):
#     total_test_pred = []
#     test_loader = prepare_loaders()

#     for fold in range(0, CONFIG.n_folds):
#         preds = Infer(models[fold], test_loader)
#         total_test_pred.append(preds)

#     total_test_pred = np.mean(total_test_pred, axis=0)
#     total_test_pred = F.softmax(torch.from_numpy(total_test_pred)).numpy()
#     y_preds.append(total_test_pred) # 注意这里是概率，还没用取 argmax 之后的标签
#     print("#" * 100)
# y_preds = np.mean(y_preds, axis=0) # 对 n_TTA次的结果进行均值融合
# y_preds = y_preds.argmax(1) # 取概率最大的作为标签

# # Make Submission
# sub = pd.DataFrame()
# sub["ImageId"] = test.index + 1
# sub["Label"] = y_preds
# sub.to_csv('submission.csv', index=False)
# pd.read_csv('submission.csv')

100%|██████████| 55/55 [00:37<00:00,  1.46it/s]
100%|██████████| 55/55 [00:38<00:00,  1.43it/s]
100%|██████████| 55/55 [00:39<00:00,  1.40it/s]
100%|██████████| 55/55 [00:38<00:00,  1.42it/s]
100%|██████████| 55/55 [00:38<00:00,  1.44it/s]

####################################################################################################



100%|██████████| 55/55 [00:38<00:00,  1.44it/s]
100%|██████████| 55/55 [00:38<00:00,  1.42it/s]
100%|██████████| 55/55 [00:38<00:00,  1.43it/s]
100%|██████████| 55/55 [00:37<00:00,  1.48it/s]
100%|██████████| 55/55 [00:37<00:00,  1.45it/s]

####################################################################################################



100%|██████████| 55/55 [00:37<00:00,  1.46it/s]
100%|██████████| 55/55 [00:39<00:00,  1.38it/s]
100%|██████████| 55/55 [00:40<00:00,  1.36it/s]
100%|██████████| 55/55 [00:38<00:00,  1.44it/s]
100%|██████████| 55/55 [00:38<00:00,  1.43it/s]

####################################################################################################



100%|██████████| 55/55 [00:40<00:00,  1.37it/s]
100%|██████████| 55/55 [00:39<00:00,  1.41it/s]
100%|██████████| 55/55 [00:40<00:00,  1.36it/s]
100%|██████████| 55/55 [00:43<00:00,  1.26it/s]
100%|██████████| 55/55 [00:40<00:00,  1.34it/s]

####################################################################################################



