# 📒 Notebooks
[UWMGI: Unet [Train] [PyTorch]](https://www.kaggle.com/code/awsaf49/uwmgi-unet-train-pytorch/)
<br/>
本人主要对该 $notebook$ 做了较详细的中文注释, 并根据个人需要适当修改、添加了部分代码(实验测试结果对比)。

# 🛠 Install Libraries
$Python$ 中以 $!$ 开头的代码是一种特殊的语法，称为 $shell$ 命令（$shell commands$）。$shell$ 命令可以在 $Python$ 代码中执行系统命令，比如 $ls$, $pwd$, $cd$ 等。$shell$ 命令通常用在 $Jupyter\ Notebook$ 或 $IPython$ 等交互式环境中，以方便用户操作文件系统或其他外部程序。

$-q$ 参数是用来指定 $pip$ 安装时的日志级别的，它表示 $quiet$ 模式，即安静模式。当使用 $-q$ 参数时，$pip$ 安装时只会输出错误和警告信息，不会输出正常的进度信息。这样可以减少输出的噪音，提高安装的效率。如果使用两个 $-q$ 参数，即 $-qq$，那么 $pip$ 安装时连错误和警告信息也不会输出，只有在发生异常时才会输出信息。

$-qU$ 参数是用来指定 $pip$ 安装时的日志级别和升级模式的，它相当于 $-q$ $-U$ 的缩写。$-U$ 参数是用来指定 $pip$ 安装时是否升级已经安装的包的，它表示 $upgrade$ 模式，即升级模式。当使用 $-U$ 参数时，$pip$ 安装时会检查已经安装的包是否有更新的版本，如果有，则会自动升级到最新版本。这样可以保证安装的包是最新的，避免出现版本不兼容的问题。

In [1]:
!pip install -qq segmentation_models_pytorch
!pip install -qq -U wandb
!pip install -qq scikit-learn==1.0
!pip install -qq torchsummary

# 📚 Import Libraries 
$Python$ 中 % 开头的代码的用法是一种特殊的语法，称为魔法命令（$magic\ commands$）。魔法命令是一些可以在 $IPython$ 或 $Jupyter\ Notebook$ 等交互式环境中执行的命令，它们可以提供一些方便的功能，比如控制输出格式、执行系统命令、加载扩展模块等

In [2]:
#导入 autoreload 扩展
%load_ext autoreload 
# 导入的模块发生变化时，会自动重新加载
%autoreload 2 

In [3]:
import numpy as np

# 主要用来 excel 表格的操作, 实现数据集的读取
import pandas as pd
# pd.options.plotting.backend = "plotly"

import random
from glob import glob
import os, shutil
# tqdm 库的功能是提供一个快速，可扩展的 Python 进度条，可以在 Python 长循环中添加一个进度提示信息
from tqdm.notebook import tqdm
tqdm.pandas ()
import time
import copy
from collections import defaultdict

# 强制回收垃圾, 释放空间
import gc

# 结合 wandb 打印训练过程中一些指标的变化 
from IPython import display as ipd

# 图像可视化
import cv2
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle

# 用来 k 折交叉检验
from sklearn.model_selection import StratifiedGroupKFold

# Pytorch 常用库
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.optim import lr_scheduler
from torch.utils.data import Dataset, DataLoader
from torch.cuda import amp

# 用于数据增强
import albumentations as A
from albumentations.pytorch import ToTensorV2

from joblib import Parallel, delayed

# 终端输出是可以输出不同颜色的文字
from colorama import Fore, Back, Style
c_  = Fore.GREEN
sr_ = Style.RESET_ALL

import warnings
warnings.filterwarnings ("ignore")

# For descriptive error messages
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

# ⭐ WandB

In [4]:
import wandb
# from kaggle_secrets import UserSecretsClient
# user_secrets = UserSecretsClient ()
# api_key = user_secrets.get_secret ("WANDB")
wandb.login (key = "f89bb5c977a0726d4d4727415427dd5eb8359124")
anonymous = None

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mcharming[0m. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: C:\Users\pc/.netrc


# ⚙️ Configuration 
把该项目所用到的参数都包含到 $CFG$ 类中

In [5]:
class CFG:
    seed          = 101
    debug         = False # set debug=False for Full Training
    exp_name      = 'Unet-For-UWMGI'
    comment       = 'Unet-resnet18-224x224-UWMGI'
    model_name    = 'Unet'
#     backbone      = 'efficientnet-b1' # 原 backbone
    backbone      = 'resnet18'
    train_bs      = 64
    valid_bs      = train_bs * 2
    img_size      = [224, 224]
    epochs        = 15
    lr            = 1e-3
    scheduler     = 'CosineAnnealingLR'
    min_lr        = 1e-6
    T_max         = int (30000 / train_bs * epochs) + 50
    T_0           = 25
    warmup_epochs = 0
    wd            = 1e-6
    n_accumulate  = max (1, 32 // train_bs)
    n_fold        = 5
    num_classes   = 3
    # device        = torch.device ("cuda:0" if torch.cuda.is_available () else "cpu")
    device = "cpu"


# ❗ Reproducibility

In [6]:
def set_seed (seed = 42):
    '''
        初始化各个随机种子为同一值, 
        保证每次执行程序的运行情况都相同
    '''
    np.random.seed (seed)
    random.seed (seed)
    torch.manual_seed (seed)
    torch.cuda.manual_seed (seed)
    # When running on the CuDNN backend, two further options must be set
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    # Set a fixed value for the hash seed
    os.environ['PYTHONHASHSEED'] = str (seed)
    print ('> SEEDING DONE')
    
set_seed (CFG.seed)

> SEEDING DONE


# 📖 Meta Data
数据集中有部分数据是只有图像没有掩码的 (即没有标签)。
由于类别共三种, 所以表格中每个样本均有三项, 每一项之间只有 $class$ 不同, 对应的图像地址和掩码图像地址都一样, 且一张掩码图像中包含了全部三种器官的掩码。

In [7]:
df = pd.read_csv('./uwmgi-mask-dataset/train.csv')
# 填充表格中空着的 segmentation 项
df['segmentation'] = df.segmentation.fillna('') 
# 把掩码对应的图片地址修改成对应的 npy 地址
df['mask_path'] = df.mask_path.str.replace('/png/','/np').str.replace('.png','.npy') 
# 计算每一个样本的掩码的长度, 用来判断该样本是否有掩码
df['rle_len'] = df.segmentation.map (len) 
# 计算每个样本对应的掩码长度之和, 只要有一个及以上类别存在掩码, 即判断该图像有掩码, 同时把样本对应的三项归为一项了
df2 = df.groupby (['id'])['rle_len'].agg (sum).to_frame ().reset_index () 
# 添加 empty 列, 说明该样本的状态 (即是否有掩码)
df2['empty'] = (df2.rle_len == 0) 
df2 = df2.drop (columns = ['rle_len'])
# 删除无用信息
df = df.drop (columns = ['class', 'segmentation', 'day', 'slice', 'height', 'width', 'rle_len']) 
# 三项归为一项
df = df.drop_duplicates (subset = ['id'], keep = 'first') 
# 获取 empty 状态
df = df.merge (df2, on = ['id']) 

In [8]:
# 删除脏数据
Case138_Day0 = [i for i in range (76, 145)]
Case85_Day23 = [119,120,121,122,123,124]
Case90_Day29 = [115,116,117,118,119]
Case133_Day25 = [111,112,113]
Case7 = []
Case43 = []
Case81 = []
Case85 = []
Case90 = []
Case133 = []
Case138 = []
for i,row in tqdm (df.iterrows (), total = len (df)) :
    if row.id.rsplit ("_",2)[0] == 'case7_day0':
        Case7.append (i)
    elif row.id.rsplit ("_",2)[0] == 'case43_day18' or row.id.rsplit ("_",2)[0] == 'case43_day26' :
        Case43.append (i)
    elif row.id.rsplit ("_",2)[0] == 'case81_day30' :
        Case81.append (i)
    elif row.id.rsplit ("_",2)[0] == 'case138_day0' :
        if int (row.id.rsplit ("_",1)[-1]) in Case138_Day0 :
            Case138.append (i)
df.drop (index = Case7 + Case43 + Case81 + Case138 ,inplace = True)
df = df.reset_index (drop = True)

print (df.head ())
print (len (df))
print (df['empty'].value_counts())

  0%|          | 0/38496 [00:00<?, ?it/s]

                         id  case  \
0  case123_day20_slice_0001   123   
1  case123_day20_slice_0002   123   
2  case123_day20_slice_0003   123   
3  case123_day20_slice_0004   123   
4  case123_day20_slice_0005   123   

                                          image_path  \
0  /kaggle/input/uw-madison-gi-tract-image-segmen...   
1  /kaggle/input/uw-madison-gi-tract-image-segmen...   
2  /kaggle/input/uw-madison-gi-tract-image-segmen...   
3  /kaggle/input/uw-madison-gi-tract-image-segmen...   
4  /kaggle/input/uw-madison-gi-tract-image-segmen...   

                                           mask_path  empty  
0  /kaggle/input/uwmgi-mask-dataset/np/uw-madison...   True  
1  /kaggle/input/uwmgi-mask-dataset/np/uw-madison...   True  
2  /kaggle/input/uwmgi-mask-dataset/np/uw-madison...   True  
3  /kaggle/input/uwmgi-mask-dataset/np/uw-madison...   True  
4  /kaggle/input/uwmgi-mask-dataset/np/uw-madison...   True  
37851
empty
True     21553
False    16298
Name: count, dtype: int64


# 🔨 Utility
这里主要实现的功能是根据文件路径加载 $np$ 格式的样本和标签、输入 $np$ 格式的样本和标签(可选)并展示

In [9]:
def load_img (path):
    '''
        根据文件路径读取图像, 并复制成三份, 成三通道图像
    '''
    img = cv2.imread (path, cv2.IMREAD_UNCHANGED)
    img = np.tile (img[...,None], [1, 1, 3]) # gray to rgb
    img = img.astype ('float32') # original is uint16
    mx = np.max (img)
    if mx:
        img /= mx # scale image to [0, 1]
    return img

def load_msk (path):
    '''
        根据文件路径读取掩码图像 (已经处理成 numpy 格式了)
    '''
    msk = np.load (path)
    msk = msk.astype('float32')
    msk /= 255.0
    return msk
    
def png2tensor (img_path, msk_path = None) :
    '''
        给定图片路径、掩码路径, 转为 tensor 类型
    '''
    img = load_img (img_path)
    if msk_path == None :
        data = data_transforms['valid'] (image = img)
        img = data['image']
        img = torch.tensor (np.transpose (img, (2, 0, 1)))
        return img
    else :
        msk = load_msk (msk_path)
        data = data_transforms['valid'] (image = img, mask = msk)
        img = data['image']
        msk = data['mask']
        img = torch.tensor (np.transpose (img, (2, 0, 1)))
        msk = torch.tensor (np.transpose (msk, (2, 0, 1)))
        return img, msk

def show_img (img, mask = None):
    ''' 
        展示图像, 如果输入了掩码图像则一起展示
        输入的图像要提前处理成 numpy 格式
    '''
#     用于医学影像增强
#     clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
#     img = clahe.apply(img)
#     plt.figure(figsize=(10,10))
    plt.imshow (img, cmap = 'bone')
    
    if mask is not None:
        plt.imshow (mask, alpha = 0.5) # alpha 参数, 透明度
        handles = [Rectangle ((0,0),1,1, color=_c) for _c in [(0.667,0.0,0.0), (0.0,0.667,0.0), (0.0,0.0,0.667)]]
        labels = ["Large Bowel", "Small Bowel", "Stomach"]
        plt.legend (handles, labels) # 标签与掩码颜色相对应
    plt.axis('off')

# 📁 Create Folds
$K$ 折交叉检验, 把整个数据集分成 $n \_ fold$ 份, 训练时选择其中一份作为测试集, 其余的作为训练集

In [10]:
skf = StratifiedGroupKFold (n_splits = CFG.n_fold, shuffle = True, random_state = CFG.seed)
for fold, (train_idx, val_idx) in enumerate (skf.split (df, df['empty'], groups = df["case"])):
    df.loc[val_idx, 'fold'] = fold
    print (train_idx.shape, val_idx.shape)
display (df.groupby(['fold','empty'])['id'].count())

(30144,) (7707,)
(30107,) (7744,)
(29483,) (8368,)
(29323,) (8528,)
(32347,) (5504,)


fold  empty
0.0   False    3366
      True     4341
1.0   False    3317
      True     4427
2.0   False    3557
      True     4811
3.0   False    3754
      True     4774
4.0   False    2304
      True     3200
Name: id, dtype: int64

# 🍚 Dataset
$pytorch$ 的 $Dataset$ 类可自定义, 需要实现 $\_ \_ len \_ \_$ 和 $\_ \_ getitem \_ \_$ 方法, 前者用来获取数据及大小, 后者用于指定下标, 返回对应的样本

In [11]:
class BuildDataset(torch.utils.data.Dataset):
    '''
        数据集总体可以分为两类, 有标签和无标签
        对于训练集, 需要进行一系列数据增强的操作, 包括图片的旋转、水平翻转
        对于测试集和验证集, 则不需要数据增强
        所以需要设计两种 transforms
        训练集、测试集、验证集均需要固定图片尺寸 (224, 224)
    '''
    def __init__(self, df, label = True, transforms = None) :
        self.df         = df
        self.label      = label
        self.img_paths  = df['image_path'].tolist()
        self.msk_paths  = df['mask_path'].tolist()
        self.transforms = transforms
        
    def __len__(self) :
        return len (self.df)
    
    def __getitem__(self, index) :
        img_path  = self.img_paths[index]
        img = []
        img = load_img (img_path)
        
        if self.label :
            msk_path = self.msk_paths[index]
            msk = load_msk (msk_path)
            if self.transforms:
                data = self.transforms (image = img, mask = msk)
                img  = data['image']
                msk  = data['mask']
            img = np.transpose (img, (2, 0, 1))
            msk = np.transpose (msk, (2, 0, 1))
            return torch.tensor (img), torch.tensor (msk)
        else :
            if self.transforms :
                data = self.transforms (image = img)
                img  = data['image']
            img = np.transpose (img, (2, 0, 1))
            return torch.tensor (img)

# 🌈 Augmentations

In [12]:
data_transforms = {
    "train": A.Compose ([
        # 固定图像尺寸
        A.Resize (*CFG.img_size, interpolation = cv2.INTER_NEAREST),
        # 随机水平翻转
        A.HorizontalFlip (p = 0.5), 
#         A.VerticalFlip(p=0.5),
        # 随机平移、缩放、旋转
        A.ShiftScaleRotate (shift_limit = 0.0625, scale_limit = 0.05, rotate_limit = 10, p = 0.5), 
        A.OneOf ([
            # 以下两种操作以 p 的概率随机选择其中一种
            # 网格畸变
            A.GridDistortion (num_steps = 5, distort_limit = 0.05, p = 1.0), 
#             A.OpticalDistortion(distort_limit=0.05, shift_limit=0.05, p=1.0),
            # 弹性变换, 扭曲图像的同时保持图像的连续性
            A.ElasticTransform (alpha = 1, sigma = 50, alpha_affine = 50, p = 1.0)
#             A.ElasticTransform (alpha = 20, sigma = 5, alpha_affine = 20, p = 1.0)
        ], p = 0.25),
        # 对图片进行随机遮挡, 遮挡区域用固定值或者随机值填充
        A.CoarseDropout (max_holes = 8, max_height = CFG.img_size[0] // 20, 
                        max_width = CFG.img_size[1] // 20, min_holes = 5, 
                        fill_value = 0, mask_fill_value = 0, p = 0.5),
        ], p = 1.0),
    
    "valid": A.Compose ([
        A.Resize (*CFG.img_size, interpolation = cv2.INTER_NEAREST),
        ], p = 1.0)
}

# 🍰 DataLoader

In [13]:
def DataLoad (fold) :
    '''
        K 折交叉验证, 指定验证集为哪一折, 划分训练集和验证集
    '''
    # drop 参数表示是否删除原来的索引列
    train_df = df.query ("fold!=@fold & empty==0").reset_index (drop = True)
    valid_df = df.query ("fold==@fold & empty==0").reset_index (drop = True)
    train_dataset = BuildDataset (train_df, transforms = data_transforms['train'])
    valid_dataset = BuildDataset (valid_df, transforms = data_transforms['valid'])

    # pin_memory 参数表示是否将加载的数据常驻内存
    # drop_last 参数表示是否丢弃最后一个批次 (可能不满 batch_size 个样本)
    train_loader = DataLoader (train_dataset, batch_size = CFG.train_bs, num_workers = 4, 
                               shuffle = True, pin_memory = True, drop_last = False)
    valid_loader = DataLoader (valid_dataset, batch_size = CFG.valid_bs, num_workers = 4, 
                               shuffle = False, pin_memory = True)
    
    return train_loader, valid_loader

train_loader, valid_loader = DataLoad (fold = 0)
imgs, msks = next (iter (train_loader))
imgs.size (), msks.size ()

# 📈 Visualization
展示一个批次内部分图像及其掩码

In [None]:
def plot_batch (imgs, msks, size = 3) :
    '''
        输入的 tensor 类型的数据, 要先把通道维度换到最后一维, 然后转成 numpy 类型
    '''
    plt.figure (figsize = (5 * size, 5))
    for idx in range (size) :
        plt.subplot (1, size, idx + 1)
        img = imgs[idx,].permute ((1, 2, 0)).numpy () * 255.0
        img = img.astype ('uint8')
        msk = msks[idx,].permute ((1, 2, 0)).numpy () * 255.0
        show_img (img, msk)
    plt.tight_layout ()
    plt.show ()

plot_batch (imgs, msks, size = 5)

In [None]:
import gc
# 强制执行一次垃圾回收, 检测并释放不再被引用的对象所占用的内存空间
gc.collect ()

45

# 📦 Model
模型的搭建, 直接使用了 $smp$ 库中的 $unet$ 模型。
可以选择模型的编码器、模型的预训练，设置输入通道数、分类数

In [None]:
import segmentation_models_pytorch as smp

def build_model (backbone = CFG.backbone):
#     model = smp.Unet(
#         encoder_name = CFG.backbone,      # choose encoder, e.g. mobilenet_v2 or efficientnet-b7
#         encoder_weights = "imagenet",     # use `imagenet` pre-trained weights for encoder initialization
#         in_channels = 3,                  # model input channels (1 for gray-scale images, 3 for RGB, etc.)
#         classes = CFG.num_classes,        # model output channels (number of classes in your dataset)
#         activation = None,
#     )
#     segmentation_head = dict (
#         in_channels = 512,
#         out_channels = 3,
#         activation = None,
#         kernel_size = 3,
#         upsampling = "upsample",
#         upsample_scale = 2,
#     )
    model = smp.Unet (
        # 更换成 resnet50
        encoder_name = backbone, 
        # 定义编码器网络深度, 设为 4，加速训练过程
#         encoder_depth = 4,
        # 不设预训练权重, 从零开始训练!
        encoder_weights = None,
        # 没有 decoder_name, 由模型框架 Unet 决定
        # 给定图片是单通道的, 读入时叠加成三通道
        in_channels = 3, 
        # 分类类别参数, 对于 UWMGI 数据集是 3
        classes = CFG.num_classes,
        activation = None,
#         segmentation_head = segmentation_head,
    )
    model.to (CFG.device)
    return model

def load_model (backbone, path) :
    '''
        用于测试模型前加载训练过程中表现最好的模型
    '''
    model = build_model (backbone)
    model.load_state_dict (torch.load (path, map_location = torch.device('cpu')))
    model.eval ()
    return model

# 🔧 Loss Function
$Tversky$ 指数 $=$ $S(P, G, \alpha, \beta) = \frac{|P \cap G|}{|P \cap G| + \alpha |P \setminus G| + \beta |G \setminus P|}$, <br/> 当 $\alpha = \beta = 0.5$ 时, $Tversky$ 系数等价于 $Dice$ 系数;

$Jaccard$ 系数与 $IOU$ 系数的计算公式是相同的, 只是应用场景不尽相同, 才有不同的名字。<br/>
$Jaccard$ 用于比较样本集的相似性与多样性，是一种统计量；<br/>
$IOU$ 系数通常用于比较图像分割等任务中预测框与真实框的接近程度，是一种评价指标。<br/>
$J(P, G) = IOU(P, G) = \frac{|P \cap G|}{|P \cup G|}$

$LovaszLoss$ 系数就是 $1 - Tversky$...

In [None]:
BCELoss     = smp.losses.SoftBCEWithLogitsLoss ()
TverskyLoss = smp.losses.TverskyLoss (mode = 'multilabel', log_loss = False)

def dice_coef (y_true, y_pred, thr = 0.5, dim = (2,3), epsilon = 0.001) :
    '''
        图像 A, B 的 dice 系数等于 A 和 B 的掩码区域的交集的面积大小乘以 2
        再除以 A 的掩码区域面积与 B 的掩码区域面积的和
        可以用来衡量两个图像的相似程度
    '''
    y_true = y_true.to (torch.float32)
    y_pred = (y_pred > thr).to (torch.float32)
    inter = (y_true * y_pred).sum (dim = dim)
    den = y_true.sum (dim = dim) + y_pred.sum (dim = dim)
    dice = ((2 * inter + epsilon) / (den+epsilon)).mean (dim = (1,0))
    return dice

def iou_coef (y_true, y_pred, thr = 0.5, dim = (2, 3), epsilon = 0.001) :
    '''
        iou 系数与 dice 系数类似, 计算公式中分子均为两图像的交集, 
        dice 系数的计算公式的分母为两图像面积和
        iou 系数的计算公式的分母为两图像并集
    '''
    y_true = y_true.to (torch.float32)
    y_pred = (y_pred > thr).to (torch.float32)
    inter = (y_true * y_pred).sum (dim = dim)
    union = (y_true + y_pred - y_true * y_pred).sum (dim = dim)
    iou = ((inter + epsilon) / (union + epsilon)).mean (dim = (1,0))
    return iou

def criterion (y_pred, y_true) :
    return 0.5 * BCELoss (y_pred, y_true) + 0.5 * TverskyLoss (y_pred, y_true)

# 🚄 Training Function
用到了自动混合精度训练和损失放大<br/>
$tqdm$ 库用于展示模型训练的进度条

In [None]:
def train_one_epoch (model, optimizer, scheduler, dataloader, device, epoch) :
    # model.train () 与 model.eval () 主要影响网络中 BatchNorm 层和 Dropout 层
    # 前者启用, 后者不启用; model.eval () 模式下不会进行反向传播, 但是梯度的计算照常进行
    # model.eval () 配合 torch.no_grad () 使用, 加速计算过程、节省显存空间
    model.train ()
    # 创建一个 GradScaler 对象, 可以在迭代过程中动态估计损失放大的倍数
    scaler = amp.GradScaler ()
    
    dataset_size = 0
    running_loss = 0.0
    
    # 将一个可迭代对象作为参数传入，然后返回一个包装后的可迭代对象，
    # 可以像平常一样对其进行迭代，每次请求一个值时，都会打印一个进度条。
    pbar = tqdm (enumerate (dataloader), total = len (dataloader), desc = 'Train ')
    for step, (images, masks) in pbar:         
        images = images.to (device, dtype = torch.float)
        masks  = masks.to (device, dtype = torch.float)
        
        batch_size = images.size (0)
        
        # 前向传播过程中自动混合精度训练
        with amp.autocast (enabled = True):
            y_pred = model (images)
            loss   = criterion (y_pred, masks)
            # n_accumulate 参数的含义是每若干个批次后进行一次梯度更新
            loss   = loss / CFG.n_accumulate
        # 放大损失、反向传播
        scaler.scale (loss).backward ()
    
        if (step + 1) % CFG.n_accumulate == 0 :
            # 根据原放大倍数，梯度更新时缩小相应的倍数
            scaler.step (optimizer)
            # 更新损失放大的倍数
            scaler.update ()

            optimizer.zero_grad ()

            if scheduler is not None :
                # 更新学习率
                scheduler.step ()
                
        running_loss += (loss.item () * batch_size)
        dataset_size += batch_size
        
        epoch_loss = running_loss / dataset_size
        
        mem = torch.cuda.memory_reserved () / 1E9 if torch.cuda.is_available () else 0
        current_lr = optimizer.param_groups[0]['lr']
        pbar.set_postfix (train_loss = f'{epoch_loss : 0.4f}',
                        lr = f'{current_lr : 0.5f}',
                        gpu_mem = f'{mem : 0.2f} GB')
    torch.cuda.empty_cache ()
    gc.collect ()
    
    return epoch_loss

# 👀 Validation Function

In [None]:
@torch.no_grad ()
def valid_one_epoch (model, dataloader, device, epoch):
    model.eval ()
    
    dataset_size = 0
    running_loss = 0.0
    
    val_scores = []
    
    pbar = tqdm (enumerate (dataloader), total = len (dataloader), desc = 'Valid ')
    for step, (images, masks) in pbar :
        images  = images.to (device, dtype = torch.float)
        masks   = masks.to (device, dtype = torch.float)
        
        batch_size = images.size (0)
        
        y_pred  = model (images)
        loss    = criterion (y_pred, masks)
        
        running_loss += (loss.item () * batch_size)
        dataset_size += batch_size
        
        epoch_loss = running_loss / dataset_size
        
        y_pred = nn.Sigmoid () (y_pred)
        val_dice = dice_coef (masks, y_pred).cpu ().detach ().numpy ()
        val_jaccard = iou_coef (masks, y_pred).cpu ().detach ().numpy ()
        val_scores.append ([val_dice, val_jaccard])
        
        mem = torch.cuda.memory_reserved () / 1E9 if torch.cuda.is_available() else 0
        current_lr = optimizer.param_groups[0]['lr']
        pbar.set_postfix (valid_loss = f'{epoch_loss : 0.4f}',
                        lr = f'{current_lr : 0.5f}',
                        gpu_memory = f'{mem : 0.2f} GB')
    val_scores  = np.mean (val_scores, axis = 0)
    torch.cuda.empty_cache ()
    gc.collect()
    
    return epoch_loss, val_scores

# 🏃 Run Training

In [None]:
def run_training (model, optimizer, scheduler, device, num_epochs) :
    '''
        这里主要做一些记录训练日志、保存最优模型等工作
    '''
    # wandb 自动记录 PyTorch 模型的权重、偏置和梯度, log_freq 设置记录的频率 (每 log_freq 批次记录一次)
    wandb.watch (model, log_freq = 100)
    
    # 打印 GPU 的名字
    if torch.cuda.is_available ():
        print("cuda: {}\n".format(torch.cuda.get_device_name ()))
    
    start = time.time ()
    best_model_wts = copy.deepcopy (model.state_dict())
    best_dice      = -np.inf
    best_epoch     = -1
    history = defaultdict (list)
    
    for epoch in range (1, num_epochs + 1): 
        gc.collect ()
        print (f'Epoch {epoch} / {num_epochs}', end = '')
        train_loss = train_one_epoch (model, optimizer, scheduler, 
                                           dataloader = train_loader, 
                                           device = CFG.device, epoch = epoch)
        
        val_loss, val_scores = valid_one_epoch (model, valid_loader, 
                                                 device = CFG.device, 
                                                 epoch = epoch)
        val_dice, val_jaccard = val_scores
    
        history['Train Loss'].append (train_loss)
        history['Valid Loss'].append (val_loss)
        history['Valid Dice'].append (val_dice)
        history['Valid Jaccard'].append (val_jaccard)
        
        # Log the metrics
        wandb.log ({"Train Loss" : train_loss, 
                   "Valid Loss" : val_loss,
                   "Valid Dice" : val_dice,
                   "Valid Jaccard" : val_jaccard,
                   "LR" : scheduler.get_last_lr ()[0]})
        
        print(f'Valid Dice: {val_dice : 0.4f} | Valid Jaccard: {val_jaccard : 0.4f}')
        
        # deep copy the model
        if val_dice >= best_dice:
            print(f"{c_}Valid Score Improved ({best_dice:0.4f} ---> {val_dice:0.4f})")
            best_dice    = val_dice
            best_jaccard = val_jaccard
            best_epoch   = epoch
            run.summary["Best Dice"]    = best_dice
            run.summary["Best Jaccard"] = best_jaccard
            run.summary["Best Epoch"]   = best_epoch
            best_model_wts = copy.deepcopy (model.state_dict ())
            PATH = f"best_epoch-{fold:02d}.bin"
            torch.save (model.state_dict (), PATH)
            # Save a model file from the current directory
            wandb.save (PATH)
            print (f"Model Saved{sr_}")
            
        last_model_wts = copy.deepcopy (model.state_dict ())
        PATH = f"last_epoch-{fold : 02d}.bin"
        torch.save (model.state_dict (), PATH)
            
        print (); print ()
    
    end = time.time()
    time_elapsed = end - start
    print ('Training complete in {:.0f}h {:.0f}m {:.0f}s'.format (
        time_elapsed // 3600, (time_elapsed % 3600) // 60, (time_elapsed % 3600) % 60))
    print ("Best Score: {:.4f}".format (best_jaccard))
    
    # load best model weights
    model.load_state_dict (best_model_wts)
    
    return model, history

# 🔍 Optimizer
$torch.optim.CosineAnnealingLR ()$ 方法是一个用于设置学习率的调度器，它可以根据一个余弦退火的策略，动态地调整优化器的学习率。它的原理是在每个周期内，将学习率从一个最大值降低到一个最小值，然后在下一个周期内重复这个过程。这样可以避免学习率过大或过小导致的收敛困难或局部最优;

$torch.optim.CosineAnnealingWarmRestarts ()$ 与 $torch.optim.CosineAnnealingLR ()$ 类似, 主要区别是前者的每个周期长度是动态调整的, 后者是固定的;

$torch.optim.ReduceLROnPlateau ()$ 方法是一个用于设置学习率的调度器，它可以根据一个指标（如损失函数或准确率）是否停止改善，来动态地调整优化器的学习率。可以指定学习率的衰减因子，耐心值，阈值，冷却时间，最小学习率等选项;

$torch.optim.ExponentialLR ()$ 方法是一个用于设置学习率的调度器，它可以根据一个指数衰减的策略，动态地调整优化器的学习率。

In [None]:
def fetch_scheduler (optimizer):
    if CFG.scheduler == 'CosineAnnealingLR' :
        scheduler = lr_scheduler.CosineAnnealingLR (optimizer, T_max = CFG.T_max, 
                                                   eta_min = CFG.min_lr)
    elif CFG.scheduler == 'CosineAnnealingWarmRestarts' :
        scheduler = lr_scheduler.CosineAnnealingWarmRestarts (optimizer, T_0 = CFG.T_0, 
                                                             eta_min = CFG.min_lr)
    elif CFG.scheduler == 'ReduceLROnPlateau' :
        scheduler = lr_scheduler.ReduceLROnPlateau (optimizer,
                                                   mode = 'min',
                                                   factor = 0.1,
                                                   patience = 7,
                                                   threshold = 0.0001,
                                                   min_lr = CFG.min_lr,)
    elif CFG.scheduer == 'ExponentialLR' :
        scheduler = lr_scheduler.ExponentialLR (optimizer, gamma = 0.85)
    elif CFG.scheduler == None:
        return None
        
    return scheduler

In [None]:
# model = build_model ()
# optimizer = optim.Adam (model.parameters (), lr = CFG.lr, weight_decay = CFG.wd)
# scheduler = fetch_scheduler (optimizer)


# 🚅 Training

In [None]:
# for fold in range (1):
#     print (f'#'*15)
#     print (f'### Fold: {fold}')
#     print (f'#'*15)
#     run = wandb.init (project ='Unet-For-UWMGI', 
#                      config = {k:v for k, v in dict(vars(CFG)).items() if '__' not in k},
#                      anonymous = anonymous,
#                      name = f"fold-{fold}|dim-{CFG.img_size[0]}x{CFG.img_size[1]}|model-{CFG.model_name}",
#                      group = CFG.comment,
#                     )
#     train_loader, valid_loader = DataLoad (fold = fold)
#     model     = build_model ()
#     optimizer = optim.Adam (model.parameters(), lr = CFG.lr, weight_decay = CFG.wd)
#     scheduler = fetch_scheduler (optimizer)
#     model, history = run_training (model, optimizer, scheduler,
#                                   device = CFG.device,
#                                   num_epochs = CFG.epochs)
#     run.finish ()
#     display (ipd.IFrame (run.url, width = 1000, height = 720))

# 🔭 Prediction
对训练好的模型进行预测, 选了若干张没有标签的样本, 输入到模型中, 再和输出的掩码重叠一起打印

In [None]:
# def Predict (model, imgs, msks, size, plot_img = True) :
#     '''
#         输入模型, tensor 类型的图像, 标签, 即个数,
#         输出三行(或两行)图像, 
#         第一行只有图像, 
#         第二行为图像和标签的重叠,
#         第三行为图像和预测标签的重叠
#     '''
#     imgs = imgs.to (CFG.device, dtype = torch.float)
#     msks = msks.cpu ().detach () if not msks == None else None
#     with torch.no_grad () :
#         preds = model (imgs)
#         # 事实上这一步仅仅是用来把所有大于 0 的元素都置 1 了
#         # 因为类别仅有三种, 刚好可以分别用三个通道单独表示, 标签也是这么做的
#         # preds = (nn.Sigmoid ()(preds) > 0.5).double ().cpu ().detach ()
#         preds = (nn.Sigmoid ()(preds)).double ().cpu ().detach ()
#         for idx, pred in enumerate (preds) :
#             pred = pred.permute (1, 2, 0)
#             label = torch.argmax (pred, dim = 2)
#             pred = (pred > 0.5)
#             npred = F.one_hot (label, CFG.num_classes)
#             pred = pred * npred
#             pred = pred.permute (2, 0, 1)
#             preds[idx] = pred
#     imgs = imgs.cpu ().detach ()
#     if not plot_img : return preds
#     plot_batch (imgs, torch.zeros_like (imgs).cpu (), size = size)
#     if not msks == None : plot_batch (imgs, msks, size = size)
#     plot_batch (imgs, preds, size = size)

# def GetPartImageAndMask (img, msk, lef_top = (32, 96), rig_bot = (128, 192)) :
#     '''
#         截取一对样本、标签的一个矩形区域
#         输入 3D tensor, shape = (通道数, 长, 宽)
#         lef_tor, tuple, 左上角坐标
#         rig_bot, typle, 右下角坐标
#     '''
#     x0, y0 = lef_top
#     x1, y1 = rig_bot
#     img = img[: , x0 : x1, y0 : y1]
#     msk = msk[: , x0 : x1, y0 : y1]
#     lef_pad = rig_pad = (CFG.img_size[1] - (y1 - y0)) // 2
#     top_pad = bot_pad = (CFG.img_size[0] - (x1 - x0)) // 2
#     pad = (lef_pad, rig_pad, top_pad, bot_pad)
#     img = F.pad (img, pad, mode = "constant", value = 0)
#     msk = F.pad (msk, pad, mode = "constant", value = 0)
#     return img, msk

# def RandomSave (fold = 0, save_num = 5) :
#     '''
#         从数据集里随机找 save_num 个图像和标签, 保存在 output 里
#     '''
#     df_notEmpty = df.query("fold==@fold & empty==0").reset_index (drop = True)
#     dataset = BuildDataset (df_notEmpty, transforms = data_transforms['valid'])
#     for i in range (save_num) :
#         img_PATH_new = f"slice_{i}.png"
#         msk_PATH_new = f"slice_{i}.npy"
#         id = random.randint (0, dataset.__len__ ())
#         img_PATH_old = dataset.img_paths[id]
#         msk_PATH_old = dataset.msk_paths[id]
# #         print (img_PATH_new, img_PATH_old)
#         shutil.copy (img_PATH_old, img_PATH_new)
#         shutil.copy (msk_PATH_old, msk_PATH_new)
        
# # test_fold = 0
# # model = load_model ("resnet50", f"best_epoch-{test_fold:02d}.bin")
# model = load_model ("resnet50", "/kaggle/input/models-for-unet/model_unet_resnet50_2.bin")  

In [None]:
# def PredictWithLabel (fold, model, df) :
#     '''
#         使用有标签的图像进行测试
#     '''
#     # 选择第一折的有标签的数据作为测试集, 不进行数据增强
#     test_dataset = BuildDataset (df.query ("fold==@fold & empty==0").sample (frac = 1.0), label = True, 
#                             transforms = data_transforms['valid'])
#     test_loader  = DataLoader (test_dataset, batch_size = 5, 
#                             num_workers = 4, shuffle = False, pin_memory = True)
#     # 获取一个批次大小 (5个) 的测试集
#     imgs, msks = next (iter (test_loader))
#     Predict (model, imgs, msks, 5)

# PredictWithLabel (0, model, df)

In [None]:
# def PredictWithNoLabel (model, df) :
#     '''
#         使用无标签的图像进行测试
#     '''
#     test_dataset = BuildDataset (df.query ("empty!=0").sample (frac = 1.0), label = False, 
#                             transforms = data_transforms['valid'])
#     test_loader  = DataLoader (test_dataset, batch_size = 5, 
#                             num_workers = 4, shuffle = False, pin_memory = True)
#     imgs = next (iter (test_loader)).to (CFG.device, dtype = torch.float)
#     Predict (model, imgs, None, 5)

# PredictWithNoLabel (model, df)

In [None]:
# def PredictWithPart (model, size = 5) :
#     '''
#         随机截取若干张图像的局部图像, 并进行预测
#     '''
#     RandomSave (save_num = size)
#     imgs, msks = [], []
#     for i in range (size) :
#         img_PATH = f"/kaggle/working/slice_{i}.png"
#         msk_PATH = f"/kaggle/working/slice_{i}.npy"
#         img, msk = png2tensor (img_PATH, msk_PATH)
#         img, msk = GetPartImageAndMask (img, msk)
#         imgs.append (img); msks.append (msk)
#     imgs = torch.stack (imgs, dim = 0)
#     msks = torch.stack (msks, dim = 0)
#     Predict (model, imgs, msks, size)
    
# PredictWithPart (model)

In [None]:
# def PredictionCompare (img, msk, models, save_path = None, pred_medsam = None) :
#     '''
#         输入图像和标签均为 tensor 类型
#         对比朴素 unet、resnet50_unet、medsam 三种模型的语义分割效果
#     '''
#     figure_cnt = len (models) + 2
#     plt.rcParams['figure.figsize'] = (5 * figure_cnt, 6)
# #     titles = ["image", "mask", "unet", " unet-resnet50", "medsam"]
# #     imgs = [img, msk, pred_unet, pred_unet_resnet50, pred_medsam]
#     preds, titles = [], ['image', 'mask']
#     img = img.unsqueeze (0)
#     msk = msk.unsqueeze (0)
#     for title, model in models.items () :
#         preds.append (Predict (model, img, msk, 1, plot_img = False).squeeze ())
#         titles.append (title)
#     img = img.squeeze ()
#     msk = msk.squeeze ()
#     imgs = [img, msk] + preds
#     if not pred_medsam == None :
#         imgs.append (pred_medsam)
#         titles.append ("medsam")
# #     for idx, image in e
# #         imgs[idx] = imgs[idx,].permute ((1, 2, 0)).numpy () * 255.
# #     imgs[0] = imgs[0].astype ('uint8')
    
#     for i, image in enumerate (imgs) :
#         image = image.permute ((1, 2, 0)).numpy () * 255.0
#         if i == 0 : image = image.astype ('uint8')
#         imgs[i] = image
#         plt.subplot (1, figure_cnt, i + 1)
#         plt.title (titles[i], fontsize = "32")
#         if i == 0 : show_img (img = imgs[0], mask = None)
#         else : show_img (img = imgs[0], mask = image)
#     plt.tight_layout ()
#     if not save_path == None : plt.savefig (save_PATH); print (save_PATH)
#     plt.show ()

# 📃 Save Files
通过上述训练过程，得到了若干种使用不同编码器的 $Unet$ 模型，枚举他们的子集，并选取十张图片进行测试结果对比，保存测试结果图片，并通过 $wandb$ 将测试结果图片打包成压缩包后上传到云端。

In [None]:
model_unet_densenet161 = load_model ("densenet161", "models-for-unet/model_unet_densenet161.bin")
model_unet_efficientnet_b4 = load_model ("efficientnet-b4", "models-for-unet/model_unet_efficientnet_b4.bin")
model_unet_efficientnet_b5 = load_model ("efficientnet-b5", "models-for-unet/model_unet_efficientnet_b5.bin")
model_unet_mit_b2 = load_model ("mit_b2", "models-for-unet/model_unet_mit_b2.bin")
model_unet_mobilenet_v2 = load_model ("mobilenet_v2", "models-for-unet/model_unet_mobilenet_v2.bin")
model_unet_resnet101 = load_model ("resnet101", "models-for-unet/model_unet_resnet101.bin")
model_unet_resnet50 = load_model ("resnet50", "models-for-unet/model_unet_resnet50_2.bin")
model_unet_se_resnet50 = load_model ("se_resnet50", "models-for-unet/model_unet_se_resnet50.bin")
models = {
        #   "densenet161"     : model_unet_densenet161,
        #   "efficientnet-b4" : model_unet_efficientnet_b4,
        #   "efficientnet-b5" : model_unet_efficientnet_b5, 
          "mit-b2"          : model_unet_mit_b2,
        #   "mobilenet-v2"    : model_unet_mobilenet_v2,
        #   "resnet101"       : model_unet_resnet101,
        #   "resnet50"        : model_unet_resnet50,
        #   "se-resnet50"     : model_unet_se_resnet50}
}

def bindigits(n, bits):
    s = bin(n & int("1"*bits, 2))[2:]
    return ("{0:0>%s}" % (bits)).format(s)

# 取所有模型的集合的一个子集, 对同一张图片进行预测结果的可视化对比, 对比十次
# %mkdir -p "figures"
# model_cnt = len (models)
# key_value = list (models.items ())
# RandomSave (fold = random.randint (0, 4), save_num = 10)
# for i in range (1, 1 << model_cnt) :
#     dir_PATH = "figures/" + str (bindigits (i, model_cnt))
#     sub_models = dict ()
#     base_PATH = str ("/kaggle/working/") + str (dir_PATH) + str ("/")
#     cnt = 0
#     for j in range (model_cnt) :
#         if (i >> j) & 1 : 
#             sub_models[key_value[j][0]] = key_value[j][1]
#             if cnt > 0 : base_PATH += "|"
#             cnt += 1
#             base_PATH += str (key_value[j][0])
#     if cnt < 4 : continue
#     isExists = os.path.exists (dir_PATH)
#     if not isExists:
#         os.makedirs (dir_PATH)
#         print("%s 目录创建成功" % dir_PATH)
#     else:
#         print("目录已经存在")
#     for k in range (10) :
#         img_PATH = f"/kaggle/working/slice_{k}.png"
#         msk_PATH = f"/kaggle/working/slice_{k}.npy"
#         img, msk = png2tensor (img_PATH, msk_PATH)
#         img, msk = GetPartImageAndMask (img, msk)
#         save_PATH = base_PATH + f"|{k:02d}.png"
#         PredictionCompare (img, msk, sub_models, save_PATH)
#     gc.collect ()

打包成压缩包并上传到 $wandb$。

In [None]:
# import zipfile, os


# def zipDir (dirpath, outFullName) :
#     """
#     压缩指定文件夹
#     :param dirpath: 目标文件夹路径
#     :param outFullName: 压缩文件保存路径+xxxx.zip
#     :return: 无
#     """
#     zip = zipfile.ZipFile (outFullName, "w", zipfile.ZIP_DEFLATED)
#     for path, dirnames, filenames in os.walk (dirpath):
#         # 去掉目标跟路径，只对目标文件夹下边的文件及文件夹进行压缩
#         fpath = path.replace (dirpath, '')
 
#         for filename in filenames:
#             zip.write (os.path.join (path, filename), os.path.join (fpath, filename))
#     zip.close ()

# zip_PATH = "/kaggle/working/figures"
# out_PATH = "/kaggle/working/results.zip"
# zipDir (zip_PATH, out_PATH)
# run = wandb.init (project ='Unet-For-UWMGI-Analysis', 
#                  anonymous = anonymous,
#                  name = "Comparison for complete scans between all models"
#                 )
# wandb.save ("/kaggle/working/results.zip")

In [None]:
# images  = images.to (device, dtype = torch.float)
# masks   = masks.to (device, dtype = torch.float)

# batch_size = images.size (0)

# y_pred  = model (images)
# loss    = criterion (y_pred, masks)

# running_loss += (loss.item () * batch_size)
# dataset_size += batch_size

# epoch_loss = running_loss / dataset_size

# y_pred = nn.Sigmoid () (y_pred)
# val_dice = dice_coef (masks, y_pred).cpu ().detach ().numpy ()
# val_jaccard = iou_coef (masks, y_pred).cpu ().detach ().numpy ()
# val_scores.append ([val_dice, val_jaccard])
    
# RandomSave (fold = random.randint (0, 4), save_num = 1000)
# for name, test_model in models.items () :
#     val_dice, val_jaccard = 0, 0
#     for k in tqdm (range (1000), total = 1000) :
#         img_PATH     = f"/kaggle/working/slice_{k}.png"
#         msk_PATH     = f"/kaggle/working/slice_{k}.npy"
#         img, msk     = png2tensor (img_PATH, msk_PATH)
#         img, msk     = GetPartImageAndMask (img, msk)
#         images       = img.unsqueeze (0).to (CFG.device, dtype = torch.float)
#         masks        = msk.unsqueeze (0).to (CFG.device, dtype = torch.float)
#         y_pred       = test_model (images)
#         y_pred       = nn.Sigmoid () (y_pred)
#         val_dice    += dice_coef (masks, y_pred).cpu ().detach ().numpy ()
#         val_jaccard += iou_coef (masks, y_pred).cpu ().detach ().numpy ()
#     val_dice /= 1000.0; val_jaccard /= 1000.0
#     print (name, f"dice :{val_dice}", f"iou : {val_jaccard}")

In [None]:
import sys
from torchsummary import summary
from contextlib import contextmanager
 
@contextmanager
def stdout_redirected (to = None) :
    """
    上下文管理器, 用于临时将stdout重定向到文件或控制台。
    使用方法：`with stdout_redirected to='output.txt'):`
    """
    if to is None:
        yield
    else:
        sys.stdout.flush ()
        original_stdout = sys.stdout
        with open (to, 'w') as file:
            sys.stdout = file
            try:
                yield file
            finally :
                sys.stdout = original_stdout

with torch.no_grad () :
    for name, test_model in models.items () :
        output_file = "model_summary_" + name + ".txt"
        print (output_file)
        with stdout_redirected (to = output_file) :
            summary (test_model, input_size = (3, 224, 224), batch_size = 1, device = "cpu")

model_summary_mit-b2.txt


ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (2,) + inhomogeneous part.

# ✂️ Remove Files

In [None]:
# !rm -r ./wandb