# GAMMA 挑战赛任务二
## 任务简要介绍
给定图片数据，以及对应的黄斑中央坐标，构造回归模型预测新的图片中黄斑位置。
## 赛题链接
MICCAI2021 Contest - GAMMA: https://aistudio.baidu.com/aistudio/competition/detail/90
## 项目说明
本项目完全是基线中展示的内容，**版本1**为纯基线内容，只是做了一些不伤大雅的修改，让原本没法一键运行的基线可以运行，这个版本对应了2021年10月第1名的成绩。

**版本2**将基线中的Resnet更替为层数更深的Resnet152，这部分是我在11月的头几天运行出来的结果，也提供给大家了~

本项目提供基线模型对应的运行一次后的参数，也提将基线中Resnet更替后的模型参数。具体说明参考下面的表格中的测试记录~

# 测试记录

如果你喜欢我的测试记录，并认为这个项目和记录对你有帮助，欢迎在之后的版本中也更新并公开你的测试记录~

|编号| 版本 | score	|ED | 备注 |
| -------- | -------- | -------- | -------- |-------- |
|0| 版本1     |7.40255	|0.03509    |对基线进行了一点修改，跑通程序,保存为best_model_0.0358，由于本程序只生成了一个best_model可见模型在前次迭代就已经收敛了     |
|1| 版本2     |8.18647		|0.02215	   |使用Resnet152,保存为best_model_0.0194，本程序生成了5个best_model，可见远没有达到收敛     |
|2| 版本2     |8.59959	|0.01628	|在1的基础下，增加读取之前的训练好模型的code模块，接续之前的训练结果进行训练，保存为best_model_0.0145     |
|2| 版本2     |8.69833	|0.01496	|在2的基础下，接续之前的训练结果进行训练，保存为best_model_0.0138。<font color=#ff0000>现在开始收敛速度下降了，并且只出现了四个新文件，也许已经彻底收敛了，无法再寸进了</font>     |


# 正式开始
## 获取数据

In [1]:
! wget https://dataset-bj.cdn.bcebos.com/%E5%8C%BB%E7%96%97%E6%AF%94%E8%B5%9B/task2_Fovea_localization.zip

In [2]:
! unzip -oq /home/aistudio/task2_Fovea_localization.zip

## 导入包

In [18]:
### 导入包

import sys 
sys.path.append('/home/aistudio/external-libraries')
import os
import cv2
import random
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics.pairwise import euclidean_distances 
import matplotlib.pylab as plt

import paddle
import paddle.nn as nn
import paddle.vision.models
from paddle.vision.models import resnet50
from paddle.io import Dataset


## 训练前准备

### 配置

In [1]:
# 统一一些命名
try:
    os.rename('training/fundus color images','training/fundus_image')
    os.rename('training/fovea_localization_training_GT.xlsx','training/Fovea_Location_train.xlsx')
    os.rename('testing/fundus color images','testing/fundus_image')
except:
    print("nothing to run")

nothing to run


In [3]:
### 设置参数

path = 'training/'  # 训练数据路径
images_file = path + 'fundus_image/'
gt_file = path + 'Fovea_Location_train.xlsx'
test_file = 'testing/fundus_image/'  # 测试数据路径
image_size = 256 # 输入图像统一尺寸 (image_size, image_size, 3)
val_ratio = 0.2 # 训练/验证数据划分比例
BATCH_SIZE = 32  # 批大小
iters = 500 # 迭代次数
optimizer_type = 'adam' # 优化器，选手可选用其他优化器，如SGD, RMSprop,...
num_workers = 4 # 加载数据机器数
init_lr = 1e-4 # 初始学习率

### 训练/验证集划分

In [4]:
### 从训练数据中划分训练集和验证集 

filelists = os.listdir(images_file)
train_filelists, val_filelists = train_test_split(filelists, test_size = val_ratio,random_state = 42)
print("Total Nums: {}, train: {}, val: {}".format(len(filelists), len(train_filelists), len(val_filelists)))

Total Nums: 100, train: 80, val: 20


### 数据加载

In [5]:
### 从训练集文件夹中加载每个图像和对应的金标准

class FundusDataset(Dataset):
    def __init__(self, image_file, gt_file=None, filelists=None,  mode='train'):
        super(FundusDataset, self).__init__()
        self.mode = mode
        self.image_path = image_file
        image_idxs = os.listdir(self.image_path)
        self.gt_file = gt_file

        if self.mode == 'train':
            label = {str(int(row['data'])).zfill(4)+'.jpg': row[1:].values 
                        for _, row in pd.read_excel(gt_file).iterrows()}
            self.file_list = [[image_idxs[i], label[image_idxs[i]]] for i in range(len(image_idxs))]
        
        elif self.mode == 'test':
            self.file_list = [[image_idxs[i], None] for i in range(len(image_idxs))]
        
        if filelists is not None:
            self.file_list = [item for item in self.file_list if item[0] in filelists] 
   
    def __getitem__(self, idx):
        real_index, label = self.file_list[idx]
        fundus_img_path = os.path.join(self.image_path, real_index)
        fundus_img = cv2.imread(fundus_img_path)[:, :, ::-1] # BGR -> RGB        
        h,w,c = fundus_img.shape
        if self.mode == 'train':
            label_nor = (float(label[0])/w, float(label[1])/h)
            label_nor = np.array(label_nor).astype('float32').reshape(2)
        fundus_re = cv2.resize(fundus_img,(image_size, image_size))
        img = fundus_re.transpose(2, 0, 1) # H, W, C -> C, H, W
        # print(img.shape)
        # img = fundus_re.astype(np.float32)
        
        if self.mode == 'test':
            return img, real_index, h, w
        if self.mode == 'train':
            return img, label_nor

    def __len__(self):
        return len(self.file_list)


### 网络模型

基线使用ResNet50从眼底图像中提取特征。关于ResNet50的详细介绍可以在https://arxiv.org/pdf/1512.03385.pdf上找到。

ResNet50在PaddlePaddle框架中的代码可以在https://www.paddlepaddle.org.cn/documentation/docs/zh/api/paddle/vision/models/resnet50_cn.html#resnet50 上找到。

**这里我已经把网络格式换为Resnet152了，只想跑通基线可以对版本1进行运行**

In [22]:
class Network(paddle.nn.Layer):
    def __init__(self):
        super(Network, self).__init__()
        # self.resnet = resnet50(pretrained=True, num_classes=0) # remove final fc 输出为[?, 2048, 1, 1]
        self.resnet = paddle.vision.models.resnet152(pretrained=True, num_classes=0)
        self.flatten = paddle.nn.Flatten()
        self.linear_1 = paddle.nn.Linear(2048, 512)
        self.linear_2 = paddle.nn.Linear(512, 256)
        self.linear_3 = paddle.nn.Linear(256, 2)
        self.relu = paddle.nn.ReLU()
        self.dropout = paddle.nn.Dropout(0.2)
    
    def forward(self, inputs):
        # print('input', inputs)
        y = self.resnet(inputs)
        y = self.flatten(y)
        y = self.linear_1(y)
        y = self.linear_2(y)
        y = self.relu(y)
        y = self.dropout(y)
        y = self.linear_3(y)
        y = paddle.nn.functional.sigmoid(y)

        return y

### 功能函数

In [7]:
# 计算欧式距离
def cal_ed(logit, label):
    ed_loss = []
    for i in range(logit.shape[0]):
        logit_tmp = logit[i,:].numpy()
        label_tmp = label[i,:].numpy()
        # print('cal_coordinate_loss_ed', logit_tmp, label_tmp)        
        ed_tmp = euclidean_distances([logit_tmp], [label_tmp])
        # print('ed_tmp:', ed_tmp[0][0])
        ed_loss.append(ed_tmp)
    
    ed_l = sum(ed_loss)/len(ed_loss)
    return ed_l

In [8]:
# 验证过程中计算欧式距离
def cal_ed_val(logit, label):
    ed_loss = []
    for i in range(logit.shape[0]):
        logit_tmp = logit[i,:]
        label_tmp = label[i,:]
        ed_tmp = euclidean_distances([logit_tmp], [label_tmp])
        ed_loss.append(ed_tmp)
    
    ed_l = sum(ed_loss)/len(ed_loss)
    
    return ed_l

In [9]:
# 损失函数
def cal_coordinate_Loss(logit, label, alpha = 0.5):
    """
    logit: shape [batch, ndim]
    label: shape [batch, ndim]
    ndim = 2 represents coordinate_x and coordinaate_y
    alpha: weight for MSELoss and 1-alpha for ED loss
    return: combine MSELoss and ED Loss for x and y, shape [batch, 1]
    """
    alpha = alpha
    mse_loss = nn.MSELoss(reduction='mean')

    mse_x = mse_loss(logit[:,0],label[:,0])
    mse_y = mse_loss(logit[:,1],label[:,1])
    mse_l = 0.5*(mse_x + mse_y)
    # print('mse_l', mse_l)

    ed_loss = []
    # print(logit.shape[0])
    for i in range(logit.shape[0]):
        logit_tmp = logit[i,:].numpy()
        label_tmp = label[i,:].numpy()
        # print('cal_coordinate_loss_ed', logit_tmp, label_tmp)        
        ed_tmp = euclidean_distances([logit_tmp], [label_tmp])
        # print('ed_tmp:', ed_tmp[0][0])
        ed_loss.append(ed_tmp)
    
    ed_l = sum(ed_loss)/len(ed_loss)
    # print('ed_l', ed_l)
    # print('alpha', alpha)
    loss = alpha * mse_l + (1-alpha) * ed_l
    # print('loss in function', loss)
    return loss

In [10]:
### 训练函数

def train(model, iters, train_dataloader, val_dataloader, optimizer, log_interval, evl_interval):
    iter = 0
    model.train()
    avg_loss_list = []
    avg_ED_list = []
    best_ED = sys.float_info.max
    while iter < iters:
        for img, lab in train_dataloader:
            iter += 1
            if iter > iters:
                break
            fundus_imgs = (img / 255.).astype('float32')
            label = lab.astype("float32")

            logits = model(fundus_imgs)
            loss = cal_coordinate_Loss(logits, label)
            # print('loss in train',loss)

            for p,l in zip(logits.numpy(), label.numpy()):
                avg_ED_list.append([p,l])
            
            # print('avg_ED_list', avg_ED_list)
            loss.backward()
            optimizer.step()
            model.clear_gradients()
            avg_loss_list.append(loss.numpy()[0])
            
            if iter % log_interval == 0:
                avg_loss = np.array(avg_loss_list).mean()
                # print(avg_loss)
                avg_ED_list = np.array(avg_ED_list)
                avg_ED = cal_ed_val(avg_ED_list[:, 0], avg_ED_list[:, 1]) # cal_ED
                # print('ed in training', avg_ED)
                avg_loss_list = []
                avg_ED_list = []
                
                print("[TRAIN] iter={}/{} avg_loss={:.4f} avg_ED={:.4f}".format(iter, iters, avg_loss, avg_ED[0][0]))

            if iter % evl_interval == 0:
                avg_loss, avg_ED = val(model, val_dataloader)
                print("[EVAL] iter={}/{} avg_loss={:.4f} ED={:.4f}".format(iter, iters, avg_loss, avg_ED[0][0]))
                if avg_ED <= best_ED:
                    best_ED = avg_ED[0][0]
                    paddle.save(model.state_dict(),
                            os.path.join("best_model_{:.4f}".format(best_ED), 'model.pdparams'))
                model.train()

### 验证函数

def val(model, val_dataloader):
    model.eval()
    avg_loss_list = []
    cache = []
    with paddle.no_grad():
        for data in val_dataloader:
            fundus_imgs = (data[0] / 255.).astype("float32")
            labels = data[1].astype('float32')
            
            logits = model(fundus_imgs)
            for p, l in zip(logits.numpy(), labels.numpy()):
                cache.append([p, l])

            loss = cal_coordinate_Loss(logits, labels)
            avg_loss_list.append(loss.numpy()[0])

    cache = np.array(cache)
    ED = cal_ed_val(cache[:, 0], cache[:, 1])
    avg_loss = np.array(avg_loss_list).mean()

    return avg_loss, ED

## 训练阶段

In [15]:
### 生成训练集和验证集 

train_dataset = FundusDataset(image_file = images_file, 
                       gt_file=gt_file,
                       filelists=train_filelists)

val_dataset = FundusDataset(image_file = images_file, 
                       gt_file=gt_file,
                       filelists=val_filelists)

In [16]:
### 加载数据

train_loader = paddle.io.DataLoader(
    train_dataset,
    batch_sampler=paddle.io.DistributedBatchSampler(train_dataset, batch_size=BATCH_SIZE, shuffle=True, drop_last=False),
    # num_workers=num_workers,
    return_list=True,
    use_shared_memory=False
)

val_loader = paddle.io.DataLoader(
    val_dataset,
    batch_sampler=paddle.io.DistributedBatchSampler(val_dataset, batch_size=BATCH_SIZE, shuffle=True, drop_last=False),
    # num_workers=num_workers,
    return_list=True,
    use_shared_memory=False
)

In [23]:

model = Network()

if optimizer_type == "adam":
    optimizer = paddle.optimizer.Adam(init_lr, parameters=model.parameters())

# criterion = cal_coordinate_Loss()

W1102 17:24:26.792856   104 device_context.cc:362] Please NOTE: device: 0, GPU Compute Capability: 7.0, Driver API Version: 10.1, Runtime API Version: 10.1
W1102 17:24:26.798398   104 device_context.cc:372] device: 0, cuDNN Version: 7.6.
2021-11-02 17:24:42,151 - INFO - unique_endpoints {''}
2021-11-02 17:24:42,152 - INFO - Downloading resnet152.pdparams from https://paddle-hapi.bj.bcebos.com/models/resnet152.pdparams
100%|██████████| 355826/355826 [00:05<00:00, 67440.43it/s]
2021-11-02 17:24:47,570 - INFO - File /home/aistudio/.cache/paddle/hapi/weights/resnet152.pdparams md5 checking...


In [30]:
# 接续上次的运行结果
best_model_path = "./best_model_0.0145/model.pdparams"
para_state_dict = paddle.load(best_model_path)
model.set_state_dict(para_state_dict)

In [25]:
### 训练过程
iters = 2000
evl_interval = iters//5

train(model, iters, train_loader, val_loader, optimizer, log_interval=10, evl_interval=100)

## 测试结果

In [32]:
### 测试过程，加载模型参数

best_model_path = "./best_model_0.0138/model.pdparams"
model = Network()
para_state_dict = paddle.load(best_model_path)
model.set_state_dict(para_state_dict)
model.eval()

2021-11-02 22:24:52,818 - INFO - unique_endpoints {''}
2021-11-02 22:24:52,819 - INFO - File /home/aistudio/.cache/paddle/hapi/weights/resnet152.pdparams md5 checking...
2021-11-02 22:24:53,611 - INFO - Found /home/aistudio/.cache/paddle/hapi/weights/resnet152.pdparams


In [27]:
### 生成测试集

test_dataset = FundusDataset(image_file = test_file, mode='test')

In [28]:
### 一张一张测试样本

cache = []
for fundus_img, idx, h, w in test_dataset:
    fundus_img = fundus_img[np.newaxis, ...]    
    fundus_img = paddle.to_tensor((fundus_img / 255.).astype("float32"))    
    logits = model(fundus_img)
    pred_coor = logits.numpy()
    # print(pred_coor)
    x = pred_coor[0][0] * w
    y = pred_coor[0][1] * h
    cache.append([idx.split('.')[0], x, y])

In [29]:
### 将所有测试集中预测结果存到.csv中

submission_result = pd.DataFrame(cache, columns=['data', 'Fovea_X', 'Fovea_Y'])
submission_result[['data', 'Fovea_X', 'Fovea_Y']].to_csv("./Localization_Results.csv", index=False)

# 总结

目前在Resnet152上的运行效果仍不够好，可以有以下几个方向修改：

1. 更替更深层的网络，比如Resnet200
2. 更替loss的设定