# [Angle closure Glaucoma Evaluation Challenge](https://age.grand-challenge.org/Details/)
## Angle closure classification Baseline

## Training

- Assume `Training100.zip` and `Validation_ASOCT_Image.zip` are stored @ `./AGE_challenge Baseline/datasets/`
- Assume `weights` are stored @ `./AGE_challenge Baseline/weights/`
- In training phase, we use standard ResNet34 with `sigmoid(fc(1))` output
- We split a single image into two parts

### Download ImageNet weight

In [1]:
# https://github.com/PaddlePaddle/models/tree/develop/PaddleCV/image_classification
!rm ../weights/ResNet34_pretrained.tar 
!rm -rf ../weights/ResNet34_pretrained

!wget -P ../weights/ https://paddle-imagenet-models-name.bj.bcebos.com/ResNet34_pretrained.tar 
!tar xvf ../weights/ResNet34_pretrained.tar -C ../weights/ > /dev/null # silent
!rm ../weights/ResNet34_pretrained/fc*

--2019-08-06 13:36:07--  https://paddle-imagenet-models-name.bj.bcebos.com/ResNet34_pretrained.tar
Resolving paddle-imagenet-models-name.bj.bcebos.com (paddle-imagenet-models-name.bj.bcebos.com)... 220.181.33.44, 220.181.33.43
Connecting to paddle-imagenet-models-name.bj.bcebos.com (paddle-imagenet-models-name.bj.bcebos.com)|220.181.33.44|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 87470080 (83M) [application/x-tar]
Saving to: ‘../weights/ResNet34_pretrained.tar’


2019-08-06 13:37:13 (1.27 MB/s) - ‘../weights/ResNet34_pretrained.tar’ saved [87470080/87470080]



### Main Code

In [2]:
import os, random, functools, math
import cv2
import numpy as np
import time
from sklearn.metrics import roc_auc_score, confusion_matrix, roc_curve

In [3]:
import paddle
import paddle.fluid as fluid
import paddle.fluid.layers as FL
import paddle.fluid.optimizer as FO
fluid.install_check.run_check()

Running Verify Fluid Program ... 
Your Paddle Fluid works well on SINGLE GPU or CPU.
Your Paddle Fluid works well on MUTIPLE GPU or CPU.
Your Paddle Fluid is installed successfully! Let's start deep Learning with Paddle Fluid now


In [4]:
from resnet import *

In [5]:
data_root_path = "../datasets/Training100/"
image_path = os.path.join(data_root_path, "ASOCT_Image")

train_file_path = os.path.join(data_root_path, "cls_train_split.csv")
val_file_path = os.path.join(data_root_path, "cls_val_split.csv")

In [6]:
BATCH_SIZE = 32 // 2 # image split * 2
THREAD = 8
BUF_SIZE = 32

### Define Data Loader

In [7]:
# Real time data augmentation in training

def rotate_image(image, angle=90, scale=1.0):
    '''
    Rotate the image
    :param image: image to be processed
    :param angle: Rotation angle in degrees. Positive values mean counter-clockwise rotation (the coordinate origin is assumed to be the top-left corner).
    :param scale: Isotropic scale factor.
    '''
    w = image.shape[1]
    h = image.shape[0]
    #rotate matrix
    M = cv2.getRotationMatrix2D((w/2,h/2), angle, scale)
    #rotate
    image = cv2.warpAffine(image,M,(w,h))
    return image

def vflip_image(image):
    return cv2.flip(image, flipCode=1)

def crop_image(img, target_size, center):
    """ crop_image """
    height, width = img.shape[:2]
    size = target_size
    if center == True:
        w_start = (width - size) // 2
        h_start = (height - size) // 2
    else:
        w_start = np.random.randint(0, width - size + 1)
        h_start = np.random.randint(0, height - size + 1)
    w_end = w_start + size
    h_end = h_start + size
    img = img[h_start:h_end, w_start:w_end, :]
    return img

def split_image(img):
    rows,_,_ = img.shape
    # left, right split
    return [img[:, :rows, :], img[:, -rows:, :]]

In [8]:
# data reader and xmap wrapper to enable multiprocessing data load

def reader(img_path, file_list, batch_size=32, shuffle=True, shuffle_seed=42):
    def read_file_list():
        batch_data = []
        np.random.shuffle(file_list)
        for line in file_list:
            single_img_path, l_label, r_label = line.split(",")
            batch_data.append([single_img_path, int(l_label), int(r_label)])
            if len(batch_data) == batch_size:
                yield batch_data
                batch_data = []
        if len(batch_data) != 0:
            yield batch_data
    return read_file_list

def process_batch_data(input_data, mode, rotate=True, flip=True):
    batch_data = []
    for sample in input_data:
        file, l_label, r_label = sample

        img = cv2.imread( file )
        img = img[:, :, ::-1].astype('float32') / 255
        
        img = np.concatenate(split_image(img), axis=-1) # concat at channel dim
        img = cv2.resize(img, (256, 256))
        
        if mode == 'train':
            img = crop_image(img, target_size=224, center=False)
#             img = img + np.random.randn(*img.shape) * 0.3 / 255 
            if rotate:
                angle = np.random.randint(1, 30, size=1)
                img = rotate_image(img, angle)
            if flip and np.random.randint(0,2):
                img = vflip_image(img)
        else:
            img = crop_image(img, target_size=224, center=True)
        
        img = img.transpose((2, 0, 1))

        batch_data.append((img[:3,:,:], l_label))
        batch_data.append((img[3:,:,:], r_label))

    return batch_data

In [9]:
def data_loader(img_list, img_path, batch_size, order=False, mode='train'):
    data_reader = reader(img_path, img_list, batch_size)
    mapper = functools.partial(process_batch_data, mode=mode)
    
    data_reader = paddle.reader.shuffle(data_reader, 32)
    
    return paddle.reader.xmap_readers(
        mapper, data_reader, THREAD, BUF_SIZE, order=order)

In [10]:
with open(train_file_path) as flist:
    train_file_list = [os.path.join(image_path,line.strip()) for line in flist]

with open(val_file_path) as flist:
    val_file_list = [os.path.join(image_path,line.strip()) for line in flist]

In [11]:
print(len(train_file_list))
print(len(val_file_list))

print(train_file_list[0])

1296
304
../datasets/Training100/ASOCT_Image/T0047-06.jpg,1,1


In [12]:
np.random.shuffle(train_file_list)

In [13]:
# Class imbalance
classes_collaction = [0] * 2
for line in train_file_list:
    file, c_l, c_r = line.split(",")
    classes_collaction[int(c_l)] +=1
    
print(classes_collaction)

[1072, 224]


In [14]:
train_dataloader = data_loader(train_file_list, image_path, BATCH_SIZE, False, mode='train')
val_dataloader = data_loader(val_file_list, image_path, BATCH_SIZE, True, mode='val')

### Define model (compute graph)

In [15]:
def network():
    data_shape = [3, 224, 224]
    
    model = ResNet34()
    
    input_feature = FL.data(name='pixel', shape=data_shape, dtype='float32')
    label = FL.data(name='label', shape=[1], dtype='int64')
    
    logit = model.net(input_feature, class_dim=1)
    predict = FL.sigmoid(logit)

    reader = fluid.io.PyReader(feed_list=[input_feature, label], 
                         capacity=64, iterable=True, use_double_buffer=True)

    cost = FL.log_loss(predict, FL.cast(label, "float32"), epsilon=1e-7)
    loss = FL.mean(cost)

    accuracy = FL.mean(FL.cast(FL.equal(FL.cast(FL.round(predict),"int64"), label), "float32") )
    
    return [loss, accuracy, predict, reader]

In [16]:
def calc_auc_numpy(y_pred, y_true):
    auc = roc_auc_score(y_true, y_pred)

    fpr, tpr, thresh = roc_curve(y_true, y_pred)
    optimal_idx = np.argmax(tpr - fpr)
    
    print("Best Sensi: %1.4f" % (tpr[optimal_idx]))
    print("Best Speci: %1.4f" % (1-fpr[optimal_idx]))
    print("Best Thresh: %1.4f" % (thresh[optimal_idx]))
    
    y_pred = (y_pred > 0.5).astype(np.int_)
    print(confusion_matrix(y_true, y_pred))
    
    return auc

In [17]:
def train(use_cuda, params_dirname_prefix, pretrained_model=False, EPOCH_NUM=10):
    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
    
    startup_prog = fluid.Program()
    train_prog = fluid.Program()
    val_prog = fluid.Program()

    with fluid.program_guard(train_prog, startup_prog):
        # fluid.unique_name.guard() to share parameters with test network
        with fluid.unique_name.guard():
            train_loss, train_acc, train_output, train_reader = network()
            
            optimizer = fluid.optimizer.Adam(learning_rate=1e-4)
            optimizer.minimize(train_loss)
    
    # 定义预测网络
    with fluid.program_guard(val_prog, startup_prog):
        # Use fluid.unique_name.guard() to share parameters with train network
        with fluid.unique_name.guard():
            val_loss, val_acc, val_output, val_reader = network()

    val_prog = val_prog.clone(for_test=True)

    train_loss.persistable = True
    train_acc.persistable = True
    val_loss.persistable = True
    val_acc.persistable = True
    val_output.persistable = True
            
    exe = fluid.Executor(place)
    exe.run(startup_prog)

    if pretrained_model:
        def if_exist(var):
            return os.path.exists(os.path.join(pretrained_model, var.name))

        fluid.io.load_vars(
            exe, pretrained_model, main_program=train_prog, predicate=if_exist)

    train_reader.decorate_sample_list_generator( train_dataloader, places=place )
    val_reader.decorate_sample_list_generator( val_dataloader, places=place )

    # For training test cost
    def train_test(val_prog, val_reader):
        count = 0
        accumulated = [0,0]
        
        prediction = []
        label_values = []
        
        for tid, val_data in enumerate(val_reader()):
            avg_cost_np = exe.run(
                program=val_prog,
                feed=val_data,
                fetch_list=[val_loss, val_acc, val_output],
                use_program_cache=True)
            accumulated = [
                x[0] + x[1][0] for x in zip(accumulated, avg_cost_np)
            ]
            prediction.append(avg_cost_np[2])
            label_values.append( np.array(val_data[0]['label']) )
            count += 1

        prediction = np.concatenate(prediction, 0)
        label_values = np.concatenate(label_values, 0)
        
        auc = calc_auc_numpy(prediction, label_values)
        
        return [x / count for x in accumulated], auc

    # main train loop.
    def train_loop():
        step = 0
        best_auc = 0.

        for pass_id in range(EPOCH_NUM):
            data_load_time = time.time()
            for step_id, data_train in enumerate(train_reader()):
                data_load_costtime = time.time() - data_load_time
                start_time = time.time()
                avg_loss_value = exe.run(
                    train_prog,
                    feed=data_train,
                    fetch_list=[train_loss, train_acc], 
                    use_program_cache=True)
                cost_time = time.time() - start_time
                if step_id % 50 == 0:
                    print("Pass %d, Epoch %d, Cost %f, Acc %f, Time %f, LoadTime %f" % (
                        step_id, pass_id, avg_loss_value[0], avg_loss_value[1], cost_time, data_load_costtime))
                else:
                    pass
                step += 1
                data_load_time = time.time()

            metrics, auc = train_test(val_prog, val_reader)
            avg_cost_test, accuracy_test = metrics
            
            print('Test with Epoch {0}, Loss {1:2.4}, Acc {2:2.4}, Auc {3:2.4}'.format(
                pass_id, avg_cost_test, accuracy_test, auc))
            
            if auc >= best_auc:
                best_data = [pass_id, avg_cost_test, accuracy_test, auc]
                best_auc = auc
                print("\nBest AUC, Checkpoint Saved!\n")
                if not os.path.isdir(params_dirname_prefix+"_best/"):
                    os.makedirs(params_dirname_prefix+"_best/")
                fluid.io.save_persistables(exe, params_dirname_prefix+"_best/", main_program=train_prog)

            if not os.path.isdir(params_dirname_prefix+"_checkpoint/"):
                os.makedirs(params_dirname_prefix+"_checkpoint/")
            fluid.io.save_persistables(exe, params_dirname_prefix+"_checkpoint/", main_program=train_prog)
    train_loop()

In [None]:
# download imagenet pretrain weight from:
# https://github.com/PaddlePaddle/models/tree/develop/PaddleCV/image_classification
# remove ResNet34_pretrained/fc*
train(use_cuda=True, params_dirname_prefix="../weights/classify_weights", 
        pretrained_model="../weights/ResNet34_pretrained", EPOCH_NUM=20)