In [80]:
import os
import cv2
import time
import warnings
import random
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt

from torch.optim.lr_scheduler import ReduceLROnPlateau
from torch.nn import functional as F
from torch.utils.data import DataLoader, Dataset, sampler

from torchvision import transforms, models

from sklearn.model_selection import train_test_split
from tqdm import tqdm_notebook as tqdm

warnings.filterwarnings("ignore")

NUM_WORKERS = 4
BATCH_SIZE = 4
VALIDATION_SPLIT = .2
IMG_SIZE = 256

In [2]:
dataset_path = '../../Downloads/aptos2019-blindness-detection/'
train_imgs_path = os.path.join(dataset_path, 'train_images/')
test_imgs_path = os.path.join(dataset_path, 'test_images/')

train_df_path = os.path.join(dataset_path, 'train.csv')
test_df_path = os.path.join(dataset_path, 'test.csv')

### Transform functions

In [3]:
class SegmentBloodVessels(object):       
    def __call__(self, image):
        # 1. Extract green channel
        img_green = image[:, :, 1]

        # 2. Use median 3x3 filter
        img_median_filtered = cv2.medianBlur(img_green, 3)
        # 3. Adaptive thresholding
        img_thresholded_adaptive = \
            cv2.adaptiveThreshold(img_median_filtered, 127, \
                                  cv2.ADAPTIVE_THRESH_GAUSSIAN_C, \
                                  cv2.THRESH_BINARY, 97, 1)

        # 4. Use contrast limited adaptive histogram equalisation
        clahe = cv2.createCLAHE(clipLimit=5, tileGridSize=(3, 3))
        img_clahed = clahe.apply(img_thresholded_adaptive)

        # 5. Denoising by median blur
        img_denoised_blur = cv2.medianBlur(img_clahed, 11)
        # 5. Denoising by Wiener filter
        # psf = np.ones((5, 5)) / 25
        # # img_clahed = cv2.filter2D(img_clahed, psf)
        # img_denoised = np.uint8(restoration.wiener(img_clahed, psf, 200))

        # 6. Otsu thresholding
        ret2, img_otsu_thresholded = cv2.threshold(img_denoised_blur, 0, 255, \
                                                   cv2.THRESH_BINARY + cv2.THRESH_OTSU)

        # 7. Suppress noise by using morphological opening by
        #    circular SE with radius 8 px
        kernel_circ = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (9, 9))
        img_opened = cv2.morphologyEx(img_otsu_thresholded, cv2.MORPH_OPEN, kernel_circ)
        
        return img_opened

In [4]:
class RandomCrop(object):
    """Crop randomly the grayscaled image in a sample.
    Args:
        output_size (tuple or int): Desired output size. If int, square crop
            is made.
    """
    def __init__(self, output_size):
        assert isinstance(output_size, (int, tuple))
        if isinstance(output_size, int):
            self.output_size = (output_size, output_size)
        else:
            assert len(output_size) == 2
            self.output_size = output_size

    def __call__(self, image):
        h, w = image.shape
        new_h, new_w = self.output_size

        top = np.random.randint(0, h - new_h)
        left = np.random.randint(0, w - new_w)

        image = image[top: top + new_h,
                      left: left + new_w]

        return image

In [5]:
class ReduceRadius(object):
    """ Reduce radius of RGB image
    """
    def __call__(self, image):       
        h, w, c = image.shape
        frame0 = np.zeros((h, w, c), dtype = np.uint8)

        cv2.circle(frame0, (int(np.floor(w / 2)), int(np.floor(h / 2))),
                           int(np.floor((h * 96) / float(2 * 100))), (255, 255, 255), -1)

        frame1 = cv2.cvtColor(frame0, cv2.COLOR_BGR2GRAY)
        res = cv2.bitwise_and(image, image, mask=frame1)
        return res

In [6]:
class ResizeFundus(object):
    def __init__(self, dim, interpolation=cv2.INTER_AREA):
        self.dim = dim
        self.interpolation = interpolation
        
    def __call__(self, image):
        ret, thresh = cv2.threshold(image, 0, 255, cv2.THRESH_OTSU)
        contours, hierarchy = cv2.findContours(thresh, 1, 2)
        cnt = max(contours, key=cv2.contourArea)

        # resized = resize_img_adaptively(img_segmented, cnt, (256, 256))

        (x,y), radius = cv2.minEnclosingCircle(cnt)

        x = int(x); y = int(y); radius = int(radius)

        if x < radius:
            radius = x
        if y < radius:
            radius = y

        crop_img = image[y - radius:y + radius, x - radius:x + radius]

        resized = cv2.resize(crop_img, self.dim, interpolation=self.interpolation)

        return resized

In [7]:
class RemoveBoundigCircle(object):
    def __init__(self, shift):
        self.shift = shift
        
    def __call__(self, image):        
        dim = image.shape
        mask = np.zeros(dim, np.uint8)

        circle = cv2.circle(mask, (dim[0]//2, dim[1]//2), dim[1]//2 - self.shift, 1, thickness=-1)

        return cv2.bitwise_and(image, circle)

In [8]:
class ToTensor(object):
    def __call__(self, image):
        return torch.from_numpy(image)

In [9]:
def info_image(im):
    # Compute the center (cx, cy) and radius of the eye
    cy = np.uint8(im.shape[0] // 2)
    midline = im[cy,:]
    midline = np.where(midline>midline.mean() / 3)[0]

    if len(midline)>im.shape[1] // 2:
        x_start, x_end = np.min(midline), np.max(midline)
    else: # This actually rarely happens p~1/10000
        x_start, x_end = im.shape[1] // 10, 9 * im.shape[1] // 10
    cx = np.uint8((x_start + x_end) / 2)
    r = np.uint8((x_end - x_start) / 2)
    return cx, cy, r

In [10]:
class DRDataset(Dataset):
    def __init__(self, df, root_dir, phase, transform=None):
        self.df = df
        self.root_dir = root_dir
        self.transform = transform
        self.phase = phase
        
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):            
        current_img_path = os.path.join(self.root_dir,
                                self.df.iloc[idx, 0])
        
        img = cv2.imread(current_img_path  + '.png')
        
        label = self.df.iloc[idx, 1]                
        
        if self.transform:
            if self.phase == 'train':
                img = self.transform['train'](img)                
            elif self.phase == 'val' or self.phase == 'test':
                img = self.transform['val'](img)
            
        return img, label        

In [81]:
data_transforms = {
    'train': transforms.Compose([
        ReduceRadius(),
        SegmentBloodVessels(),
        ResizeFundus((IMG_SIZE, IMG_SIZE)),
        RemoveBoundigCircle(10),
        RandomCrop(224),        
#         transforms.RandomHorizontalFlip(),
        ToTensor(),
    ]),
    'val': transforms.Compose([
        ReduceRadius(),
        SegmentBloodVessels(),
        ResizeFundus((IMG_SIZE, IMG_SIZE)),
        RemoveBoundigCircle(10),
        ToTensor(),
    ])
}

In [82]:
df = pd.read_csv(train_df_path)[0:50]
data_size = df.shape[0]
train_df, val_df = train_test_split(df, test_size=VALIDATION_SPLIT, 
                                    stratify=df['diagnosis'])

train_dataset = DRDataset(df=train_df, root_dir=train_imgs_path,
                          phase='train', transform=data_transforms)

val_dataset = DRDataset(df=val_df, root_dir=train_imgs_path,
                        phase='val', transform=data_transforms)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=False, \
                         num_workers=1)

In [83]:
df = pd.read_csv(train_df_path)[0:50]
data_size = df.shape[0]
indices = list(range(data_size))
split = int(np.floor(VALIDATION_SPLIT * data_size))

train_indices, valid_indices = indices[split:], indices[:split]
train_sampler = sampler.SubsetRandomSampler(train_indices)
valid_sampler = sampler.SubsetRandomSampler(valid_indices)

train_dataset = DRDataset(df=train_df, root_dir=train_imgs_path,
                          phase='train', transform=data_transforms)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, 
                         sampler=train_sampler)

In [102]:
indexes = np.arange(data_size)
dataset_length_by_batches = np.uint16(np.ceil(data_size / BATCH_SIZE))

for idx in range(dataset_length_by_batches):
    current_batch = np.zeros(4)
    a = 0
    for i in indexes[idx*BATCH_SIZE : (idx+1)*BATCH_SIZE]:
        print(train_dataset[i][0].shape)
#         current_batch[a] = train_dataset[i]
        a += 1
    print(len(current_batch))

torch.Size([224, 224])
torch.Size([224, 224])
torch.Size([224, 224])
torch.Size([224, 224])


NameError: name 'length' is not defined

In [98]:
arr = np.zeros(4)
arr[1] = 1