# Finetuing CNN model with EyePACS data 

In [4]:
import torch
import numpy as np
import pandas as pd
from skimage import io, transform
import matplotlib.pyplot as plt
import os
import sys
import cv2
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils

In [5]:
print(f'Version info:\n Py: {sys.version_info},\n Torch: {torch.__version__},\n Cuda: {torch.cuda.is_available()}')


Version info:
 Py: sys.version_info(major=3, minor=6, micro=8, releaselevel='final', serial=0),
 Torch: 1.3.0,
 Cuda: True


In [3]:
class RetinaDataset(Dataset):
    def __init__(self, csv_file, root_dir, transform=None):
        """
        Args:
            csv_file (string): Path to the csv file with annotations.
            root_dir (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        self.labels_df = pd.read_csv(csv_file)
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        return len(self.labels_df)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        img_name = os.path.join(self.root_dir, self.labels_df.iloc[idx, 0] + '.jpeg')
        image = io.imread(img_name)
        
        severity = self.labels_df.iloc[idx, 1]
        severity = 1 if severity > 1 else 0
       
        sample = {'image': image, 'label': severity}
        if self.transform:
            sample['image'] = self.transform(sample['image'])
        return sample
    
class CenterCrop(object):
    def __init__(self, output_size):
        assert isinstance(output_size, (int, tuple))
        if isinstance(output_size, int):
            self.output_size = (output_size, output_size)
        else:
            assert len(output_size) == 2
            self.output_size = output_size

    def __call__(self, image):
        h, w = image.shape[:2]
        new_h, new_w = self.output_size
        top = (h - new_h) // 2
        left = (w - new_w) // 2
        image = image[top: top + new_h, left: left + new_w]
        return image
    

class RandomCrop(object):
    def __init__(self, output_size):
        assert isinstance(output_size, (int, tuple))
        if isinstance(output_size, int):
            self.output_size = (output_size, output_size)
        else:
            assert len(output_size) == 2
            self.output_size = output_size

    def __call__(self, image):
        h, w = image.shape[:2]
        
        if h < self.output_size[0] or w < self.output_size[1]:
            return transform.resize(image, self.output_size)
        
        new_h, new_w = self.output_size
        top = np.random.randint(0, h - new_h)
        left = np.random.randint(0, w - new_w)
        image = image[top: top + new_h, left: left + new_w]
        return image


class ToTensor(object):
    def __call__(self, image):
        # swap color axis because
        # numpy image: H x W x C
        # torch image: C X H X W
        image = image.transpose((2, 0, 1))
        return torch.from_numpy(image)
    

In [4]:
def display_examples(ds):
    fig = plt.figure(figsize=(10, 10))
    
    for i in range(0, 40, 10):
        sample = ds[i]
        ax = plt.subplot(1, 4, i//10 + 1)
        plt.tight_layout()
        ax.set_title(f'Sample #{i}- {sample["label"]}')
        ax.axis('off')
        plt.imshow(sample['image'])
    
    plt.show()
    
# Helper function to show a batch
def show_batch(sample_batched):
    """Show image with landmarks for a batch of samples."""
    images_batch, label_batch = sample_batched['image'], sample_batched['label']
    batch_size = len(images_batch)
    im_size = images_batch.size(2)
    grid_border_size = 2

    grid = utils.make_grid(images_batch)
    plt.imshow(grid.numpy().transpose((1, 2, 0)))

In [5]:
data_transforms = transforms.Compose([
        RandomCrop(1000),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])

retina_dataset = RetinaDataset('/data/simon/trainLabels.csv', '/data/simon/retina_data', transform=data_transforms)
train_size = int(0.9 * len(retina_dataset))
test_size = len(retina_dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(retina_dataset, [train_size, test_size])

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=4, shuffle=True, num_workers=0)
val_loader = torch.utils.data.DataLoader(test_dataset, batch_size=4, shuffle=True, num_workers=4)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

print(f'Dataset info:\n Train size: {train_size},\n Test size: {test_size},\n Device: {device}')

Dataset info:
 Train size: 31613,
 Test size: 3513,
 Device: cuda:0


In [None]:

for i_batch, sample_batched in enumerate(train_loader):
    print(i_batch, sample_batched['image'].size(),
          sample_batched['label'].size())

    # observe 4th batch and stop.
    if i_batch == 3:
        plt.figure()
        show_batch(sample_batched)
        plt.axis('off')
        plt.ioff()
        plt.show()
        break


In [6]:
list(torch.utils.data.WeightedRandomSampler([0.1, 0.9, 0.4, 0.7, 3.0, 0.6], 5, replacement=False))




[4, 1, 5, 3, 2]