In [None]:
#Import packages

import numpy as np
import pandas as pd
from PIL import Image
import os
import path

import torch
import torchvision
from torch.utils import data
from torchvision import transforms
import torch.nn as nn
import torch.nn.functional as F

In [None]:
import sklearn
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
def prepare_labels(y):
    # From here: https://www.kaggle.com/pestipeti/keras-cnn-starter
    values = np.array(y)
    label_encoder = LabelEncoder()
    integer_encoded = label_encoder.fit_transform(values)

    onehot_encoder = OneHotEncoder(sparse=False)
    integer_encoded = integer_encoded.reshape(len(integer_encoded), 1)
    onehot_encoded = onehot_encoder.fit_transform(integer_encoded)

    y = onehot_encoded
    return y

In [None]:
# Normaly I do this with FastAI, but I want to challenge myself and do it in PyTorch to get deeper :)
# For this cell, I'm borrowing and modifying code from good old forums. But I'll explain it to make sure I understand every line of code :)

# First I'll define a custom dataset for my eye images. I can't use torchvision.Imagefolder because my lables are in a csv. 
class eyeDataset(data.Dataset):
    #initializing it, taking in the text and image path. With an optional transform
    def __init__(self, txt_path, img_dir, transform=None):
        #First define a pandas dataframe from the CSV. The ID code (aka name of the image) will be the index
        df = pd.read_csv(txt_path, index_col=0)
        #Define image and text paths
        self.img_dir = img_dir
        self.txt_path = txt_path
        #The image names = the index of the dataframe
        self.img_names = df.index.values
        #The labels of the dataframe come from diagnosis
        self.y = prepare_labels(df['diagnosis'].values)
        #And the transforms
        self.transform = transform
    
    #When we indext the dataset, it will run this functon. This function returns the image and the label
    def __getitem__(self, index):
        #Opens the image (Lesson: Put a plus to merge the paths together without creating a sub-branch)
        img = Image.open(os.path.join(self.img_dir,
                                      self.img_names[index] + ".png"))
        #Transforms the image if we want
        if self.transform is not None:
            img = self.transform(img)
        #Labels it by accessing it from the dataframe
        label = self.y[index]
        #Returns image and label
        return img, label

    def __len__(self):
        return self.y.shape[0]


In [None]:
#Defining it
label_path = "../input/aptos2019-blindness-detection/train.csv"
image_path = "../input/aptos2019-blindness-detection/train_images"
ds = eyeDataset(label_path, image_path)

In [None]:
#Teting it out
img, lab = ds[0];img

In [None]:
#Looking at the size
size = img.size
size

In [None]:
#That's huge! No way I'm going to train on that for the first iteration. I'll resize it down. But the good news is that we can do progressive resizing like crazy!

#The type of transforms: Resize, centercrop, Random vertical and horizonal flips,

#Order = I'll scale down, center crop it, then flips,
ratio = 0.10

transform_bunch = transforms.Compose([transforms.Resize([int(2136 * ratio), int(3216 * ratio)]),
                                      transforms.CenterCrop([int(2136 * ratio),int(2136 * ratio)]),
                                      transforms.RandomHorizontalFlip(p=0.5),
                                      transforms.RandomVerticalFlip(p=0.5)])

In [None]:
ds = eyeDataset(label_path, image_path, transform_bunch)

In [None]:
img, label = ds[0]; img

In [None]:
# #But we still need to convert to tensor... But while we're doing that, we might as well normalize the data.

# #When getting the data for mean and std, we must do it only on the train images because if we did it for the test images it would polute our numbers

# Shamelessly just borrowed the code from online. But I'll only be running it once to get the values

# transform_bunch = transforms.Compose([transforms.Resize([int(2136 * ratio), int(3216 * ratio)]),
#                                       transforms.CenterCrop([int(2136 * ratio),int(2136 * ratio)]),
#                                       transforms.RandomHorizontalFlip(p=0.5),
#                                       transforms.RandomVerticalFlip(p=0.5),
#                                       transforms.ToTensor()])

# ds = eyeDataset(label_path, image_path, transform_bunch)

# loader = data.DataLoader(
#     ds,
#     batch_size=100,
#     num_workers=2,
#     shuffle=False
# )


# mean = 0.
# std = 0.
# nb_samples = 0.
# for data1 in loader:
#     data1 = data1[0]
#     print(data1.shape)
#     batch_samples = data1.size(0)
#     data1 = data1.view(batch_samples, data1.size(1), -1)
#     mean += data1.mean(2).sum(0)
#     std += data1.std(2).sum(0)
#     nb_samples += batch_samples

# mean /= nb_samples
# std /= nb_samples

In [None]:
# mean /= nb_samples
# std /= nb_samples

In [None]:
# nb_samples

In [None]:
# mean.size()

In [None]:
# #Alright so I ran it and I got: (tensor([0.5139, 0.2727, 0.0891]), tensor([0.1533, 0.0909, 0.0400]))
# #Mean, std
# mean, std

In [None]:
#Redoing the transforms, this time with the values of the mean and std to normalize it

transform_bunch = transforms.Compose([transforms.Resize([int(2136 * ratio), int(3216 * ratio)]),
                                      transforms.CenterCrop([int(2136 * ratio),int(2136 * ratio)]),
                                      transforms.RandomHorizontalFlip(p=0.5),
                                      transforms.RandomVerticalFlip(p=0.5),
                                      transforms.ToTensor(),
                                      transforms.Normalize((0.5139, 0.2727, 0.0891), (0.1533, 0.0909, 0.0400))])

ds = eyeDataset(label_path, image_path, transform_bunch)

In [None]:
#I'm not entirely sure if this is right but whatever! 
img, x = ds[0]
im = transforms.ToPILImage()(img)
display(im)

In [None]:
#Now that we have the dataset, let's put it in the dataloader
bs = 100

dl = data.DataLoader(ds, bs, True)

In [None]:
#Testing it out to see if it works --> It does!
#Also salving one batch for later testings
xx = 0
yy = 0
h = 0

for x,y in dl:
    print(x.shape,y.shape)
    xx = x
    yy = y
    break
#     h += 1
#     if h == 10:
#         break

In [None]:
#Now I'll be making the model! It'll take elements off of nn.Module
class cnnmaker(nn.Module):
    #Initialization will only take in input channels
    def __init__(self, input_channels):
        super().__init__()
        
        #I looked at what the resnet architechture looked like and just implmented the block types (not the residual part just yet)
        def block(in_chan):
            return nn.Sequential(nn.Conv2d(in_chan, in_chan * 2, 3, 2, 1, padding_mode = "reflect"),
                                nn.BatchNorm2d(in_chan*2),
                                nn.ReLU())
        
        self.model = nn.Sequential(block(input_channels),
                                  block(input_channels*2),
                                  block(input_channels*4),
                                  block(input_channels*8),
                                  block(input_channels*16))
        
        self.second_model = nn.Sequential(nn.Linear(4704, 5))
    #The forward pass when you call it   
    def forward(self, images):
        pre_proc = self.model(images)
#         print(pre_proc.shape)
        
        formatted = torch.reshape(pre_proc, (images.shape[0], -1))
        
        return self.second_model(formatted)

In [None]:
model = cnnmaker(3)

In [None]:
model

In [None]:
#Look at how many parameters
total = 0
for param in model.parameters():
    if param.requires_grad:
        total += param.numel()

print(total)

In [None]:
# testing_outputs = model(xx)
# print(testing_outputs, yy)

In [None]:
# testing_outputs.shape

In [None]:
loss_func = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters())

In [None]:
model = model.to('cuda')
loss_func = loss_func.to('cuda')

In [None]:
def accuracy(preds, target):
    correct = (preds == target).float()
    accuracy = correct.sum() / len(correct)
    return accuracy

In [None]:
def train():
    model.train()
    
    for image, label in dl:
        image, label = image.type(dtype=torch.cuda.FloatTensor), label.type(dtype=torch.cuda.FloatTensor)
        optimizer.zero_grad()
        prediction = model(image)
        
        loss = loss_func(prediction, label)
        
        loss.backward()
        
        optimizer.step()
    
    with torch.no_grad():
        total_iter = 0
        total_acc = 0
        for image, label in dl: 
            image, label = image.to('cuda', dtype=torch.float), label.to('cuda', dtype=torch.float)
            prediction = model(image)
            total_acc += accuracy(torch.argmax(prediction, dim = 1).float(), torch.argmax(label.float()))
            total_iter += 1
            if total_iter == 20:
                print(total_acc/total_iter)
                return "done"

In [None]:
for _ in range(4):
    train()

In [None]:
# for image, label in dl:
#     image, label = image.to('cuda'), label.to('cuda')
    
#     optimizer.zero_grad()
#     prediction = model(image)

#     soft_label = F.one_hot(label,num_classes=5).float()
    
#     print(prediction, soft_label)
    
#     break

#     loss = loss_func(prediction, soft_label)

#     loss.backward()

#     optimizer.step()