In [1]:
import sys
sys.path.insert(0, '..')

import torch.nn as nn
from torch.utils.data import DataLoader
from utils.eye_dataset import *
from eye_classifier import *
import torchvision.transforms as transforms

### The Training Dataset

We use the utility class EyeImageDataset() to load the training dataset based on the metadata CSV file and
the target image folder

The training images will be loaded as needed due to memory constraints.

First, we resize our images to 512x512, so we apply some transforms on our dataset.

Next, we normalize the dataset and then it is ready to be trained / tested

In [2]:
base_dir = "../../data"
image_dir_training = f"{base_dir}/ODIR-5K/training"
image_dir_testing = f"{base_dir}/ODIR-5K/testing"
csv_file = f'{base_dir}/ODIR-5K/data.csv'

input_size = 512

apply_transforms = transforms.Compose([
    transforms.Resize(size=input_size),
    transforms.CenterCrop(size=input_size),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

ds = EyeImageDataset(root=image_dir_training, data_info_csv_file=csv_file, transform=apply_transforms)

### Building the model

To build the model, we use the utility superclass EyeClassifier. 

We create a subclass named ResnetEyeClassifier and then we feed it with the intended model on its __init__ constructor.

Here we're building some CNN as the first layers

The last layer will not have a transfer function because we opted to use CrossEntropyLoss as the loss function. 

You can change the loss function by using set_loss_function() if you wish to


In [3]:
class CustomEyeClassifier(EyeClassifier):
    def __init__(self, num_classes: int) -> None:
        super(CustomEyeClassifier, self).__init__(model=[

            (nn.Conv2d(in_channels=3, out_channels=6,
                       kernel_size=(5, 5), stride=(2, 2), padding=(0, 0), dilation=(1, 1)),
             TransferFunction.NotApplicable),

            (nn.MaxPool2d(
                kernel_size=(5, 5), stride=(2, 2), padding=(0, 0), dilation=(1, 1)),
             TransferFunction.NotApplicable),

            (nn.Dropout(),
             TransferFunction.NotApplicable),

            (nn.Conv2d(in_channels=6, out_channels=16,
                       kernel_size=(5, 5), stride=(2, 2), padding=(0, 0), dilation=(1, 1)),
             TransferFunction.NotApplicable),

            (nn.MaxPool2d(
                kernel_size=(5, 5), stride=(2, 2), padding=(0, 0), dilation=(1, 1)),
             TransferFunction.NotApplicable),

            (nn.Dropout(),
             TransferFunction.NotApplicable),

            (nn.Linear(in_features=13456, out_features=84),
             TransferFunction.Relu),

            (nn.Linear(in_features=84, out_features=42),
             TransferFunction.Relu),

            (nn.Linear(in_features=42, out_features=num_classes),
             TransferFunction.NotApplicable),
        ])

nn = CustomEyeClassifier(num_classes=len(ds.classes))
print(nn)

CustomEyeClassifier(
  (layer 1): Conv2d(3, 6, kernel_size=(5, 5), stride=(2, 2))
  (layer 2): MaxPool2d(kernel_size=(5, 5), stride=(2, 2), padding=(0, 0), dilation=(1, 1), ceil_mode=False)
  (layer 3): Dropout(p=0.5, inplace=False)
  (layer 4): Conv2d(6, 16, kernel_size=(5, 5), stride=(2, 2))
  (layer 5): MaxPool2d(kernel_size=(5, 5), stride=(2, 2), padding=(0, 0), dilation=(1, 1), ceil_mode=False)
  (layer 6): Dropout(p=0.5, inplace=False)
  (layer 7): Linear(in_features=13456, out_features=84, bias=True)
  (layer 8): Linear(in_features=84, out_features=42, bias=True)
  (layer 9): Linear(in_features=42, out_features=8, bias=True)
)


### Training the model

Training the model is very easy. All we have to do is to call train_model() passing the EyeImageDataset object. 

If you're having I/O constraints, you can use the set_buffer_size(n) method to cache n images in memory. Be aware that you cant use it
along with shuffle, because the cache will be constantly invalidated, unless you have a lot of memory and cache all of the images.

In [4]:
nn.train_model(ds)


training (1%) epoch 1/100, loss = 2.6364
training (2%) epoch 2/100, loss = 2.1246
training (3%) epoch 3/100, loss = 0.7268
training (4%) epoch 4/100, loss = 1.6911
training (5%) epoch 5/100, loss = 1.3497
training (6%) epoch 6/100, loss = 0.8989
training (7%) epoch 7/100, loss = 1.1256
training (8%) epoch 8/100, loss = 2.0137
training (9%) epoch 9/100, loss = 2.2820
training (10%) epoch 10/100, loss = 1.3867
training (11%) epoch 11/100, loss = 0.6704
training (12%) epoch 12/100, loss = 0.7619


### Testing the model

Testing the model is similar to training it. All we have to do is to call test_model() passing a EyeImageDataset object pointing to the test dataset images. 

If you're having I/O constraints, you can use the set_buffer_size(n) method to cache n images in memory. Be aware that you cant use it
along with shuffle, because the cache will be constantly invalidated, unless you have a lot of memory and cache all of the images.

In [None]:
input_size = 512

apply_transforms = transforms.Compose([
    transforms.Resize(size=input_size),
    transforms.CenterCrop(size=input_size),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

ds = EyeImageDataset(root=image_dir_testing, data_info_csv_file=csv_file, transform=apply_transforms)


In [None]:
ds.set_buffer_size(16)
nn.test_model(ds)