In [1]:
import sys
sys.path.insert(0, '..')

import torch.nn as nn
from torch.utils.data import DataLoader
from utils.eye_dataset import *
from eye_classifier import *
import torchvision.transforms as transforms
import torchvision.models as models

### The Training Dataset

We use the utility class EyeImageDataset() to load the training dataset based on the metadata CSV file and
the target image folder

The training images will be loaded as needed due to memory constraints.

Since we're using the ALEXNET network, we need to resize our images to 224x224, so we apply some transforms on our dataset.

We also need to proper normalize the dataset accordingly to the ALEXNET input expectations.

In [2]:
base_dir = "../../data"

image_dir_training = f"{base_dir}/ODIR-5K/training"
#image_dir_training = f"{base_dir}/preprocessed_images"
image_dir_testing = f"{base_dir}/ODIR-5K/testing"
csv_file = f'{base_dir}/ODIR-5K/data.csv'

print ('reading input dataset')
input_size = 224

apply_transforms = transforms.Compose([
    transforms.Resize(size=input_size),
    transforms.CenterCrop(size=input_size),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

ds = EyeImageDataset(root=image_dir_training, data_info_csv_file=csv_file, transform=apply_transforms)

reading input dataset


### Building the model

To build the model, we use the utility superclass EyeClassifier. 

We create a subclass named ResnetEyeClassifier and then we feed it with the intended model on its __init__ constructor.

Here we're building a resnet18 as the first layer (in fact resnet18 is comprised of many layers), followed by three linear fully connected layers
for image classification

The last layer will not have a transfer function because we opted to use CrossEntropyLoss as the loss function. 

You can change the loss function by using set_loss_function() if you wish to


In [3]:

class ResnetEyeClassifier(EyeClassifier):
    def __init__(self, num_classes: int) -> None:
        super(ResnetEyeClassifier, self).__init__(model=[

            (models.AlexNet(num_classes=num_classes), TransferFunction.NotApplicable),
        ])

nn = ResnetEyeClassifier(num_classes=len(ds.classes))
print(nn)

ResnetEyeClassifier(
  (layer 1): AlexNet(
    (features): Sequential(
      (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
      (1): ReLU(inplace=True)
      (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
      (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
      (4): ReLU(inplace=True)
      (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
      (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (7): ReLU(inplace=True)
      (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (9): ReLU(inplace=True)
      (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (11): ReLU(inplace=True)
      (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    )
    (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
    (classifier): Sequential(
      (0): Dropout(p=0.5, inplace=Fa

### Training the model

Training the model is very easy. All we have to do is to call train_model() passing the EyeImageDataset object. 

If you're having I/O constraints, you can use the set_buffer_size(n) method to cache n images in memory. Be aware that you cant use it
along with shuffle, because the cache will be constantly invalidated, unless you have a lot of memory and cache all of the images.

In [4]:
#ds.set_buffer_size(1024)
nn.train_model(ds, shuffle=True)


training (1%) epoch 1/100, loss = 2.0758
training (2%) epoch 2/100, loss = 2.0002
training (3%) epoch 3/100, loss = 1.6103
training (4%) epoch 4/100, loss = 1.6359
training (5%) epoch 5/100, loss = 1.5097
training (6%) epoch 6/100, loss = 1.1799
training (7%) epoch 7/100, loss = 1.5411
training (8%) epoch 8/100, loss = 0.8500
training (9%) epoch 9/100, loss = 1.4629
training (10%) epoch 10/100, loss = 1.1718
training (11%) epoch 11/100, loss = 1.6333
training (12%) epoch 12/100, loss = 1.0791
training (13%) epoch 13/100, loss = 2.0819
training (14%) epoch 14/100, loss = 0.9682
training (15%) epoch 15/100, loss = 2.3420
training (16%) epoch 16/100, loss = 1.0062
training (17%) epoch 17/100, loss = 0.9314
training (18%) epoch 18/100, loss = 1.7139
training (19%) epoch 19/100, loss = 0.7175
training (20%) epoch 20/100, loss = 1.6322
training (21%) epoch 21/100, loss = 1.1615
training (22%) epoch 22/100, loss = 1.0109
training (23%) epoch 23/100, loss = 1.9683
training (24%) epoch 24/100, 

### Saving some weights

You can use the util methods save/load weights to save or load individual model layer weights or the whole model weights,
so you can try different kinds of training.

You can also freeze/unfreeze layers, prevent them from being trained, by using the freeze_layer() unfreeze_layer() methods.

In [None]:

#nn.save_layer_weights(0, "eye_classification_net_full_resnet18.w")
#nn.save_weights("eye_classification_net_full.w")


### Testing the model

Testing the model is similar to training it. All we have to do is to call test_model() passing a EyeImageDataset object pointing to the test dataset images. 

If you're having I/O constraints, you can use the set_buffer_size(n) method to cache n images in memory. Be aware that you cant use it
along with shuffle, because the cache will be constantly invalidated, unless you have a lot of memory and cache all of the images.

In [None]:
input_size = 224

apply_transforms = transforms.Compose([
    transforms.Resize(size=input_size),
    transforms.CenterCrop(size=input_size),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

ds = EyeImageDataset(root=image_dir_testing, data_info_csv_file=csv_file, transform=apply_transforms)


In [None]:
ds.set_buffer_size(16)
nn.test_model(ds)