# Boilerplate notebook

In [None]:
# Matplotlib
import matplotlib.pyplot as plt
# Numpy
import numpy as np
# Pillow
from PIL import Image
# Torch
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import transforms
from torchsummary import summary
# Misc
import time
from datetime import datetime

# 1. Download dataset

In [None]:
!git clone -b data https://github.com/Oxiang/50.039-Deep-Learning.git

In [None]:
!sudo apt-get install tree

In [None]:
cd 50.039-Deep-Learning

In [None]:
%%bash

(
tree dataset -d
) 

# 2. Dataset high-level info

The images stored in the **./dataset** folder and its subfolder consists of 150 by 150 pixels greyscale images, representing X-Ray pictures of lungs.

We treat the problem as a cascading problem where we first predict normal vs infected then infected_non_covid vs infected_covid there will

For normal vs infected

| Description                | Class index | Tensor | Class label |
| -------------------------- | ----------- | ------ | ----------- |
| Normal                     | 0           | [1 0]  | normal      |
| People with infected lungs | 1           | [0 1]  | infected    |

For infected_covid vs infected_non_covid

| Description                              | Class index | Tensor | Class label        |
| ---------------------------------------- | ----------- | ------ | ------------------ |
| People with infected lungs and covid     | 0           | [1 0]  | covid |
| People with infected lungs and non-covid | 1           | [0 1]  | non_covid     |


In [None]:
classes_n_c = {0: 'normal', 1: 'infected'}
classes_inc_ic = {0: 'infected_non_covid', 1: 'infected_covid'}
groups = ['train', 'test', 'val']
dataset_numbers = {
    'train_normal': 1341,
    'train_infected_non_covid': 2530,
    'train_infected_covid': 1345,
    'val_normal': 8,
    'val_infected_non_covid': 8,
    'val_infected_covid': 8,    
    'test_normal': 234,
    'test_infected_non_covid': 242,
    'test_infected_covid': 138,
}
dataset_paths = {
    'train_normal': './dataset/train/normal/',
    'train_infected_non_covid': './dataset/train/infected/non-covid/',
    'train_infected_covid': './dataset/train/infected/covid/',
    'val_normal': './dataset/val/normal/',
    'val_infected_non_covid': './dataset/val/infected/non-covid/',
    'val_infected_covid': './dataset/val/infected/covid/',    
    'test_normal': './dataset/test/normal/',
    'test_infected_non_covid': './dataset/test/infected/non-covid/',
    'test_infected_covid': './dataset/test/infected/covid/',    
}

View one of the images and its properties. These images consist of a Numpy array, with values ranging between 0 and 255. These values will be normalized.

In [None]:
path_to_file = './dataset/train/normal/1.jpg'
with open(path_to_file, 'rb') as f:
    im = np.asarray(Image.open(f))
    plt.imshow(im)
f.close()
print('Image shape is: {}'.format(im.shape))
# Images are defined as a Numpy array of values between 0 and 256
print('Image as a numpy array is:\n {}'.format(im))

# 3. Creating a Dataset object

## 3.1 Common variables

In [None]:
binary_dataset_paths = {
    'layer_0': {
        'train': {
            'train_normal':'./dataset/train/normal',
            'train_infected': './dataset/train/infected'
        },
        'val': {
            'val_normal':'./dataset/val/normal',
            'val_infected': './dataset/val/infected'
        },
        'test': {
            'test_normal':'./dataset/test/normal',
            'test_infected': './dataset/test/infected'
        }
    },
    'layer_1':{
        'train': {
            'train_covid': './dataset/train/infected/covid',
            'train_non_covid' : './dataset/train/infected/non-covid'
        },
        'val': {
            'val_covid': './dataset/val/infected/covid',
            'val_non_covid' : './dataset/val/infected/non-covid'
        },
        'test': {
            'test_covid': './dataset/test/infected/covid',
            'test_non_covid' : './dataset/test/infected/non-covid'            
        }
    }
}

binary_dataset_numbers = {
    'layer_0': {
        'train': {
            'train_normal': 1341,
            'train_infected': 3875
        },
        'val': {
            'val_normal': 8,
            'val_infected': 16
        },
        'test': {
            'test_normal': 234,
            'test_infected': 380
        }
    },
    'layer_1':{
        'train': {
            'train_covid': 1345,
            'train_non_covid' : 2530
        },
        'val': {
            'val_covid': 8,
            'val_non_covid': 8
        },
        'test': {
            'test_covid': 138,
            'test_non_covid': 242            
        }
    }
}

## 3.2 Layer 0 General Dataset object that is custom made for train, val, test to individually use

length method ( __ len __ )

> return the number of images present in the dataset

getitem method ( __ getitem __ )

> fetch an image and its label, using a single index value. Returns the image, along with a one-hot vector corresponding to the class of the object. Both returned parameters will be torch tensors.
- [1, 0] for normal class
- [0, 1] for infected class

In [None]:
class L0_Lung_Dataset(Dataset):
    """
    Generic Dataset class for Layer 0
    """
    
    def __init__(self, groups, dataset_numbers, dataset_paths, infected_sub_class_numbers):
        """
        Constructor for generic Dataset class for Layer0 - assembles
        the important parameters in attributes.

        Parameters
        ----------
        groups : str
            Allowed values: train, val, test
        dataset_numbers : dict
            Count of each class within specified group (e.g. normal, infected)
        dataset_paths : dict
            Path to each class within specified group (infected has 2 sub-class dir)
        """

        self.img_size = (150, 150)
        self.classes = { 0: 'normal', 1: 'infected' }
        self.covid_status = {0: '', 1: 'covid', 2: 'non-covid'} 
        self.groups = groups
        self.dataset_numbers = dataset_numbers
        self.dataset_paths = dataset_paths
        self.infected_sub_class_numbers = infected_sub_class_numbers

        
    def describe(self):
        """
        Descriptor function.
        Will print details about the dataset when called.
        """
        
        # Generate description
        msg = "This is the {} dataset of the Lung Dataset".format(self.groups)
        msg += " used for the Small Project Demo in the 50.039 Deep Learning class"
        msg += " in March 2021. \n"
        msg += "It contains a total of {} images, ".format(sum(self.dataset_numbers.values()))
        msg += "of size {} by {}.\n".format(self.img_size[0], self.img_size[1])
        msg += "The images are stored in the following locations "
        msg += "and each one contains the following number of images:\n"
        for key, val in self.dataset_paths.items():
            msg += " - {}, in folder {}: {} images.\n".format(key, val, self.dataset_numbers[key])
        print(msg)
        
    
    def open_img(self, group_val, class_val, covid_status, index_val):
        """
        Opens image with specified parameters.
        
        Parameters:
        - group_val should take values in 'train', 'test' or 'val'.
        - class_val variable should be set to 'normal' or 'infected_non_covid' or 'infected_covid'.
        - covid_status should take values in '', 'covid' or 'non_covid'.
        - index_val should be an integer with values between 0 and the maximal number of images in dataset.
        
        Returns loaded image as a normalized Numpy array.
        """
        
        # Asserts checking for consistency in passed parameters
        err_msg = "Error - group_val variable should be set to 'train', 'test' or 'val'."
        assert group_val in self.groups, err_msg
        
        err_msg = "Error - class_val variable should be set to 'normal' or 'infected_non_covid' or 'infected_covid."
        assert class_val in self.classes.values(), err_msg

        err_msg = "Error - covid_status variable should be set to '', 'covid' or 'non-covid'."
        assert covid_status in self.covid_status.values(), err_msg
        
        max_val = self.dataset_numbers['{}_{}'.format(group_val, class_val)]
        err_msg = "Error - index_val variable should be an integer between 0 and the maximal number of images."
        err_msg += "\n(In {}/{}, you have {} images.)".format(group_val, class_val, max_val)
        assert isinstance(index_val, int), err_msg
        assert index_val >= 0 and index_val <= max_val, err_msg
        
        # 'normal' - example path: /dataset/train/normal/1.jpg
        if covid_status == "":
            path_to_file = '{}/{}.jpg'.format(self.dataset_paths['{}_{}'.format(group_val, class_val)], index_val)
        # 'covid' or 'non_covid' - example path: './dataset/train/infected/covid/1.jpg',
        else:
            path_to_file = '{}/{}/{}.jpg'.format(self.dataset_paths['{}_{}'.format(group_val, class_val)], covid_status, index_val)

        with open(path_to_file, 'rb') as f:
            # Convert to Numpy array and normalize pixel values by dividing by 255.
            im = np.asarray(Image.open(f))/255
        f.close()
        return im
    
    
    def show_img(self, group_val, class_val, covid_status, index_val):
        """
        Opens, then displays image with specified parameters.
        
        Parameters:
        - group_val should take values in 'train', 'test' or 'val'.
        - class_val variable should be set to 'normal' or 'infected'.
        - covid_status should take values in '', 'covid' or 'non-covid'.
        - index_val should be an integer with values between 0 and the maximal number of images in dataset.
        """
        
        # Open image
        im = self.open_img(group_val, class_val, covid_status, index_val)
        
        # Display
        plt.imshow(im)

    def __len__(self):
        """
        Length special method, returns the number of images in dataset.
        """
        
        # Length function
        return sum(self.dataset_numbers.values())
    
    
    def __getitem__(self, index):
        """
        Getitem special method.
        
        Expects an integer value index, between 0 and len(self) - 1.
        
        Returns the image and its label as a one hot vector, both
        in torch tensor format in dataset.
        """
        
        # Get item special method
        first_val = int(list(self.dataset_numbers.values())[0])
        if index < first_val:
            class_val = 'normal'
            label = torch.Tensor([1, 0])
            covid_status = ""
        else:
            class_val = 'infected'
            index = index - first_val
            label = torch.Tensor([0, 1])
            infected_covid_numbers = int(list(self.infected_sub_class_numbers.values())[0]) # covid
            if index < infected_covid_numbers:
                class_val = 'infected'
                covid_status = 'covid'
            else:
                class_val = 'infected'
                index = index - infected_covid_numbers
                covid_status = 'non-covid'
        im = self.open_img(self.groups, class_val, covid_status, index)
        im = transforms.functional.to_tensor(np.array(im)).float()
        return im, label

In [None]:
def verify_l0_dataset(group,dataset,image_overall_index,class_val,covid_status,
                   image_specific_dataset_index=1):
  """
  Helper function to verify that the classes are implemented correctly

  Parameters
  ----------
  group : str
      Allowed values: train, val, test
  dataset: object
      Object instantiated from the class
  image_overall_index: int
      Overall index in the full dataset across all classes
  class_val: str
      Image label. Example: normal, infected
  covid_status: str
      If class_val is 'infected', set this to either 'covid' or 'non-covid'
  image_specific_dataset_index : int
      image id in the specific nested directory
  """
  print('Verify the special methods __len__ and __get_item__')
  print('Number of images in {} dataset: {}'.format(group, len(dataset)))
  print('Details for image id {} from the {} dataset'.format(
      image_overall_index,
      group
  ))
  im, class_oh = dataset[image_overall_index]
  print('Sample image shape: {}'.format(im.shape))
  print('Sample image: {}'.format(im))
  print('Sample image class: {}'.format(class_oh))

  print('\nVerify the open_img and show_img functions')
  print('Open and show image {} from the {}_{} dataset'.format(
      image_specific_dataset_index,
      group,
      class_val
  ))
  im = dataset.open_img(group, class_val, covid_status, image_specific_dataset_index)
  print('Sample image shape: {}'.format(im.shape))
  print('Sample image: {}'.format(im))
  dataset.show_img(group, class_val, covid_status, image_specific_dataset_index)

### 3.2.1 Layer 0 Train dataset

In [None]:
train_group = 'train'
l0_ld_train = L0_Lung_Dataset(groups = train_group,
                              dataset_numbers = binary_dataset_numbers['layer_0'][train_group],
                              dataset_paths = binary_dataset_paths['layer_0'][train_group],
                              infected_sub_class_numbers = binary_dataset_numbers['layer_1'][train_group])
l0_ld_train.describe()

In [None]:
verify_l0_dataset(train_group,l0_ld_train,1340,'normal','',1340)

### 3.2.2 Layer 0 Validation dataset

In [None]:
val_group = 'val'
l0_ld_val = L0_Lung_Dataset(groups = val_group,
                            dataset_numbers = binary_dataset_numbers['layer_0'][val_group],
                            dataset_paths = binary_dataset_paths['layer_0'][val_group],
                            infected_sub_class_numbers = binary_dataset_numbers['layer_1'][val_group])
l0_ld_val.describe()

In [None]:
verify_l0_dataset(val_group,l0_ld_val,9,'infected','covid',1)

### 3.2.3 Layer 0 Test dataset

In [None]:
test_group = 'test'
l0_ld_test = L0_Lung_Dataset(groups = test_group, 
                              dataset_numbers = binary_dataset_numbers['layer_0'][test_group], 
                              dataset_paths = binary_dataset_paths['layer_0'][test_group],
                              infected_sub_class_numbers = binary_dataset_numbers['layer_1'][test_group])
l0_ld_test.describe()

In [None]:
verify_l0_dataset(test_group,l0_ld_test,613,'infected','non-covid',241)

## 3.3 Layer 1 General Dataset object that is custom made for train, val, test to individually use

In [None]:
class L1_Lung_Dataset(Dataset):
    """
    Generic Dataset class for Layer 1
    """
    
    def __init__(self, groups, dataset_numbers, dataset_paths):
        """
        Constructor for generic Dataset class for Layer0 - assembles
        the important parameters in attributes.

        Parameters
        ----------
        groups : str
            Allowed values: train, val, test
        dataset_numbers : dict
            Count of each class within specified group (e.g. covid, non_covid)
        dataset_paths : dict
            Path to each class within specified group (infected has 2 sub-class dir)
        """

        self.img_size = (150, 150)
        self.classes = {0: 'covid', 1: 'non_covid'}
        self.groups = groups
        self.dataset_numbers = dataset_numbers
        self.dataset_paths = dataset_paths

        
    def describe(self):
        """
        Descriptor function.
        Will print details about the dataset when called.
        """
        
        # Generate description
        msg = "This is the {} dataset of the Lung Dataset".format(self.groups)
        msg += " used for the Small Project Demo in the 50.039 Deep Learning class"
        msg += " in March 2021. \n"
        msg += "It contains a total of {} images, ".format(sum(self.dataset_numbers.values()))
        msg += "of size {} by {}.\n".format(self.img_size[0], self.img_size[1])
        msg += "The images are stored in the following locations "
        msg += "and each one contains the following number of images:\n"
        for key, val in self.dataset_paths.items():
            msg += " - {}, in folder {}: {} images.\n".format(key, val, self.dataset_numbers[key])
        print(msg)
        
    
    def open_img(self, group_val, class_val, index_val):
        """
        Opens image with specified parameters.
        
        Parameters:
        - group_val should take values in 'train', 'test' or 'val'.
        - class_val variable should be set to 'covid' or 'non-covid'.
        - index_val should be an integer with values between 0 and the maximal number of images in dataset.
        
        Returns loaded image as a normalized Numpy array.
        """
        
        # Asserts checking for consistency in passed parameters
        err_msg = "Error - group_val variable should be set to 'train', 'test' or 'val'."
        assert group_val in self.groups, err_msg
        
        err_msg = "Error - class_val variable should be set to 'covid' or 'non-covid'."
        assert class_val in self.classes.values(), err_msg     
        
        max_val = self.dataset_numbers['{}_{}'.format(group_val, class_val)]
        err_msg = "Error - index_val variable should be an integer between 0 and the maximal number of images."
        err_msg += "\n(In {}/{}, you have {} images.)".format(group_val, class_val, max_val)
        assert isinstance(index_val, int), err_msg
        assert index_val >= 0 and index_val <= max_val, err_msg
        
        # Open file as before
        path_to_file = '{}/{}.jpg'.format(self.dataset_paths['{}_{}'.format(group_val, class_val)], index_val)
        with open(path_to_file, 'rb') as f:
            im = np.asarray(Image.open(f))/255
        f.close()
        return im
    
    
    def show_img(self, group_val, class_val, index_val):
        """
        Opens, then displays image with specified parameters.
        
        Parameters:
        - group_val should take values in 'train', 'test' or 'val'.
        - class_val variable should be set to 'covid' or 'non-covid'.
        - index_val should be an integer with values between 0 and the maximal number of images in dataset.
        """
        
        # Open image
        im = self.open_img(group_val, class_val, index_val)
        
        # Display
        plt.imshow(im)
        
        
    def __len__(self):
        """
        Length special method, returns the number of images in dataset.
        """
        
        # Length function
        return sum(self.dataset_numbers.values())
    
    
    def __getitem__(self, index):
        """
        Getitem special method.
        
        Expects an integer value index, between 0 and len(self) - 1.
        
        Returns the image and its label as a one hot vector, both
        in torch tensor format in dataset.
        """
        
        # Get item special method
        first_val = int(list(self.dataset_numbers.values())[0])
        if index < first_val:
            class_val = 'covid'
            label = torch.Tensor([1, 0])
        else:
            class_val = 'non_covid'
            index = index - first_val
            label = torch.Tensor([0, 1])

        im = self.open_img(self.groups, class_val, index)
        im = transforms.functional.to_tensor(np.array(im)).float()
        return im, label

In [None]:
def verify_l1_dataset(group,dataset,image_overall_index,class_val,
                   image_specific_dataset_index=1):
  """
  Helper function to verify that the classes are implemented correctly

  Parameters
  ----------
  group : str
      Allowed values: train, val, test
  dataset: object
      Object instantiated from the class
  image_overall_index: int
      Overall index in the full dataset across all classes
  class_val: str
      Image label. Example: covid, non-covid
  image_specific_dataset_index : int
      image id in the specific nested directory
  """
  print('Verify the special methods __len__ and __get_item__')
  print('Number of images in {} dataset: {}'.format(group, len(dataset)))
  print('Details for image id {} from the {} dataset'.format(
      image_overall_index,
      group
  ))
  im, class_oh = dataset[image_overall_index]
  print('Sample image shape: {}'.format(im.shape))
  print('Sample image: {}'.format(im))
  print('Sample image class: {}'.format(class_oh))

  print('\nVerify the open_img and show_img functions')
  print('Open and show image {} from the {}_{} dataset'.format(
      image_specific_dataset_index,
      group,
      class_val
  ))
  im = dataset.open_img(group, class_val, image_specific_dataset_index)
  print('Sample image shape: {}'.format(im.shape))
  print('Sample image: {}'.format(im))
  dataset.show_img(group, class_val, image_specific_dataset_index)

### 3.2.1 Layer 1 Train dataset

In [None]:
train_group = 'train'
l1_ld_train = L1_Lung_Dataset(groups = train_group, 
                              dataset_numbers = binary_dataset_numbers['layer_1'][train_group], 
                              dataset_paths = binary_dataset_paths['layer_1'][train_group])
l1_ld_train.describe()

In [None]:
verify_l1_dataset(train_group,l1_ld_train,1,'covid',1)

### 3.2.2 Layer 1 Validation dataset

In [None]:
val_group = 'val'
l1_ld_val = L1_Lung_Dataset(groups = val_group, 
                              dataset_numbers = binary_dataset_numbers['layer_1'][val_group], 
                              dataset_paths = binary_dataset_paths['layer_1'][val_group])
l1_ld_val.describe()

In [None]:
verify_l1_dataset(val_group,l1_ld_val,15,'non_covid',7)

### 3.2.3 Layer 1 Test dataset

In [None]:
test_group = 'test'
l1_ld_test = L1_Lung_Dataset(groups = test_group, 
                              dataset_numbers = binary_dataset_numbers['layer_1'][test_group], 
                              dataset_paths = binary_dataset_paths['layer_1'][test_group])
l1_ld_test.describe()

In [None]:
verify_l1_dataset(test_group,l1_ld_test,379,'non_covid',241)

# 4. Data visualization

This requires a `grouped bar chart`. Refer to [matplotlib Grouped bar chart with labels](https://matplotlib.org/stable/gallery/lines_bars_and_markers/barchart.html) for starter code

In [None]:
def plot_grouped_bar_chart(labels, train, val, test):
  '''
  Plots a grouped bar chart
  '''
  x = np.arange(len(labels))  # the label locations
  width = 0.25  # the width of the bars

  fig, ax = plt.subplots(figsize=(10,10))
  rects1 = ax.bar(x - width, train, width, label='train')
  rects2 = ax.bar(x + width, val, width, label='val')
  rects3 = ax.bar(x, test, width, label='test')

  ax.set_ylabel('Number of datapoints')
  ax.set_title('Number of datapoints with respect to each dataset and class')
  ax.set_xticks(x)
  ax.set_xticklabels(labels)
  ax.legend()

  def autolabel(rects):
      """Attach a text label above each bar in *rects*, displaying its height."""
      for rect in rects:
          height = rect.get_height()
          ax.annotate('{}'.format(height),
                      xy=(rect.get_x() + rect.get_width() / 2, height),
                      xytext=(0, 3),  # 3 points vertical offset
                      textcoords="offset points",
                      ha='center', va='bottom')

  autolabel(rects1)
  autolabel(rects2)
  autolabel(rects3)

  fig.tight_layout()
  plt.show()

## 4.1 Overall

<u>**Discuss whether or not the dataset is balanced between classes, uniformly distributed, etc.**</u>

**Training set**

The train data for the different classes are imbalanced. From the graph plotted below, the `infected_non_covid` class has significantly more data points than the other classes. Overall, the ratio `normal:infected_non_covid:infected_covid` is approximately `1:2:1`.

This could present more complications if the model is trained in a stacking manner by first training normal vs infected. The ratio of `normal:infected` would be a ratio of `1:3` which is more imbalanced. However, if there are distinct differences in the data of the normal and infected then this large difference could be negated. Alternatively, data augmentation techniques could be used to increase the size of the training data for the `normal` class.

**Testing set**

The test set is also slightly imbalanced with the ratio `normal:infected_non_covid:infected_covid` being approximately `2:2:1`. However, this is not as bad as an imbalanced training set because the test set will not only be used to get a gauge of the model's performance after each epoch and will not directly affect the model's parameter tuning.

**Validation set**

The val set is uniformly distributed between the three classes. However, it is glaring that there are only 8 validation samples for the 3 classes. However, this is acceptable as it is used only for validation and will not affect the model's parameter tuning

In [None]:
labels = ['normal', 'infected_covid', 'infected_non_covid']

train_normal = [l0_ld_train.dataset_numbers['train_normal']]
val_normal = [l0_ld_val.dataset_numbers['val_normal']]
test_normal = [l0_ld_test.dataset_numbers['test_normal']]

train_covid_non_covid = list(l1_ld_train.dataset_numbers.values())
val_covid_non_covid = list(l1_ld_val.dataset_numbers.values())
test_covid_non_covid = list(l1_ld_test.dataset_numbers.values())

train_normal_inc_ic = train_normal + train_covid_non_covid
val_normal_inc_ic = val_normal + val_covid_non_covid
test_normal_inc_ic = test_normal + test_covid_non_covid

plot_grouped_bar_chart(labels, train_normal_inc_ic, val_normal_inc_ic, test_normal_inc_ic)

## 4.2 Layer 0

The graph below gives a Clearer idea about the training imbalance where the ratio of `normal:infected` would be a ratio of `1:3` which are the ratios used when training the binary classifier for Layer 0.

In [None]:
labels = ['normal', 'infected']

train_normal_infected = list(l0_ld_train.dataset_numbers.values())
val_normal_infected = list(l0_ld_val.dataset_numbers.values())
test_normal_infected = list(l0_ld_test.dataset_numbers.values())

plot_grouped_bar_chart(labels, train_normal_infected, val_normal_infected, test_normal_infected)

## 4.3 Layer 1

The class distribution for infected_covid vs infected_non_covid is imbalanced by about 1:2. This represents what is likely in reality. Covid is new and will probably not have as many data samples as the common infected non-covid X rays. Again, possible data augmentations techniques like mirroring could potentially be used to increase the number of training samples for covid

In [None]:
labels = ['covid', 'non-covid']

train_covid_non_covid = list(l1_ld_train.dataset_numbers.values())
val_covid_non_covid = list(l1_ld_val.dataset_numbers.values())
test_covid_non_covid = list(l1_ld_test.dataset_numbers.values())

plot_grouped_bar_chart(labels, train_covid_non_covid, val_covid_non_covid, test_covid_non_covid)

# 4. Why normalize the data?

To recap the normalization can be found in the `Lung_Dataset` class' `open_img` function which did the following normalization.

```python
# Convert to Numpy array and normalize pixel values by dividing by 255.
im = np.asarray(Image.open(f))/255
```

Images have RGB ranges from 0-255. Considering various activation functions like `sigmoid` such a large range would mean that for vastly different values like 100 and 255, not much difference can be seen when passed into the `sigmoid` activation function. Both would produce a value that is close to 1.

Taking the same values as reference, if we divide by 255, for a value of 100,  $\frac{100}{255}$ we get approximately 0.39. Then for a value of 255, $\frac{255}{255}$ we get 1. For the initial value of 100 that becomes 0.39 after the division, passing it into `sigmoid(0.39)` produces a value of 0.596. Meanwhile for the initial value of 255 that becomes 1 after division, passing it into `sigmoid(1)` produces a value of 0.731. This difference in value allows us to extract meaningful differences in the pixel values.


# 5. Other possible pre-processing

Form the plot below, which is based on the Training set for normal images as reference, it is evident that there are several differences in the photo dimensions and photo environment. 

For example, comparing image_index 1 and image_index 28 there is a clear difference in the lighting, Image_index 28 is a lot brighter. One pre-processing step could be to use histogram normalization. There is a paper that recommends 14 possible normalization algorithms that can be performed (Leszczynski, 2010)

Aother example is comparing "skinny" images like image_index 1 and image_index 31 where there is significantly more dark backgrounds at the side compares to images like image_index 12. Perhaps a edge detection algorithm can be applied to just filter the relevant parts of the image which are the lungs.

In [None]:
row = 2
col = 2

selected_indices = [1,12,28,31]
f, axarr = plt.subplots(row,col,figsize=(10,7))
counter = 0
for row_index in range(row):
  for col_index in range(col):
    image_index = selected_indices[counter]
    im = l0_ld_train.open_img('train', 'normal', '', image_index)
    axarr[row_index,col_index].set_title('Image index: {}'.format(image_index))
    axarr[row_index,col_index].imshow(im)
    counter += 1

# 6. Creating a data loader object

## 6.1 Layer 0

In [None]:
l0_bs_train = 32
l0_bs_test = 32
l0_bs_val = 1
l0_train_loader = DataLoader(l0_ld_train, batch_size = l0_bs_train, shuffle = True)
l0_test_loader = DataLoader(l0_ld_test, batch_size = l0_bs_test, shuffle = True)
l0_val_loader = DataLoader(l0_ld_val, batch_size = l0_bs_val, shuffle = True)

## 6.2 Layer 1

In [None]:
l1_bs_train = 32
l1_bs_test = 32
l1_bs_val = 1
l1_train_loader = DataLoader(l1_ld_train, batch_size = l1_bs_train, shuffle = True)
l1_test_loader = DataLoader(l1_ld_test, batch_size = l1_bs_test, shuffle = True)
l1_val_loader = DataLoader(l1_ld_val, batch_size = l1_bs_val, shuffle = True)

# 7. Model



## 7.1 Layer 0

In [None]:
class L0_Net(nn.Module):
    def __init__(self):
        super(L0_Net, self).__init__()
        # Conv2D: 1 input channel, 4 output channels, 3 by 3 kernel, stride of 1.
        self.conv1 = nn.Conv2d(1, 4, 3, 1)
        self.fc1 = nn.Linear(87616, 2)

    def forward(self, x):
        x = self.conv1(x)
        x = torch.flatten(x, 1)
        x = self.fc1(x)
        output = F.log_softmax(x, dim = 1)
        return output

In [None]:
# Activate gpu
if torch.cuda.is_available():  
    print('using GPU')
    device = "cuda:0" 
else:  
    device = "cpu"
l0_model = L0_Net().to(torch.device(device))

In [None]:
summary(l0_model, (1, 150, 150))

## 7.2 Layer 1

In [None]:
class L1_Net(nn.Module):
    def __init__(self):
        super(L1_Net, self).__init__()
        # Conv2D: 1 input channel, 4 output channels, 3 by 3 kernel, stride of 1.
        self.conv1 = nn.Conv2d(1, 4, 3, 1)
        self.fc1 = nn.Linear(87616, 2)

    def forward(self, x):
        x = self.conv1(x)
        x = torch.flatten(x, 1)
        x = self.fc1(x)
        output = F.log_softmax(x, dim = 1)
        return output

In [None]:
# Activate gpu
if torch.cuda.is_available():  
    print('using GPU')
    device = "cuda:0" 
else:  
    device = "cpu"
l1_model = L1_Net().to(torch.device(device))

In [None]:
summary(l1_model, (1, 150, 150))

# 8. Training the model

Reference material: [Towards data science: PyTorch [Tabular] — Multiclass Classification](https://towardsdatascience.com/pytorch-tabular-multiclass-classification-9f8211a123ab)

In [None]:
def multi_acc(y_pred, y_test):
    y_pred_softmax = torch.log_softmax(y_pred, dim = 1)
    _, y_pred_tags = torch.max(y_pred_softmax, dim = 1)
    correct_pred = (y_pred_tags == y_test).float()
    acc = correct_pred.sum() / len(correct_pred)
    
    return acc

## 8.1 Layer 0

In [None]:
# Define criterion and optimizer, epoch
epochs = 3
lr = 0.0001
# Normal : Infected
l0_class_weights = torch.tensor([2.8, 1.0]).to(torch.device(device))
criterion = nn.CrossEntropyLoss(l0_class_weights)
optimizer = optim.Adam(l0_model.parameters(), lr = lr)

In [None]:
start = time.time()

start_model_time = datetime.now().strftime("%d_%m_%Y_%H_%M_%S")
model_name = 'binary_L0_net'
path = './checkpoint_model_{}_{}_{}'.format(epochs, model_name, start_model_time)

l0_accuracy_stats_epoch = {
    'train': [],
    'test': [],
    'epoch': [],
}
l0_loss_stats_epoch = {
    'train': [],
    'test': [],
    'epoch': [],
}

for e in range(epochs):
    train_epoch_loss = 0
    train_epoch_acc = 0
    
    l0_model.train()
    for X_train_batch, y_train_batch in l0_train_loader:
        X_train_batch, y_train_batch = X_train_batch.to(device), y_train_batch.to(device)

        optimizer.zero_grad()

        output = l0_model.forward(X_train_batch)
        train_loss  = criterion(output, torch.max(y_train_batch, 1)[1])
        train_acc = multi_acc(output, torch.max(y_train_batch, 1)[1])
        
        train_loss.backward()
        optimizer.step()

        train_epoch_loss += train_loss.item()
        train_epoch_acc += train_acc.item()

    # testing
    with torch.no_grad():
        test_epoch_loss = 0
        test_epoch_acc = 0
        l0_model.eval()
        for X_test_batch, y_test_batch in l0_test_loader:
            X_test_batch, y_test_batch = X_test_batch.to(device), y_test_batch.to(device)

            y_test_pred = l0_model.forward(X_test_batch)

            test_loss = criterion(y_test_pred, torch.max(y_test_batch, 1)[1])
            test_acc = multi_acc(y_test_pred, torch.max(y_test_batch, 1)[1])

            test_epoch_loss += test_loss.item()
            test_epoch_acc += test_acc.item()
            
    
    # averaged
    train_epoch_loss = train_epoch_loss/len(l0_train_loader)
    train_epoch_acc = train_epoch_acc/len(l0_train_loader)
    test_epoch_loss = test_epoch_loss/len(l0_test_loader)
    test_epoch_acc = test_epoch_acc/len(l0_test_loader)
    
    # The step number corresponds to the number of batches seen
    now = datetime.now().strftime("%d/%m/%Y %H:%M:%S")
    print("Epoch: {}/{} - {} - ".format(e+1, epochs, now),
      "Training Loss: {:.4f} - ".format(train_epoch_loss),
      "Training Accuracy: {:.4f}".format(train_epoch_acc),
      "Test Loss: {:.4f} - ".format(test_epoch_loss),
      "Test Accuracy: {:.4f}".format(test_epoch_acc))
    l0_model.train()
    
    # Epoch metrics
    l0_loss_stats_epoch['train'].append(train_epoch_loss)
    l0_loss_stats_epoch['test'].append(test_epoch_loss)
    l0_loss_stats_epoch['epoch'].append(e+1)
    l0_accuracy_stats_epoch['train'].append(train_epoch_acc)
    l0_accuracy_stats_epoch['test'].append(test_epoch_acc)
    l0_accuracy_stats_epoch['epoch'].append(e+1)
    
    # auto-save a checkpoint at every epoch for evaluation and continual training. Overrides the previous checkpoint
    checkpoint = {
        'epochs': e+1,
        'model_state_dict': l0_model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'train_loss': train_epoch_loss,
        'test_loss': test_epoch_loss,
        'test_accuracy': test_epoch_acc,
        'training_accuracy': train_epoch_acc
    }
    torch.save(checkpoint, path)

## 8.2 Layer 1

In [None]:
# Define criterion and optimizer, epoch
epochs = 3
lr = 0.0001
l1_class_weights = torch.tensor([1.5, 1]).to(torch.device(device))
criterion = nn.CrossEntropyLoss(l1_class_weights)
optimizer = optim.Adam(l1_model.parameters(), lr = lr)

In [None]:
start = time.time()

start_model_time = datetime.now().strftime("%d_%m_%Y_%H_%M_%S")
model_name = 'binary_L1_net'
path = './checkpoint_model_{}_{}_{}'.format(epochs, model_name, start_model_time)

l1_accuracy_stats_epoch = {
    'train': [],
    'test': [],
    'epoch': [],
}
l1_loss_stats_epoch = {
    'train': [],
    'test': [],
    'epoch': [],
}

for e in range(epochs):
    train_epoch_loss = 0
    train_epoch_acc = 0
    
    l1_model.train()
    for X_train_batch, y_train_batch in l1_train_loader:
        X_train_batch, y_train_batch = X_train_batch.to(device), y_train_batch.to(device)

        optimizer.zero_grad()

        output = l1_model.forward(X_train_batch)
        train_loss  = criterion(output, torch.max(y_train_batch, 1)[1])
        train_acc = multi_acc(output, torch.max(y_train_batch, 1)[1])
        
        train_loss.backward()
        optimizer.step()

        train_epoch_loss += train_loss.item()
        train_epoch_acc += train_acc.item()

    # testing
    with torch.no_grad():
        test_epoch_loss = 0
        test_epoch_acc = 0
        l1_model.eval()
        for X_test_batch, y_test_batch in l1_test_loader:
            X_test_batch, y_test_batch = X_test_batch.to(device), y_test_batch.to(device)

            y_test_pred = l1_model.forward(X_test_batch)

            test_loss = criterion(y_test_pred, torch.max(y_test_batch, 1)[1])
            test_acc = multi_acc(y_test_pred, torch.max(y_test_batch, 1)[1])

            test_epoch_loss += test_loss.item()
            test_epoch_acc += test_acc.item()
            
    
    # averaged
    train_epoch_loss = train_epoch_loss/len(l1_train_loader)
    train_epoch_acc = train_epoch_acc/len(l1_train_loader)
    test_epoch_loss = test_epoch_loss/len(l1_test_loader)
    test_epoch_acc = test_epoch_acc/len(l1_test_loader)
    
    # The step number corresponds to the number of batches seen
    now = datetime.now().strftime("%d/%m/%Y %H:%M:%S")
    print("Epoch: {}/{} - {} - ".format(e+1, epochs, now),
      "Training Loss: {:.4f} - ".format(train_epoch_loss),
      "Training Accuracy: {:.4f}".format(train_epoch_acc),
      "Test Loss: {:.4f} - ".format(test_epoch_loss),
      "Test Accuracy: {:.4f}".format(test_epoch_acc))
    l1_model.train()
    
    # Epoch metrics
    l1_loss_stats_epoch['train'].append(train_epoch_loss)
    l1_loss_stats_epoch['test'].append(test_epoch_loss)
    l1_loss_stats_epoch['epoch'].append(e+1)
    l1_accuracy_stats_epoch['train'].append(train_epoch_acc)
    l1_accuracy_stats_epoch['test'].append(test_epoch_acc)
    l1_accuracy_stats_epoch['epoch'].append(e+1)
    
    # auto-save a checkpoint at every epoch for evaluation and continual training. Overrides the previous checkpoint
    checkpoint = {
        'epochs': e+1,
        'model_state_dict': l1_model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'train_loss': train_epoch_loss,
        'test_loss': test_epoch_loss, 
        'test_accuracy': test_epoch_acc,
        'training_accuracy': train_epoch_acc
    }
    torch.save(checkpoint, path)

# 9. Graph visualizations of learning curves

Graphs covered

1. Accuracy against steps
2. Recall against steps
3. Precision against steps
4. F1 against steps
5. Loss against steps

In [None]:
def plot_graph(data,datasets,y_axis,x_axis):
  """Plots a graph of y against x

    Keyword arguments:
    data -- dictionary with keys: train, val, epoch, steps
    datasets -- list of datasets to plot: train, val
    y_axis -- name of the y_axis. example: accuracy
    x_axis -- either epoch or steps
    """
  plt.figure(figsize=(17,8))
  for dataset in datasets:
    target_y = data[dataset]
    target_x = data[x_axis]
    plt.plot(target_x, target_y, label = dataset)
  plt.xlabel(x_axis)
  plt.ylabel(y_axis)
  plt.title('Graph of {} against {}'.format(y_axis, x_axis))
  plt.legend()
  plt.show()

## 9.1 Layer 0

In [None]:
plot_graph(l0_accuracy_stats_epoch, ['train', 'test'], 'accuracy', 'epoch')
plot_graph(l0_loss_stats_epoch, ['train', 'test'], 'loss', 'epoch')

## 9.2 Layer 1

In [None]:
plot_graph(l1_accuracy_stats_epoch, ['train', 'test'], 'accuracy', 'epoch')
plot_graph(l1_loss_stats_epoch, ['train', 'test'], 'loss', 'epoch')

# 10. Validation set performance

## 10.1 Init dataset and data loader for 3 classes

In [None]:
class Lung_Dataset(Dataset):
    """
    Generic Dataset class.
    """
    
    def __init__(self, groups, dataset_numbers, dataset_paths):
        """
        Constructor for generic Dataset class - assembles
        the important parameters in attributes.

        Parameters
        ----------
        groups : str
            Allowed values: train, val, test
        dataset_numbers : dict
            Count of each class within specified group
        dataset_paths : dict
            Path to each class within specified group
        """

        self.img_size = (150, 150)
        self.classes = {
            0: 'normal',
            1: 'infected_covid',
            2: 'infected_non_covid'
        }        
        self.groups = groups
        self.dataset_numbers = dataset_numbers
        self.dataset_paths = dataset_paths
        
        
    def describe(self):
        """
        Descriptor function.
        Will print details about the dataset when called.
        """
        
        # Generate description
        msg = "This is the {} dataset of the Lung Dataset".format(self.groups)
        msg += " used for the Small Project Demo in the 50.039 Deep Learning class"
        msg += " in March 2021. \n"
        msg += "It contains a total of {} images, ".format(sum(self.dataset_numbers.values()))
        msg += "of size {} by {}.\n".format(self.img_size[0], self.img_size[1])
        msg += "The images are stored in the following locations "
        msg += "and each one contains the following number of images:\n"
        for key, val in self.dataset_paths.items():
            msg += " - {}, in folder {}: {} images.\n".format(key, val, self.dataset_numbers[key])
        print(msg)
        
    
    def open_img(self, group_val, class_val, index_val):
        """
        Opens image with specified parameters.
        
        Parameters:
        - group_val should take values in 'train', 'test' or 'val'.
        - class_val variable should be set to 'normal' or 'infected_non_covid' or 'infected_covid'.
        - index_val should be an integer with values between 0 and the maximal number of images in dataset.
        
        Returns loaded image as a normalized Numpy array.
        """
        
        # Asserts checking for consistency in passed parameters
        err_msg = "Error - group_val variable should be set to 'train', 'test' or 'val'."
        assert group_val in self.groups, err_msg
        
        err_msg = "Error - class_val variable should be set to 'normal' or 'infected_non_covid' or 'infected_covid."
        assert class_val in self.classes.values(), err_msg
        
        max_val = self.dataset_numbers['{}_{}'.format(group_val, class_val)]
        err_msg = "Error - index_val variable should be an integer between 0 and the maximal number of images."
        err_msg += "\n(In {}/{}, you have {} images.)".format(group_val, class_val, max_val)
        assert isinstance(index_val, int), err_msg
        assert index_val >= 0 and index_val <= max_val, err_msg
        
        # Open file as before
        path_to_file = '{}/{}.jpg'.format(self.dataset_paths['{}_{}'.format(group_val, class_val)], index_val)
        with open(path_to_file, 'rb') as f:
            # Convert to Numpy array and normalize pixel values by dividing by 255.
            im = np.asarray(Image.open(f))/255
        f.close()
        return im
    
    
    def show_img(self, group_val, class_val, index_val):
        """
        Opens, then displays image with specified parameters.
        
        Parameters:
        - group_val should take values in 'train', 'test' or 'val'.
        - class_val variable should be set to 'normal' or 'infected_covid' or 'infected_non_covid'
        - index_val should be an integer with values between 0 and the maximal number of images in dataset.
        """
        
        # Open image
        im = self.open_img(group_val, class_val, index_val)
        
        # Display
        plt.imshow(im)

    def __len__(self):
        """
        Length special method, returns the number of images in dataset.
        """
        
        # Length function
        return sum(self.dataset_numbers.values())
    
    
    def __getitem__(self, index):
        """
        Getitem special method.
        
        Expects an integer value index, between 0 and len(self) - 1.
        
        Returns the image and its label as a one hot vector, both
        in torch tensor format in dataset.
        """
        
        # Get item special method
        first_val = int(list(self.dataset_numbers.values())[0])
        second_val = int(list(self.dataset_numbers.values())[1])
        if index < first_val:
            class_val = 'normal'
            label = torch.Tensor([1, 0, 0])
        elif index < (first_val+second_val):
            class_val = 'infected_covid'
            index = index - first_val
            label = torch.Tensor([0, 1, 0])
        else:
            class_val = "infected_non_covid"
            index = index - (first_val+second_val)
            label = torch.Tensor([0, 0, 1])
        im = self.open_img(self.groups, class_val, index)
        im = transforms.functional.to_tensor(np.array(im)).float()
        return im, label

In [None]:
dataset_numbers_val = {
    'val_normal': 8,
    'val_infected_covid': 8,
    'val_infected_non_covid': 8,
}
dataset_paths_val = {
    'val_normal': './dataset/val/normal/',
    'val_infected_covid': './dataset/val/infected/covid/',
    'val_infected_non_covid': './dataset/val/infected/non-covid/',
}
val_group = 'val'
ld_val = Lung_Dataset(
    val_group,
    dataset_numbers_val,
    dataset_paths_val
)
ld_val.describe()

## 10.2 Perform validation

In [None]:
def combinePred(l0_model, l1_model, val_batch):
    '''
    val_batch will be batch_size of 1 from

    Keyword arguments:
    l0_model -- L0_Net object
    l1_model -- L1_Net object
    val_batch -- DataLoader object
    '''
    l0_model.eval()
    l0_pred = l0_model.forward(val_batch.to(device))
    l0_pred = int(torch.max(l0_pred, 1)[1])
    if l0_pred == 0:
        # normal
        return torch.Tensor([1, 0, 0])
    else:
        l1_model.eval()
        l1_pred = l1_model.forward(val_batch.to(device))
        l1_pred = int(torch.max(l1_pred, 1)[1])
        if l1_pred == 0:
            # covid
            return torch.Tensor([0, 1, 0])
        else:
            # non-covid
            return torch.Tensor([0, 0, 1])
    

def get_ground_truth_prediction(data_loader):
    '''Gets the predicted & ground truth labels for each image in data_loader

    Keyword arguments:
    data_loader -- DataLoader object
    '''
    correct = 0
    result = {
        'im': [],
        'ground_truth': [],
        'prediction': [],
        'index': []
    }
    for batch_idx, (X_val_batch, ground_truth) in enumerate(data_loader):
        ground_truth = torch.max(ground_truth,1)[1].item()
        ground_truth = ld_val.classes.get(ground_truth)
        prediction = combinePred(l0_model, l1_model, X_val_batch)
        prediction = torch.max(prediction,0)[1].item()
        prediction = ld_val.classes.get(prediction)
        if ground_truth == prediction:
          correct += 1
        im, class_oh = ld_val[batch_idx]
        result['im'].append(im)
        result['ground_truth'].append(ground_truth)
        result['prediction'].append(prediction)
        result['index'].append(batch_idx)
    return result, correct

def plot_ground_truth_prediction(row, col, result):
    '''Plots the subplots of images with their ground truth and prediction

    Keyword arguments:
    row -- int value for number of rows of plot
    col -- int value for number of columns of plot
    result -- dict containing index, prediction, ground truth, image
    '''
    f, axarr = plt.subplots(row,col,figsize=(20,30))
    f.suptitle('Validation set pictures, with predicted and ground truth labels.\nAverage performance {}/{}={}%'.format(correct,total,accuracy))
    counter = 0
    for row_index in range(row):
        for col_index in range(col):
            axarr[row_index,col_index].set_title(
                'Ground truth label: {}\nPredicted label: {}'.format(
                    result['ground_truth'][counter],
                    result['prediction'][counter])
                )
            axarr[row_index,col_index].imshow(torch.squeeze(result['im'][counter]))
            counter += 1

val_loader = DataLoader(ld_val, batch_size = 1, shuffle = False)
total = len(val_loader)
result, correct = get_ground_truth_prediction(val_loader)
accuracy = round(correct/total,3) * 100
row = 6
col = 4

plot_ground_truth_prediction(row, col, result)

In [None]:
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
val_loader = DataLoader(ld_val, batch_size = 1, shuffle = False)

acc = 0
# row = truth, col = pred
y_true = []
y_pred = []
for X_val_batch, y_val_batch in val_loader:
    X_val_batch, y_val_batch = X_val_batch.to(device), y_val_batch.to(device)
    
    y_val_pred = combinePred(l0_model, l1_model, X_val_batch)
    y_val_pred = y_val_pred.to(device)
    batch_acc = int(torch.argmax(y_val_batch)) == int(torch.argmax(y_val_pred))
    y_true.append(int(torch.argmax(y_val_batch)))
    y_pred.append(int(torch.argmax(y_val_pred)))
    acc += batch_acc
    
print('combined accuracy:', acc/len(val_loader))
cm = confusion_matrix(y_true, y_pred)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=np.array(['normal','covid','non-covid']))
disp.plot()

# 11. Other useful metrics

Precision = $\frac{tp}{tp + fp}$

Recall = $\frac{tp}{tp + fn}$

For this project, the metrics can be applied differently considering the needs of different stakeholders.

**Considering Covid, we cite 2 stakeholders who could have different concerns**

For example, the general public could be concerned about the spread of the virus, hence, they could be more worried about Recall because false negatives could mean that people with the virus in the public space and mingling with others.

On the other hand, the clinicans could be more concerned about Precision. This is because there are limited hospital beds and it would be a waste of resources to allocated scarce resource to people who do not have covid but were predicted to have covid.

In [None]:
import numpy as np
from sklearn.metrics import precision_recall_fscore_support
import json

p_r_fs = precision_recall_fscore_support(y_true, y_pred, average=None, labels=[0, 1, 2])
precision = p_r_fs[0]
recall = p_r_fs[1]
f_beta_score = p_r_fs[2]

overall_results = {}

overall_labels = ['normal','covid','non-covid']

for index, label in enumerate(overall_labels):
  overall_results[label] = {}
  overall_results[label]['precision'] = precision[index]
  overall_results[label]['recall'] = recall[index]
  overall_results[label]['f_beta_score'] = f_beta_score[index]

print(json.dumps(overall_results, indent=4, sort_keys=True))

# 12. Feature maps

Reference: https://discuss.pytorch.org/t/visualize-feature-map/29597

In [None]:
# Visualize feature maps
activation = {}
# f, axarr = plt.subplots(row,col,figsize=(20,30))
def get_activation(name):
    def hook(model, input, output):
        activation[name] = output.detach()
    return hook
for batch_idx, (X_val_batch, ground_truth) in enumerate(val_loader):
    if batch_idx == 1:
        break
    l0_model.conv1.register_forward_hook(get_activation('conv1'))
    output = l0_model(X_val_batch.to(device))
    act = activation['conv1'].squeeze()

fig, axarr = plt.subplots(len(val_loader),act.size(0),figsize=(28,60))
for batch_idx, (X_val_batch, ground_truth) in enumerate(val_loader):
    l0_model.conv1.register_forward_hook(get_activation('conv1'))
    output = l0_model(X_val_batch.to(device))
    act = activation['conv1'].squeeze()
    mid_point = act.size(0) / 2
    for idx in range(act.size(0)):
        if idx == mid_point:
            axarr[batch_idx,idx].set_title(
                      'Ground Truth for Row {}: {}\nPrediction for row {}: {}'.format(
                          batch_idx,
                          result['ground_truth'][batch_idx],
                          batch_idx,
                          result['prediction'][batch_idx])
                      )
        else:
            axarr[batch_idx,idx].set_title('')
        axarr[batch_idx, idx].imshow(act[idx].cpu().numpy())

In [None]:
ld_val.show_img('val', 'normal', 0)

# References

Leszczynski, M. (2010). Image Preprocessing for Illumination Invariant Face 
Verification. Journal of telecommunications and information technology, 19-25.