### Import libraries

In [11]:
import torchvision.transforms as T
from PIL import Image
from matplotlib import pyplot
import numpy as np
import os
import torch
from torchvision import datasets, models
from torch.utils.data import Dataset, DataLoader

import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision

import cv2
import random
import math

### Resize images and setup labels

In [15]:
train_imgs1 = []
train_labels1 = []
train_imgs2 = []
train_labels2 = []
test_imgs1 = []
test_labels1 = []
test_imgs2 = []
test_labels2 = []

filepath_no_external_damage = 'train_images/no_external_damage'
filepath_external_damage = 'train_images/external_damage'
filepath_no_internal_damage = 'train_images/no_internal_damage'
filepath_internal_damage = 'train_images/internal_damage'
filepath_test_no_ext = 'test_images/no_external_damage'
filepath_test_ext = 'test_images/external_damage'
filepath_test_no_int = 'test_images/no_internal_damage'
filepath_test_int = 'test_images/internal_damage'
no_transforms = T.Compose([T.ToTensor()])

def resize_and_label(filepath,train_imgs1,train_labels1,test_imgs1,test_labels1,train_imgs2,train_labels2,test_imgs2,test_labels2):
    print('The number of images in',filepath, 'is',len(os.listdir(filepath)))
    for img in os.listdir(filepath):
        if img.endswith('.jpg') or img.endswith('.png') or img.endswith('.JPG'):
            # Read image
            img = cv2.imread(filepath + '/' + img)
            
            # Crop image to square
            crop_size = min(img.shape[0], img.shape[1])
            start_row = (img.shape[0]-crop_size)//2
            end_row = (img.shape[0]-crop_size)//2 + crop_size
            start_col = (img.shape[1]-crop_size)//2
            end_col = (img.shape[1]-crop_size)//2 + crop_size
            img = img[start_row:end_row, start_col:end_col]
            
            # Rescale image
            img = cv2.resize(img, (256,256), interpolation=cv2.INTER_CUBIC)
            img = no_transforms(np.array(img))
            img = np.moveaxis(img.numpy(), 0, 2)

            # Model1 deals with external damage and Model2 deals with internal damage
            if filepath == filepath_no_external_damage: # Training No external damage for Model1
                train_imgs1.append(img)
                train_labels1.append(0)
            elif filepath == filepath_no_internal_damage: # Training No internal damage for Model2
                train_imgs2.append(img)
                train_labels2.append(0)
            elif filepath == filepath_external_damage: # Training External damage for Model1
                train_imgs1.append(img)
                train_labels1.append(1)
            elif filepath == filepath_internal_damage: # Training Internal damage for Model2 
                train_imgs2.append(img)
                train_labels2.append(1)
            elif filepath == filepath_test_no_ext: # Testing No external damage for Model1
                test_imgs1.append(img)
                test_labels1.append(0)
            elif filepath == filepath_test_no_int: # Testing No internal damage for Model2
                test_imgs2.append(img)
                test_labels2.append(0)
            elif filepath == filepath_test_ext: # Testing External damage for Model1
                test_imgs1.append(img)
                test_labels1.append(1)
            elif filepath == filepath_test_int: # Testing Internal damage for Model2
                test_imgs2.append(img)
                test_labels2.append(1)

    return train_imgs1, train_labels1, test_imgs1, test_labels1,train_imgs2, train_labels2, test_imgs2, test_labels2

train_imgs1, train_labels1, test_imgs1, test_labels1,train_imgs2, train_labels2, test_imgs2, test_labels2 = resize_and_label(filepath_no_external_damage,train_imgs1,train_labels1,test_imgs1,test_labels1,train_imgs2,train_labels2,test_imgs2,test_labels2)
train_imgs1, train_labels1, test_imgs1, test_labels1,train_imgs2, train_labels2, test_imgs2, test_labels2 = resize_and_label(filepath_no_internal_damage,train_imgs1,train_labels1,test_imgs1,test_labels1,train_imgs2,train_labels2,test_imgs2,test_labels2)
train_imgs1, train_labels1, test_imgs1, test_labels1,train_imgs2, train_labels2, test_imgs2, test_labels2 = resize_and_label(filepath_external_damage,train_imgs1,train_labels1,test_imgs1,test_labels1,train_imgs2,train_labels2,test_imgs2,test_labels2)
train_imgs1, train_labels1, test_imgs1, test_labels1,train_imgs2, train_labels2, test_imgs2, test_labels2 = resize_and_label(filepath_internal_damage,train_imgs1,train_labels1,test_imgs1,test_labels1,train_imgs2,train_labels2,test_imgs2,test_labels2)
train_imgs1, train_labels1, test_imgs1, test_labels1,train_imgs2, train_labels2, test_imgs2, test_labels2 = resize_and_label(filepath_test_no_ext,train_imgs1,train_labels1,test_imgs1,test_labels1,train_imgs2,train_labels2,test_imgs2,test_labels2)
train_imgs1, train_labels1, test_imgs1, test_labels1,train_imgs2, train_labels2, test_imgs2, test_labels2 = resize_and_label(filepath_test_no_int,train_imgs1,train_labels1,test_imgs1,test_labels1,train_imgs2,train_labels2,test_imgs2,test_labels2)
train_imgs1, train_labels1, test_imgs1, test_labels1,train_imgs2, train_labels2, test_imgs2, test_labels2 = resize_and_label(filepath_test_ext,train_imgs1,train_labels1,test_imgs1,test_labels1,train_imgs2,train_labels2,test_imgs2,test_labels2)
train_imgs1, train_labels1, test_imgs1, test_labels1,train_imgs2, train_labels2, test_imgs2, test_labels2 = resize_and_label(filepath_test_int,train_imgs1,train_labels1,test_imgs1,test_labels1,train_imgs2,train_labels2,test_imgs2,test_labels2)

# set seed and shuffle for images and labels
random.seed(12)
random.shuffle(train_imgs1)
random.seed(12) 
random.shuffle(train_labels1)

random.seed(12)
random.shuffle(train_imgs2)
random.seed(12) 
random.shuffle(train_labels2)

# uncomment if you want to see the images and its corresponding labels
# for i in range(len(train_imgs)):
#     img = train_imgs[i]
#     pyplot.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
#     pyplot.axis("off")
#     pyplot.show()
#     print(train_labels[i])

# for i in range(len(test_imgs)):
#     img = test_imgs[i]
#     pyplot.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
#     pyplot.axis("off")
#     pyplot.show()
#     print(test_labels[i])

print('The number of training images for external damage are',len(train_labels1))
print('The number of training images for internal damage are',len(train_labels2))
print('The number of test images for external damage are',len(test_labels1))
print('The number of test images for internal damage are',len(test_labels2))

The number of images in train_images/no_damage is 50
The number of images in train_images/external_damage is 56
The number of images in train_images/internal_damage is 44
The number of images in test_images/no_damage is 14
The number of images in test_images/external_damage is 12
The number of images in test_images/internal_damage is 4
The number of training images for external damage are 104
The number of training images for internal damage are 92
The number of test images for external damage are 40
The number of test images for internal damage are 4


### Split training, validation and test dataset

In [16]:
# n = len(train_imgs)
# valid_imgs = train_imgs[math.floor(0.85*n):n]
# valid_labels = train_labels[math.floor(0.85*n):n]
valid_imgs = []
valid_labels = []
# test_imgs = train_imgs[math.floor(0.7*n):n]
# test_labels = train_labels[math.floor(0.7*n):n]
# train_imgs = train_imgs[0:math.ceil(0.7*n)]
# train_labels = train_labels[0:math.ceil(0.7*n)]

In [17]:
np.savez("Models/external", train_imgs=train_imgs1, train_labels=train_labels1, valid_imgs=valid_imgs, valid_labels=valid_labels, test_imgs=test_imgs1, test_labels=test_labels1)

np.savez("Models/internal", train_imgs=train_imgs2, train_labels=train_labels2, valid_imgs=valid_imgs, valid_labels=valid_labels, test_imgs=test_imgs2, test_labels=test_labels2)
