In [10]:
import re
import os
from os import listdir
from os.path import isfile, join

import numpy as np
import pandas as pd

from keras import applications
from keras import optimizers
import tensorflow.keras as K

from PIL import Image

K.utils.set_random_seed(270219)

We will start with two functions: One to get the ID of each image and one to find the index of each image of the training set. In this function, we have some folder named data containing the images and the csv file. We store all the images in the variable images, the IDs are the IDs of the images (the number after img_) and labels contain the id with the respective labels. trainIndex then is the images ID that correspond to the labels.csv.

In [11]:
def getImageID(imageList):
    ID = []
    for i in imageList:
        ID.append(re.findall("\d{1,}", i))
    return ID

def getTrainIndex(IDs,labelsId):
   index = []
   for labeled in labelsId:
       for IDindex, ID in enumerate(IDs):
           if str(labeled) == ID[0]:
               index.append(IDindex)
   return index

localPath = os.getcwd()
dataPath = localPath +"\data\\"

images = [f for f in listdir(dataPath) if isfile(join(dataPath, f))]
del images[-1]

IDs = getImageID(images)
labels = pd.read_csv(dataPath+"\labels.csv")
labels.malignant = labels.malignant+1
trainIndex = getTrainIndex(IDs,labels.id)

Great, so we have the images. Not all images are the same size however, lets find the maximum image size so we know how much we need to pad the images.

In [12]:
imageSize = [0,0]
for image in images:
    im = Image.open(dataPath+image)
    if imageSize[0] < im.size[0]:
        imageSize[0] = im.size[0]
    if imageSize[1] < im.size[1]:
        imageSize[1] = im.size[1]
print("Maximum image size will be: "+ str(imageSize))

Maximum image size will be: [896, 896]


Now we know what the maximum size will be, lets pad all the images to our desired dimensions

In [13]:
def add_margin(pil_img, top, right):
    width, height = pil_img.size
    new_width = width + right
    new_height = height + top
    result = Image.new(pil_img.mode, (new_width, new_height), (0, 0, 0))
    result.paste(pil_img, (0, top))
    return result

try:
    if not len(listdir("data_padded")) == 186:
        for image in images:
            im = Image.open(dataPath+image)
            dWidth =  imageSize[0] - im.size[0]
            dHeight = imageSize[1] - im.size[1]
            if dWidth or dHeight:
                im = add_margin(im, dHeight,dWidth)
            im.save('data_padded/'+image)
except:
    for image in images:
        im = Image.open(dataPath+image)
    dWidth =  imageSize[0].astype(int) - im.size[0]
    dHeight = imageSize[1].astype(int) - im.size[1]
    if dWidth or dHeight:
        im = add_margin(im, dHeight,dWidth)
    im.save('data_padded/'+image)

In [14]:
paddedImages = [f for f in listdir("data_padded") if isfile(join("data_padded", f))]
trainData = list(K.utils.img_to_array(Image.open('data_padded/'+paddedImages[i])) for i in trainIndex)
for i in range(len(trainData)):
    trainData[i] = (trainData[i]-np.mean(trainData[i]))/np.std(trainData[i])
trainData = np.array(trainData)
labelData = np.array(labels.malignant)

In [15]:
resmodel = applications.ResNet50(include_top=False, weights='imagenet', input_shape=(imageSize[0], imageSize[1], 3))

model = K.models.Sequential()
model.add(resmodel)
model.add(K.layers.Flatten())
model.add(K.layers.Dense(3, activation='softmax'))
for layer in model.layers[:-2]:
    layer.trainable = False
for i, layer in enumerate(model.layers):
    print(i, layer.name, "-", layer.trainable)
model.compile(loss = "sparse_categorical_crossentropy", optimizer = optimizers.Adam(learning_rate=0.0001), metrics=["accuracy"])

0 resnet50 - False
1 flatten_1 - True
2 dense_1 - True


In [16]:
history = model.fit(trainData,labelData , verbose = 1, batch_size = 16, epochs = 10)
model.summary()

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 resnet50 (Functional)       (None, 28, 28, 2048)      23587712  
                                                                 
 flatten_1 (Flatten)         (None, 1605632)           0         
                                                                 
 dense_1 (Dense)             (None, 3)                 4816899   
                                                                 
Total params: 28,404,611
Trainable params: 4,816,899
Non-trainable params: 23,587,712
_________________________________________________________________
