### Train a model on the balanced dataset

In [1]:
# Run once to install
!pip install image-classifiers==0.2.2
!pip install image-classifiers==1.0.0b1
!pip install imgaug

Collecting image-classifiers==0.2.2
  Using cached image_classifiers-0.2.2-py2.py3-none-any.whl (72 kB)
Installing collected packages: image-classifiers
Successfully installed image-classifiers-0.2.2
Collecting image-classifiers==1.0.0b1
  Using cached image_classifiers-1.0.0b1-py3-none-any.whl
Installing collected packages: image-classifiers
  Attempting uninstall: image-classifiers
    Found existing installation: image-classifiers 0.2.2
    Uninstalling image-classifiers-0.2.2:
      Successfully uninstalled image-classifiers-0.2.2
Successfully installed image-classifiers-1.0.0b1


In [2]:
# Import libs
import os 
import time
import cv2
import numpy as np
import matplotlib.pyplot as plt
from keras import optimizers
import keras
import tensorflow as tf
import keras.backend as K
from sklearn.metrics import confusion_matrix, classification_report
from keras.models import load_model
from keras.models import Sequential
from keras.regularizers import l2
from keras.applications.vgg16 import VGG16
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ModelCheckpoint, CSVLogger, EarlyStopping, ReduceLROnPlateau
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.metrics import roc_curve, auc, roc_auc_score
import matplotlib.pyplot as plt
from tqdm import tqdm
from keras.utils import np_utils
from imgaug import augmenters as iaa    
import itertools

Using TensorFlow backend.


In [3]:
# Import libs
# import keras
from classification_models.keras import Classifiers

In [7]:
np.random.seed(42)

# Print version
print("Keras Version", keras.__version__)
print("Tensorflow Version", tf.__version__)


# GPU test
from tensorflow.python.client import device_lib
def get_available_gpus():
    local_device_protos = device_lib.list_local_devices()
    return [x.name for x in local_device_protos if x.device_type == 'GPU']

print(get_available_gpus())

# Get compute specs
from tensorflow.python.client import device_lib
device_lib.list_local_devices()



# Helpers functions

def create_directory(directory):
    '''
    Creates a new folder in the specified directory if the folder doesn't exist.
    INPUT
        directory: Folder to be created, called as "folder/".
    OUTPUT
        New folder in the current directory.
    '''
    if not os.path.exists(directory):
        os.makedirs(directory)


def plot_hist(img):
    
    img_flat = img.flatten()
    print(min(img_flat), max(img_flat))
    
    plt.hist(img_flat, bins=20, color='c')
    #plt.title("Data distribution")
    plt.xlabel("Pixel values")
    plt.grid(True)
    plt.ylabel("Frequency")
    
    plt.show()


# Focal loss function
##################################################################################
# Paper: https://arxiv.org/abs/1708.02002

#Focal loss down-weights the well-classified examples. This has
#the net effect of putting more training emphasis on that data that is hard to classify. 
#In a practical setting where we have a data imbalance, our majority class will quickly 
#become well-classified since we have much more data for it. Thus, in order to insure that we
#also achieve high accuracy on our minority class, we can use the focal loss to give those minority
#class examples more relative weight during training. 

from keras import backend as K
import tensorflow as tf

def focal_loss(gamma=2., alpha=.25):
    def focal_loss_fixed(y_true, y_pred):
        pt_1 = tf.where(tf.equal(y_true, 1), y_pred, tf.ones_like(y_pred))
        pt_0 = tf.where(tf.equal(y_true, 0), y_pred, tf.zeros_like(y_pred))
        return -K.mean(alpha * K.pow(1. - pt_1, gamma) * K.log(pt_1)) - K.mean((1 - alpha) * K.pow(pt_0, gamma) * K.log(1. - pt_0))
    return focal_loss_fixed
##################################################################################


# Define paths
base_path = os.path.abspath("../")
dataset_path = os.path.join(base_path, "dataset", "isic2016numpy")
model_path = os.path.join(base_path, "models")
print(os.listdir(dataset_path))


# Load data
x_train = np.load("{}/x_upsampled.npy".format(dataset_path)) 
y_train = np.load("{}/y_upsampled.npy".format(dataset_path))
x_test = np.load("{}/x_test.npy".format(dataset_path))
y_test = np.load("{}/y_test.npy".format(dataset_path))


# Shuffle training dataset
flag = 1
if flag == 1:
    # Shuffle data
    print("Shuffling data")
    s = np.arange(x_train.shape[0])
    np.random.shuffle(s)
    x_train = x_train[s]
    y_train = y_train[s]
else:
    print("Not shuffling...")
    pass



# Show shape
print("Dataset sample size :", x_train.shape, y_train.shape, x_test.shape, y_test.shape)


# Sanity check on training data
#img = x_train[0]
#plot_hist(img)

#plt.imshow(x_train[0])

Keras Version 2.3.1
Tensorflow Version 1.15.5
[]
['x_test.npy', 'x_train.npy', 'x_upsampled.npy', 'y_test.npy', 'y_train.npy', 'y_upsampled.npy']
Shuffling data
Dataset sample size : (1627, 256, 256, 3) (1627, 2) (379, 256, 256, 3) (379, 2)


In [None]:


# Define architecture
arch, preprocess_input = Classifiers.get('vgg16') 


# Preprocess the dataset

# 1. Use model preprocessing
#x_train = preprocess_input(x_train)
#x_test = preprocess_input(x_test)


# 2. Use standard preprocessing
prepro = False # False when using synthetic data

if prepro == True:
    print("Preprocessing training data")
    x_train = x_train.astype('float32')
    x_train /= 255
else:
    print("Not preprocessing training data, already preprocessed in MeGAN generator.")
    pass

# Standardize test set
x_test = x_test.astype('float32')
x_test /= 255

print(x_train.shape, x_test.shape)

# Sanity check on preprocessed data
#img = x_test[0]
#plot_hist(img)
#plt.imshow(x_test[0])

In [None]:
# Experiment name
EXP_NAME = "results"

# Create folder for the experiment
create_directory("{}/{}".format(base_path, EXP_NAME))
output_path = os.path.join(base_path, EXP_NAME)


# Callbacks
weights_path = "{}/{}.h5".format(output_path, EXP_NAME)
checkpointer = ModelCheckpoint(filepath=weights_path, verbose=1, monitor='val_loss', save_best_only=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=5, verbose=1, min_lr=1e-8, mode='auto') # new_lr = lr * factor
early_stopping = EarlyStopping(monitor='val_loss', min_delta=0, verbose=1, patience=8, mode='auto', restore_best_weights=True)
csv_logger = CSVLogger('{}/{}_training.csv'.format(output_path, EXP_NAME))


# Define class weights for imbalacned data
from sklearn.utils import class_weight
class_weights = class_weight.compute_class_weight('balanced', np.unique(np.argmax(y_train, axis=1)), np.argmax(y_train, axis=1))
print(class_weights)


def my_awesome_model():
  
    '''Awesomest model'''

    # Get backbone network
    base_model = arch(input_shape=(256,256,3), weights='imagenet', include_top=False)

    # Add GAP layer
    x = keras.layers.GlobalAveragePooling2D()(base_model.output)
    # Add FC layer
    output = keras.layers.Dense(2, activation='softmax', trainable=True)(x) 

    # Freeze layers
    #for layer in base_model.layers[:]:
    #layer.trainable=False

    # Build model
    model = keras.models.Model(inputs=[base_model.input], outputs=[output])

    # Optimizers
    adadelta = optimizers.Adadelta(lr=0.001) 

    # Compile
    model.compile(optimizer=adadelta, loss= [focal_loss(alpha=.25, gamma=2)], metrics=['accuracy']) 

    # Output model configuration
    model.summary()

    return model


model = None
model = my_awesome_model()

In [None]:
# Train the awesome model

# Configuration
batch_size = 16
epochs = 300 

# Calculate the starting time    
start_time = time.time()


model.fit(x_train, y_train,
            batch_size=batch_size,
            epochs=epochs,
            validation_data=(x_test, y_test),
            class_weight = class_weights,
            callbacks=[csv_logger, early_stopping, reduce_lr, checkpointer], # early_stopping, checkpointer, reduce_lr
            shuffle=False)


end_time = time.time()
print("--- Time taken to train : %s hours ---" % ((end_time - start_time)//3600))

# Save model
# If checkpointer is used, dont use this
#model.save(weights_path)

In [None]:
# Plot and save accuravy loss graphs together
def plot_loss_accu_all(history):
    
    loss = history.history['loss']
    val_loss = history.history['val_loss']
    acc = history.history['accuracy']
    val_acc = history.history['val_accuracy']
    epochs = range(len(loss))
    
    plt.plot(epochs, acc, 'r')
    plt.plot(epochs, val_acc, 'b')
    plt.plot(epochs, loss, 'g')
    plt.plot(epochs, val_loss, 'y')
    plt.title('Accuracy/Loss')
    
    #plt.ylabel('Rate')
    #plt.xlabel('Epoch')
    
    plt.legend(['trainacc', 'valacc', 'trainloss', 'valloss'], loc='lower right', fontsize=10)
    plt.grid(True)
    plt.savefig('{}/{}_acc_loss_graph.jpg'.format(output_path, EXP_NAME), dpi=100)
    plt.show()

# Plot and save accuravy loss graphs individually
def plot_loss_accu(history):
    loss = history.history['loss']
    val_loss = history.history['val_loss']
    epochs = range(len(loss))
    plt.plot(epochs, loss, 'g')
    plt.plot(epochs, val_loss, 'y')
    #plt.title('Training and validation loss')
    plt.ylabel('Loss %')
    plt.xlabel('Epoch')
    plt.legend(['train', 'val'], loc='upper right')
    plt.grid(True)
    #plt.savefig('{}/{}_loss.jpg'.format(output_path, EXP_NAME), dpi=100)
    #plt.savefig('{}/{}_loss.pdf'.format(output_path, EXP_NAME), dpi=300)
    plt.show()
    
    loss = history.history['accuracy']
    val_loss = history.history['val_accuracy']
    epochs = range(len(loss))
    plt.plot(epochs, loss, 'r')
    plt.plot(epochs, val_loss, 'b')
    #plt.title('Training and validation accuracy')
    plt.ylabel('Accuracy %')
    plt.xlabel('Epoch')
    plt.legend(['train', 'val'], loc='lower right')
    plt.grid(True)
    #plt.savefig('{}/{}_acc.jpg'.format(output_path, EXP_NAME), dpi=100)
    #plt.savefig('{}/{}_acc.pdf'.format(output_path, EXP_NAME), dpi=300)
    plt.show()

plot_loss_accu(model.history)
print("Done training and logging!")