# Keras Image Classification for AS
## Import of libraries, selection of CUDA_VISIBLE_DEVICES to select gpu to use

### Classify by segments

In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = '2'

import keras
from keras.layers import Dense, Reshape, Flatten, Dropout
from keras.models import Model
import keras.backend as K
from keras.utils import multi_gpu_model
import tensorflow as tf
import numpy as np
import shutil
import glob
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
from random import shuffle
from scipy import misc
import imageio
from PIL import Image
import datetime
import keras.backend as K
from keras.utils import multi_gpu_model
from sklearn.metrics import confusion_matrix
from scipy.misc import imresize
import tensorflow as tf


%matplotlib inline
import matplotlib.pyplot as plt

import ssl
ssl._create_default_https_context = ssl._create_unverified_context

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


## Functions used, will be ported into separate utils.py later

In [2]:
def getBatchGens(trainPath, validPath, batchSize, trainShuffle=True, targetSize=(128,128)):
    datagenTrain = ImageDataGenerator(
            shear_range=0.1,
            horizontal_flip=True,
            vertical_flip=True,
            zoom_range=0.1,
            rescale=1/255,
            fill_mode='nearest')

    datagenValid = ImageDataGenerator(rescale=1/255)


    trainBatches = datagenTrain.flow_from_directory(
            trainPath,  # this is the target directory
            target_size=targetSize,  # all images will be resized to 197x197
            batch_size=batchSize,
            class_mode='binary',
            shuffle=trainShuffle,
            )  # since we use binary_crossentropy loss, we need binary labels
    """
            save_to_dir = '/home/ref1pal/project/AS/allOkNok/examples',
            save_format = 'jpg'
            """
    
    validBatches = datagenValid.flow_from_directory(
            validPath,  # this is the target directory
            target_size=targetSize,  # all images will be resized to 197x197
            batch_size=batchSize,
            class_mode='binary',
            shuffle=False)  # since we use binary_crossentropy loss, we need binary labels
    return trainBatches, validBatches

class leakyBatches(object):
    def __init__(self, gen, leak):
        self.n = gen.n
        self.batch_size = gen.batch_size
        self.leak = leak
        self.gen = gen
        self.newGen = self.newerGen()
        self.batch_index = self.gen.batch_index
        self.filenames = self.gen.filenames
    
    def newerGen(self):
        while True:
            x, y_gen = next(self.gen)
            self.batch_index = self.gen.batch_index
            y = []
            for i in y_gen:
                if i == 0:
                    leak_i = self.leak #*np.random.rand()
                    y.append([leak_i, 1-leak_i])
                else:             
                    leak_i = self.leak*np.random.rand()
                    y.append([1-leak_i, leak_i])
            yield x, np.asarray(y, dtype='float32')
    
    def __next__(self):
        return next(self.newGen)

In [3]:
def leakyLoss(pred, target):
    return tf.reduce_mean(tf.losses.softmax_cross_entropy(pred, target, label_smoothing=0.01), name='cross_entropy')

def getModel(dropout = 0.33, optimizer = 'adam', loss = leakyLoss):
    baseModel = keras.applications.mobilenet.MobileNet(include_top=False, input_shape=(128, 128, 3), weights='imagenet')
    x = baseModel.output
    x = Flatten()(x)
    x = Dense(64, activation='relu')(x)
    x = Dropout(dropout)(x)
    x = Dense(2, activation='softmax')(x)
    model = Model(inputs=baseModel.inputs, outputs=x)

    # multi GPU support not worth it, as bottleneck is in datagen-augmentation
    # model = multi_gpu_model(model, gpus=3)

    model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    # model.summary()
    print("Created MobileNet Model with Dropout of {}".format(dropout))
    return model

def trainModel(model, batchSize, modelName='model_weights', epochs=1):
    global epochCounter
    try:
        epochCounter
    except NameError:
        epochCounter = 1
    print("Start training for {} epochs with a batch size of {}. The overall current epoch is {}".format(epochs, batchSize, epochCounter))
    for epoch in range(1, epochs+1):
        print("Epoch {}".format(epoch))
        model.fit_generator(
            trainBatches,
            steps_per_epoch= trainBatches.n // batchSize,
            epochs=1,
            validation_data=validBatches,
            validation_steps= validBatches.n // batchSize,
            use_multiprocessing = True,
            class_weight={1:25., 0:1.})
        timeStamp = datetime.datetime.now().strftime("%y_%m_%d_%H_%M")
        fileName = "{}_{}_epoch_{}.h5".format(modelName, timeStamp, epochCounter)
        epochCounter += 1
        model.save_weights(os.path.join(modelOutputPath, fileName))
        print("Done! Model Name is {}".format(fileName))
    return model, fileName

In [5]:
def getPredictions(dataGen, model):
    y_true = []
    y_pred = []
    y_predArr = []
    files = []
    rounds = dataGen.n // dataGen.batch_size 
    for i in range(rounds+1):
        if i % (rounds//5+1) == 0:
            print("{} of {} predictions".format(len(y_predArr), dataGen.n))
        x, y_t = next(dataGen)
        y_p = model.predict(x, batch_size=dataGen.batch_size)
        y_true.extend(y_t[:, 1])
        y_predArr.extend(y_p[:, 1])
        idx = (dataGen.batch_index - 1) * dataGen.batch_size
        if dataGen.batch_index > 0:
            idx = (dataGen.batch_index - 1) * dataGen.batch_size
            files.extend(dataGen.filenames[idx : idx + dataGen.batch_size])
        else:
            files.extend(dataGen.filenames[0 : len(y_p)])
    print("{} of {} predictions".format(len(y_predArr), dataGen.n))
    for i in y_predArr:
        y_pred.append(i)
    return np.asarray(y_true), np.asarray(y_pred), np.asarray(files)

def showErrors(y_pred, y_true, basicPath, threshold=0.5, mode='fn'):
    if mode == 'fp':
        print("\nFalse positives:\n")
        maskParam = True
    elif mode == 'fn':
        print("\nFalse negatives\n")
        maskParam = False  
    test = (y_pred > (1-threshold)).astype(int)
    mask = np.where((y_true == int(maskParam)) & (test == int(not maskParam)))[0]
    falsePositives = fileNames[mask]
    probs = y_pred[mask]
    print(probs)
    pathsFalsePositives = [os.path.join(basicPath, f) for f in falsePositives]
    print(pathsFalsePositives[:10])
    i = 0
    for f, p in zip(pathsFalsePositives, probs):
        i += 1
        if i == 25:
            break
        print("{}. {}: {}".format(i, f, p))
        plt.figure()
        plt.title("Filename: {} \nProbability: {}".format(os.path.basename(f), p))
        plt.imshow(plt.imread(f), cmap='gray')
        plt.show()

def getConfusionMatrix(thresholds, y_pred, y_true):
    for thres in thresholds:
        curr_pred = (y_pred > thres).astype(int)
        print("Cofusion matrix for {} threshold".format(thres))
        print(confusion_matrix(y_true, curr_pred))
        
def predictSpecificImage(imgPath):
    img = load_img(imgPath)  # this is a PIL image
    # import pdb; pdb.set_trace()
    img = img.resize((128,128))
    imgname, _ = os.path.splitext(os.path.basename(imgPath))
    x = img_to_array(img)  
    x = x/255 #rescale
    plt.imshow(x)
    x = x.reshape((1,) + x.shape)  
    print(x.shape)
    result = model.predict(x)
    print(result)

## Set important parameters

In [6]:
trainPath = '/home/../train'
validPath = '/home/../valid'
modelOutputPath = '/home/../model_weights'
modelName = 'model_we._data'
if not os.path.exists(modelOutputPath):
    os.makedirs(modelOutputPath)
batchSize = 32

## All that is needed to generically train 

In [None]:
epochs = 1
trainBatches, validBatches = getBatchGens(trainPath, validPath, batchSize)
trainBatches = leakyBatches(trainBatches, leak=0)
validBatches = leakyBatches(validBatches, leak=0)
adam = keras.optimizers.Adam(lr=0.001)
model = getModel(optimizer=adam, dropout=0)
model, fileName = trainModel(model, batchSize, modelName=modelName, epochs=epochs)

## Load specific model

In [None]:
fileName = '.h5'
trainBatches, validBatches = getBatchGens(trainPath, validPath, batchSize)
trainBatches = leakyBatches(trainBatches, leak=0)
validBatches = leakyBatches(validBatches, leak=0)
adam = keras.optimizers.Adam(lr=0.001)
model = getModel(optimizer=adam, dropout=0)
model.load_weights(os.path.join(modelOutputPath, fileName))

## How it's done with tensorflow training!!!!!!!

In [7]:
# This is important, so that tensors of Keras are initialized for tensorflow to use!
sess = K.get_session()
K.set_session(sess)

### Create Keras model, add loss and train_step

In [8]:
from keras.objectives import categorical_crossentropy
from keras.layers import InputLayer
from keras import backend as K

labels = tf.placeholder(tf.float32, shape=(None, 2))
weight_test = [v for v in tf.trainable_variables() if v.name =='conv1_bn_1/beta:0']

img = tf.placeholder(shape=(None, 128, 128,3), dtype=tf.float32)
#img = keras.Input(shape=(128, 128,3), dtype=tf.float32)
baseModel = keras.applications.mobilenet.MobileNet(include_top=False, input_shape=(128, 128, 3), weights='imagenet')

x = baseModel(img)
x = Flatten()(x)
x = Dense(64, activation='relu')(x)
x = Dropout(0.8)(x)
preds = Dense(2, activation=None)(x)
loss = tf.reduce_mean(tf.losses.softmax_cross_entropy(labels, preds))
#  train_step = tf.train.GradientDescentOptimizer(0.01).minimize(loss)
train_step = tf.train.AdamOptimizer(learning_rate=0.00001).minimize(loss)

In [9]:
# Initialize uninitialized keras variables
uninitialized_variables_names = sess.run(tf.report_uninitialized_variables(tf.global_variables()))
uninitialized_variables_names = [name.decode("utf-8") for name in uninitialized_variables_names]
uninitialized_variables = [v for v in tf.global_variables() if v.name[:-2] in uninitialized_variables_names]
init = tf.variables_initializer(uninitialized_variables)
sess.run(init)

In [10]:
test = [op for op in sess.graph.get_operations() if 'dropout' in op.name]

In [12]:
test

[<tf.Operation 'dropout_1/cond/Switch' type=Switch>,
 <tf.Operation 'dropout_1/cond/switch_t' type=Identity>,
 <tf.Operation 'dropout_1/cond/switch_f' type=Identity>,
 <tf.Operation 'dropout_1/cond/pred_id' type=Identity>,
 <tf.Operation 'dropout_1/cond/mul/y' type=Const>,
 <tf.Operation 'dropout_1/cond/mul/Switch' type=Switch>,
 <tf.Operation 'dropout_1/cond/mul' type=Mul>,
 <tf.Operation 'dropout_1/cond/dropout/keep_prob' type=Const>,
 <tf.Operation 'dropout_1/cond/dropout/Shape' type=Shape>,
 <tf.Operation 'dropout_1/cond/dropout/random_uniform/min' type=Const>,
 <tf.Operation 'dropout_1/cond/dropout/random_uniform/max' type=Const>,
 <tf.Operation 'dropout_1/cond/dropout/random_uniform/RandomUniform' type=RandomUniform>,
 <tf.Operation 'dropout_1/cond/dropout/random_uniform/sub' type=Sub>,
 <tf.Operation 'dropout_1/cond/dropout/random_uniform/mul' type=Mul>,
 <tf.Operation 'dropout_1/cond/dropout/random_uniform' type=Add>,
 <tf.Operation 'dropout_1/cond/dropout/add' type=Add>,
 <tf.

In [None]:
weight_test = [v for v in tf.trainable_variables() if v.name[:6] =='dense_'[:8]]
print(weight_test)
with sess.as_default():
    sanityCheck = sess.run(weight_test)
    print([v.name for v in weight_test])
    print(sanityCheck)

In [18]:
iterations_per_epoch = int(trainBatches.n/trainBatches.batch_size + 0.5)
print(iterations_per_epoch)
batch = next(trainBatches)

3193


In [None]:
with sess.as_default():
    pred = sess.run(preds, feed_dict={img: batch[0],
                             labels: batch[1]})
print(pred, batch[1])

In [None]:
next(iter(trainBatches.newGen))

In [20]:
# from keras.metrics import categorical_accuracy as accuracy
def categorical_accuracy(y_true, y_pred):
    return K.mean(K.cast(K.equal(K.argmax(y_true, axis=-1),
                          K.argmax(y_pred, axis=-1)),
                  K.floatx()))
acc_value = categorical_accuracy(labels, preds)

In [29]:
trainBatches, validBatches = getBatchGens(trainPath, validPath, batchSize)
trainBatches = leakyBatches(trainBatches, leak=0)
validBatches = leakyBatches(validBatches, leak=0)

Found 102182 images belonging to 2 classes.
Found 25760 images belonging to 2 classes.


In [None]:
train = 1
with sess.as_default():
    for i in range(iterations_per_epoch*3):
        batch = next(trainBatches)
        sanityCheck = sess.run(weight_test)
        loss_val1 = sess.run(loss, feed_dict={img: batch[0],
                             labels: batch[1],
                                K.learning_phase(): train})
        loss_val11 = sess.run(loss, feed_dict={img: batch[0],
                             labels: batch[1],
                                K.learning_phase(): 0})
        acc_val1 = acc_value.eval(feed_dict={img: batch[0],
                                    labels:batch[1]})
        pred = sess.run(preds, feed_dict={img: batch[0],
                             labels: batch[1], # })
                                 K.learning_phase(): train})
        # print("Iteration {}, loss_val1: {}".format(i+1, loss_val1), end='\r')
        train_step.run(feed_dict={img: batch[0],
                                 labels: batch[1], # })
                                 K.learning_phase(): train})
        loss_val2 = sess.run(loss, feed_dict={img: batch[0],
                                 labels: batch[1], # })
                                 K.learning_phase(): train})
        print("Iteration {}, acc_val1: {},  loss_val1: {} {}".format(i+1, acc_val1, loss_val1, loss_val11), "Iteration {}, loss_val2: {}".format(i+1, loss_val2))

In [None]:
from keras.metrics import categorical_accuracy as accuracy

acc_value = accuracy(labels, preds)
with sess.as_default():
    print acc_value.eval(feed_dict={img: batch[0],
                                    labels:batch[1]})

In [None]:
baseModel = keras.applications.mobilenet.MobileNet(include_top=False, input_shape=(128, 128, 3), weights='imagenet')


In [None]:
baseModel = keras.applications.mobilenet.MobileNet(include_top=False, input_shape=(128, 128, 3), weights='imagenet')
x = baseModel.output
x = Flatten()(x)
x = Dense(64, activation='relu')(x)
x = Dropout(dropout)(x)
x = Dense(2, activation='softmax')(x)
model = Model(inputs=baseModel.inputs, outputs=x)

## Validate model

In [None]:
pathImageSet = 'folder'
datagenExtraTest = ImageDataGenerator()
extraTestBatches = datagenExtraTest.flow_from_directory(
        pathImageSet,  # this is the target directory
        target_size=(1200, 1600),  # all images will be resized to 150x150
        batch_size=batchSize,
        class_mode='binary',
        shuffle=False)  # since we use binary_crossentropy loss, we need binary labels
# get predictions
# validBatches needs to be initialized again, so we get the correct filenames
extraTestBatches = leakyBatches(extraTestBatches, leak = 0)
y_true, y_pred, fileNames = getPredictionsSegment(extraTestBatches, model)
# Show confusion matrix for various thresholds
thresholds = [0.5, 0.1, 0.04, 0.004, 0.001]
getConfusionMatrix(thresholds, y_pred, y_true)
# show up to 25 false positives
showErrors(y_pred, y_true, basicPath = pathImageSet, threshold=0.5, mode='fp')
# show up to 25 false negatives
showErrors(y_pred, y_true, basicPath = pathImageSet, threshold=0.5, mode='fn')

In [None]:
predictSpecificImage2Segment('x.jpg')