In [None]:
# ################################################################
# Importing libraries
# ################################################################
import os 
import zipfile 
import tensorflow as tf 
from tensorflow.keras.preprocessing.image import ImageDataGenerator 
from tensorflow.keras import layers 
from tensorflow.keras import Model 
import matplotlib.pyplot as plt
import numpy as np

In [None]:
# ################################################################
# Define dataset directory
# ################################################################
dir_ = '/home/jovyan/private/MSc/CNN'
base_dir = os.path.join(dir_, 'dataset')
weight_dir = os.path.join(dir_, 'network')
weight_place365 = os.path.join(weight_dir, 'place_365')
weight_hybrid1365 = os.path.join(weight_dir, 'hybrid_1365')
train_dir = os.path.join(base_dir, 'train')
validation_dir = os.path.join(base_dir, 'validate')
test_dir = os.path.join(base_dir, 'test')

# Directory with our training slum pictures
train_slum_dir = os.path.join(train_dir, 'slum')

# Directory with our training nonslum pictures
train_nonslum_dir = os.path.join(train_dir, 'nonslum')

# Directory with our validation slum pictures
validation_slum_dir = os.path.join(validation_dir, 'slum')

# Directory with our validation nonslum pictures
validation_nonslum_dir = os.path.join(validation_dir, 'nonslum')

In [None]:
# ################################################################
# Visualize some sample input data
# ################################################################

# Set up matplotlib fig, and size it to fit 4x4 pics
import matplotlib.image as mpimg
nrows = 4
ncols = 4

fig = plt.gcf()
fig.set_size_inches(ncols*4, nrows*4)

pic_index = 100
train_slum_fnames = os.listdir(train_slum_dir)
train_nonslum_fnames = os.listdir(train_nonslum_dir)


next_slum_pix = [os.path.join(train_slum_dir, fname) 
                for fname in train_slum_fnames[ pic_index-8:pic_index]]

next_nonslum_pix = [os.path.join(train_nonslum_dir, fname) 
                for fname in train_nonslum_fnames[ pic_index-8:pic_index]]

for i, img_path in enumerate(next_slum_pix+next_nonslum_pix):
  # Set up subplot; subplot indices start at 1
  sp = plt.subplot(nrows, ncols, i + 1)
  sp.axis('Off') # Don't show axes (or gridlines)

  img = mpimg.imread(img_path)
  plt.imshow(img)

plt.show()

In [None]:
# Add our data-augmentation parameters to ImageDataGenerator
train_datagen = ImageDataGenerator(rescale = 1.0/255, shear_range = 0.2, horizontal_flip = True)

# Note that the validation data should not be augmented!
validation_datagen = ImageDataGenerator(rescale = 1.0/255)

# Note that the test data should not be augmented!
test_datagen = ImageDataGenerator(rescale = 1.0/255)

In [None]:
# Flow training images in batches of 64 using train_datagen generator
train_generator = train_datagen.flow_from_directory(train_dir, batch_size = 64, classes = ['slum', 'nonslum'], target_size = (224, 224))

# Flow validation images in batches of 64 using validation_datagen generator
validation_generator = validation_datagen.flow_from_directory(validation_dir, batch_size = 64, classes = ['slum', 'nonslum'], target_size = (224, 224))

# Flow test images in batches of 64 using validation_datagen generator
test_generator = test_datagen.flow_from_directory(test_dir, batch_size = 64, classes = ['slum', 'nonslum'], target_size = (224, 224), shuffle=False)

In [None]:
def plotImages(images_arr):
    fig, axes = plt.subplots(1, 10, figsize=(20,20))
    axes = axes.flatten()
    for img, ax in zip(images_arr, axes):
        ax.imshow(img)
        ax.axis('off')
    plt.tight_layout()
    plt.show()

In [None]:
imgs, labels = next(train_generator)

plotImages(imgs)
print(labels)

# print(np.argmax(labels, axis=-1))
# print(train_generator.classes)
print(train_generator.class_indices)

In [None]:
imgs, labels1 = next(validation_generator)

plotImages(imgs)
print(labels1)

# print(np.argmax(labels1, axis=-1))
# print(validation_generator.classes)
print(validation_generator.class_indices)

#### Setting up the network

In [None]:
import sys
sys.path.append('/home/jovyan/private/MSc/CNN/model')
from vgg16_places_365 import VGG16_Places365

base_model = VGG16_Places365(input_shape = (224, 224, 3), # Shape of our images
include_top = False, # Leave out the last fully connected layer
weights = 'places', pooling = 'max')

In [None]:
# Freeze all layers except the last one
for layer in base_model.layers:
    layer.trainable = False

In [None]:
# Flatten the output layer to 1 dimension
x = layers.Flatten()(base_model.output)

# Add a fully connected layer with 512 hidden units and ReLU activation
x = layers.Dense(512, activation='relu')(x)
x = layers.Dense(128, activation='relu')(x)

# Add a dropout rate of 0.5
x = layers.Dropout(0.5)(x)

# Add a final sigmoid layer for classification
x = layers.Dense(2, activation='sigmoid')(x)


model = tf.keras.models.Model(base_model.input, x)

model.compile(optimizer = tf.keras.optimizers.SGD(learning_rate=0.0001, momentum=0.9), loss = 'binary_crossentropy',metrics = ['acc'])
model.summary()

#### Configration for training

In [None]:
from keras.callbacks import ModelCheckpoint, EarlyStopping
checkpoint = ModelCheckpoint(weight_dir, monitor='val_acc', verbose=1, save_best_only=True, save_weights_only=False, mode='auto', period=1)
early = EarlyStopping(monitor='val_acc', min_delta=0, patience=10, verbose=1, mode='auto')

history = model.fit(train_generator, validation_data = validation_generator, epochs = 200, steps_per_epoch = 370, validation_steps = 123, verbose = 1,  callbacks=[checkpoint,early])

In [None]:
# ################################################################
# Learning curves
#
# To verify whether the algorithm is working, we can plot learning
# curve.
#
#   * model accuracy
# ################################################################

plt.plot(history.history["acc"])
plt.plot(history.history['val_acc'])
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title("model accuracy")
plt.ylabel("Accuracy")
plt.xlabel("Epoch")
plt.legend(["Accuracy","Validation Accuracy","loss","Validation Loss"])
plt.show()

#### Accuracy assessment

In [None]:
# ################################################################
# Evaluation of the test data
#
# In this step, the previously created test data set is feeded to
# the network. The network responds with a prediciton of slum
# The resulting data will be used to assess the  accuracy of the 
# network.
# ################################################################

from keras.models import load_model

saved_model = load_model(weight_path)
predictions = saved_model.predict(x=test_generator, steps=len(test_generator), verbose=0)
test_ = np.argmax(predictions, axis=-1)

In [None]:
name = []
name_dir = []
class_name = []
for i in range(7904):
    index = i
    image, label = test_generator._get_batches_of_transformed_samples(np.array([index]))
    image_name = test_generator.filenames[index]
    name1 = image_name.split('/')
    class_name.append(name1[0]) # appeding true class of image to list
    name_dir.append(name1[1][:-4]) # appending image name with direction to list 
    name2 = name1[1].split('_')
    name.append(name2[0]) # appending image name to list

In [None]:
import pandas as pd
df_final = pd.DataFrame(columns=['image_name','image_dir', 'predicted_class', 'true_class'])
df_final['image_name'] = name
df_final['image_dir'] = name_dir
df_final['predicted_class'] = test_
df_final['true_class'] = class_name
# pd.set_option('display.max_rows', df_final.shape[0])
df_final

In [None]:
file_name = "vgg16_output"
file_path = csv_dir + "/" + file_name + ".csv"
df_final.to_csv(file_path, index= False)

#### Accuracy assessment

In [None]:
# ################################################################
# Accuracy 
# ################################################################

%matplotlib inline
from sklearn.metrics import confusion_matrix
import itertools
import matplotlib.pyplot as plt

def plot_confusion_matrix(cm, classes,
                        normalize=False,
                        title='Confusion matrix',
                        cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    print(cm)

    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, cm[i, j],
            horizontalalignment="center",
            color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')

In [None]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_true=test_generator.classes, y_pred=np.argmax(predictions, axis=-1))
cm_plot_labels = ['slum','nonslum']
plot_confusion_matrix(cm=cm, classes=cm_plot_labels, title='Confusion Matrix')

In [None]:
# ################################################################
# Accuracy Matrix
# ################################################################

import pandas
from IPython.display import display

def print_scores(title, tp, fp, fn, tn):
    """Calculates and prints the typical precision scores of a machine learning
    algorithm.

    :param title: Title to be printed
    :param tp: number of true positives
    :param fp: number of false positives
    :param fn: number of false negatives
    :param tn: number of true negatives
    """
    total = sum([tp, fp, fn, tn])
    # Accuracy
    # How many pixels are classified correctly?
    accuracy = (tp + tn) / total

    # Precision / Specification
    # How accurate can the network determine field boundaries?
    precision = tp / (tp + fp)

    # Recall / Sensitivity
    # How many boundaries did we detect out of all actual field boundary pixels?
    recall = tp / (tp + fn)

    # F1 score
    # harmonic mean of recall & precision
    f1 = ( 2 * ( precision * recall ) ) / ( precision + recall )
    #f1 = 2 * ( ( precision * recall ) / (precision + recall) )


    accuracy *= 100
    precision *= 100
    recall *= 100
    f1 *= 100

    print(f"Accuracy Assessment: {title}")
    print("=" * 32)
    print("{:<32} {:<10} %".format("Overall Accuracy", round(accuracy, 3)))
    print("{:<32} {:<10} %".format("Precision", round(precision, 3)))
    print("{:<32} {:<10} %".format("Recall", round(recall, 3)))
    print("{:<32} {:<10} %".format("F1 Score", round(f1, 3)))
    print("\n" * 3)

In [None]:
print_scores(title = "Confusion Matrix",tp = cm[1,1], fp = cm[0,1], fn = cm[1,0], tn = cm[0,0])