In [None]:
import pandas as pd
import numpy as np
import itertools
import os.path
import pickle
import re
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix

from keras.utils.vis_utils import model_to_dot, plot_model
from keras.preprocessing import image
from keras.preprocessing.image import ImageDataGenerator
from keras.applications.vgg16 import preprocess_input
from keras.models import load_model

import keras.backend as K
K.set_image_data_format('channels_last')

In [None]:
model = load_model('image_classifier_tl_5_ft_15.h5')

In [None]:
# saving the model summary with the output shapes and the number of parameters for each layer

with open('results/image_classifier.txt','w') as fh:
    # Pass the file handle in as a lambda function to make it callable
    model.summary(print_fn = lambda x: fh.write(x + '\n'))

model.summary()

In [None]:
%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false;
}

In [None]:
from IPython.display import SVG
plot_model(model, to_file = 'results/image_classifier.png')
SVG(model_to_dot(model).create(prog = 'dot', format = 'svg'))

In [None]:
# import test set with probabilities and predictions

test_set_predictions = pd.read_csv('test_set_predictions.csv', low_memory = False)
test_set_predictions['image_id'] = test_set_predictions['filename'].apply(lambda x: int(re.search('\d+', x).group(0)))
test_set_predictions.drop(['Unnamed: 0', 'filename'], axis = 1, inplace = True)
test_set_predictions.head()

In [None]:
total_predictions = pd.read_csv('total_predictions.csv', low_memory = False)
total_predictions.drop(['Unnamed: 0', 'filename'], axis = 1, inplace = True)
total_predictions.head()

In [None]:
check_test = test_set_predictions[['image_id', 'y_hat']]
check_test.rename(columns = {'y_hat': 'y_hat_test'}, inplace = True)

check_total = total_predictions[['image_id', 'y_hat']]
check_total.rename(columns = {'y_hat': 'y_hat_total'}, inplace = True)

overlap = check_total.merge(check_test, on = 'image_id', how = 'inner')

In [None]:
overlap[overlap['y_hat_total'] == overlap['y_hat_test']].shape

In [None]:
# create confusion matrices, with and without normalization

import matplotlib as mpl
mpl.rcParams.update(mpl.rcParamsDefault)

def plot_confusion_matrix(cm, classes, normalize = False, title = 'Confusion matrix', cmap = plt.cm.Blues):

    if normalize:
        cm = cm.astype('float') / cm.sum(axis = 1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    print(cm)

    plt.imshow(cm, interpolation = 'nearest', cmap = cmap)
    plt.title(title)

    plt.colorbar()
    if normalize:
        plt.clim(-0, 1)
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation = 45)
    plt.yticks(tick_marks, classes)

    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt),
                 fontsize = 14,
                 horizontalalignment = "center",
                 color = "white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')

# Compute confusion matrix
cnf_matrix = confusion_matrix(test_set_predictions['y_true'], test_set_predictions['y_hat'])
np.set_printoptions(precision = 2)

class_names = ['Animals',
               'Lifts',
               'Other',
               'People',
               'Summer activity',
               'Summer landscape',
               'Winter activity',
               'Winter landscape']

# Plot non-normalized confusion matrix
plt.figure(figsize = (10, 10))
plot_confusion_matrix(cnf_matrix, classes = class_names, title = 'Confusion matrix, without normalization')

accuracy = 0

for i in range(0, len(class_names)):
    accuracy = accuracy + cnf_matrix[i, i]

print('\nThe accuracy is:', accuracy / cnf_matrix.sum(), '\n')

# Plot normalized confusion matrix
plt.figure(figsize = (10, 10))
plot_confusion_matrix(cnf_matrix, classes = class_names, normalize = True, title = 'Normalized confusion matrix')

accuracy = 0

for i in range(0, len(class_names)):
    accuracy = accuracy + cnf_matrix[i, i]

print('\nThe accuracy is:', accuracy / cnf_matrix.sum())

plt.show()

In [None]:
# import the history files

picklehistory = open('history_transfer.p', 'rb')
history_transfer = pickle.load(picklehistory)
picklehistory.close()

picklehistory = open('history_finetune.p', 'rb')
history_finetune = pickle.load(picklehistory)
picklehistory.close()

In [None]:
# visualize the development of the loss function and accuracy, both for the training and the validation set

import matplotlib as mpl
mpl.rcParams.update(mpl.rcParamsDefault)

# visualizing the development of the loss and accuracy (of the last batch in every epoch) with respect to the epoch

plt.figure()
fig = plt.gcf()
fig.set_size_inches(10, 8, forward = True)
    
# custom line colors
color_accuracy = '#FF9933'
color_loss = '#0099FF'

# number of epochs
x = np.arange(1, 21, 1)

# extract the series from the history
y1 = history_transfer['acc'] + history_finetune['acc']
y2 = history_transfer['val_acc'] + history_finetune['val_acc']
y3 = history_transfer['loss'] + history_finetune['loss']
y4 = history_transfer['val_loss'] + history_finetune['val_loss']

ax1 = plt.gca()

# plot the accuracy series
accuracy_train, = plt.plot(x, y1, color_accuracy, linewidth = 0.75, linestyle = '-')
accuracy_validation, = plt.plot(x, y2, color_accuracy, linewidth = 0.75, linestyle = '--')

ax1.set_ylim([0, 1])
yticks_major = np.round(np.linspace(0, 1, 11), 1)
yticks_major_str = (yticks_major * 100).astype(int).astype(str).tolist()
yticks_labels = [x + ' %' for x in yticks_major_str]
ax1.set_yticks(yticks_major)
ax1.set_yticklabels(yticks_labels, fontsize = 10)

ax1.set_xlabel('epoch', fontsize = 11, labelpad = 10)
ax1.set_ylabel('accuracy', fontsize = 11)

ax2 = ax1.twinx()

# plot the accuracy series
loss_train, = plt.plot(x, y3, color_loss, linewidth = 0.75, linestyle = '-')
loss_validation, = plt.plot(x, y4, color_loss, linewidth = 0.75, linestyle = '--')

ax2.set_ylim([0, 3])
ax2.set_ylabel('loss', fontsize = 11)
ax1.grid(color = '#333333', linestyle = '--', linewidth = 0.25, zorder = 1)

xticks_major = np.round(np.linspace(1, 19, 10), 2)
ax1.set_xticks(xticks_major)
ax1.set_xlim([0, 21])

plt.title('\nAccuracy and Loss\n', fontsize = 14)

plt.axvspan(0, 5, alpha = 0.075, color = '#0099FF', zorder = 0).set_hatch('/')
plt.axvspan(5, 21, alpha = 0.075, color = '#FF9933', zorder = 0).set_hatch('/')

plt.text(10.8, 1.45, 'fine-tuning', fontsize = 12, color = '#666666', multialignment = 'center')

plt.annotate("",
             xy = (5.25, 1.6),
             xytext = (20.75, 1.6),
             arrowprops = dict(arrowstyle = "<->", facecolor = '#666666'))

plt.text(1.7, 0.45, 'transfer\nlearning', fontsize = 12, color = '#666666', multialignment = 'center')

plt.annotate("",
             xy = (0.25, 0.7),
             xytext = (4.75, 0.7),
             arrowprops = dict(arrowstyle = "<->", facecolor = '#666666'))

plt.legend([accuracy_train, accuracy_validation, loss_train, loss_validation],
               ['accuracy training set',
                'accuracy validation set',
                'loss training set',
                'loss validation set'],
                loc = 2,
                facecolor = 'white',
                edgecolor = 'black',
                borderaxespad = 1)

plt.show()

filename = 'results/accuracy_loss_image_classifier.png'
fig.savefig(filename)

In [None]:
# predict the class for all pictures

account_data_01 = pd.read_csv('results/dataset_analysis.csv', low_memory = False)

image_ids_train = pd.read_csv('results/image_ids_train.csv', low_memory = False)
image_ids_val = pd.read_csv('results/image_ids_val.csv', low_memory = False)
image_ids_test = pd.read_csv('results/image_ids_test.csv', low_memory = False)

In [None]:
def file_path_from_train_df(image_id, label):
    image_path = 'training_data/' + str(label) + '/' + str(image_id) + '_256.jpg'
    return image_path

def file_path_from_validation_df(image_id, label):
    image_path = 'validation_data/' + str(label) + '/' + str(image_id) + '_256.jpg'
    return image_path

def file_path_from_test_df(image_id, label):
    image_path = 'test_data/' + str(label) + '/' + str(image_id) + '_256.jpg'
    return image_path

In [None]:
train_df = account_data_01.merge(image_ids_train, on = 'image_id', how = 'inner')
train_df = train_df[['image_id', 'likes_groups']]
train_df['label'] = train_df.likes_groups.apply(lambda x: str(x)[0])
train_df['path'] = train_df.apply(lambda x: file_path_from_train_df(x['image_id'], x['label']), axis = 1)

validation_df = account_data_01.merge(image_ids_val, on = 'image_id', how = 'inner')
validation_df = validation_df[['image_id', 'likes_groups']]
validation_df['label'] = validation_df.likes_groups.apply(lambda x: str(x)[0])
validation_df['path'] = validation_df.apply(lambda x: file_path_from_validation_df(x['image_id'], x['label']), axis = 1)

test_df = account_data_01.merge(image_ids_test, on = 'image_id', how = 'inner')
test_df = test_df[['image_id', 'likes_groups']]
test_df['label'] = test_df.likes_groups.apply(lambda x: str(x)[0])
test_df['path'] = test_df.apply(lambda x: file_path_from_test_df(x['image_id'], x['label']), axis = 1)

In [None]:
train_paths = list(zip(train_df['image_id'], train_df['path']))
validation_paths = list(zip(validation_df['image_id'], validation_df['path']))
test_paths = list(zip(test_df['image_id'], test_df['path']))

paths = train_paths + validation_paths + test_paths

In [None]:
model_GAP = load_model('image_classifier_GAP.h5')

In [None]:
# examples of correctly classified images
img_path = 'image_classifier/test_data/Winter activity/600702_256.jpg'
img = image.load_img(img_path, target_size = (224, 224))
x = image.img_to_array(img)
x = np.expand_dims(x, axis = 0)
img1 = preprocess_input(x)

img_path = 'image_classifier/test_data/People/3700675_256.jpg'
img = image.load_img(img_path, target_size = (224, 224))
x = image.img_to_array(img)
x = np.expand_dims(x, axis = 0)
img2 = preprocess_input(x)

# examples of incorrectly classified images
img_path = 'image_classifier/test_data/Summer activity/3701083_256.jpg'
img = image.load_img(img_path, target_size = (224, 224))
x = image.img_to_array(img)
x = np.expand_dims(x, axis = 0)
img3 = preprocess_input(x)

img_path = 'image_classifier/Animals/1900249_256.jpg'
img = image.load_img(img_path, target_size = (224, 224))
x = image.img_to_array(img)
x = np.expand_dims(x, axis = 0)
img4 = preprocess_input(x)

In [None]:
probabilities = model.predict(img4)

In [None]:
pred_class = np.argmax(probabilities)

In [None]:
class_labels = ['animals', 'lifts', 'other', 'people', 'summer\nactivity', 'summer\nlandscape', 'winter\nactivity', 'winter\nlandscape']

probabilities_df = pd.DataFrame(list(zip(class_labels, list(probabilities[0]))))
probabilities_df.columns = ['class', 'probability']

In [None]:
top3_probs = probabilities_df.nlargest(3, columns = 'probability')

In [None]:
X, Y

In [None]:
import matplotlib as mpl
mpl.rcParams.update(mpl.rcParamsDefault)

plt.figure()
fig = plt.gcf()

fig.set_size_inches(3, 2)

X = np.linspace(0, 1, 3)

plt.barh(X,
         top3_probs['probability'],
         0.3,
         edgecolor = None,
         color = 'red')

Y = np.round(np.array(top3_probs['probability']), 2)
Y_100 = (Y * 100).astype('int')

for a, b, c in zip(X, Y, Y_100): 
    plt.text(b + 0.1, a, str(c) + '%', fontsize = 8, ha = 'center', va = 'center')
    
plt.xlim([0, 1.5])

ax = plt.gca()
ax.invert_yaxis()
ax.xaxis.grid(color = '#333333', linestyle = '--', linewidth = 0.25)
ax.set_yticks(X)
ax.set_yticklabels(top3_probs['class'], fontsize = 8, ha = 'center', va = 'center', x = 0.90)
ax.tick_params(axis = 'y', which = 'both', length = 0)

# xticks_major = np.round(np.linspace(0, 1, 6), 1)
# xticks_major_str = (xticks_major * 100).astype(int).astype(str).tolist()
# xticks_labels = [x + '%' for x in xticks_major_str]
# ax.set_xticks(xticks_major)
# ax.set_xticklabels(xticks_labels, fontsize = 8)
ax.set_xticks([])

ax.set_axisbelow(True)
plt.show()
    
# filename = 'results/histogram_likes_groups.png'
# fig.savefig(filename)

In [None]:
img1 = utils.load_img('image_classifier/Winter activity/600702_256.jpg', target_size = (224, 224))
img2 = utils.load_img('image_classifier/People/1600710_256.jpg', target_size = (224, 224))

f, ax = plt.subplots(1, 2)
ax[0].imshow(img1)
ax[1].imshow(img2)

plt.show()