# Basic config

In [1]:
#Uploading dataset from GoogleDrive
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [2]:
#To go to your drive’s main directory
cd /content/gdrive/My Drive/

/content/gdrive/My Drive


In [3]:
img_dir = '/content/gdrive/My Drive/dataset/'
print(img_dir)

/content/gdrive/My Drive/dataset/


In [4]:
cd '/content/gdrive/My Drive/dataset/'

/content/gdrive/My Drive/dataset


# Data preprocessing-Split folders

In [5]:
!pip install split-folders

Collecting split-folders
  Using cached https://files.pythonhosted.org/packages/20/67/29dda743e6d23ac1ea3d16704d8bbb48d65faf3f1b1eaf53153b3da56c56/split_folders-0.3.1-py3-none-any.whl
Installing collected packages: split-folders
Successfully installed split-folders-0.3.1


In [0]:
#https://github.com/jfilter/split-folders
import split_folders

# Split with a ratio.
# To only split into training and validation set, set a tuple to `ratio`, i.e, `(.8, .2)`.

#==========================UNCOMMENT FOR SPLITING=============================================
split_folders.ratio(img_dir, output="output", seed=1337, ratio=(.7, .15, .15)) # default values
#=================================================================================================

# Import libraries and Prepare Data

In [6]:
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Model, Sequential
from keras.layers import Conv2D, MaxPooling2D, BatchNormalization, GlobalAveragePooling2D
from keras.layers import Activation, Dropout, Flatten, Dense
from keras.optimizers import RMSprop, SGD
from keras import backend as K

import keras

import matplotlib.pyplot as plt
import os
import tensorflow as tf

gpu_options = tf.GPUOptions(allow_growth=True)
session = tf.InteractiveSession(config=tf.ConfigProto(gpu_options=gpu_options))

Using TensorFlow backend.


In [7]:
#Prepare Data
img_width, img_height = 200, 200

output_dir = img_dir + 'output/'

train_data_dir = os.path.join(output_dir, 'train')
print(train_data_dir)
validation_data_dir = os.path.join(output_dir, 'val')
test_data_dir = os.path.join(output_dir, 'test')
nb_train_samples = 1490 
nb_validation_samples = 330 
nb_test_samples = 330 
epochs = 4
batch_size = 16
numclasses = 6

/content/gdrive/My Drive/dataset/output/train


# Data augmentation

In [8]:
# dataset
# this is the augmentation configuration we will use for training
train_datagen = ImageDataGenerator(
    rescale=1. / 255,
    rotation_range=10,  # randomly rotate images in the range (degrees, 0 to 180)
    zoom_range = 0.1, # Randomly zoom image 
    width_shift_range=0.1,  # randomly shift images horizontally (fraction of total width)
    height_shift_range=0.1,  # randomly shift images vertically (fraction of total height)
    #shear_range=0.2,
    vertical_flip=False,
    horizontal_flip=True)

# this is the augmentation configuration we will use for testing:
# only rescaling
test_datagen = ImageDataGenerator(rescale=1. / 255)

train_generator = train_datagen.flow_from_directory(
    train_data_dir,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode='categorical')

validation_generator = test_datagen.flow_from_directory(
    validation_data_dir,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode='categorical')

test_generator = test_datagen.flow_from_directory(
    test_data_dir,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode='categorical')



Found 1490 images belonging to 6 classes.
Found 330 images belonging to 6 classes.
Found 330 images belonging to 6 classes.


In [9]:
print(K.image_data_format())

channels_last


# Create the base model from the pre-trained convnets

In [0]:
#Model
#if channel is first set input shape => (3, 200, 200) else (200, 200, 3)
if K.image_data_format() == 'channels_first':
    input_shape = (3, img_width, img_height)
else:
    input_shape = (img_width, img_height, 3)

In [11]:
base_model = None
base_model = keras.applications.resnet50.ResNet50(weights='imagenet', include_top=False, input_shape=input_shape)















**Freeze Convolutional base**

In [0]:
# It is important to freeze the convolutional base before you compile and train the model. 
#Freezing (by setting layer.trainable = False) prevents the weights in a given layer from being updated during training. 
#MobileNet V2 has many layers, so setting the entire model's trainable flag to False will freeze all the layers.

In [0]:
base_model.trainable = False

In [14]:
base_model.summary()

Model: "resnet50"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 200, 200, 3)  0                                            
__________________________________________________________________________________________________
conv1_pad (ZeroPadding2D)       (None, 206, 206, 3)  0           input_1[0][0]                    
__________________________________________________________________________________________________
conv1 (Conv2D)                  (None, 100, 100, 64) 9472        conv1_pad[0][0]                  
__________________________________________________________________________________________________
bn_conv1 (BatchNormalization)   (None, 100, 100, 64) 256         conv1[0][0]                      
___________________________________________________________________________________________

In [15]:
print(base_model.output_shape[1:])

(7, 7, 2048)


In [16]:
top_model = Sequential()
top_model.add(Flatten(input_shape=base_model.output_shape[1:])) #Flattens the input. Does not affect the batch size.
for i in range(2):
  top_model.add(Dense(4096, activation='relu')) #Apply a Dense layer to convert these features into a single prediction per image. 
  top_model.add(Dropout(0.5)) #Dropout consists in randomly setting a fraction rate of input units to 0.5 at each update during training time, which helps prevent overfitting.
top_model.add(Dense(numclasses, activation='softmax'))



Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


In [17]:
top_model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_1 (Flatten)          (None, 100352)            0         
_________________________________________________________________
dense_1 (Dense)              (None, 4096)              411045888 
_________________________________________________________________
dropout_1 (Dropout)          (None, 4096)              0         
_________________________________________________________________
dense_2 (Dense)              (None, 4096)              16781312  
_________________________________________________________________
dropout_2 (Dropout)          (None, 4096)              0         
_________________________________________________________________
dense_3 (Dense)              (None, 6)                 24582     
Total params: 427,851,782
Trainable params: 427,851,782
Non-trainable params: 0
________________________________________

In [0]:
#https://keras.io/getting-started/functional-api-guide/
model = None
model = Model(inputs=base_model.input, outputs=top_model(base_model.output))

**Compile the model**

In [19]:
#model = resnet50tl(input_shape, numclasses, 'softmax')
lr = 1e-5
decay = 1e-7 #0.0
optimizer = RMSprop(lr=lr, decay=decay)
model.compile(loss='categorical_crossentropy',
              optimizer=optimizer,
              metrics=['accuracy'])





# Display model/network

In [0]:
from keras.utils.vis_utils import plot_model

#plot_model(model, show_shapes=True, show_layer_names = True)
from IPython.display import Image

#Uncomment for visualisation

#Image(filename='model.png')

# Train Model

In [0]:
#Train
history = model.fit_generator(
    train_generator,
    steps_per_epoch=nb_train_samples // batch_size,
    epochs=epochs,
    validation_data=validation_generator,
    validation_steps=nb_validation_samples // batch_size)

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where



Epoch 1/4

# Performance and Evaluation

In [0]:
# Get training and test loss histories - Learning curves
training_loss = history.history['loss']
training_acc = history.history['acc']

# Create count of the number of epochs
epoch_count = range(1, len(training_loss) + 1)

fig=plt.figure(figsize=(12, 4))
# Visualize loss history
fig.add_subplot(121)
plt.plot(epoch_count, training_loss, 'r--')
plt.plot(epoch_count, training_acc, 'b-')
plt.legend(['Training Loss', 'Training Accuracy'])
plt.xlabel('Epoch')
plt.ylabel('Training Loss/Acc')

# Get training and test loss histories
val_acc = history.history['val_acc']
training_acc = history.history['acc']

# Create count of the number of epochs
epoch_count = range(1, len(val_acc) + 1)

# Visualize loss history
fig.add_subplot(122)
plt.plot(epoch_count, val_acc, 'r--')
plt.plot(epoch_count, training_acc, 'b-')
plt.legend(['Validation Accuracy', 'Training Accuracy'])
plt.xlabel('Epoch')
plt.ylabel('Accuracy')

plt.show()

In [0]:
import numpy
import sklearn.metrics as metrics

In [0]:
test_gene = test_datagen.flow_from_directory(
    test_data_dir,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode='categorical')

In [0]:
predictions = model.predict_generator(test_gene)
# Get most likely class
predicted_classes = numpy.argmax(predictions, axis=1)

#Get ground-truth classes and class-labels
true_classes = test_gene.classes
class_labels = list(test_gene.class_indices.keys()) 

In [0]:
report = metrics.classification_report(true_classes, predicted_classes, target_names=class_labels)
print('Classification Report: \n')
print(report)

In [0]:
saveweight =  'celebriytag_weight_transfer.h5'
model.save_weights(saveweight)

In [0]:
model.load_weights(saveweight)

In [0]:
#Evaluate the model on test set
print(model.evaluate_generator(test_generator,steps= (nb_test_samples // batch_size), verbose = 1))

In [0]:
from sklearn.metrics import confusion_matrix
import pandas as pd
import seaborn as sn
import numpy as np

test_generator.reset()

predictions = model.predict_generator(test_generator)

Y_pred = model.predict_generator(test_generator)
classes = test_generator.classes[test_generator.index_array]
y_pred = np.argmax(Y_pred, axis=-1)


from sklearn.metrics import confusion_matrix
print('Confusion Matrix:')
print(confusion_matrix(test_generator.classes[test_generator.index_array],y_pred))


predicted_classes = numpy.argmax(predictions, axis=1)

report = metrics.classification_report(classes, predicted_classes, target_names=class_labels)
print('\nClassification Report: \n')
print(report)


# Fine tuning

In [0]:
#UnFreeze all the layers of base_model to train it after 5 epochs 
base_model.trainable = True

In [0]:
# Let's take a look to see how many layers are in the base model
print("Number of layers in the base model: ", len(base_model.layers))


# # Freeze all the layers before the `fine_tune_at` layer
# for layer in base_model.layers[:fine_tune_at]:
#   layer.trainable =  False


In [0]:
model2 = None
model2 = Model(inputs=base_model.input, outputs=top_model(base_model.output))

**Compile the model**

In [0]:
lr = 1e-5
decay = 1e-7 #0.0
optimizer = RMSprop(lr=lr, decay=decay)
model2.compile(loss='categorical_crossentropy',
              optimizer=optimizer,
              metrics=['accuracy'])

In [0]:
model2.summary()

# Continue training the model

In [0]:
#Train no2
fine_tune_epochs = 12
total_epochs =  epochs + fine_tune_epochs

history = model2.fit_generator(
    train_generator,
    steps_per_epoch=nb_train_samples // batch_size,
    epochs=total_epochs,
    validation_data=validation_generator,
    validation_steps=nb_validation_samples // batch_size)

# Performance and Evaluation after fine tuning

In [0]:
# Get training and test loss histories
training_loss = history.history['loss']
training_acc = history.history['acc']

# Create count of the number of epochs
epoch_count = range(1, len(training_loss) + 1)

fig=plt.figure(figsize=(12, 4))
# Visualize loss history
fig.add_subplot(121)
plt.plot(epoch_count, training_loss, 'r--')
plt.plot(epoch_count, training_acc, 'b-')
plt.legend(['Training Loss', 'Training Accuracy'])
plt.xlabel('Epoch')
plt.ylabel('Training Loss/Acc')

# Get training and test loss histories
val_acc = history.history['val_acc']
training_acc = history.history['acc']

# Create count of the number of epochs
epoch_count = range(1, len(val_acc) + 1)

# Visualize loss history
fig.add_subplot(122)
plt.plot(epoch_count, val_acc, 'r--')
plt.plot(epoch_count, training_acc, 'b-')
plt.legend(['Validation Accuracy', 'Training Accuracy'])
plt.xlabel('Epoch')
plt.ylabel('Accuracy')

plt.show()

In [0]:
import numpy
import sklearn.metrics as metrics


test_generator.reset()

predictions2 = model2.predict_generator(test_generator)

Y_pred2 = model2.predict_generator(test_generator)
classes = test_generator.classes[test_generator.index_array]
y_pred2 = np.argmax(Y_pred2, axis=-1)

predicted_classes2 = numpy.argmax(predictions2, axis=1)

from sklearn.metrics import confusion_matrix
print('Confusion matrix: \n')
print(confusion_matrix(test_generator.classes[test_generator.index_array],y_pred2))

report2 = metrics.classification_report(classes, predicted_classes2, target_names=class_labels)
print('\nClassification Report: \n')
print(report2)


In [0]:
saveweight2 =  'celebriytag_weight_transfer2.h5'
model2.save_weights(saveweight2)

In [0]:
model2.load_weights(saveweight2)

In [0]:
#Evaluate the model on test set
print(model2.evaluate_generator(test_generator,steps= (nb_test_samples // batch_size), verbose = 1))

# Evaluation per image

In [0]:
#Evaluation
from keras.preprocessing import image
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image
from io import BytesIO
import cv2
import requests
import os

In [0]:
labels = ['aaron_diaz',  'aaron_tippin',  'aarti_chabria',  'abbey_clancy',  'abby_elliott', 'uknown']

In [0]:
test_imgs = ['0311_01.jpg']

new_validation_data_dir = '/content/gdrive/My Drive/dataset/output/test/aaron_tippin/'

print(new_validation_data_dir)

for test in test_imgs:
    test_img = os.path.join(new_validation_data_dir, test)
    img = image.load_img(test_img, target_size=(img_width, img_height))
    x = image.img_to_array(img)
    x = np.expand_dims(x, axis=0)
    x /= 255.
    classes = model.predict(x)
    result = np.squeeze(classes)
    result_indices = np.argmax(result)
    
    img = cv2.imread(test_img, cv2.IMREAD_COLOR)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    plt.axis('off')
    plt.title("{}, {:.2f}%".format(labels[result_indices], result[result_indices]*100))
    plt.imshow(img)

In [0]:
test_imgs = ['0058_01.jpg']

new_validation_data_dir = '/content/gdrive/My Drive/dataset/output/test/abbey_clancy/'

for test in test_imgs:
    test_img = os.path.join(new_validation_data_dir, test)
    img = image.load_img(test_img, target_size=(img_width, img_height))
    x = image.img_to_array(img)
    x = np.expand_dims(x, axis=0)
    x /= 255.
    classes = model.predict(x)
    result = np.squeeze(classes)
    result_indices = np.argmax(result)
    
    img = cv2.imread(test_img, cv2.IMREAD_COLOR)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    plt.axis('off')
    plt.title("{}, {:.2f}%".format(labels[result_indices], result[result_indices]*100))
    plt.imshow(img)

In [0]:
test_imgs = ['0081_01.jpg']

new_validation_data_dir = '/content/gdrive/My Drive/dataset/output/test/aaron_diaz/'

for test in test_imgs:
    test_img = os.path.join(new_validation_data_dir, test)
    img = image.load_img(test_img, target_size=(img_width, img_height))
    x = image.img_to_array(img)
    x = np.expand_dims(x, axis=0)
    x /= 255.
    classes = model.predict(x)
    result = np.squeeze(classes)
    result_indices = np.argmax(result)
    
    img = cv2.imread(test_img, cv2.IMREAD_COLOR)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    plt.axis('off')
    plt.title("{}, {:.2f}%".format(labels[result_indices], result[result_indices]*100))
    plt.imshow(img)

In [0]:
test_imgs = ['0205_02.jpg']

new_validation_data_dir = '/content/gdrive/My Drive/dataset/output/test/aarti_chabria/'

for test in test_imgs:
    test_img = os.path.join(new_validation_data_dir, test)
    img = image.load_img(test_img, target_size=(img_width, img_height))
    x = image.img_to_array(img)
    x = np.expand_dims(x, axis=0)
    x /= 255.
    classes = model.predict(x)
    result = np.squeeze(classes)
    result_indices = np.argmax(result)
    
    img = cv2.imread(test_img, cv2.IMREAD_COLOR)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    plt.axis('off')
    plt.title("{}, {:.2f}%".format(labels[result_indices], result[result_indices]*100))
    plt.imshow(img)

In [0]:
test_imgs = ['0037_01.jpg']

new_validation_data_dir = '/content/gdrive/My Drive/dataset/output/test/abby_elliott/'

for test in test_imgs:
    test_img = os.path.join(new_validation_data_dir, test)
    img = image.load_img(test_img, target_size=(img_width, img_height))
    x = image.img_to_array(img)
    x = np.expand_dims(x, axis=0)
    x /= 255.
    classes = model.predict(x)
    result = np.squeeze(classes)
    result_indices = np.argmax(result)
    
    img = cv2.imread(test_img, cv2.IMREAD_COLOR)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    plt.axis('off')
    plt.title("{}, {:.2f}%".format(labels[result_indices], result[result_indices]*100))
    plt.imshow(img)

In [0]:
test_imgs = ['0003_01.jpg']

new_validation_data_dir = '/content/gdrive/My Drive/dataset/output/test/unkown/'

for test in test_imgs:
    test_img = os.path.join(new_validation_data_dir, test)
    img = image.load_img(test_img, target_size=(img_width, img_height))
    x = image.img_to_array(img)
    x = np.expand_dims(x, axis=0)
    x /= 255.
    classes = model.predict(x)
    result = np.squeeze(classes)
    result_indices = np.argmax(result)
    
    img = cv2.imread(test_img, cv2.IMREAD_COLOR)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    plt.axis('off')
    plt.title("{}, {:.2f}%".format(labels[result_indices], result[result_indices]*100))
    plt.imshow(img)

In [0]:
test_imgs = ['0051_02 (2).jpg']

new_validation_data_dir = '/content/gdrive/My Drive/dataset/output/test/unkown/'

for test in test_imgs:
    test_img = os.path.join(new_validation_data_dir, test)
    img = image.load_img(test_img, target_size=(img_width, img_height))
    x = image.img_to_array(img)
    x = np.expand_dims(x, axis=0)
    x /= 255.
    classes = model.predict(x)
    result = np.squeeze(classes)
    result_indices = np.argmax(result)
    
    img = cv2.imread(test_img, cv2.IMREAD_COLOR)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    plt.axis('off')
    plt.title("{}, {:.2f}%".format(labels[result_indices], result[result_indices]*100))
    plt.imshow(img)