# Apply transfer learning to classify wikiart images into 25 styles
### Import VGG16 (no top layer) and extract bottleneck features for training / validation images

In [None]:
import numpy as np  
from keras.preprocessing.image import ImageDataGenerator, img_to_array, load_img  
from keras.models import Sequential  
from keras.layers import Dropout, Flatten, Dense  
from keras import applications  
from keras.utils.np_utils import to_categorical  
import matplotlib.pyplot as plt  
import math  
import cv2  
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True

# dimensions of our images (required by VGG16)  
img_width, img_height = 224, 224  
   
top_model_weights_path = 'bottleneck_fc_model_25.h5'  
train_data_dir = '/Users/lizbaldo/Desktop/wikiart-master/wikiart/train'  
validation_data_dir = '/Users/lizbaldo/Desktop/wikiart-master/wikiart/val'  
   
# number of epochs to train top model  
epochs = 50  
# batch size used by flow_from_directory and predict_generator 
# more than 16 and my RAM goes crazy...
batch_size = 16  

model = applications.VGG16(include_top=False, weights='imagenet') 

datagen = ImageDataGenerator(rescale=1. / 255)  
 
# TRAINING DATA   
generator_train = datagen.flow_from_directory(  
     train_data_dir,  
     target_size=(img_width, img_height),  
     batch_size=batch_size,  
     class_mode=None,  
     shuffle=False)  
   
nb_train_samples = len(generator_train.filenames)  
num_classes = len(generator_train.class_indices)  
   
predict_size_train = int(math.ceil(nb_train_samples / batch_size))  
   
bottleneck_features_train = model.predict_generator(  
      generator_train, predict_size_train)  
   
np.save('bottleneck_features_25_train.npy', bottleneck_features_train)  

# VALIDATION DATA
generator_val = datagen.flow_from_directory(  
     validation_data_dir,  
     target_size=(img_width, img_height),  
     batch_size=batch_size,  
     class_mode=None,  
     shuffle=False)  
   
nb_validation_samples = len(generator_val.filenames)  
   
predict_size_validation = int(math.ceil(nb_validation_samples / batch_size))  
   
bottleneck_features_validation = model.predict_generator(  
     generator_val, predict_size_validation)  
   
np.save('bottleneck_features_25_validation.npy', bottleneck_features_validation)

### Prepare data for top model

In [None]:
# TRAINING DATA
datagen_top = ImageDataGenerator(rescale=1./255)  
generator_top = datagen_top.flow_from_directory(  
         train_data_dir,  
         target_size=(img_width, img_height),  
         batch_size=batch_size,  
         class_mode='categorical',  
         shuffle=False)  
   
nb_train_samples = len(generator_top.filenames)  
num_classes = len(generator_top.class_indices)  
   
# load the bottleneck features saved earlier  
train_data = np.load('bottleneck_features_5_train.npy')    
# get the class labels for the training data, in the original order  
train_labels = generator_top.classes  
# convert the training labels to categorical vectors  
train_labels = to_categorical(train_labels, num_classes=num_classes)

# VALIDATION DATA
generator_top = datagen_top.flow_from_directory(  
         validation_data_dir,  
         target_size=(img_width, img_height),  
         batch_size=batch_size,  
         class_mode=None,  
         shuffle=False)  
   
nb_validation_samples = len(generator_top.filenames)  
   
validation_data = np.load('bottleneck_features_25_validation.npy')   
validation_labels = generator_top.classes  
validation_labels = to_categorical(validation_labels, num_classes=num_classes) 

### Build and train top model

In [None]:
from keras.layers import Dense, Conv2D, MaxPool2D, Flatten, Dropout, BatchNormalization
from keras.models import Sequential
from keras import optimizers, callbacks
from visual_callbacks import ConfusionMatrixPlotter
import time

# Need one more convolutional layer -> bottleneck features not enough   
model = Sequential()
model.add(Conv2D(512, 3, activation='relu', input_shape=train_data.shape[1:]))
model.add(MaxPool2D())
model.add(BatchNormalization())
model.add(Flatten())
model.add(Dense(1024, activation='relu')) 
model.add(Dropout(0.25))
model.add(Dense(256, activation='relu')) 
model.add(Dropout(0.25))
model.add(Dense(64, activation='relu')) 
model.add(Dense(num_classes, activation='sigmoid')) 

# Took quite a lot of trial and errors to get these right...
sgd = optimizers.SGD(lr=0.01, clipnorm=1.)
model.compile(optimizer=sgd,  
              loss='categorical_crossentropy', metrics=['accuracy'])  

# Great script adapted from https://github.com/chasingbob/keras-visuals
plotter = ConfusionMatrixPlotter(X_val=validation_data, classes=generator_train.class_indices, Y_val=validation_labels)

history = model.fit(train_data, train_labels,  
          epochs=epochs,  
          batch_size=batch_size,  
          validation_data=(validation_data, validation_labels),callbacks=[plotter])  
   
model.save('my_model_25.h5')   

### Plot accuracy and loss curves

In [None]:
with plt.xkcd():
    plt.figure(1)  
    # summarize history for accuracy  
    plt.plot(history.history['acc'])  
    plt.plot(history.history['val_acc'])  
    plt.title('model accuracy')  
    plt.ylabel('accuracy')  
    plt.xlabel('epoch')  
    plt.legend(['train', 'test'], loc='upper left')  
    plt.show()  
    # summarize history for loss  
    plt.figure(2) 
    plt.plot(history.history['loss'])  
    plt.plot(history.history['val_loss'])  
    plt.title('model loss')  
    plt.ylabel('loss')  
    plt.xlabel('epoch')  
    plt.legend(['train', 'test'], loc='upper right')  
    plt.show()  

### How to predict classes for new images

In [None]:
from PIL import Image, ImageFile
from keras.applications.vgg16 import preprocess_input
from keras.preprocessing.image  import load_img,img_to_array
from keras.models import load_model
model = load_model('my_model_25.h5')

target_size = (224, 224) #fixed size for VGG16 architecture

img = '/Users/lizbaldo/Desktop/Ukiyo-e_example.jpg' # Replace with your own image

# preprocess image
image = load_img(img, target_size=target_size)  
image = img_to_array(image)     
image = image / 255   # important! otherwise the predictions will be '0'  
image = np.expand_dims(image, axis=0)  

# build the VGG16 network  
model_vgg16 = applications.VGG16(include_top=False, weights='imagenet') 
# get the bottleneck prediction from the pre-trained VGG16 model  
bottleneck_prediction = model_vgg16.predict(image)  
class_predicted = model.predict(bottleneck_prediction)

inID = class_predicted[0]  
   
class_dictionary = generator_top.class_indices  
   
inv_map = {v: k for k, v in class_dictionary.items()}  
   
label = inv_map[inID]  
   
 # get the prediction label  
print("Image Label: {}".format(label))  
