### Access to Google Drive folders

In [1]:
from google.colab import drive
drive.mount('/content/gdrive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/gdrive


### Constants

In [0]:
images_folder = '/content/gdrive/My Drive/Project/Images_300x450/'
models_folder = '/content/gdrive/My Drive/Project/Models/'
results_folder = '/content/gdrive/My Drive/Project/Results/'
train_folder = 'Train/'
validation_folder = 'Validation/'

prepared_data_folder = '/content/gdrive/My Drive/Project/Prepared_Data/'

titles_file = "titles"
imdb_id_file = "imdb_id"
images_file = "images"
features_file = "features"

model_InceptionV3 = "InceptionV3"
model_InceptionV3_with_Fine_Tuning = "InceptionV3FineTuning"
model_VGG16 = "VGG16"

### Libraries


In [3]:
!pip install h5py pyyaml 

import os
import json
import pickle
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

from keras.layers import concatenate
from keras.preprocessing import image
from keras.preprocessing.image import load_img
from keras.preprocessing.image import img_to_array
from tensorflow.keras.applications.inception_v3 import InceptionV3
from keras.applications.vgg16 import VGG16

from tensorflow.keras import Model
from tensorflow.keras import layers
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.preprocessing.image import ImageDataGenerator

from sklearn.neighbors import NearestNeighbors

import tqdm
from IPython.display import HTML, display



Using TensorFlow backend.


### Parameters

In [0]:
batch_size = 100
model_type = model_InceptionV3_with_Fine_Tuning
data_folder = images_folder
number_of_recommendations = 5

# Parameters for Inception with Fine Tuning
epochs_number = 15
steps_per_epoch_number = 30
validation_steps_number = steps_per_epoch_number / 2

### Functions

In [0]:
def preprocess_input(x):
    x /= 255.
    x -= 0.5
    x *= 2.
    
    return x
def load_titles(directory):
  titles = []

  for name in tqdm.tqdm(os.listdir(directory)):
    path = os.path.join(directory, name)
    if os.path.isdir(path):
      continue
    title = name.rsplit('.', 1)[0]
    title = title.split('-', 1)[1]
    titles.append(title)
        
  return titles

def load_imdb_id(directory):
  imdb_ids = []

  for name in tqdm.tqdm(os.listdir(directory)):
    path = os.path.join(directory, name)
    if os.path.isdir(path):
      continue
    imdb_id = name.split('.')[0]
    imdb_id = imdb_id.split('-')[0]
    imdb_ids.append(imdb_id)
        
  return imdb_ids


def load_images(directory):
  images = []

  for name in tqdm.tqdm(os.listdir(directory)):
    path = os.path.join(directory, name)
    if os.path.isdir(path):
      continue    
    # load an image from file
    filename = directory + name
    image = load_img(filename, target_size=(299, 299))
    # convert the image pixels to a numpy array
    image = img_to_array(image)
    # reshape data for the model
    image = np.expand_dims(image, axis=0)
    # prepare the image for the  model
    image = preprocess_input(image)
   
    images.append(image)
    
  return images

def load_features(images):
  features = []
  
  for image in tqdm.tqdm(images):
    feature = model2.predict(image).ravel()
    features.append(feature)
    
  return features

## Prepare Model

## Prepare to Inception V3 + Fine Tuning Functions 

### Load Inception V3 Model + ImageNet weigths

In [6]:
!wget --no-check-certificate \
    https://storage.googleapis.com/mledu-datasets/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5 \
    -O /tmp/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5

def load_model_inceptionv3_imagenet():
  local_weights_file = '/tmp/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5'
  pretrained_model = InceptionV3(input_shape=(299, 299, 3), include_top=False, weights=None)
  pretrained_model.load_weights(local_weights_file)
  
  return pretrained_model

--2019-01-28 19:24:56--  https://storage.googleapis.com/mledu-datasets/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5
Resolving storage.googleapis.com (storage.googleapis.com)... 74.125.204.128, 2404:6800:4008:c03::80
Connecting to storage.googleapis.com (storage.googleapis.com)|74.125.204.128|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 87910968 (84M) [application/x-hdf]
Saving to: ‘/tmp/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5’


2019-01-28 19:25:00 (26.6 MB/s) - ‘/tmp/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5’ saved [87910968/87910968]



### Create new model with additional layers

In [0]:
def prepare_model_to_fine_tuning(pretrained_model):
  unfreeze = False

  # Unfreeze all models after "mixed6"
  for layer in pretrained_model.layers:
    if unfreeze:
      layer.trainable = True
    if layer.name == 'mixed6':
      unfreeze = True

    last_layer = pretrained_model.get_layer('mixed7')
    last_output = last_layer.output

    ##### Change model - Add 2 fully connected layers #####

    # Flatten the output layer to 1 dimension
    x = layers.Flatten()(last_output)
    # Add a fully connected layer with 1,024 hidden units and ReLU activation
    x = layers.Dense(1024, activation='relu')(x)
    # Add a dropout rate of 0.2
    x = layers.Dropout(0.2)(x)
    # Add a fully connected layer with 1,024 hidden units and ReLU activation
    x = layers.Dense(1024, activation='relu')(x)
    # Add a dropout rate of 0.2
    x = layers.Dropout(0.2)(x)
    # Add a final sigmoid layer for classification
    x = layers.Dense(19, activation='sigmoid')(x)

    # Configure and compile the model
    model = Model(pretrained_model.input, x)
    model.compile(loss='categorical_crossentropy', optimizer=SGD(lr=0.00001, momentum=0.9), metrics=['acc'])

    return model 

### Prepare Generators

In [0]:
def prepare_generators():

  # Add our data-augmentation parameters to ImageDataGenerator
  train_datagen = ImageDataGenerator(rescale=1./255)

  # Note that the validation data should not be augmented!
  test_datagen = ImageDataGenerator(rescale=1./255)

  train_generator = train_datagen.flow_from_directory(
          images_folder + train_folder, # This is the source directory for training images
          target_size=(299, 299),  # All images will be resized to 299x299
          batch_size=batch_size,
          class_mode='categorical')

  # Flow validation images in batches of 20 using test_datagen generator
  validation_generator = test_datagen.flow_from_directory(
          images_folder + validation_folder,
          target_size=(299, 299),
          batch_size=batch_size,
          class_mode='categorical')

  return train_generator, validation_generator

### Fit model using generators

In [0]:
def fit_model(model, train_generator, validation_generator):

  history = model.fit_generator(
        train_generator,
        steps_per_epoch = steps_per_epoch_number,
        epochs = epochs_number,
        validation_data=validation_generator,
        validation_steps = validation_steps_number,
        verbose=1)
  
  return history, model

### Plot Results of training

In [0]:
def plot_fine_tuning_results(history):

  # Retrieve a list of accuracy results on training and test data
  # sets for each training epoch
  acc = history.history['acc']
  val_acc = history.history['val_acc']

  # Retrieve a list of list results on training and test data
  # sets for each training epoch
  loss = history.history['loss']
  val_loss = history.history['val_loss']

  # Get number of epochs
  epochs = range(len(acc))

  fix, axes = plt.subplots(nrows=1, ncols=2, figsize=(18, 7))
  # Plot training and validation accuracy per epoch
  plt.subplot(1, 2, 1)
  plt.plot(epochs, acc)
  plt.plot(epochs, val_acc)
  plt.title('Training and validation accuracy')


  # Plot training and validation loss per epoch
  plt.subplot(1, 2, 2)
  plt.plot(epochs, loss)
  plt.plot(epochs, val_loss)
  plt.title('Training and validation loss')

  plt.savefig(results_folder + "Fine_Tuning_Results_" + model_type + ".png")
  None
  plt.show()
  None

### Remove before added layers used to Fine Tuning

In [0]:
def remove_added_layers(model):

  model_input = model.input
  model_output = model.layers[-7].output

  model = Model(model_input, model_output)
  
  return model

### Save pretrained weights

In [0]:
def save_pretrained_weights(history):
  history.model.save_weights(models_folder + model_type + '_weights.h5')

  json_model = history.model.to_json()

  with open(models_folder + model_type + '.json', 'w') as outfile:
      json.dump(json_model, outfile)

## Prepare Model

### Create Inception V3 model with Fine Tuning

In [0]:
def get_InceptionV3_Fine_Tuning_Model():
  pretrained_model = load_model_inceptionv3_imagenet()
  model = prepare_model_to_fine_tuning(pretrained_model)

  train_generator, validation_generator = prepare_generators()
  
  history, model = fit_model(model, train_generator, validation_generator)
  plot_fine_tuning_results(history)
  
  model = remove_added_layers(model)
  save_pretrained_weights(history)
  
  return model

### Get Model

In [0]:
def get_model(model):
  switcher={
          model_InceptionV3: InceptionV3(weights='imagenet', include_top=False),
          model_VGG16: VGG16(weights='imagenet', include_top=False),
          model_InceptionV3_with_Fine_Tuning: get_InceptionV3_Fine_Tuning_Model()
  }
  return switcher.get(model,"Invalid model")

model = get_model(model_type)

## Extract Titles, IDs, Images and Features

#### Titles

In [0]:
titles = load_titles(data_folder)
number_of_batches = -(-len(titles) // batch_size)

In [0]:
for idx, i in  tqdm.tqdm(enumerate(range(0, len(titles), batch_size))):
  pickle.dump(np.asarray(titles[i:i+batch_size]), open(prepared_data_folder + model_type + "_" + titles_file + "_part_" + str(idx) + '.p', 'wb'))

#### IMDB ids


In [0]:
imdb_ids = load_imdb_id(data_folder)

In [0]:
for idx, i in  tqdm.tqdm(enumerate(range(0, len(imdb_ids), batch_size))):
  pickle.dump(np.asarray(imdb_ids[i:i+batch_size]), open(prepared_data_folder + model_type + "_" + imdb_id_file + "_part_" + str(idx) + '.p', 'wb'))

#### Images

In [0]:
images = load_images(data_folder)

In [0]:
for idx, i in  tqdm.tqdm(enumerate(range(0, len(images), batch_size))):
  pickle.dump(np.asarray(images[i:i+batch_size]), open(prepared_data_folder + model_type + "_" + images_file + "_part_" + str(idx) + '.p', 'wb'))

#### Features

In [0]:
for idx in range(0, number_of_batches):
  filename = prepared_data_folder + model_type + "_" + images_file + "_part_" + str(idx) + '.p'
  loaded_images = pickle.load(open(filename, mode='rb'))
  features = load_features(loaded_images)
    
  pickle.dump(np.asarray(features), open(prepared_data_folder + model_type + "_" + features_file + "_part_" + str(idx) + '.p', 'wb'))