The purpose of this notebook is to use transfer learning in order to predict the Culture of the artwork from MET given images. In order to do that, we use GPU offered by Google Colab


# Import Libraries

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.preprocessing.image import array_to_img, img_to_array, load_img
from keras.layers import Input, Lambda, Dense, Flatten
from keras.models import Model
from keras.applications.vgg16 import VGG16
from keras.applications.vgg16 import preprocess_input
from keras.preprocessing import image
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
import numpy as np
from glob import glob
import matplotlib.pyplot as plt
import os
import tensorflow as tf
from google.colab import drive

The dataset is stored in google drive

In [None]:
#get access to google colab
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
"""
The dataset is stored in a zip file 
"""
zip_file = '/content/gdrive/MyDrive/MUSEUM_IMAGES.zip' #path of the dataset

Run the following cell if and only if you have not yet unzip your dataset

In [None]:
import zipfile
with zipfile.ZipFile(zip_file, 'r') as zip_ref:
    zip_ref.extractall('/content/gdrive/MyDrive')

In the following cell, we create the train, validation and test dataset

In [None]:
batch_size = 64
IMG_SIZE = (224, 224) #specific for VGG16
IMG_SHAPE = IMG_SIZE + (3,)

file_data = 'MUSEUM_IMAGES'
data_dir_train = os.path.join('/content/gdrive/MyDrive', file_data, 'TRAIN')

train_data = tf.keras.utils.image_dataset_from_directory(
  data_dir_train,
  validation_split=0.2,
  subset="training",
  image_size= IMG_SIZE,
  seed=1,
  batch_size= batch_size)


val_data = tf.keras.utils.image_dataset_from_directory(
  data_dir_train,
  validation_split=0.2,
  subset="validation",
  seed=1,
  image_size= IMG_SIZE,
  batch_size= batch_size)


data_dir_test_balanced = os.path.join('/content/gdrive/MyDrive', file_data, 'TEST_BALANCED')
test_data_balanced = tf.keras.utils.image_dataset_from_directory(
  data_dir_test_balanced,
  image_size= IMG_SIZE,
  seed=1,
  batch_size= batch_size)


data_dir_test_unbalanced = os.path.join('/content/gdrive/MyDrive',file_data, 'TEST_UNBALANCED')
test_data_unbalanced = tf.keras.utils.image_dataset_from_directory(
  data_dir_test_unbalanced,
  image_size= IMG_SIZE,
  seed=1,
  batch_size= batch_size)


Found 7153 files belonging to 9 classes.
Using 5723 files for training.
Found 7153 files belonging to 9 classes.
Using 1430 files for validation.
Found 1789 files belonging to 9 classes.
Found 200 files belonging to 9 classes.


We import VGG16 in order to do transfer learning for our specific problem

In [None]:
base_model = VGG16(input_shape=IMG_SHAPE , weights='imagenet', include_top=False)

# don't train existing weights
for layer in base_model.layers:
    layer.trainable = False
  

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5


In [None]:
preprocess_input = tf.keras.applications.vgg16.preprocess_input
rescale = tf.keras.layers.Rescaling(scale=1./255)
flatten = tf.keras.layers.Flatten()

We build our model. We change the last dense layer by putting a 4-dense layer with a softmax as activation function in order to achieve our classification problem

In [None]:
num_classes = 9
x = Flatten()(base_model.output)
prediction = Dense(num_classes, activation='softmax')(x)

# create a model object
model = Model(inputs=base_model.input, outputs=prediction)

# view the structure of the model
model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 224, 224, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 224, 224, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 112, 112, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 112, 112, 128)     73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 112, 112, 128)     147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 56, 56, 128)       0     

In [None]:
from tensorflow.keras import optimizers
model.compile(
  loss='sparse_categorical_crossentropy',
  optimizer=optimizers.Adam(learning_rate=0.01),
  metrics=['accuracy']
)


model.fit( 
    train_data,
    validation_data=val_data,
    epochs=20,
)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x7f2349a62fd0>

In [None]:
"""
inputs = tf.keras.Input(shape=IMG_SHAPE)
x = rescale(inputs)
x = base_model(x, training=False)
x = flatten(x)
prediction = Dense(num_classes, activation='softmax')(x)

model2 = tf.keras.Model(inputs=inputs, outputs=prediction)

model2.summary()


"""


In [None]:
"""
from tensorflow.keras import optimizers
model2.compile(
  loss='sparse_categorical_crossentropy',
  optimizer=optimizers.Adam(learning_rate=0.01),
  metrics=['accuracy']
)


model2.fit( 
    train_data,
    validation_data=val_data,
    epochs=20,
)
"""

The loss used is categorial cross entropy and the metric is accuracy

In [None]:
#save the model in order to re-use the weights
name_model = 'vgg16_culture_customized.h5'
model.save(os.path.join('/content/gdrive/MyDrive',name_model))

In [None]:
#loaded_model = tf.keras.models.load_model('/content/gdrive/MyDrive',name_model)

In [None]:
score = model.evaluate(test_data_balanced, verbose=False)
print('Test accuracy:', score[1])

Test accuracy: 0.4924538731575012


In [None]:
score = model.evaluate(test_data_unbalanced, verbose=False)
print('Test accuracy:', score[1])

Test accuracy: 0.6200000047683716


In [None]:
"""
score = model2.evaluate(test_data_balanced, verbose=False)
print('Test accuracy:', score[1])
#score = model2.evaluate(test_data_unbalanced, verbose=False)
#print('Test accuracy:', score[1]) """

Test accuracy: 0.4684181213378906
