# Essencial imports
* Importing os to work with system files.
* Importing shutil to copy and remove files.
* Importing random to shuffle list
* Importing math to work with `floor()` method.
* Importing zipfile ro unzip downloaded zip files.
* Importing matplotlib to plot and save plots.
* Importing tensorflow to create and train model.

In [18]:
import os
import shutil
from shutil import copyfile
import random
import math
import zipfile
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from datetime import datetime
from tensorflow.keras.models import load_model

# Downloading dataset

In [2]:
if not os.path.exists('./datasets/best-artworks-of-all-time.zip'):
    !wget https://bibview.ir/static/web/data/best-artworks-of-all-time.zip

# Unzip files

In [3]:
def extract_zip():
    with zipfile.ZipFile('best-artworks-of-all-time.zip', 'r') as zip_ref:
        zip_ref.extractall('./datasets/')

In [4]:
if not os.path.exists('./datasets/best-artworks-of-all-time'):
    extract_zip()

# Defining Hyperparameters

In [24]:
print("Enter number of classes:(2 to 51)")
NUM_CLASSES = int(input())

IMG_WIDTH, IMG_HEIGHT = 250, 250

POOLING_KERNEL_SIZE = 3

num_epochs = 100

TRAIN_TO_TEST_RATIO = 0.9

Enter number of classes:(2 to 51)
10


# Reading Data Directory

In [25]:
IMAGES_DIR = './datasets/best-artworks-of-all-time/images/images/'

artists_dir = os.listdir(IMAGES_DIR)[:NUM_CLASSES]

# Creating Train and Validation Directories
* To work with data generators, it need data to be reached via related directory.

In [8]:
if NUM_CLASSES != 51:
    if os.path.exists('./datasets/training-tmp/'):
        os.rmdir('./datasets/training-tmp/')
    if os.path.exists('./datasets/validation-tmp/'):
        os.rmdir('./datasets/validation-tmp/')
        
    TRAIN_DIR = './datasets/training-tmp/'
    VALIDATION_DIR = './datasets/validation-tmp/'
    
    os.mkdir(TRAIN_DIR)
    os.mkdir(VALIDATION_DIR)
else:
    TRAIN_DIR = './datasets/training-all/'
    VALIDATION_DIR = './datasets/validation-all/'

# Seperate Train and Validation Data
* This function reads all data and seperated it to train and validation set according to given `TRAIN_TO_TEST_RATIO`.

* **Input**: Boolean print_info: printing info about each artist in images directory.

In [9]:
def seperate_data_into_training_validation_for_each_artist(print_info=1):
  for artist_dir in artists_dir:
    artist_paintings = os.listdir(IMAGES_DIR + artist_dir)
    number_of_seperation = math.floor(TRAIN_TO_TEST_RATIO * len(artist_paintings))
    random.shuffle(artist_paintings)

    artist_training_images = artist_paintings[:number_of_seperation]
    artist_validation_images = artist_paintings[number_of_seperation:]
    
    if print_info:
        print("Number of total images for %s: %d" % (artist_dir, len(artist_paintings)))
        print("Number of training images for %s: %d" % (artist_dir, len(artist_training_images)))
        print("Number of validation images for %s: %d" % (artist_dir, len(artist_validation_images)))
        print("------------------------------------------------")
    
    if NUM_CLASSES != 51:
        os.mkdir(TRAIN_DIR + '%s' % artist_dir)
        os.mkdir(VALIDATION_DIR + '%s' % artist_dir)

        for file in artist_training_images:
          copyfile(IMAGES_DIR + '/%s/%s' % (artist_dir, file), TRAIN_DIR + '%s/%s' % (artist_dir, file))

        for file in artist_validation_images:
          copyfile(IMAGES_DIR + '/%s/%s' % (artist_dir, file), VALIDATION_DIR + '%s/%s' % (artist_dir, file))
  return

In [10]:
seperate_data_into_training_validation_for_each_artist(print_info=1)

Number of total images for Gustav_Klimt: 117
Number of training images for Gustav_Klimt: 105
Number of validation images for Gustav_Klimt: 12
------------------------------------------------
Number of total images for Paul_Cezanne: 47
Number of training images for Paul_Cezanne: 42
Number of validation images for Paul_Cezanne: 5
------------------------------------------------
Number of total images for Diego_Rivera: 70
Number of training images for Diego_Rivera: 63
Number of validation images for Diego_Rivera: 7
------------------------------------------------
Number of total images for Mikhail_Vrubel: 171
Number of training images for Mikhail_Vrubel: 153
Number of validation images for Mikhail_Vrubel: 18
------------------------------------------------
Number of total images for Vincent_van_Gogh: 877
Number of training images for Vincent_van_Gogh: 789
Number of validation images for Vincent_van_Gogh: 88
------------------------------------------------
Number of total images for Paul_K

# Defining Sequential Model

In [26]:
model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(16, (3, 3), activation='relu', input_shape=(IMG_WIDTH, IMG_HEIGHT, 3)),
    tf.keras.layers.MaxPooling2D(POOLING_KERNEL_SIZE, POOLING_KERNEL_SIZE),
    tf.keras.layers.Conv2D(32, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D(POOLING_KERNEL_SIZE, POOLING_KERNEL_SIZE),
    tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D(POOLING_KERNEL_SIZE, POOLING_KERNEL_SIZE),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(512, activation='relu'),
    tf.keras.layers.Dense(NUM_CLASSES, activation='softmax')
])

model.compile(optimizer=RMSprop(lr=0.001), loss='categorical_crossentropy', metrics=['acc'])

model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_3 (Conv2D)            (None, 248, 248, 16)      448       
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 82, 82, 16)        0         
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 80, 80, 32)        4640      
_________________________________________________________________
max_pooling2d_4 (MaxPooling2 (None, 26, 26, 32)        0         
_________________________________________________________________
conv2d_5 (Conv2D)            (None, 24, 24, 64)        18496     
_________________________________________________________________
max_pooling2d_5 (MaxPooling2 (None, 8, 8, 64)          0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 4096)              0         
__________

In [27]:
train_datagen = ImageDataGenerator(rescale=1./255,
                                  rotation_range=40,
                                  width_shift_range=0.2,
                                  height_shift_range=0.2,
                                  shear_range=0.2,
                                  zoom_range=0.2,
                                  horizontal_flip=True,
                                  fill_mode='nearest')

train_generator = train_datagen.flow_from_directory(TRAIN_DIR,
                                                    batch_size=100,
                                                    class_mode='categorical',
                                                    target_size=(IMG_WIDTH, IMG_HEIGHT))

Found 1976 images belonging to 10 classes.


In [28]:
validation_datagen = ImageDataGenerator(rescale=1./255,
                                        rotation_range=40,
                                        width_shift_range=0.3, 
                                        height_shift_range=0.2,
                                        shear_range=0.2,
                                        zoom_range=0.2,
                                        horizontal_flip=True,
                                        fill_mode='nearest')

validation_generator = validation_datagen.flow_from_directory(VALIDATION_DIR,
                                                              batch_size=100,
                                                              class_mode='categorical',
                                                              target_size=(IMG_WIDTH, IMG_HEIGHT))

Found 224 images belonging to 10 classes.


In [29]:
%load_ext tensorboard

logdir = "logs/scalars/" + datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=logdir)

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


In [None]:
%tensorboard --logdir logs/scalars
history = model.fit_generator(train_generator,
                              epochs=num_epochs,
                              verbose=1,
                              validation_data=validation_generator,
                              callbacks=[tensorboard_callback])

Reusing TensorBoard on port 6006 (pid 10974), started 0:02:42 ago. (Use '!kill 10974' to kill it.)

Instructions for updating:
Use tf.cast instead.
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100


Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100

In [40]:
def plot_history(history):
    %matplotlib inline

    acc=history.history['acc']
    val_acc=history.history['val_acc']
    loss=history.history['loss']
    val_loss=history.history['val_loss']

    epochs=range(len(acc)) 

    plt.plot(epochs, acc, 'r', "Training Accuracy")
    plt.plot(epochs, val_acc, 'b', "Validation Accuracy")
    plt.title('Training and validation accuracy')

    if not os.path.exists(ACC_PLOT_DIR):
        plt.savefig(ACC_PLOT_DIR)
        print("Accuracy plot %s saved." % ACC_PLOT_NAME)
    else:
        print("Accuracy plot %s already exists in directory." % ACC_PLOT_NAME)
    plt.figure()

    plt.plot(epochs, loss, 'r', "Training Loss")
    plt.plot(epochs, val_loss, 'b', "Validation Loss")

    if not os.path.exists(LOSS_PLOT_DIR):
        plt.savefig(LOSS_PLOT_DIR)
        print("Loss plot %s saved" % LOSS_PLOT_NAME)
    else:
        print("Loss plot %s already exists in directory." % LOSS_PLOT_NAME)
    plt.figure()

    ACC_PLOT_NAME = '%s_classes_%s_epochs_%.4f_%.4f.png' % (NUM_CLASSES, num_epochs, acc[-1], val_acc[-1])
    LOSS_PLOT_NAME = '%s_classes_%s_epochs_%.4f_%.4f.png' % (NUM_CLASSES, num_epochs, loss[-1], val_loss[-1])

    ACC_PLOT_DIR = os.path.join('./plots/acc/', ACC_PLOT_NAME) 
    LOSS_PLOT_DIR = os.path.join('./plots/loss/', LOSS_PLOT_NAME)
    return

In [None]:
# plot_history(history)

In [None]:
if NUM_CLASSES != 51:
    shutil.rmtree(TRAIN_DIR, ignore_errors=True)
    shutil.rmtree(VALIDATION_DIR, ignore_errors=True)

In [None]:
model.save('./model/model_%s_classes_%s_epochs_%.4f_%.4f.h5' % (NUM_CLASSES, num_epochs, acc[-1], val_acc[-1]))

In [30]:
model = load_model('./model/model_10_classes_100_0.8512_0.6205.h5')

In [31]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_3 (Conv2D)            (None, 248, 248, 16)      448       
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 82, 82, 16)        0         
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 80, 80, 32)        4640      
_________________________________________________________________
max_pooling2d_4 (MaxPooling2 (None, 26, 26, 32)        0         
_________________________________________________________________
conv2d_5 (Conv2D)            (None, 24, 24, 64)        18496     
_________________________________________________________________
max_pooling2d_5 (MaxPooling2 (None, 8, 8, 64)          0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 4096)              0         
__________

In [35]:
out = model.evaluate_generator(validation_generator)

In [38]:
model.metrics_names
print(model.metrics_names[0] + " over test data generator: ", out[0])
print(model.metrics_names[1] + " over test data generator: ", out[1])

loss over test data generator:  0.6797053416570028
acc over test data generator:  0.7901786
