In [None]:
import cv2 as cv
import numpy as np
import matplotlib.pyplot as plt
import os, random

from modules.painting import painting
from modules.database import database as db

from keras.models import Sequential, Model
from keras.layers.core import (Dropout, Flatten, Dense, Activation)
from keras.layers.convolutional import Conv2D, MaxPooling2D
from keras.layers import (RandomFlip, RandomRotation, Input, BatchNormalization, 
                          RandomTranslation, RandomZoom)
from keras.callbacks import EarlyStopping
from keras.optimizers import Adam
from keras.applications.vgg19 import VGG19, preprocess_input

from tensorflow import unique_with_counts

#### Control Variables

In [None]:
# setting path variables for Working Directory and folder to save and load
#   models from.
wd = os.getcwd()
WD_PATH =  os.path.abspath(wd)
PATH_TRAINING = os.path.join(WD_PATH, "model_training")

# # initializing the database object
gallery = db()

# Total number of paintings available
TOTAL_PAINTINGS = 3971
# Size of the pictures when reduced in size
PIXEL_SIZE = 250

# the proportions to split the available paintings in training
# and testing.
keep_unused = 10
prop_train = 0.8
prop_test = 0.2

In [15]:
# checking the available paintings per artist.
num_paintings = np.zeros(10,np.int16)
for j in range(10):
    num_paintings[j] = len(gallery.get_paintingids_from_artist(j+1))
print(num_paintings)

[877 439 336 291 259 239 702 262 255 311]


In [37]:
# Weights for the model:
class_temp = TOTAL_PAINTINGS / num_paintings
class_weights = {}
for i in range(10):
    class_weights[i] = class_temp[i]
class_weights

{0: 4.5279361459521095,
 1: 9.045558086560364,
 2: 11.818452380952381,
 3: 13.646048109965635,
 4: 15.332046332046332,
 5: 16.615062761506277,
 6: 5.656695156695156,
 7: 15.15648854961832,
 8: 15.572549019607843,
 9: 12.768488745980708}

In [21]:
# coding the sizes needed as collection arrays for the input data
SIZE_P_TRAINING = 0
training_size_paintings = num_paintings.copy()
SIZE_P_TESTING = 0
testing_size_paintings = num_paintings.copy()
for i in range(10):
    train_count = int((num_paintings[i] - keep_unused)  * prop_train)
    test_count = int((num_paintings[i] - keep_unused)  * prop_test)
    # print(sum((train_count,test_count,10)),training_size_paintings[i])
    SIZE_P_TRAINING += train_count
    SIZE_P_TESTING += test_count
    training_size_paintings[i] = train_count
    testing_size_paintings[i] = test_count
print("total train paintings:", SIZE_P_TRAINING)
print("total test paintings:", SIZE_P_TESTING)

total train paintings: 3092
total test paintings: 770


#### Loading data from DB

In [26]:
## creating arrays to hold the pictures taken from the DB
training_images = np.zeros((SIZE_P_TRAINING,PIXEL_SIZE,PIXEL_SIZE,3))
index_training = 0
skip_count_training = 0
testing_images = np.zeros((SIZE_P_TESTING,PIXEL_SIZE,PIXEL_SIZE,3))
index_testing = 0
skip_count_testing = 0
# creating the arrays to hold labels. In this case they are the artist ids.
training_labels = np.array([0]*SIZE_P_TRAINING,dtype=int)
testing_labels = np.array([0]*SIZE_P_TESTING,dtype=int)
unused_paintings = np.array(
    [0]*(TOTAL_PAINTINGS-SIZE_P_TESTING-SIZE_P_TRAINING)
    ,dtype=int)
# checking if the numbers add up.
print(training_images.shape)
print(testing_images.shape)
print(training_labels.shape)
print(testing_labels.shape)
print(unused_paintings.shape)

(3092, 250, 250, 3)
(770, 250, 250, 3)
(3092,)
(770,)
(109,)


Loop over the artists and adding randomized pictures of them to the <br>
training and testing sets. There are also ids collected, which are unused, <br>
so that they can be used as "new, unseen" input for the model.

In [27]:
# filling the arrays with picture arrays. They will be resized according
# to the pixel_size value
unused_index = 0
for i in range(10):
    # loading all ids from the artist
    ids = gallery.get_paintingids_from_artist(i+1)
    # shuffle the ids to get random order for selection
    random.seed(1983)
    random.shuffle(ids)
    print("New Artist")
    
    # getting the numbers for the current artist:
    _str = training_size_paintings[i]
    _ste = testing_size_paintings[i]
    # slicing the ids for training and testing of artist with id i+1
    ids_training = ids[ : _str]
    ids_testing = ids[_str : _str + _ste]
    ids_unused = ids[ _str + _ste : ]
    
    # collecting the ids of the unused paintings
    for l, f in zip(range(unused_index,unused_index+len(ids_unused))
                    , ids_unused):
        unused_paintings[l] = f[0]
    unused_index += len(ids_unused)
    
    # retrieving the paintings from the db, resizing them and collecting
    # them in the training_images array while also filling the labels
    for k in ids_training:
        temp_p = painting("local DB", id=k[0])
        temp_p_res = cv.resize(temp_p.ndarray, dsize=(PIXEL_SIZE,PIXEL_SIZE)
                               ,interpolation=cv.INTER_CUBIC)
        if temp_p_res.shape == (PIXEL_SIZE,PIXEL_SIZE,3):
            training_images[index_training] = temp_p_res
            training_labels[index_training] = temp_p.artist_id-1
            index_training += 1
        else:
            skip_count_training += 1
            
    # retrieving the paintings from the db, resizing them and collecting
    # them in the testing_images array while also filling the labels
    for j in ids_testing:
        temp_p = painting("local DB", id=j[0])
        temp_p_res = cv.resize(temp_p.ndarray, dsize=(PIXEL_SIZE,PIXEL_SIZE)
                               ,interpolation=cv.INTER_CUBIC)
        if temp_p_res.shape == (PIXEL_SIZE,PIXEL_SIZE,3):
            testing_images[index_testing] = temp_p_res
            testing_labels[index_testing] = temp_p.artist_id-1
            # testing_labels[index_testing] = [temp_p.artist_id-1,]
            index_testing += 1
        else:
            skip_count_testing += 1

# ## dropping the last few array positions of testing images, which where 
# ## not filled.
testing_images = testing_images[:index_testing,:,:,:]
testing_labels = testing_labels[:index_testing]
# ## dropping the last few array positions of training images, which where 
# ## not filled.
training_images = training_images[:index_training,:,:,:]
training_labels = training_labels[:index_training]

# # the pixels on an image are rescaled from 0-255 to 0-1 
training_images, testing_images = training_images/255, testing_images/255 

# defining labels in a list
class_names = [i[1] for i in gallery.get_all_artists()]

New Artist
Training images status
690
3
New Artist
Training images status
1017
19
New Artist
Training images status
1277
19
New Artist
Training images status
1430
90
New Artist
Training images status
1628
91
New Artist
Training images status
1810
92
New Artist
Training images status
2363
92
New Artist
Training images status
2564
92
New Artist
Training images status
2759
93
New Artist
Training images status
2999
93


#### Checking data shapes

In [31]:
print(training_images.shape)
print(testing_images.shape)
print(training_labels.shape)
print(testing_labels.shape)
print(type(training_labels[185:200]))
print(unused_paintings.shape)

(2999, 250, 250, 3)
(746, 250, 250, 3)
(2999,)
(746,)
<class 'numpy.ndarray'>
(109,)


***
Creating the weight vector for the artists.

#### Checking data counts

In [29]:
y_tl, idx_tl, count_tl = unique_with_counts(training_labels)
y_testl, idx_testl, count_testl = unique_with_counts(testing_labels)
y_unused, idx_unused, count_unused = unique_with_counts(unused_paintings)


In [30]:
print(y_tl, count_tl)
print(y_testl, count_testl)
print(unique_with_counts(count_unused)[2])


tf.Tensor([0 1 2 3 4 5 6 7 8 9], shape=(10,), dtype=int32) tf.Tensor([690 327 260 153 198 182 553 201 195 240], shape=(10,), dtype=int32)
tf.Tensor([0 1 2 3 4 5 6 7 8 9], shape=(10,), dtype=int32) tf.Tensor([173  83  65  35  49  44 138  50  49  60], shape=(10,), dtype=int32)
tf.Tensor([109], shape=(1,), dtype=int32)


***
## Modelling part
####  creating the model


In [32]:
base_model = VGG19(include_top = False,
                   classes = 10, 
                   input_shape = (PIXEL_SIZE, PIXEL_SIZE, 3))

base_model.trainable = False
base_model.summary()

Model: "vgg19"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 250, 250, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 250, 250, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 250, 250, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 125, 125, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 125, 125, 128)     73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 125, 125, 128)     147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 62, 62, 128)       0     

In [33]:
# Layer for the model. 
## data augmentation counteracting the small number of paintings
data_augmentation = Sequential([
    RandomFlip('horizontal'),
    RandomFlip('vertical'),
    RandomRotation(0.2),
    RandomZoom(0.1),
    RandomTranslation(0.1, 0.1),
])
prediction = Sequential([
    Flatten(),
    Dense(512),
    BatchNormalization(),
    Dropout(0.1),
    Dense(512),
    Dense(10, activation = 'softmax'),
])

In [34]:
# Creating the model from basemodel and the other layers.
inputs = Input(shape=(PIXEL_SIZE, PIXEL_SIZE, 3))
x = data_augmentation(inputs)
x = preprocess_input(x)
x = base_model(x)
outputs = prediction(x)
model = Model(inputs, outputs)

In [35]:
# compile the model
model.compile(loss='sparse_categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])
model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 250, 250, 3)]     0         
                                                                 
 sequential (Sequential)     (None, 250, 250, 3)       0         
                                                                 
 tf.__operators__.getitem (S  (None, 250, 250, 3)      0         
 licingOpLambda)                                                 
                                                                 
 tf.nn.bias_add (TFOpLambda)  (None, 250, 250, 3)      0         
                                                                 
 vgg19 (Functional)          (None, 7, 7, 512)         20024384  
                                                                 
 sequential_1 (Sequential)   (None, 10)                13115402  
                                                             

In [38]:
## training the model
epochs = 100
batch_size = 16

early_stopping = EarlyStopping(patience = 20, 
                               verbose = 2, 
                               restore_best_weights = True)
    
history = model.fit(training_images,
                    training_labels,
                    validation_data = (testing_images, testing_labels),
                    class_weight = class_weights,
                    epochs = epochs,
                    batch_size = batch_size,
                    callbacks = early_stopping)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100

In [None]:
model.save(os.path.join(PATH_TRAINING,"image_classifier_BvD_withVGG19basemodel.model"))

In [None]:
model = models.load_model(os.path.join(PATH_TRAINING,"image_classifier_BvD_withVGG19basemodel.model"))

#### check one prediction

In [None]:
test_painting = painting("local DB",random.choice(unused_paintings))
print(test_painting.id)
test_ndarray = cv.resize(test_painting.ndarray, dsize=(PIXEL_SIZE,PIXEL_SIZE)
                               ,interpolation=cv.INTER_CUBIC)
test_ndarray = test_ndarray/255
temp_array = np.zeros((1,PIXEL_SIZE,PIXEL_SIZE,3))
print(temp_array.shape)
temp_array[0] = test_ndarray
prediction = model.predict(temp_array)
np.set_printoptions(suppress=True)
print(np.round(prediction, 4))
print(np.argmax(prediction))
index = np.argmax(prediction)
print(f"Prediction is {class_names[index]}")
imgplot = plt.imshow(test_painting.ndarray)
plt.show()