In [8]:
from pathlib import Path
import os
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import keras
import cv2
import tensorflow as tf
import keras
from keras.layers import Conv2D, Dense, Flatten, MaxPool2D
from keras.models import Sequential, load_model
from keras.optimizers import Adam
from keras.preprocessing import image
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ModelCheckpoint, EarlyStopping
from sklearn.preprocessing import LabelEncoder
from keras.layers import Lambda
from PIL import Image
import cv2
from tensorflow.keras import layers
from keras.utils import to_categorical


In [9]:
class CustomGenerator(keras.utils.Sequence):

    def __init__(self, image_filenames, labels, batch_size):
        self.image_filenames = image_filenames
        self.labels = labels
        self.batch_size = batch_size

    def __len__(self):
        return (np.ceil(len(self.image_filenames) / float(self.batch_size))).astype(int)

    def __getitem__(self, idx):
        batch_x = self.image_filenames[idx * self.batch_size: (idx + 1) * self.batch_size]
        batch_y = self.labels[idx * self.batch_size: (idx + 1) * self.batch_size]        
        return np.array([
            (cv2.imread(str(file_name)))
            for file_name in batch_x]), np.array(batch_y)

In [10]:
path = Path("test_data_smaller")
df = pd.read_csv(path/'artists.csv')

#check distribution of paintings
artists_df = df[['name', 'paintings']].groupby(['name'], as_index = False).sum()
names = artists_df.sort_values('paintings', ascending = False)[:50]
print(names)
#remove spaces from names
images_dir = Path(path/'images/images')
artists = names['name'].str.replace(' ', '_').values

                         name  paintings
48           Vincent van Gogh        877
10                Edgar Degas        702
33              Pablo Picasso        439
38      Pierre-Auguste Renoir        336
0              Albrecht Dürer        328
35               Paul Gauguin        311
15             Francisco Goya        291
42                  Rembrandt        262
1               Alfred Sisley        259
46                     Titian        255
30               Marc Chagall        239
43              Rene Magritte        194
2           Amedeo Modigliani        193
36                  Paul Klee        188
21              Henri Matisse        186
4                 Andy Warhol        181
32             Mikhail Vrubel        171
45          Sandro Botticelli        164
29          Leonardo da Vinci        143
37          Peter Paul Rubens        141
44              Salvador Dali        139
24           Hieronymus Bosch        137
40             Pieter Bruegel        134
9             Di

In [11]:
painting_list = []
for path, subdirs, files in os.walk(images_dir):
    for name in files:
        img = os.path.join(path, name)
        painting_list.extend([img])

#only works on windows
artist_list = []
for painting in painting_list:
    artist = painting.split('\\')[3]
    artist_list.extend([artist])

y = np.array(artist_list)  

encoder = LabelEncoder()
y = encoder.fit_transform(pd.Series(y))
classes = list(encoder.classes_)
for i in range(len(classes)):
    print(i, ' represents ', classes[i])

y = keras.utils.to_categorical(y, len(classes))
print(y)

X = np.array(painting_list) 

0  represents  Francisco_Goya
1  represents  Paul_Gauguin
2  represents  Pierre-Auguste_Renoir
3  represents  Rembrandt
[[1. 0. 0. 0.]
 [1. 0. 0. 0.]
 [1. 0. 0. 0.]
 ...
 [0. 0. 0. 1.]
 [0. 0. 0. 1.]
 [0. 0. 0. 1.]]


In [12]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=1)
X_val, X_test, y_val, y_test = train_test_split(X_val, y_val, test_size=0.5, random_state=1)

print(np.unique(y_train))
print(np.unique(y_val))
print(np.unique(y_test))
print(len(X_train))
print(len(y_train))

[0. 1.]
[0. 1.]
[0. 1.]
960
960


In [13]:
batch_size = 32
training_batch_generator = CustomGenerator(X_train, y_train, batch_size)
validation_batch_generator = CustomGenerator(X_val, y_val, batch_size)
training_size = len(X_train)
validation_size = len(X_val)
test_size = len(X_test)
print(training_size)

960


In [14]:
def make_model(input_shape, num_classes):
    inputs = keras.Input(shape=input_shape)

    # Entry block
    x = layers.Rescaling(1.0 / 255)(inputs)
    x = layers.Conv2D(128, 3, strides=2, padding="same")(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation("relu")(x)

    previous_block_activation = x  # Set aside residual

    for size in [256, 512, 728]:
        x = layers.Activation("relu")(x)
        x = layers.SeparableConv2D(size, 3, padding="same")(x)
        x = layers.BatchNormalization()(x)

        x = layers.Activation("relu")(x)
        x = layers.SeparableConv2D(size, 3, padding="same")(x)
        x = layers.BatchNormalization()(x)

        x = layers.MaxPooling2D(3, strides=2, padding="same")(x)

        # Project residual
        residual = layers.Conv2D(size, 1, strides=2, padding="same")(
            previous_block_activation
        )
        x = layers.add([x, residual])  # Add back residual
        previous_block_activation = x  # Set aside next residual

    x = layers.SeparableConv2D(1024, 3, padding="same")(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation("relu")(x)

    x = layers.GlobalAveragePooling2D()(x)
    
    activation = "softmax"

    x = layers.Dropout(0.5)(x)
    outputs = layers.Dense(num_classes, activation=activation)(x)
    return keras.Model(inputs, outputs)

In [15]:
input_shape = (cv2.imread(str(painting_list[0]))).shape
model = make_model(input_shape, len(classes))

In [16]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=["accuracy"])
model.fit_generator(generator=training_batch_generator,
                    steps_per_epoch=int(training_size // batch_size),
                    epochs=6,
                    verbose=1,
                    validation_data=validation_batch_generator,
                    validation_steps=int(validation_size // batch_size)
                   )

Epoch 1/6


  model.fit_generator(generator=training_batch_generator,


Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6


<keras.callbacks.History at 0x1b2a034cac0>

In [17]:
model.save(Path('test.h5'))