# Libraries and setup

In [None]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

import tensorflow as tf
import keras
from tensorflow.keras import layers
from tensorflow.keras.optimizers import * 

import os

# from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from tensorflow.python.keras.layers.core import Flatten

# from sklearn.pipeline import Pipeline

## Colab setup

In [None]:
from google.colab import drive
drive.mount('/content/drive')

## Read csv

In [None]:
df = pd.read_csv('/content/drive/MyDrive/Proyecto_Final/artists_clean.csv')
df

## Variables setup

### Artists dictionary

In [None]:
# dict of artist in each folder
# Post_Impressionism/Vincent_van_Gogh/Vincent_van_Gogh_1.jpg

# Kazimir_Malevich/Kazimir_Malevich_112.jpg
# Cubism
# /content/drive/MyDrive/Proyecto_Final/pictures/images/images/Renaissance/Leonardo_da_Vinci
artist_per_genre = {
    'Abstract':['Paul_Klee',
                'Vasiliy_Kandinskiy',
                'Joan_Miro',
                'Jackson_Pollock'],
    'Baroque':['Caravaggio', 
               'Peter_Paul_Rubens', 
               'Rembrandt'],
    'Expresionism':['Amedeo_Modigliani', 
                    'Edvard_Munch',
                    'Marc_Chagall'],
    'Renaissance':['Leonardo_da_Vinci',
                     'Michelangelo',
                     'Raphael',
                     'Titian',
                   'Albrecht_Dürer',
                     'Hieronymus_Bosch',
                     'Jan_van_Eyck',
                     'Pieter_Bruegel',
                   'Sandro_Botticelli'],
    'Impressionism':['Alfred_Sisley',
                     'Camille_Pissarro',
                     'Claude_Monet',
                     'Edgar_Degas',
                     'Georges_Seurat',
                     'Henri_de_Toulouse-Lautrec',
                     'Paul_Cezanne',
                     'Pierre-Auguste_Renoir',
                     'Vincent_van_Gogh'],
    'Realism':['Edouard_Manet',
               'Gustave_Courbet'],
    'Romanticism':['Eugene_Delacroix',
                   'Francisco_Goya',
                   'William_Turner'],
    'Surrealism':['Frida_Kahlo',
                  'Henri_Rousseau',
                  'Rene_Magritte',
                  'Salvador_Dali'],
    'Cubism':['Kazimir_Malevich',
              'Pablo_Picasso']
}

In [None]:
genres = list(artist_per_genre.keys())

### Path to artists folders

In [None]:
path = '/content/drive/MyDrive/Proyecto_Final/pictures/images/images'

### Neural Networks variables


In [None]:
input_shape = (180, 180, 3)

In [None]:
labels_qty = 9

# Functions

In [None]:
def jpg_to_dic(artist, painting_quantity=10):
    """
    This function returns a dictionary containing the arrays corresponding to 
    each painting of the artist.

    The image file name must be: <artist>_<number>.jpg
    e.g. Salvador_Dali_13.jpg
    """

    dic = dict()

    for paint_num in range(1, painting_quantity+1):
        painting = f'{relative_path}{artist}_{paint_num}.jpg'

        img = tf.keras.preprocessing.image.load_img(painting, 
        grayscale=False, 
        color_mode="rgb", 
        target_size=(180, 180), 
        interpolation="nearest"
        )
        array = tf.keras.preprocessing.image.img_to_array(img)

        dic[paint_num] = array
    print('\nDone!\n')
    return dic

In [None]:
def jpg_to_array(artists, painting_quantity=10):
    """
    This function returns a dictionary containing the arrays corresponding to 
    each painting of the artist.

    The image file name must be: <artist>_<number>.jpg
    e.g. Salvador_Dali_13.jpg
    """

    lst = list()

    for paint_num in range(1, painting_quantity+1):
        painting = f'{relative_path}{artist}_{paint_num}.jpg'

        img = tf.keras.preprocessing.image.load_img(painting, 
        grayscale=False, 
        color_mode="rgb", 
        target_size=(180, 180), 
        interpolation="nearest"
        )
        array = tf.keras.preprocessing.image.img_to_array(img)

        lst.append(array)
    print('\nDone!\n')
    return lst

In [None]:
def get_df(dictionary, artist, artist_number, size=97200):
  """
  This function takes the dictionary and an artist name and returns a pandas
  dataframe of the paintings array with the label in the last column
  """
  # df = pd.DataFrame() # Empty DataFrame
  DF = pd.DataFrame(dictionary[artist][1].reshape(1, size))
  for painting in dictionary[artist]: #range(1,4):#
    # print(painting)
    if painting != 10:
      DF_2 = pd.DataFrame(dictionary[artist][painting+1].reshape(1, size))
      DF = pd.concat([DF, DF_2])
    else:
      continue
  DF['Y'] = artist_number
  print('\nDone!\n')
  return DF

In [None]:
def painting_arrays(n):
  """
  This function returns a list of n number of paintings from the artists
  """
  arrays_list = list()
  # artist_array_dict = dict()
  for artist, qty in zip(artists, paint_qty):
    print(artist)
    # artist_array_dict[artist] = jpg_to_array(artist, n)
    arrays_list.append(jpg_to_array(artist, 10))
  print('\nDone!\n')
  return arrays_list#, artist_array_dict

In [None]:
def jpg_to_array_from_folders(path, genre, painting_quantity=10):
    """
    This function returns a list containing the arrays corresponding to 
    each painting of the genre folder.

    The image file name must be: <artist>_<number>.jpg
    e.g. Salvador_Dali_13.jpg
    """

    lst = list()

    for paint_num in range(1, painting_quantity+1):
        painting = f'{path}{genre}_{paint_num}.jpg'

        img = tf.keras.preprocessing.image.load_img(painting, 
        grayscale=False, 
        color_mode="rgb", 
        target_size=(180, 180), 
        interpolation="nearest"
        )
        array = tf.keras.preprocessing.image.img_to_array(img)

        lst.append(array)
    print('\nDone!\n')
    return lst

In [None]:
def arrays_from_genre(path, genre, genre_dict=artist_per_genre, painting_quantity=10, target_size=(180, 180)):
  """
  This function returns a list containing the arrays of images from a path.
  The folders contains artists folders as this:

  images:
    genre_1:
      artist_1:
        img_1.jpg
        img_2.jpg
        img_3.jpg
      artist_2:
        img_1.jpg
        img_2.jpg
        img_3.jpg
    genre_2:
      artist_1:
        img_1.jpg
        img_2.jpg
        img_3.jpg
      artist_2:
        img_1.jpg
        img_2.jpg
        img_3.jpg
  """
  arrays_genre = list()

  list_artists = artist_per_genre[genre] # list of artists
  # iterate over list of artists
  for artist in list_artists:
    print(f' Currently in {genre}, {artist} ')
    for paint_num in range(1, painting_quantity+1):
      
      painting = f'{path}/{genre}/{artist}/{artist}_{paint_num}.jpg'

      img = tf.keras.preprocessing.image.load_img(painting, 
      grayscale=False, 
      color_mode="rgb", 
      target_size=target_size, 
      interpolation="nearest"
      )
      array = tf.keras.preprocessing.image.img_to_array(img)
      arrays_genre.append(array)

  print('\nDone!\n')
  return arrays_genre

In [None]:
# path = '/content/drive/MyDrive/Proyecto_Final/pictures/images/images'
def paintings_to_predict(path, dic=artist_per_genre, train_samples=20, predict_samples=5, target_size=(180, 180)):

  array_predict = list()

  genres = list(dic.keys())
  rd_genre = random.choice(genres)
  # print(rd_genre)
  list_artists = dic[rd_genre]

  rd_artist = random.choice(list_artists)
  max_paint = 30
  top_paint = train_samples+predict_samples
  
  if top_paint > max_paint:
    rd_paint = random.randint(train_samples+1, max_paint)
  else:
    rd_paint = random.randint(train_samples+1, top_paint)

  painting = f'{path}/{rd_genre}/{rd_artist}/{rd_artist}_{rd_paint}.jpg'

  img = tf.keras.preprocessing.image.load_img(painting, 
  grayscale=False, 
  color_mode="rgb", 
  target_size=target_size, 
  interpolation="nearest"
  )
  array = tf.keras.preprocessing.image.img_to_array(img)
  array_predict.append(array)

  return (rd_artist, array)

In [None]:
# Plot the training graph
def plot_training(history):
    acc = history['acc']
    val_acc = history['val_acc']
    loss = history['loss']
    val_loss = history['val_loss']
    epochs = range(len(acc))

    fig, axes = plt.subplots(1, 2, figsize=(15,5))
    
    axes[0].plot(epochs, acc, 'r-', label='Training Accuracy')
    axes[0].plot(epochs, val_acc, 'b--', label='Validation Accuracy')
    axes[0].set_title('Training and Validation Accuracy')
    axes[0].legend(loc='best')

    axes[1].plot(epochs, loss, 'r-', label='Training Loss')
    axes[1].plot(epochs, val_loss, 'b--', label='Validation Loss')
    axes[1].set_title('Training and Validation Loss')
    axes[1].legend(loc='best')
    
    plt.show()
    
# plot_training(history)

In [None]:
def predict_paintings(n_predictions, model=model):
  
  bullseye = 0
  miss = 0
  for n in range(n_predictions):

    y_predict = paintings_to_predict(path)

    prediction = model.predict(y_predict[1].reshape(1,180,180,3))
    # prediction

    for label, index in enumerate(list(prediction[0])):
      if index == 1:
        predicted_label = genres[label]
      else:
        pass
    if y_predict[0] in artist_per_genre[predicted_label]:
      print(f'The model predicted {predicted_label} for {y_predict[0]}. Prediction Correct! :D\n')
      bullseye += 1
    else:
      print(f'The model predicted {predicted_label} for {y_predict[0]}, Prediction Failed :(\n')
      miss += 1
  return bullseye, miss

In [None]:
def paintings_array(path, painting_quantity=20):

  """
  This function gets the list of paintings transformed to arrays.

  Uses 'arrays_from_genre()'
  """

  array_list = list()
  # path = '/content/drive/MyDrive/Proyecto_Final/pictures/images/images'

  for genre in artist_per_genre.keys():
    array_list.append(arrays_from_genre(path, genre, painting_quantity=20))
  print(f'Array size is: {len(array_list)}')
  print('-----'*10)
  return array_list

In [None]:
def get_paintings_list(array_list):

  """
  This function transforms the list of arrays into a numpy array
  and print it's dimentions
  """

  paintings_list = list()

  for genre in array_list:
    for artist in genre:
      paintings_list.append(artist)

  data_array = np.array(paintings_list)
  print(f'the data array shape is: {data_array.shape}')
  print('\n-----'*10)
  return data_array

In [None]:
# genres = list(artist_per_genre.keys())

def get_labels(genres, data_array):
  labels_codified = []
  labels = []

  for code, actual_genre in enumerate(genres):
    for i in range(1, len(artist_per_genre[actual_genre])+1):
      for j in range(20):
        labels.append(actual_genre)
        labels_codified.append(code)
  print(f'The codified labels length is: {len(labels_codified)}. And here are the first 35 of them:\n')
  print(list(zip(labels, labels_codified))[0:35])
  print()
  print(f'\nCodifed labels : Data array ---> {len(labels_codified), len(data_array)}\n')
  print(f'The shape of the codified labels array is: {np.array(labels_codified).shape}')
  print('\n-----'*10)

  return labels_codified

In [None]:
input_shape = (180, 180, 3)
def reescale_images(train_images, test_images, n_train, n_test, input_shape):
  train_images = train_images.reshape(n_train, input_shape[0], input_shape[1], input_shape[2])
  test_images = test_images.reshape(n_test, input_shape[0], input_shape[1], input_shape[2])
  # Confirming the type of data
  train_images = train_images.astype('float32')
  test_images = test_images.astype('float32')
  # Reescale the pixels between 0 and 255
  train_images /= 255
  test_images /= 255
  return train_images, test_images

# Main

In [None]:
array_list = paintings_array(path, painting_quantity=20)

In [None]:
data_array = get_paintings_list(array_list)

In [None]:
labels_codified = get_labels(genres, data_array)

In [None]:
X = data_array
y = np.array(labels_codified) #(num_list)
# labels_qty = 9

x_train, x_test, y_train, y_test = train_test_split(X, y, 
                                                    test_size=0.25,
                                                    random_state=42)

print(f'Train_test_split shapes: {x_train.shape, x_test.shape, y_train.shape, y_test.shape}\n')

# Uncoment [:,:,:,0] to transform RGB to GS
train_images = x_train#[:,:,:,0]
test_images = x_test#[:,:,:,0]
train_labels = y_train
test_labels = y_test
# print(f'{train_images.shape, test_images.shape, train_labels.shape, test_labels.shape}')

n_train = train_images.shape[0]
n_test = test_images.shape[0]

print(f'number of arrays: {n_train, n_test}\n')

In [None]:
train_images, test_images = reescale_images(train_images, 
                                            test_images, 
                                            n_train, 
                                            n_test,
                                            input_shape)

## Dense ANN

In [None]:
# train a NN
# def FFNN(train_images, train_labels, test_images, test_labels, labels_qty, epochs=30, batch_size=32):
model = keras.Sequential([
    layers.Flatten(),                      
    layers.Dense(512, activation='relu'),
    layers.Dense(256, activation='relu'),
    layers.Dense(labels_qty, activation='softmax')
])

model.compile(optimizer='adam',
              loss=keras.losses.SparseCategoricalCrossentropy(),
              metrics=['accuracy']
              )

model.fit(train_images, train_labels, epochs=30, batch_size=32)

print('Evaluation:\n')
model.evaluate(test_images, test_labels)

In [None]:
# FFNN(train_images, train_labels, test_images, test_labels, labels_qty, epochs=30, batch_size=32)

In [None]:
# model.evaluate(test_images, test_labels)

## Convolutional Neural Network (simple)

In [None]:
# def simple_CNN(train_images, train_labels, test_images, test_labels, labels_qty, epochs=10, batch_size=32):
model = keras.Sequential([
    layers.Conv2D(180, 3, activation='relu', input_shape=input_shape), 
    # strides := velocity of the smaller grid
    layers.Conv2D(90, 3, activation='relu'),
    layers.Flatten(),
    layers.Dense(labels_qty, activation='softmax')
])

model.compile(optimizer='adam',
              loss=keras.losses.SparseCategoricalCrossentropy(),
              metrics=['accuracy']
              )

model.fit(train_images, train_labels, epochs=10, batch_size=32)
print('Evaluation:\n')
model.evaluate(test_images, test_labels)

In [None]:
# simple_CNN(train_images, train_labels, test_images, test_labels, labels_qty, epochs=10, batch_size=32)

In [None]:
# model.evaluate(test_images, test_labels)

## Convolutional Neural Network (custom)

In [None]:
model = keras.Sequential([
    layers.AveragePooling2D(6, 3, input_shape=input_shape), 
    # strides := velocity of the smaller grid
    layers.Conv2D(180*2, 3, activation='relu'),
    layers.Conv2D(90*2, 3, activation='relu'),
    layers.MaxPool2D(2,2),
    layers.Dropout(0.5),
    layers.Flatten(),
    layers.Dense(360*2, activation='relu'),
    layers.Dense(labels_qty, activation='softmax')
])
optimizer = Adam(lr=0.0001)

# model.compile(loss='categorical_crossentropy',
#               optimizer=optimizer, 
#               metrics=['accuracy'])

model.compile(optimizer='adam',
              loss=keras.losses.SparseCategoricalCrossentropy(),
              metrics=['accuracy']
              )

history = model.fit(train_images, train_labels, epochs=25, batch_size=32)
print('Evaluation:\n')
model.evaluate(test_images, test_labels)

In [None]:
# model.evaluate(test_images, test_labels)

## Predicitions

In [None]:
y_predict = paintings_to_predict(path)
prediction = model.predict(y_predict[1].reshape(1,180,180,3))
# list(prediction[0])

[0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0]

# Results

In [None]:
predict_paintings(10)

The model predicted Impressionism for Pablo_Picasso, Prediction Failed :(

The model predicted Impressionism for Paul_Cezanne. Prediction Correct! :D

The model predicted Abstract for Paul_Klee. Prediction Correct! :D

The model predicted Impressionism for William_Turner, Prediction Failed :(

The model predicted Impressionism for Peter_Paul_Rubens, Prediction Failed :(

The model predicted Impressionism for Joan_Miro, Prediction Failed :(

The model predicted Impressionism for Francisco_Goya, Prediction Failed :(

The model predicted Impressionism for Peter_Paul_Rubens, Prediction Failed :(

The model predicted Impressionism for Edgar_Degas. Prediction Correct! :D

The model predicted Impressionism for Jackson_Pollock, Prediction Failed :(



(3, 7)