In [1]:
# Install the plaidml backend
import plaidml.keras
plaidml.keras.install_backend()

In [2]:
import pandas as pd
import numpy as np
import os
import sys
from collections import Counter, defaultdict
import matplotlib.pyplot as plt
import tqdm

In [45]:
from __future__ import print_function
import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras import backend as K
import cv2

import keras
from keras.models import Model
from keras.layers import Conv2D, MaxPooling2D, Dense, Input, Activation, Dropout, GlobalAveragePooling2D, \
    BatchNormalization, concatenate, AveragePooling2D
from keras.optimizers import Adam

ModuleNotFoundError: No module named 'tensorflow'

# 1. Input data

In [4]:
PATH_DATA = "/Users/david/TFM_DATA/spec"
data_files = os.listdir(PATH_DATA)

In [5]:
# PATH triplets
PATH_TRIPLETS = os.path.join("..","triplets", "triplets.csv")
df = pd.read_csv(PATH_TRIPLETS, delimiter= ";")

In [6]:
# List
triplets_input = list(df["output"])
size_triplets_input = len(triplets_input)

# Sample
triplets_input_v1 = triplets_input[:1000]
size_triplets_input = len(triplets_input_v1)

# Artists
artists_labels = np.sort(df.a1.unique())
df_artists = pd.DataFrame(data = artists_labels, columns = ["artist"]).reset_index()
df_artists.columns = ["id","artist"]

# DF Artists with artists as index
df_artists_index = df_artists.set_index("artist")
num_artists = df_artists.shape[0]

## Labels 

In [9]:
labels_mat = keras.utils.to_categorical(df_artists_index.id)

In [10]:
labels = dict()
for i, row in df_artists.iterrows():
    labels[row.artist] = labels_mat[row.id,].astype(int)

## 1.1 Dataframe Classifier

In [13]:
df1 = df[["a1","tr1","win1","ini1","fin1"]].copy()
df2 = df[["a1","tr2","win2","ini2","fin2"]].copy()

# Column names
colnames = ["art","tr","win","ini","fin"]
df1.columns = colnames
df2.columns = colnames

In [14]:
df_concat = pd.concat([df1, df2])

In [15]:
df_concat["tr"] = df_concat.tr + "__" + df_concat.win.astype(str) +  \
    "__" + df_concat.ini.astype(str) + "__" + df_concat.fin.astype(str) + ".jpg"

In [16]:
df_concat.head()

Unnamed: 0,art,tr,win,ini,fin
0,0InCPtI0kadS7s3cZrcbbY,3mUonXr6ECjJs2XPDYxWhG__10__192__222.jpg,10,192,222
1,0InCPtI0kadS7s3cZrcbbY,3mUonXr6ECjJs2XPDYxWhG__2__40__70.jpg,2,40,70
2,0InCPtI0kadS7s3cZrcbbY,3mUonXr6ECjJs2XPDYxWhG__9__180__210.jpg,9,180,210
3,0InCPtI0kadS7s3cZrcbbY,3mUonXr6ECjJs2XPDYxWhG__3__60__90.jpg,3,60,90
4,0InCPtI0kadS7s3cZrcbbY,3mUonXr6ECjJs2XPDYxWhG__4__80__110.jpg,4,80,110


In [20]:
df_concat.tail()

Unnamed: 0,art,tr,win,ini,fin
13870795,0InCPtI0kadS7s3cZrcbbY,42yFmT6lc2kOBqooibIX08__6__120__150.jpg,6,120,150
13870796,0InCPtI0kadS7s3cZrcbbY,42yFmT6lc2kOBqooibIX08__5__100__130.jpg,5,100,130
13870797,0InCPtI0kadS7s3cZrcbbY,42yFmT6lc2kOBqooibIX08__7__140__170.jpg,7,140,170
13870798,0InCPtI0kadS7s3cZrcbbY,42yFmT6lc2kOBqooibIX08__7__140__170.jpg,7,140,170
13870799,0InCPtI0kadS7s3cZrcbbY,42yFmT6lc2kOBqooibIX08__3__60__90.jpg,3,60,90


In [19]:
df_concat.groupby("art")["tr"].count().reset_index().sort_values("tr",ascending=False)

Unnamed: 0,art,tr
0,00XhexlJEXQstHimpZN910,100000
1,00me4Ke1LsvMxt5kydlMyU,100000
166,3Isy6kedDrgPYoTS1dazA9,100000
167,3KV3p5EY4AvKxOlhGHORLg,100000
326,6oMuImdp5ZcFhWP0ESe6mG,100000
...,...,...
251,5AsWcQXw4RJFRWBbwa0ti0,200
99,1rs3y69kDwkIdGJcOYngQt,200
168,3L2SIGZah4QZSvN4wC8rHl,200
130,2PaZWGu5T5nHjY2xxAkFsT,200


## 1.2 Data Generator

In [163]:
class DataGenerator(keras.utils.Sequence):
    def __init__(self, df, scaling, x_col, y_col=None, batch_size=10, num_classes=None, shuffle=True):
        self.batch_size = batch_size
        self.df = df
        self.indices = self.df.index.tolist()
        self.num_classes = num_classes
        self.shuffle = shuffle
        self.x_col = x_col
        self.y_col = y_col
        self.dim = (int(x_col / scaling), int(y_col / scaling), 1) #one input channel
        self.on_epoch_end()

    def __len__(self):
        return len(self.indices) // self.batch_size

    def __getitem__(self, index):
        idx = self.indices[index * self.batch_size:(index + 1) * self.batch_size]
        batch = [self.indices[k] for k in idx]
        
        X, y = self.__get_data(batch)
        return X, y

    def on_epoch_end(self):
        pass

    def __get_data(self, batch):
        X = np.empty((self.batch_size, *self.dim))
        y = np.empty((self.batch_size, self.num_classes), dtype=int)
        
        # Get the list of image files and corresponding artists
        df_imgs_files = self.df.iloc[batch]
        
        list_imgs = list(df_imgs_files["tr"])
        list_art = list(df_imgs_files["art"])
        
        for ii in range(len(list_imgs)):
            
            # Read image using cv2
            path_img = os.path.join(PATH_DATA, list_imgs[ii])
            img = cv2.cvtColor(cv2.imread(path_img), cv2.COLOR_BGR2GRAY)
            img = np.round(img / 255.,5)
            img = cv2.resize(img, (self.y_col, self.x_col))
            img = cv2.resize(img, (self.dim[1], self.dim[0]))
            img = np.expand_dims(img, axis = 2) # add the dimension of the channel 
            
            # Put it into X matrix
            X[ii,] = img
            y[ii] = labels[list_art[ii]]

        return X, y

In [173]:
x_col, y_col = (256, 937)
batch_size = 10
num_classes = df_artists.shape[0]
shuffle = False
scaling = 4

training_generator = DataGenerator(df=df_concat, 
                                   scaling = scaling,
                                   x_col=x_col, 
                                   y_col=y_col,
                                   batch_size=batch_size, 
                                   num_classes=num_artists,
                                   shuffle=False)

# Model

In [174]:
def conv_layer(conv_x, filters):
    conv_x = BatchNormalization()(conv_x)
    conv_x = Activation('relu')(conv_x)
    conv_x = Conv2D(filters, (3, 3), kernel_initializer='he_uniform', padding='same', use_bias=False)(conv_x)
    conv_x = Dropout(0.2)(conv_x)

    return conv_x


def dense_block(block_x, filters, growth_rate, layers_in_block):
    for i in range(layers_in_block):
        each_layer = conv_layer(block_x, growth_rate)
        block_x = concatenate([block_x, each_layer], axis=-1)
        filters += growth_rate

    return block_x, filters

def transition_block(trans_x, tran_filters):
    trans_x = BatchNormalization()(trans_x)
    trans_x = Activation('relu')(trans_x)
    trans_x = Conv2D(tran_filters, (1, 1), kernel_initializer='he_uniform', padding='same', use_bias=False)(trans_x)
    trans_x = AveragePooling2D((2, 2), strides=(2, 2))(trans_x)

    return trans_x, tran_filters

def dense_net(filters, growth_rate, classes, dense_block_size, layers_in_block, in_shape):
    input_img = Input(shape=in_shape)
    x = Conv2D(3, (3, 3), kernel_initializer='he_uniform', padding='same', use_bias=False)(input_img)

    dense_x = BatchNormalization()(x)
    dense_x = Activation('relu')(x)

    dense_x = MaxPooling2D((3, 3), strides=(2, 2), padding='same')(dense_x)
    for block in range(dense_block_size - 1):
        dense_x, filters = dense_block(dense_x, filters, growth_rate, layers_in_block)
        dense_x, filters = transition_block(dense_x, filters)

    dense_x, filters = dense_block(dense_x, filters, growth_rate, layers_in_block)
    dense_x = BatchNormalization()(dense_x)
    dense_x = Activation('relu')(dense_x)
    dense_x = GlobalAveragePooling2D()(dense_x)

    output = Dense(classes, activation='softmax')(dense_x)

    return Model(input_img, output)

## Quick model

In [169]:
def quick_CNN(input_shape, num_classes):

    model = Sequential()
    model.add(Conv2D(2, kernel_size=(3, 3),
                     activation='relu',
                     input_shape=input_shape))
    model.add(Conv2D(4, (3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Flatten())
    model.add(Dense(32, activation='relu'))
    model.add(Dense(num_classes, activation='softmax'))
    
    return model

## Instantiate the model

In [170]:
dense_block_size = 1
layers_in_block = 1

in_shape = (int(x_col / scaling), int(y_col / scaling), 1)
growth_rate = 2
classes = num_artists

#model = dense_net(growth_rate * 2, growth_rate, classes, dense_block_size, layers_in_block, in_shape)
model = quick_CNN(input_shape=in_shape, num_classes = classes)

## Parameters

In [171]:
# training
batch_size = 10
epochs = 10
optimizer = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08)
model.compile(optimizer = optimizer, loss = 'categorical_crossentropy', metrics=['accuracy'])

In [172]:
model.fit_generator(generator = training_generator, 
          epochs=epochs, 
          shuffle=True)

Epoch 1/10
    352/2774160 [..............................] - ETA: 60:22:09 - loss: 5.9007 - acc: 0.0068

KeyboardInterrupt: 