In [144]:
import os
from typing import Literal, Union

import pandas as pd
import numpy as np
import cv2
import matplotlib.pyplot as plt

import ast
from tqdm.auto import tqdm
tqdm.pandas()

from sklearn.model_selection import train_test_split

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Dense, Flatten, Conv2D, MaxPool2D
from tensorflow.keras.optimizers import Adam

from tensorflow.keras.applications import VGG16, InceptionV3, ResNet50

In [145]:
gpus = tf.config.list_physical_devices('GPU')
if gpus:
  try:
    # Currently, memory growth needs to be the same across GPUs
    for gpu in gpus:
      tf.config.experimental.set_memory_growth(gpu, True)
    logical_gpus = tf.config.list_logical_devices('GPU')
    print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
  except RuntimeError as e:
    # Memory growth must be set before GPUs have been initialized
    print(e)


In [146]:
BASE_PATH = "."
TEST_SIZE = 0.1
BATCH_SIZE = 32
IN_SHAPE = (300, 300, 3)
NUMBER_OF_CLASSES = (129, 11, 27) # artist, genre, style
DATASET_SIZE = 81444

In [147]:
def resize_img(img, new_width = IN_SHAPE[0], new_height = IN_SHAPE[1]):
  """Resize an image using new  width and height"""
  new_points = (new_width, new_height)
  return cv2.resize(img, new_points, interpolation= cv2.INTER_LINEAR)

In [148]:
files = os.listdir(f'{BASE_PATH}/wikiart/data/csv/')

# Use generator directly
def read_data(chosen_label: Union[Literal['artist'], Literal['genre'], Literal['style']] = 'genre'):
    for file_name in files:
        chunks = pd.read_csv(f'{BASE_PATH}/wikiart/data/csv/' + file_name, chunksize=BATCH_SIZE * 2)
        for chunk in chunks:
            images = chunk["image"].map(lambda img: tf.io.decode_image(ast.literal_eval(img).get('bytes')).numpy()).values
            labels = chunk[chosen_label].values
            
            yield (np.asarray([resize_img(image) for image in images]), np.asarray(labels))

# Generator for tf dataset
def read_tf_dataset(chosen_label: Union[Literal[b'artist'], Literal[b'genre'], Literal[b'style']] = b'genre'):
    for file_name in files:
        chunks = pd.read_csv(f'{BASE_PATH}/wikiart/data/csv/' + file_name, chunksize=BATCH_SIZE * 2)
        
        for chunk in chunks:
            images = chunk["image"].map(lambda img: resize_img(tf.io.decode_image(ast.literal_eval(img).get('bytes')).numpy())).values
            labels = chunk[chosen_label.decode('utf-8')].values
            
            for row_index in range(chunk.shape[0]):
                yield images[row_index], labels[row_index]

In [149]:
dataset = tf.data.Dataset.from_generator(read_tf_dataset,
                                         args=['genre'],
                                         output_signature=(tf.TensorSpec(shape=IN_SHAPE, dtype=tf.uint8),
                                                           tf.TensorSpec(
                                                               shape=(), dtype=tf.uint8)
                                                           )).batch(BATCH_SIZE, drop_remainder=True)

In [150]:
dataset = dataset.shuffle(buffer_size=DATASET_SIZE)
dataset_train = dataset.skip(np.floor(DATASET_SIZE * TEST_SIZE))
dataset_test = dataset.take(np.floor(DATASET_SIZE * TEST_SIZE))

In [151]:
my_net = Sequential([Conv2D(32, kernel_size = (5,5), input_shape = IN_SHAPE, activation='relu'),
                          MaxPool2D(pool_size=(2, 2)),
                          Conv2D(32, kernel_size = (5,5), activation='relu'),
                          MaxPool2D(pool_size=(2, 2))
                        ])

In [152]:
model_my_net = Sequential([
     my_net,
     # resnet,
     Flatten(),
     Dense(256, activation='relu'),
     Dense(128, activation='relu'),
     Dense(NUMBER_OF_CLASSES[1], activation='softmax')])

model_my_net.compile(loss='sparse_categorical_crossentropy', 
              optimizer=Adam(learning_rate=0.0001), 
              metrics=['accuracy'])

In [153]:
model_my_net.fit(dataset, epochs = 5, verbose = 1, batch_size=BATCH_SIZE)

Epoch 1/5


2023-04-30 19:59:46.024403: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_2' with dtype string
	 [[{{node Placeholder/_2}}]]
2023-04-30 19:59:46.024613: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]
2023-04-30 19:59:58.201499: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:392] Filling up shuffle buffer (this may take a while): 13 of 81444
