# Import

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pandas as pd
import numpy as np
import seaborn as sns
sns.set_theme(style="darkgrid")
import matplotlib.pyplot as plt
import os
from shutil import copyfile
import splitfolders

In [3]:
from neuralart.data import *

# Get Data

In [4]:
chan_csv_path = "../raw_data/wikiart/csv_chan"
chan_image_path= "../raw_data/wikiart/dataset_chan"
output_path = "../raw_data/wikiart"
dataset_dir_name = 'dataset'

In [5]:
data_full = get_data(chan_csv_path, chan_image_path, rm_image_duplicate=False)
# save_csv(data_full, output_path, 'data_full.csv')
data_full.shape

(81446, 10)

In [6]:
data = get_data(chan_csv_path, chan_image_path, rm_image_duplicate=True)
# save_csv(data, output_path, 'data.csv')
data.shape

(78748, 10)

In [7]:
merge_mov_1={'name': 'merge_mov-1',
            'merging':{'abstract_expressionism': 'abstract',
                 'action_painting': 'abstract',
                 'analytical_cubism': 'cubism',
                 'art_nouveau_modern': None,
                 'baroque': None,
                 'color_field_painting': 'color_field_painting',
                 'contemporary_realism': None,
                 'cubism': 'cubism',
                 'early_renaissance': 'renaissance',
                 'expressionism': 'expressionism',
                 'fauvism': None,
                 'high_renaissance': 'renaissance',
                 'impressionism': 'impressionism',
                 'mannerism_late_renaissance': None,
                 'minimalism': None,
                 'naive_art_primitivism': None,
                 'new_realism': None,
                 'northern_renaissance': 'renaissance',
                 'pointillism': None,
                 'pop_art': None,
                 'post_impressionism': None,
                 'realism': 'realism',
                 'rococo': None,
                 'romanticism': 'romanticism',
                 'symbolism': None,
                 'synthetic_cubism': 'cubism',
                 'ukiyo_e': None}}

In [8]:
data_sample = get_dataset(data, target="movement", class_=merge_mov_1,random_state=123, keep_genre=True, output_path=output_path)


In [9]:
create_dataset_directory(data_sample, chan_image_path, output_path, 'wikiart-movement-genre_True-class_8-merge_mov-1', flat=False)


2500 images copied
5000 images copied
7500 images copied
10000 images copied
12500 images copied
15000 images copied
17500 images copied
20000 images copied
22500 images copied
25000 images copied
27500 images copied
30000 images copied
32500 images copied
35000 images copied
37500 images copied
40000 images copied
Done: 41268 image(s) copied, 8 image(s) in the folder


In [12]:
input_dir = os.path.join(output_path,"wikiart-movement-genre_True-class_8-merge_mov-1")
output_dir = os.path.join(output_path,"train_val_test_True_all")
csv_path = os.path.join(output_path,"wikiart-movement-genre_True-class_8-merge_mov-1.csv")

In [13]:
get_train_val_test_directory(output_dir, input_dir=input_dir,
                                 csv_path=csv_path,
                                 save_csv=True,
                                 train_ratio=0.8,
                                 val_ratio=0.1,
                                 test_ratio=0.1,
                                 seed=1337)

Unnamed: 0,file_name,movement,genre,artist,split
0,early-renaissance_filippo-lippi_two-saints.jpg,renaissance,religious_painting,filippo-lippi,val
1,early-renaissance_filippo-lippi_madonna-del-ce...,renaissance,religious_painting,filippo-lippi,train
2,early-renaissance_piero-della-francesca_annunc...,renaissance,religious_painting,piero-della-francesca,train
3,early-renaissance_pietro-perugino_christ-in-th...,renaissance,religious_painting,pietro-perugino,train
4,early-renaissance_antonello-da-messina_madonna...,renaissance,religious_painting,antonello-da-messina,test
...,...,...,...,...,...
41263,synthetic-cubism_pablo-picasso_student-with-pi...,cubism,sketch_and_study,pablo-picasso,train
41264,synthetic-cubism_georges-braque_mandolin-1914.jpg,cubism,still_life,georges-braque,train
41265,synthetic-cubism_juan-gris_harlequin-at-a-tabl...,cubism,portrait,juan-gris,train
41266,synthetic-cubism_juan-gris_strawberry-jam-1917...,cubism,still_life,juan-gris,train


# Data Visualization

In [None]:
col = ['cs_split_artist','cs_split_genre','cs_split_style','cs_path']

fig, ax = plt.subplots(2,1,figsize=(12,10))
sns.barplot(y=data[col].count().index, 
            x=data[col].count().values,
            order=data[col].count().sort_values(ascending=False).index,
            ax=ax[0]);

for i, v in enumerate(data[col].count().sort_values(ascending=False).values):
    ax[0].text(v, i , str(v), color='blue', fontweight='bold')
    
ax[0].set_yticklabels(['total','cs-movement (style)','cs-genre','cs-artist']);
ax[0].set_title("Number of images per target");
ax[0].set_xlabel("Count");
ax[0].set_ylabel("Target");

sns.barplot(y=data_full[col].count().index, 
            x=data_full[col].count().values,
            order=data[col].count().sort_values(ascending=False).index,
            ax=ax[1]);

for i, v in enumerate(data_full[col].count().sort_values(ascending=False).values):
    ax[1].text(v, i , str(v), color='blue', fontweight='bold')
    
ax[1].set_yticklabels(['total','cs-movement (style)','cs-genre','cs-artist']);
ax[1].set_title("Number of images per target (Full)");
ax[1].set_xlabel("Count");
ax[1].set_ylabel("Target");

In [None]:
col = ['artist','genre','movement']

fig, ax = plt.subplots(2,1,figsize=(15,10))
sns.barplot(y=data[col].nunique().index, 
            x=data[col].nunique().values,
            order=data[col].nunique().sort_values(ascending=False).index,
            ax=ax[0]);

for i, v in enumerate(data[col].nunique().sort_values(ascending=False).values):
    ax[0].text(v, i , str(v), color='blue', fontweight='bold')
    
ax[0].set_title("Number of classes per target");
ax[0].set_xlabel("Count");
ax[0].set_ylabel("Target");

sns.barplot(y=data_full[col].nunique().index, 
            x=data_full[col].nunique().values,
            order=data_full[col].nunique().sort_values(ascending=False).index,
            ax=ax[1]);

for i, v in enumerate(data_full[col].nunique().sort_values(ascending=False).values):
    ax[1].text(v, i , str(v), color='blue', fontweight='bold')
    
ax[1].set_title("Number of classes per target (Full)");
ax[1].set_xlabel("Count");
ax[1].set_ylabel("Target");

In [None]:
fig, ax = plt.subplots(2,1,figsize=(15,15));
sns.countplot(data=data, y="movement", 
              order = data['movement'].value_counts().index,
              ax=ax[0]);

for i, v in enumerate(data["movement"].value_counts().values):
    ax[0].text(v, i , str(v), color='blue', fontweight='bold')
    
ax[0].set_title("Number of images per movement");
ax[0].set_ylabel("Movement (style)");

sns.countplot(data=data_full, y="movement", 
              order = data_full['movement'].value_counts().index,
              ax=ax[1]);

for i, v in enumerate(data_full["movement"].value_counts().values):
    ax[1].text(v, i , str(v), color='blue', fontweight='bold')
    
ax[1].set_title("Number of images per movement (Full)");
ax[1].set_ylabel("Movement (style)");

In [None]:
fig, ax = plt.subplots(2,1,figsize=(15,10));
sns.countplot(data=data, y="genre", 
              order = data['genre'].value_counts().index,
              ax=ax[0]);

for i, v in enumerate(data["genre"].value_counts().values):
    ax[0].text(v, i , str(v), color='blue', fontweight='bold')
    
ax[0].set_title("Number of images per genre");

sns.countplot(data=data_full, y="genre", 
              order = data_full['genre'].value_counts().index,
              ax=ax[1]);

for i, v in enumerate(data_full["genre"].value_counts().values):
    ax[1].text(v, i , str(v), color='blue', fontweight='bold')
    
ax[1].set_title("Number of images per genre (Full)");

In [None]:
fig, ax = plt.subplots(1,1,figsize=(15,10));
sns.barplot(y=data.groupby("movement").artist.nunique().index, 
            x=data.groupby("movement").artist.nunique().values,
            order=data.groupby("movement").artist.nunique().sort_values(ascending=False).index,
            ax=ax);

for i, v in enumerate(data.groupby("movement").artist.nunique().sort_values(ascending=False).values):
    ax.text(v, i , str(v), color='blue', fontweight='bold')
    
ax.set_title("Number of artists per movement");
ax.set_xlabel("Count");

In [None]:
data2 = data[data["cs_split_genre"].notnull()]

fig, ax = plt.subplots(1,1,figsize=(15,10));
sns.countplot(data=data2, y="movement", 
              order = data2['movement'].value_counts().index,
              ax=ax);

for i, v in enumerate(data2["movement"].value_counts().values):
    ax.text(v, i , str(v), color='blue', fontweight='bold')
    
ax.set_title("Number of images with genre per movement (style)");
ax.set_ylabel("Movement (style)");