# Analysis of model results and embedding experiment

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
from sklearn.metrics import accuracy_score
from tensorflow.keras.preprocessing import image_dataset_from_directory
from tensorflow.keras.preprocessing.image import load_img
from tensorflow.keras import layers, models, Sequential, Input, Model
from tensorflow.keras.applications.vgg16 import preprocess_input
from tensorflow.data.experimental import cardinality
from tensorflow.data.experimental import AUTOTUNE
from tensorflow.keras.models import load_model

## Load model and results analysis

In [3]:
model = load_model('../models/20201210_170338_VGG16_v2_0/')
model.summary()

Model: "functional_9"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_12 (InputLayer)        [(None, 224, 224, 3)]     0         
_________________________________________________________________
tf_op_layer_strided_slice_4  (None, 224, 224, 3)       0         
_________________________________________________________________
tf_op_layer_BiasAdd_4 (Tenso (None, 224, 224, 3)       0         
_________________________________________________________________
sequential_11 (Sequential)   (None, 100)               17223588  
_________________________________________________________________
dense_12 (Dense)             (None, 12)                1212      
Total params: 17,224,800
Trainable params: 17,224,800
Non-trainable params: 0
_________________________________________________________________


In [4]:
path0 = r'C:\Users\pitip\OneDrive\Bureau\raw_data\Clean_Data\Test_small'

In [79]:
test_dir = path0

BATCH_SIZE = 32
IMG_SIZE = (224, 224)

test_dataset = image_dataset_from_directory(test_dir, shuffle=False, batch_size=BATCH_SIZE, image_size=IMG_SIZE, label_mode='categorical')

class_names = test_dataset.class_names
n_artist = len(class_names)
test_dataset = test_dataset.prefetch(buffer_size=AUTOTUNE)
print('Number of test batches: %d' % cardinality(test_dataset))

Found 58 files belonging to 12 classes.
Number of test batches: 2


In [80]:
loss, accuracy = model.evaluate(test_dataset)
print('Test accuracy :', accuracy)

Test accuracy : 0.8275862336158752


In [81]:
predictions = model.predict(test_dataset)

In [82]:
predictions.shape

(58, 12)

In [83]:
np.argmax(predictions, axis =1)

array([10,  0,  0, 11,  0,  0,  1,  1,  5, 11,  2,  2,  2,  2,  2,  3,  3,
        3,  3,  3,  4,  0,  4,  4,  8,  5, 11,  5,  5,  5,  6,  6, 11,  6,
        6,  7,  7,  7,  7,  7,  8,  8,  8,  8,  8,  9,  9,  9,  9,  9, 10,
       10, 10, 10,  3, 11, 11, 11], dtype=int64)

In [128]:
label = np.concatenate([label for im, label in test_dataset])
label

array([[1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0.,

In [89]:
np.array(label).shape

(58, 12)

In [90]:
np.argmax(label, axis=1)

array([ 0,  0,  0,  0,  0,  1,  1,  1,  1,  1,  2,  2,  2,  2,  2,  3,  3,
        3,  3,  3,  4,  4,  4,  4,  4,  5,  5,  5,  5,  5,  6,  6,  6,  6,
        6,  7,  7,  7,  7,  7,  8,  8,  8,  8,  8,  9,  9,  9,  9,  9, 10,
       10, 10, 10, 11, 11, 11, 11], dtype=int64)

In [94]:
np.sum(np.argmax(predictions, axis =1) == np.argmax(label, axis=1))/len(np.argmax(label, axis=1))

0.8275862068965517

In [86]:
predictions[0]

array([1.2306109e-01, 5.9489469e-04, 2.3959687e-03, 4.3867840e-03,
       1.5665267e-03, 3.5306279e-04, 4.2976048e-02, 7.6306229e-03,
       8.4147359e-05, 5.3108239e-04, 7.9866463e-01, 1.7755089e-02],
      dtype=float32)

OK : able to retrieve prediction and artist, using image_dataset_from_directory

## Test 1 image by 1

In [115]:
n_art = 9
n_im = 3

In [116]:
folder = os.path.join(path0, f"_{n_art}")
file_list = os.listdir(folder)
image_to_predict = os.path.join(path0, f"_{n_art}", file_list[n_im])
print(image_to_predict)
im_224 = load_img(image_to_predict, grayscale=False, color_mode='rgb', target_size=(224, 224), interpolation='bilinear')
im_224 = np.array(im_224.getdata()).reshape(im_224.size[0], im_224.size[1], 3)
im_224 = np.expand_dims(im_224, axis = 0)

C:\Users\pitip\OneDrive\Bureau\raw_data\Clean_Data\Test_small\_9\521.jpg


In [117]:
pred = model.predict(im_224)
proba = pred[0]
print(proba)
artiste_index = np.argmax(pred[0])
print(class_names[artiste_index])


[4.3762571e-01 3.9337771e-03 1.0041299e-02 3.7460264e-03 9.0952860e-03
 4.8218057e-03 1.8679758e-03 6.6875771e-04 1.1162045e-04 6.9850724e-04
 6.1482888e-02 4.6590632e-01]
_9


OK : able to correctly identify artist, most of the time

## Clean implementation

In [123]:
test_dir = path0

BATCH_SIZE = 32
IMG_SIZE = (224, 224)

test_dataset = image_dataset_from_directory(test_dir, shuffle=False, batch_size=BATCH_SIZE, image_size=IMG_SIZE, label_mode='categorical')

class_names = test_dataset.class_names
n_artist = len(class_names)
test_dataset = test_dataset.prefetch(buffer_size=AUTOTUNE)
print('Number of test batches: %d' % cardinality(test_dataset))

label = np.concatenate([lab for im, lab in test_dataset])
artist_id = [class_names[i] for i in np.argmax(label, axis=1)]
artist_id[::10]

Found 58 files belonging to 12 classes.
Number of test batches: 2


['_1', '_11', '_2', '_4', '_6', '_8']

In [124]:
predictions = model.predict(test_dataset)
predict_artist_id = [class_names[i] for i in np.argmax(predictions, axis=1)]
predict_artist_id[::10]

['_8', '_11', '_2', '_4', '_6', '_8']

In [127]:
results_comp_df = pd.DataFrame({'artist_class':artist_id, 'predicted_artist_class':predict_artist_id})
results_comp_df

Unnamed: 0,artist_class,predicted_artist_class
0,_1,_8
1,_1,_1
2,_1,_1
3,_1,_9
4,_1,_1
5,_10,_1
6,_10,_10
7,_10,_10
8,_10,_3
9,_10,_9
