In [1]:
import common
from keras import models
from keras import metrics
import functools
import numpy as np
from keras.preprocessing.image import ImageDataGenerator
from sklearn.metrics import confusion_matrix, classification_report

In [2]:
train_3 = common.load_train_dataset('dataset_mini/train.csv')

train_3.loc[~train_3["attribute_ids"].isin(["51", "13"]), "attribute_ids"] = "-1"

print('Number of train samples: ', train_3.shape[0])
display(train_3.head())

train_48 = common.load_train_dataset('dataset_mini/train.csv')

train_48 = train_48[~train_48["attribute_ids"].isin(["51", "13"])]

print('Number of train samples: ', train_48.shape[0])
display(train_48.head())

test = common.load_train_dataset('dataset_mini/test.csv')

print('Number of test samples: ', test.shape[0])
display(test.head())

Number of train samples:  19999


Unnamed: 0,id,attribute_ids
10708,8475e5a61489cea1.png,13
7775,6565ccb846235e4c.png,-1
5074,47331714e1338d0f.png,51
378,13c03d4c9e5bb97f.png,-1
18114,d601b1072f1e23fc.png,13


Number of train samples:  4991


Unnamed: 0,id,attribute_ids
16988,c92f0591a22ebc6e.png,70
7685,64792be688f3b29c.png,22
7193,5f28a836bda961d1.png,45
8241,6a149822754ea08b.png,25
9798,7ab7d5ebc3a5644b.png,70


Number of test samples:  2097


Unnamed: 0,id,attribute_ids
1565,fa70dcd047462197.png,45
1158,f6539f1f401926da.png,13
258,ecf6c6a9a1261c73.png,51
788,f2687dd500b13443.png,14
1128,f60639307774a54b.png,51


In [3]:
train_generator_3, _, test_generator_3 = common.create_image_generators("dataset_mini/data", "dataset_mini/data", train_3, test, 64, (224, 224), data_augmentation=False)

train_generator_48, _, _ = common.create_image_generators("dataset_mini/data", "dataset_mini/data", train_48, test, 64, (224, 224), data_augmentation=False)

Found 15984 validated image filenames belonging to 3 classes.
Found 3995 validated image filenames belonging to 3 classes.
Found 2097 validated image filenames belonging to 50 classes.
Found 3988 validated image filenames belonging to 48 classes.
Found 996 validated image filenames belonging to 48 classes.
Found 2097 validated image filenames belonging to 50 classes.


In [4]:
top3_acc = functools.partial(metrics.top_k_categorical_accuracy, k=3)
top3_acc.__name__ = 'top3_acc'

top5_acc = functools.partial(metrics.top_k_categorical_accuracy, k=5)
top5_acc.__name__ = 'top5_acc'

model3 = models.load_model('task1_cnn_3')
model48 = models.load_model('task1_cnn_48', custom_objects={'top3_acc': top3_acc, 'top5_acc': top5_acc})

# Initial 3-class prediction
model3_predictions = model3.predict(test_generator_3)
y_pred_model3 = np.argmax(model3_predictions, axis=1)

# Get the dataset without the predicted 13 and 51
usefulIndexes = []
for i in range(len(y_pred_model3)):
     if(y_pred_model3[i] == train_generator_3.class_indices['-1']):
         usefulIndexes.append(i)

newOthersTest = test.loc[usefulIndexes]
y_pred_others = []

if(len(usefulIndexes) > 0):
    #pass this dataset into the Others CNN
    imageGenerator = ImageDataGenerator(
        rescale=1./255,
    )
    test_generator_last = imageGenerator.flow_from_dataframe(  
        dataframe=newOthersTest,
        directory="dataset_mini/data",    
        x_col="id",
        y_col="attribute_ids",
        target_size=(224, 224),
        batch_size=1,
        shuffle=False,
        class_mode="categorical"
    )

    probabilitiesOthers = model48.predict(test_generator_last)
    y_pred_others = np.argmax(probabilitiesOthers, axis=-1)


# join the two prediction arrays
inv_indices_3 = {v: k for k, v in train_generator_3.class_indices.items()}
inv_indices_others = {v: k for k, v in train_generator_48.class_indices.items()}
j = 0
y_pred = []
for i in range(0, len(y_pred_model3)):
    if(y_pred_model3[i] == train_generator_3.class_indices['-1']):
        y_pred.append(inv_indices_others[y_pred_others[j]])
        j += 1
    else:
        y_pred.append(inv_indices_3[y_pred_model3[i]])

y_pred = np.array(y_pred)
y_pred = y_pred.astype(str)

# get results
y_true = np.array(test['attribute_ids'])

Found 228 validated image filenames belonging to 28 classes.


In [5]:
matrix = confusion_matrix(y_true, y_pred)
print(matrix)
labels = np.unique(test["attribute_ids"])
print(classification_report(y_true, y_pred, labels=labels))

[[0 0 0 ... 0 1 0]
 [0 0 0 ... 0 1 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 5 0]
 [0 0 0 ... 0 0 0]]
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         1
           1       0.00      0.00      0.00        11
          10       0.00      0.00      0.00         2
          12       0.00      0.00      0.00         1
          13       0.64      0.12      0.21       888
          14       0.06      0.03      0.04        39
          15       0.00      0.00      0.00         2
          16       0.00      0.00      0.00         1
          17       0.00      0.00      0.00         1
          18       0.00      0.00      0.00        10
           2       0.00      0.00      0.00         2
          22       0.00      0.00      0.00         4
          23       0.25      0.06      0.10        17
          24       0.00      0.00      0.00         5
          25       0.00      0.00      0.00        55
          26   