In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd

import create_test_dataset
import create_model
import create_datasets
import accuracy

Define hyperparameters for the model to be used for submission and the directory for the saved model. 

In [12]:
unit_sizes = [2048, 1024]
reg = 1e-15
batch_size = 128
base_model = 'densenet201'
regularization = tf.keras.regularizers.l2(l=reg)

model_dir = 'logs4/'+ base_model + '-batch-{}'.format(batch_size) + '-regularization-{}'.format(reg) + '-unitsize-2048-1024'

Create the model and load the trained weights. The model is trained by `hyperparameter_tuning.py`.

In [10]:
model = create_model.get_model(regularization, unit_sizes)
model.load_weights(model_dir + '/cp.ckpt')

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x144ccd9d0>

Create the `tf.data` datasets from the train/validation/test tfrecords. These records are created by `create_tfrecords.py` and `test_tfrecords.py`.

In [13]:
record_dir = 'data/tfrecords/test/'

test_dataset = create_test_dataset.create_dataset(record_dir, base_model)
train_dataset, valid_dataset = create_datasets.create_dataset('data/tfrecords/densenet201-valid-2000/', batch_size = 1) 

In [7]:
accuracy.test_accuracy(model, valid_dataset)


Test accuracy: 77.65%
[[250  54  15   7  24   1   2   4]
 [ 99 819  26   3  56   5   6   3]
 [  3   7 215  14  10   4   1   8]
 [  5   1   5  48   7   0   0   2]
 [ 11  12  12   4 162   1   1   4]
 [  1   2   1   0   1  14   0   0]
 [  0   1   1   0   0   1  17   0]
 [  1   1  13   3   3   1   0  28]]
              precision    recall  f1-score   support

           0       0.68      0.70      0.69       357
           1       0.91      0.81      0.86      1017
           2       0.75      0.82      0.78       262
           3       0.61      0.71      0.65        68
           4       0.62      0.78      0.69       207
           5       0.52      0.74      0.61        19
           6       0.63      0.85      0.72        20
           7       0.57      0.56      0.57        50

    accuracy                           0.78      2000
   macro avg       0.66      0.75      0.70      2000
weighted avg       0.79      0.78      0.78      2000


Balanced accuracy: 0.745


The next code cell creates a dataframe, `val_df`, containing the predictions of the model on the validation dataset. We also get the true labels in a list: `labels`.

In [21]:
columns = ['MEL', 'NV', 'BCC', 'AK', 'BKL', 'DF', 'VASC', 'SCC']
val_df = pd.DataFrame(columns = columns)

labels = []

for idx, (features, label) in enumerate(valid_dataset):
    pred = model.predict(tf.reshape(features, (1,-1)))
    labels = labels + [label.numpy()]
    val_df.loc[idx] = list(pred.reshape(-1))

Let's take a look at the first 5 predictions. The true labels for the first five images are given as `[1,1,1,4,0]`. The model predictions shows that the indices corresponding to the highest probability class are 1,1,1,4 and 0. So the model predicts the first 5 images correctly. 

In [25]:
labels[:5]

[1, 1, 1, 4, 0]

In [24]:
val_df.head()

Unnamed: 0,MEL,NV,BCC,AK,BKL,DF,VASC,SCC
0,0.078269,0.821563,0.0002064102,6.097851e-05,0.097452,3.523193e-05,4.467446e-06,0.002409718
1,0.000943,0.996884,1.799427e-05,4.749096e-08,0.002153,6.655838e-07,1.204819e-06,4.341561e-09
2,0.178645,0.712248,0.001553758,4.758224e-07,0.107552,1.209348e-07,4.924299e-09,1.021091e-06
3,0.370577,0.076942,3.073253e-10,1.906462e-10,0.552481,5.340592e-12,1.525878e-14,9.232036e-14
4,0.644929,0.032506,0.2660513,0.008504489,0.045172,4.757296e-05,8.666333e-06,0.002781038


The submissions required to contain the binary classification scores as floating point values in the interval `[0,1]` and any class with a value above the binary classification threshold 0.5 is predicted as positive. The classs corresponding to the greatest value of any row is the overall prediction of the model. The following function applies a sigmoid conversion to the softmax scores predicted by the model and returns the binary classification scores. 
The sigmoid conversion is defined by the following function
$$
\frac{1}{1+e^{-a(x-b)}}
$$
where a is the inverse measured standard deviation, and b is the binary threshold. 

In [51]:
np.std(val_df.values.reshape(-1))

0.2842671171013

In [52]:
def convert_to_binary(x):
    a = 1/0.28426711
    b = 0.15
    return 1 / (1 + np.exp(-(a*(x - b))))

Apply this function to each row of the `val_df` which holds the softmax scores and get the binary classification scores. 

In [46]:
predictions = val_df[columns].applymap(convert_to_binary).values

predictions[:5]

array([[0.43724837, 0.91391759, 0.37122932, 0.37110991, 0.45391726,
        0.37108878, 0.37106352, 0.37304031],
       [0.37183402, 0.95162484, 0.37107462, 0.37105989, 0.37282932,
        0.3710604 , 0.37106084, 0.37105986],
       [0.52517059, 0.87845566, 0.37233634, 0.37106024, 0.46273786,
        0.37105995, 0.37105986, 0.37106069],
       [0.68480616, 0.43610051, 0.37105985, 0.37105985, 0.80468781,
        0.37105985, 0.37105985, 0.37105985],
       [0.85082279, 0.39811576, 0.60066755, 0.37806828, 0.40883949,
        0.37109891, 0.37106697, 0.37334586]])

In [48]:
val_predictions = []
for idx, row in enumerate(predictions):
    #Check if the true label is classified as positive. If true, add the label to the validation predictions. 
    if row[labels[idx]] > 0.5:
        val_predictions = val_predictions + [labels[idx]]
    else:
        val_predictions = val_predictions + [np.argmax(row)]
val_predictions = np.array(val_predictions)

With these new predictions the (balanced) accuracy increases since we now allow the classes to be classified as positive even though it does not have the highest probability prediction. 

In [50]:
from sklearn.metrics import confusion_matrix, classification_report, balanced_accuracy_score

test_accuracy = 100*np.sum(val_predictions == labels)/len(val_predictions)
print('\nTest accuracy: %.2f%%' % test_accuracy)

print(confusion_matrix(labels, val_predictions))
print(classification_report(labels, val_predictions))
balanced_accuracy = balanced_accuracy_score(labels, val_predictions)
print('\nBalanced accuracy: {:.3f}'.format(balanced_accuracy) )


Test accuracy: 89.15%
[[303  24   8   5  13   1   1   2]
 [ 25 944  10   3  27   1   4   3]
 [  2   3 238   6   4   3   0   6]
 [  5   1   2  53   5   0   0   2]
 [  4   8   9   3 179   0   1   3]
 [  1   2   0   0   1  15   0   0]
 [  0   1   1   0   0   0  18   0]
 [  1   1   9   3   2   1   0  33]]
              precision    recall  f1-score   support

           0       0.89      0.85      0.87       357
           1       0.96      0.93      0.94      1017
           2       0.86      0.91      0.88       262
           3       0.73      0.78      0.75        68
           4       0.77      0.86      0.82       207
           5       0.71      0.79      0.75        19
           6       0.75      0.90      0.82        20
           7       0.67      0.66      0.67        50

    accuracy                           0.89      2000
   macro avg       0.79      0.83      0.81      2000
weighted avg       0.90      0.89      0.89      2000


Balanced accuracy: 0.835


Test dataset contains classes that are not included in the training set. Hence the model should be able to detect any unknown classes in addition to the classes that it trained on. The basic approach for this is to interpret the prediction of the model as `unknown` if the highest probability predicted by the model is less than a certain threshold. The function below takes a single prediction as returned by the model and applies the threshold.  

In [53]:
def is_unknown(pred, th):
    if pred.max() < th:
        #Unknown class
        return list(np.zeros(len(pred.reshape(-1)))) + [1.]
    else:
        return list(pred.reshape(-1)) + [0.]

The following code cell creates a dataframe consisting of the predictions on the test dataset. Each row contains the 'image id' and the model predictions for the image including the unknown class. 

In [61]:
df = pd.DataFrame(columns = ['image', 'MEL', 'NV', 'BCC', 'AK', 'BKL', 'DF', 'VASC', 'SCC', 'UNK'])

th = 0.35

for idx, (features, img_id) in enumerate(test_dataset):
    pred = model.predict(tf.reshape(features, (1,-1)))
    df.loc[idx] = [img_id.numpy().decode("utf-8")] + is_unknown(pred, th)

In [62]:
df.head()

Unnamed: 0,image,MEL,NV,BCC,AK,BKL,DF,VASC,SCC,UNK
0,ISIC_0058528,0.992823,0.006722,5.542355e-09,4.132913e-07,0.000455,9.449431e-09,1.192686e-11,5.469223e-13,0.0
1,ISIC_0056505,0.010768,0.00217,0.6630822,0.0031613,0.001397,0.004158059,6.404098e-06,0.3152575,0.0
2,ISIC_0067900,0.018964,0.012862,0.09632377,0.07227323,0.799259,3.073578e-05,1.165813e-05,0.0002760254,0.0
3,ISIC_0035920,0.228007,0.60149,6.234879e-05,3.092606e-07,0.17044,5.690288e-09,1.248311e-08,1.311737e-08,0.0
4,ISIC_0065871,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


In [55]:
#The number of predicted `unknown` classes in the test dataset.
df['UNK'].value_counts()

0.0    8116
1.0     122
Name: UNK, dtype: int64

As we did on the validation dataset, now convert the probability scores into binary classification scores. 

In [63]:
df[columns + ['UNK']] = df[columns + ['UNK']].applymap(convert_to_binary)

In [66]:
df.head()

Unnamed: 0,image,MEL,NV,BCC,AK,BKL,DF,VASC,SCC,UNK
0,ISIC_0058528,0.950963,0.376595,0.37106,0.37106,0.371434,0.37106,0.37106,0.37106,0.37106
1,ISIC_0056505,0.379942,0.372843,0.858748,0.373659,0.372208,0.37448,0.371065,0.641377,0.37106
2,ISIC_0067900,0.386758,0.381679,0.452934,0.432066,0.907541,0.371085,0.371069,0.371286,0.37106
3,ISIC_0035920,0.568176,0.830371,0.371111,0.37106,0.517968,0.37106,0.37106,0.37106,0.37106
4,ISIC_0065871,0.37106,0.37106,0.37106,0.37106,0.37106,0.37106,0.37106,0.37106,0.952127


In [65]:
df.to_csv('submission.csv', index=False)