## Processing

In [1]:
import glob
import tensorflow as tf
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

img_height = 320
img_width = 320
batch_size = 8

train_data_dir = "./DHA/IMAGES"

data = []
labels = []

metadata_df = pd.read_csv('metadata.csv')
metadata_df['race'] = pd.Categorical(pd.factorize(metadata_df.race)[0])
metadata_df.head()

Unnamed: 0,filename,race,gender
0,5599.jpg,0,F
1,5004.jpg,0,F
2,5024.jpg,0,F
3,6114.jpg,0,F
4,6127.jpg,0,F


In [2]:
metadata_df['race'] = metadata_df['race'].replace(0, 'Asian')
metadata_df['race'] = metadata_df['race'].replace(1, 'Black')
metadata_df['race'] = metadata_df['race'].replace(2, 'Caucasian')
metadata_df['race'] = metadata_df['race'].replace(3, 'Hispanic')

In [3]:
metadata_df = metadata_df[(metadata_df['race'] == 'Black') |
                          ((metadata_df['race'] == 'Caucasian'))]

In [4]:
num_classes = len(metadata_df['race'].value_counts().index)
metadata_df['race'].value_counts()

Black        358
Caucasian    333
Name: race, dtype: int64

In [5]:
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
metadata_df['race'] = le.fit_transform(metadata_df['race'])
le.classes_

array(['Black', 'Caucasian'], dtype=object)

In [6]:
metadata_df['gender'].value_counts()

M    351
F    340
Name: gender, dtype: int64

In [7]:
for i in metadata_df['filename']:
    key  = i
    label = metadata_df.loc[metadata_df['filename'] == key].iloc[0]['race']
    labels.append(label)
    filepath = './DHA/IMAGES/'+i
    image=tf.keras.preprocessing.image.load_img(filepath, color_mode='rgb', target_size= (img_height,img_width))
    image=np.array(image)
    data.append(image)
    
X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.20, random_state=42) 
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.125, random_state=42)

In [8]:
import pickle

test_data = (X_test, y_test)
pickle.dump(test_data, open('test_data_dha_2_classes.pkl', 'wb'))

In [9]:
X_train = np.asarray(X_train)
X_test = np.asarray(X_test)
y_train = np.asarray(y_train)
y_test = np.asarray(y_test)
X_val = np.asarray(X_val)
y_val = np.asarray(y_val)

In [10]:
from matplotlib import pyplot as plt
plt.imshow(X_train[10], interpolation='nearest')
plt.show()
display(y_train[10])

<Figure size 640x480 with 1 Axes>

1

## Model creation

In [11]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPool2D
from tensorflow.keras.layers import Activation, Dropout, Flatten, Dense, concatenate, add, GlobalAveragePooling2D, BatchNormalization, Input
from tensorflow.keras import backend as K
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import layers
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import EarlyStopping, LearningRateScheduler, ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras import optimizers
from tensorflow.keras.applications.densenet import DenseNet121
from classification_models.tfkeras import Classifiers
from tensorflow.keras.models import load_model

resnet34, preprocess_input = Classifiers.get('resnet50')

if K.image_data_format() == 'channels_first':
    input_shape = (3, img_width, img_height)
else:
    input_shape = (img_width, img_height, 3)
    
input_a = Input(shape=(img_height, img_width, 3))
base_model = resnet34(input_tensor=input_a, include_top=False, input_shape=(img_height, img_width,3), weights='imagenet')

x = GlobalAveragePooling2D()(base_model.output)
x = layers.Dense(num_classes, name='dense_logits')(x)
output = layers.Activation('softmax', dtype='float32', name='predictions')(x)
model = Model(inputs=[input_a], outputs=[output])

adam_opt = Adam(lr=0.000001)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', mode='min', factor=0.1, patience=2, min_lr=1e-6, verbose=1)
model.compile(optimizer=adam_opt, loss=tf.losses.CategoricalCrossentropy(),
                    metrics=[
                        tf.keras.metrics.AUC(curve='ROC', name='ROC-AUC'),
                        tf.keras.metrics.AUC(curve='PR', name='PR-AUC')
                    ],
    )

In [12]:
from keras.utils import to_categorical
y_train_cat = to_categorical(y_train)
y_test_cat = to_categorical(y_test)
y_val_cat = to_categorical(y_val)

Using TensorFlow backend.


## Model Training

In [13]:
model_path = './classification_model_2_classes'
history = model.fit(X_train,y_train_cat, validation_data=(X_val, y_val_cat), epochs=100, callbacks=[reduce_lr])
tf.keras.models.save_model(model = model, filepath = model_path)

Train on 483 samples, validate on 69 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Ep

In [14]:
X_test.shape

(139, 320, 320, 3)

In [15]:
from sklearn.metrics import classification_report,confusion_matrix, roc_auc_score

model = tf.keras.models.load_model(filepath = model_path)
predictions = model.predict(X_test)
predictions_rounded = np.argmax(predictions, axis=1)

In [16]:
list(set(predictions_rounded))

[0, 1]

In [17]:
from sklearn.metrics import roc_curve, auc

for p in list(set(predictions_rounded)):
    fpr, tpr, thresholds = roc_curve(y_test, model.predict(X_test)[:,p], pos_label = p)
    auroc = round(auc(fpr, tpr), 2)
    print ('{} - {}'.format(p, auroc))

0 - 0.87
1 - 0.87


In [19]:
confusion_matrix(y_test, predictions_rounded)

array([[74,  8],
       [20, 37]])

In [20]:
print (classification_report(y_test, predictions_rounded))

              precision    recall  f1-score   support

           0       0.79      0.90      0.84        82
           1       0.82      0.65      0.73        57

    accuracy                           0.80       139
   macro avg       0.80      0.78      0.78       139
weighted avg       0.80      0.80      0.79       139

