In [32]:
import requests, zipfile, io
import numpy as np
from PIL import Image
from keras.preprocessing import image 
from tqdm import tqdm

In [40]:
def load_dataset(file):
    base_url = 'https://s3-us-west-1.amazonaws.com/udacity-dlnfd/datasets/skin-cancer/'
    r = requests.get(base_url + file, stream=True)
    archive = zipfile.ZipFile(io.BytesIO(r.content))
    
    tensors = None
    targets = []
    for name in tqdm(archive.namelist()):
        if name.endswith('.jpg'):
            img = Image.open(io.BytesIO(archive.read(name))).resize((224,224))
            x = image.img_to_array(img)
            x = np.expand_dims(x, axis=0)
            if tensors is None:
                tensors = x
            else:
                tensors = np.vstack((tensors, x))
            targets.append(name.split('/')[1])
            
    return tensors, targets

In [41]:
train_tensors, train_targets = load_dataset('train.zip')
valid_tensors, valid_targets = load_dataset('valid.zip')
test_tensors, test_targets = load_dataset('test.zip')

100%|██████████████████████████████████████████████████████████████████████████████| 2004/2004 [17:43<00:00,  1.88it/s]
100%|████████████████████████████████████████████████████████████████████████████████| 154/154 [00:51<00:00,  2.99it/s]
100%|████████████████████████████████████████████████████████████████████████████████| 604/604 [06:56<00:00,  1.45it/s]


In [42]:
train_tensors.shape, valid_tensors.shape, test_tensors.shape

((2000, 224, 224, 3), (150, 224, 224, 3), (600, 224, 224, 3))

In [43]:
import smtplib
server = smtplib.SMTP( "smtp.gmail.com", 587 )
server.starttls()
server.login( 'nicholas.d.vasko@gmail.com', 'Fibonacci2' )
server.sendmail( '', '7247141468@vtext.com', '%s\n%s\n%s' % (train_tensors.shape, valid_tensors.shape, test_tensors.shape) )

{}

In [49]:
from extract_bottleneck_features import *

def extract_bottleneck(tensors, extract_fn):
    temp = None
    for tensor in tqdm(tensors):
        tensor = np.expand_dims(tensor, axis=0)
        bottleneck_features = extract_fn(tensor)
        if temp is None:
            temp = bottleneck_features
        else:
            temp = np.vstack((temp, bottleneck_features))
    return temp

In [58]:
train_tensors = train_tensors[:2]
valid_tensors = valid_tensors[:2]
test_tensors = test_tensors[:2]

In [66]:
print('VGG19 Bottleneck')
#train_VGG19 = extract_bottleneck(train_tensors, extract_VGG19)
#valid_VGG19 = extract_bottleneck(valid_tensors, extract_VGG19)
#test_VGG19 = extract_bottleneck(test_tensors, extract_VGG19)

print('ResNet50 Bottleneck')
#train_Resnet50 = extract_bottleneck(train_tensors, extract_Resnet50)
#valid_Resnet50 = extract_bottleneck(valid_tensors, extract_Resnet50)
#test_Resnet50 = extract_bottleneck(test_tensors, extract_Resnet50)

print('InceptionV3 Bottleneck')
train_Inception = extract_bottleneck(train_tensors, extract_InceptionV3)
valid_Inception = extract_bottleneck(valid_tensors, extract_InceptionV3)
test_Inception = extract_bottleneck(test_tensors, extract_InceptionV3)

print('Xception Bottleneck')
train_Xception = extract_bottleneck(train_tensors, extract_Xception)
valid_Xception = extract_bottleneck(valid_tensors, extract_Xception)
test_Xception = extract_bottleneck(test_tensors, extract_Xception)

VGG19 Bottleneck
ResNet50 Bottleneck
Inception Bottleneck


  0%|                                                                                            | 0/2 [00:00<?, ?it/s]

Downloading data from https://github.com/fchollet/deep-learning-models/releases/download/v0.5/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5


100%|███████████████████████████████████████████████████████████████████████████████████| 2/2 [04:08<00:00, 124.36s/it]
100%|███████████████████████████████████████████████████████████████████████████████████| 2/2 [04:28<00:00, 134.50s/it]
100%|███████████████████████████████████████████████████████████████████████████████████| 2/2 [05:22<00:00, 161.19s/it]


Xception Bottleneck


100%|███████████████████████████████████████████████████████████████████████████████████| 2/2 [04:39<00:00, 139.77s/it]
100%|███████████████████████████████████████████████████████████████████████████████████| 2/2 [04:47<00:00, 143.85s/it]
100%|███████████████████████████████████████████████████████████████████████████████████| 2/2 [05:04<00:00, 152.18s/it]


In [67]:
train_targets_MM = [1 if x == 'malanoma' else 0 for x in train_targets]
valid_targets_MM = [1 if x == 'malanoma' else 0 for x in valid_targets]
test_targets_MM = [1 if x == 'malanoma' else 0 for x in test_targets]

train_targets_SK = [0 if x == 'malanoma' else 1 for x in train_targets]
valid_targets_SK = [0 if x == 'malanoma' else 1 for x in valid_targets]
test_targets_SK = [0 if x == 'malanoma' else 1 for x in test_targets]

In [None]:
from keras.layers import GlobalAveragePooling2D
from keras.layers import Dense
from keras.models import Sequential

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [69]:
VGG19_model_MM = Sequential()
VGG19_model_MM.add(GlobalAveragePooling2D(input_shape=train_VGG19.shape[1:]))
VGG19_model_MM.add(Dense(1, activation='sigmoid'))

Resnet50_model_MM = Sequential()
Resnet50_model_MM.add(GlobalAveragePooling2D(input_shape=train_Resnet50.shape[1:]))
Resnet50_model_MM.add(Dense(1, activation='sigmoid'))

Inception_model_MM = Sequential()
Inception_model_MM.add(GlobalAveragePooling2D(input_shape=train_Inception.shape[1:]))
Inception_model_MM.add(Dense(1, activation='sigmoid'))

Xception_model_MM = Sequential()
Xception_model_MM.add(GlobalAveragePooling2D(input_shape=train_Xception.shape[1:]))
Xception_model_MM.add(Dense(1, activation='sigmoid'))

In [70]:
VGG19_model_SK = Sequential()
VGG19_model_SK.add(GlobalAveragePooling2D(input_shape=train_VGG19.shape[1:]))
VGG19_model_SK.add(Dense(1, activation='sigmoid'))

Resnet50_model_SK = Sequential()
Resnet50_model_SK.add(GlobalAveragePooling2D(input_shape=train_Resnet50.shape[1:]))
Resnet50_model_SK.add(Dense(1, activation='sigmoid'))

Inception_model_SK = Sequential()
Inception_model_SK.add(GlobalAveragePooling2D(input_shape=train_Inception.shape[1:]))
Inception_model_SK.add(Dense(1, activation='sigmoid'))

Xception_model_SK = Sequential()
Xception_model_SK.add(GlobalAveragePooling2D(input_shape=train_Xception.shape[1:]))
Xception_model_SK.add(Dense(1, activation='sigmoid'))

In [76]:
from keras.callbacks import ModelCheckpoint  
import tensorflow as tf

def as_keras_metric(method):
    import functools
    from keras import backend as K
    import tensorflow as tf
    @functools.wraps(method)
    def wrapper(self, args, **kwargs):
        """ Wrapper for turning tensorflow metrics into keras metrics """
        value, update_op = method(self, args, **kwargs)
        K.get_session().run(tf.local_variables_initializer())
        with tf.control_dependencies([update_op]):
            value = tf.identity(value)
        return value
    return wrapper

auc_roc = as_keras_metric(tf.metrics.auc)

VGG19_model_MM.compile(loss='mean_squared_error', optimizer='adam', metrics=[auc_roc])
#Resnet50_model_MM.compile(loss='mean_squared_error', optimizer='adam', metrics=[auc_roc])
#Inception_model_MM.compile(loss='mean_squared_error', optimizer='adam', metrics=[auc_roc])
#Xception_model_MM.compile(loss='mean_squared_error', optimizer='adam', metrics=[auc_roc])

#VGG19_model_SK.compile(loss='mean_squared_error', optimizer='adam', metrics=[auc_roc])
#Resnet50_model_SK.compile(loss='mean_squared_error', optimizer='adam', metrics=[auc_roc])
#Inception_model_SK.compile(loss='mean_squared_error', optimizer='adam', metrics=[auc_roc])
#Xception_model_SK.compile(loss='mean_squared_error', optimizer='adam', metrics=[auc_roc])

In [77]:
from keras.callbacks import ModelCheckpoint  

epochs = 20
batch_size = 20

In [79]:
checkpointer = ModelCheckpoint(filepath='saved_models/weights.best.VGG19_MM.hdf5', 
                               verbose=1, save_best_only=True)

VGG19_model_MM.fit(train_VGG19, train_targets_MM[:2], 
          validation_data=(valid_VGG19, valid_targets_MM[:2]),
          epochs=epochs, batch_size=batch_size, callbacks=[checkpointer], verbose=1)

Train on 2 samples, validate on 2 samples
Epoch 1/20

Epoch 00001: val_loss improved from inf to 0.00181, saving model to saved_models/weights.best.VGG19_MM.hdf5
Epoch 2/20

Epoch 00002: val_loss improved from 0.00181 to 0.00181, saving model to saved_models/weights.best.VGG19_MM.hdf5
Epoch 3/20

Epoch 00003: val_loss improved from 0.00181 to 0.00181, saving model to saved_models/weights.best.VGG19_MM.hdf5
Epoch 4/20

Epoch 00004: val_loss improved from 0.00181 to 0.00181, saving model to saved_models/weights.best.VGG19_MM.hdf5
Epoch 5/20

Epoch 00005: val_loss improved from 0.00181 to 0.00181, saving model to saved_models/weights.best.VGG19_MM.hdf5
Epoch 6/20

Epoch 00006: val_loss improved from 0.00181 to 0.00181, saving model to saved_models/weights.best.VGG19_MM.hdf5
Epoch 7/20

Epoch 00007: val_loss improved from 0.00181 to 0.00181, saving model to saved_models/weights.best.VGG19_MM.hdf5
Epoch 8/20

Epoch 00008: val_loss improved from 0.00181 to 0.00181, saving model to saved_mode

<keras.callbacks.History at 0x1c2fad891d0>

In [None]:
checkpointer = ModelCheckpoint(filepath='saved_models/weights.best.Resnet50_MM.hdf5', 
                               verbose=1, save_best_only=True)

Resnet50_model_MM.fit(train_Resnet50, train_targets_MM, 
          validation_data=(valid_Resnet50, valid_targets_MM),
          epochs=epochs, batch_size=batch_size, callbacks=[checkpointer], verbose=1)

In [None]:
checkpointer = ModelCheckpoint(filepath='saved_models/weights.best.Inception_MM.hdf5', 
                               verbose=1, save_best_only=True)

Inception_model_MM.fit(train_Inception, train_targets_MM, 
          validation_data=(valid_Inception, valid_targets_MM),
          epochs=epochs, batch_size=batch_size, callbacks=[checkpointer], verbose=1)

In [None]:
checkpointer = ModelCheckpoint(filepath='saved_models/weights.best.Xception_MM.hdf5', 
                               verbose=1, save_best_only=True)

Xception_model_MM.fit(train_Xception, train_targets_MM, 
          validation_data=(valid_Xception, valid_targets_MM),
          epochs=epochs, batch_size=batch_size, callbacks=[checkpointer], verbose=1)

In [None]:
checkpointer = ModelCheckpoint(filepath='saved_models/weights.best.VGG19_SK.hdf5', 
                               verbose=1, save_best_only=True)

VGG19_model_SK.fit(train_VGG19, train_targets_SK, 
          validation_data=(valid_VGG19, valid_targets_SK),
          epochs=epochs, batch_size=batch_size, callbacks=[checkpointer], verbose=1)

In [None]:
checkpointer = ModelCheckpoint(filepath='saved_models/weights.best.Resnet50_SK.hdf5', 
                               verbose=1, save_best_only=True)

Resnet50_model_SK.fit(train_Resnet50, train_targets_SK, 
          validation_data=(valid_Resnet50, valid_targets_SK),
          epochs=epochs, batch_size=batch_size, callbacks=[checkpointer], verbose=1)

In [None]:
checkpointer = ModelCheckpoint(filepath='saved_models/weights.best.Inception_SK.hdf5', 
                               verbose=1, save_best_only=True)

Inception_model_SK.fit(train_Inception, train_targets_SK, 
          validation_data=(valid_Inception, valid_targets_SK),
          epochs=epochs, batch_size=batch_size, callbacks=[checkpointer], verbose=1)

In [None]:
checkpointer = ModelCheckpoint(filepath='saved_models/weights.best.Xception_SK.hdf5', 
                               verbose=1, save_best_only=True)

Xception_model_SK.fit(train_Xception, train_targets_SK, 
          validation_data=(valid_Xception, valid_targets_SK),
          epochs=epochs, batch_size=batch_size, callbacks=[checkpointer], verbose=1)

In [1]:
VGG19_model_MM.load_weights('saved_models/weights.best.VGG19_MM.hdf5')
#Resnet50_model_MM.load_weights('saved_models/weights.best.Resnet50_MM.hdf5')
#Inception_model_MM.load_weights('saved_models/weights.best.Inception_MM.hdf5')
#Xception_model_MM.load_weights('saved_models/weights.best.Xception_MM.hdf5')

#VGG19_model_SK.load_weights('saved_models/weights.best.VGG19_SK.hdf5')
#Resnet50_model_SK.load_weights('saved_models/weights.best.Resnet50_SK.hdf5')
#Inception_model_SK.load_weights('saved_models/weights.best.Inception_SK.hdf5')
#Xception_model_SK.load_weights('saved_models/weights.best.Xception_SK.hdf5')

NameError: name 'VGG19_model_MM' is not defined

In [None]:
# get index of prediction for each image in test set
VGG19_predictions_MM = [np.argmax(VGG19_model_MM.predict(np.expand_dims(feature, axis=0))) for feature in test_VGG19]
Resnet50_predictions_MM = [np.argmax(Resnet50_model_MM.predict(np.expand_dims(feature, axis=0))) for feature in test_Resnet50]
Inception_predictions_MM = [np.argmax(Inception_model_MM.predict(np.expand_dims(feature, axis=0))) for feature in test_Inception]
Xception_predictions_MM = [np.argmax(Xception_model_MM.predict(np.expand_dims(feature, axis=0))) for feature in test_Xception]

# report test accuracy
VGG19_test_accuracy_MM = 100*np.sum(np.array(VGG19_predictions_MM)==np.argmax(test_targets_MM, axis=1))/len(VGG19_predictions_MM)
Resnet50_test_accuracy_MM = 100*np.sum(np.array(Resnet50_predictions_MM)==np.argmax(test_targets_MM, axis=1))/len(Resnet50_predictions_MM)
Inception_test_accuracy_MM = 100*np.sum(np.array(Inception_predictions_MM)==np.argmax(test_targets_MM, axis=1))/len(Inception_predictions_MM)
Xception_test_accuracy_MM = 100*np.sum(np.array(Xception_predictions_MM)==np.argmax(test_targets_MM, axis=1))/len(Xception_predictions_MM)

# get index of prediction for each image in test set
VGG19_predictions_SK = [np.argmax(VGG19_model_SK.predict(np.expand_dims(feature, axis=0))) for feature in test_VGG19]
Resnet50_predictions_SK = [np.argmax(Resnet50_model_SK.predict(np.expand_dims(feature, axis=0))) for feature in test_Resnet50]
Inception_predictions_SK = [np.argmax(Inception_model_SK.predict(np.expand_dims(feature, axis=0))) for feature in test_Inception]
Xception_predictions_SK = [np.argmax(Xception_model_SK.predict(np.expand_dims(feature, axis=0))) for feature in test_Xception]

# report test accuracy
VGG19_test_accuracy_SK = 100*np.sum(np.array(VGG19_predictions_SK)==np.argmax(test_targets_SK, axis=1))/len(VGG19_predictions_SK)
Resnet50_test_accuracy_SK = 100*np.sum(np.array(Resnet50_predictions_SK)==np.argmax(test_targets_SK, axis=1))/len(Resnet50_predictions_SK)
Inception_test_accuracy_SK = 100*np.sum(np.array(Inception_predictions_SK)==np.argmax(test_targets_SK, axis=1))/len(Inception_predictions_SK)
Xception_test_accuracy_SK = 100*np.sum(np.array(Xception_predictions_SK)==np.argmax(test_targets_SK, axis=1))/len(Xception_predictions_SK)

print('Melonoma detection')
print('-----------------------------')
print('VGG19 test accuracy: %.4f%%' % VGG19_test_accuracy_MM)
print('Resnet50 test accuracy: %.4f%%' % Resnet50_test_accuracy_MM)
print('Inception test accuracy: %.4f%%' % Inception_test_accuracy_MM)
print('Xception test accuracy: %.4f%%' % Xception_test_accuracy_MM)

print('Melanocytic detection')
print('-----------------------------')
print('VGG19 test accuracy: %.4f%%' % VGG19_test_accuracy_SK)
print('Resnet50 test accuracy: %.4f%%' % Resnet50_test_accuracy_SK)
print('Inception test accuracy: %.4f%%' % Inception_test_accuracy_SK)
print('Xception test accuracy: %.4f%%' % Xception_test_accuracy_SK)

In [None]:
best_predictions_MM = 
best_predictions_SK = 

In [None]:
path = 'data/test'
filenames =  glob(path + '/melanoma/*')
filenames.extend(glob(path + '/nevus/*'))
filenames.extend(glob(path + '/seborrheic_keratosis/*'))

output = pd.DataFrame()
output['Id'] = filenames
output['task_1'] = best_predictions_MM
output['task_2'] = best_predictions_SK

output.to_csv('submission.csv')