In [13]:
import pandas as pd
import numpy as np
import keras
from keras.layers import Dense,GlobalAveragePooling2D
from keras.applications import MobileNet
from keras.applications.mobilenet import preprocess_input
from keras.models import Model
from PIL import Image
import os

In [14]:
cwd = os.getcwd()

# loading training data
x_train = []
for i in range(1500):
    im = Image.open(cwd + '/training/{}.tif'.format(i))
    x_train.append(np.asarray(im))
x_train = np.array(x_train)

y_train = np.genfromtxt(cwd + '/labels_training.csv', delimiter=',', skip_header = 1)
y_train = y_train.T[1]

# loading testing data
x_test = []
for i in range(1500,2058):
    im = Image.open(cwd + '/testing/{}.tif'.format(i))
    x_test.append(np.asarray(im))
x_test = np.array(x_test)

In [15]:
# Download model weights
base_model=MobileNet(weights='imagenet',include_top=False) #imports the mobilenet model and discards the last 1000 neuron layer.

# Re-structure model
x=base_model.output
x=GlobalAveragePooling2D()(x)
x=Dense(1024, activation='relu')(x) #add dense layers so that the model can learn more complex functions and classify for better results.
x=Dense(1024, activation='relu')(x) #dense layer 2
x=Dense(512, activation='relu')(x) #dense layer 3
preds=Dense(1, activation='sigmoid')(x) #final layer with sigmoid activation
model=Model(inputs=base_model.input,outputs=preds)



In [16]:
'''
for i,layer in enumerate(model.layers):
    print(i,layer.name)

from keras import backend as K

def sensitivity(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    return true_positives / (possible_positives + K.epsilon())

def specificity(y_true, y_pred):
    true_negatives = K.sum(K.round(K.clip((1-y_true) * (1-y_pred), 0, 1)))
    possible_negatives = K.sum(K.round(K.clip(1-y_true, 0, 1)))
    return true_negatives / (possible_negatives + K.epsilon())
'''

'\nfor i,layer in enumerate(model.layers):\n    print(i,layer.name)\n\nfrom keras import backend as K\n\ndef sensitivity(y_true, y_pred):\n    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))\n    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))\n    return true_positives / (possible_positives + K.epsilon())\n\ndef specificity(y_true, y_pred):\n    true_negatives = K.sum(K.round(K.clip((1-y_true) * (1-y_pred), 0, 1)))\n    possible_negatives = K.sum(K.round(K.clip(1-y_true, 0, 1)))\n    return true_negatives / (possible_negatives + K.epsilon())\n'

In [17]:
# training parameters
batch_size = 50
epochs = 30

# defining class weights by different number of classes
class_weight={1: (len(y_train) - np.sum(y_train)) / np.sum(y_train),
              0: 1}

model.compile(loss='binary_crossentropy',
              optimizer=keras.optimizers.Adadelta(),
              metrics=['accuracy'])

model.fit(x_train, y_train,
          epochs=epochs,
          batch_size=batch_size,
          class_weight=class_weight,
          verbose=1,
          validation_split = 0.2)

# The fitting process will take about 20~40 min

Train on 1200 samples, validate on 300 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0xb4d1c9cf8>

In [23]:
# Predict and writting CSVs
y = model.predict(x_test)
df = pd.DataFrame({'id':np.arange(1500, 2058, 1),
                  'score':y.T[0]})
df.to_csv('submission.csv', index=False)