In [1]:
import os
import PIL
from PIL import Image
from keras.applications.resnet50 import preprocess_input, ResNet50
from keras.models import load_model
import numpy as np
from keras import backend as K

Using TensorFlow backend.


In [2]:
import pandas as pd

TEST_DATA = '../data/test_data.h5.npy'
OUTPUT_FILE = '../kaggle/submission.csv'
METADATA = '../metadata.npy'

metadata = np.load(METADATA).item()

In [3]:
preds = np.load(TEST_DATA)
preds.shape

(794, 1, 1, 2048)

In [4]:
def recall(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    recall = true_positives / (possible_positives + K.epsilon())
    return recall

def precision(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    return precision

def f1(y_true, y_pred):
    
    def recall(y_true, y_pred):
        true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
        possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
        recall = true_positives / (possible_positives + K.epsilon())
        return recall
    
    def precision(y_true, y_pred):
        true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
        predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
        precision = true_positives / (predicted_positives + K.epsilon())
        return precision
    
    recall = recall(y_true, y_pred)
    precision = precision(y_true, y_pred)
    return 2*((precision*recall)/(precision+recall))

In [5]:
from sklearn.externals import joblib

models_filenames = ['KF_CNN_Model1', 'KF_CNN_Model2', 
                    'KF_CNN_Model3', 'KF_CNN_Model4',
                    'KF_CNN_Model5', 'KF_CNN_Model6',
                    'KF_CNN_Model7']

probabilities = []

for m_filename in models_filenames: 
    cnn_model = load_model('../models/Kaggle/CNN/' + m_filename + '.h5', custom_objects={'recall': recall, 'precision': precision, 'f1': f1})
    cnn_proba = cnn_model.predict_proba(preds)
    probabilities.append(cnn_proba)

weights = [1] * len(models_filenames)

proba = np.asarray(probabilities)
proba = np.average(proba, axis=0, weights=weights)
proba



array([[  1.99823732e-05,   9.45388256e-04,   6.99904473e-04, ...,
          9.90803506e-01,   2.63274661e-03,   5.55966635e-05],
       [  9.59144963e-03,   1.62272452e-06,   2.99387272e-06, ...,
          2.21521759e-06,   5.22874388e-06,   1.68314589e-06],
       [  1.97613882e-03,   1.31476138e-03,   5.10739475e-01, ...,
          4.35473892e-02,   1.13876805e-02,   2.92080109e-03],
       ..., 
       [  5.49004918e-06,   5.10091303e-06,   1.97210670e-05, ...,
          1.57501145e-04,   2.24631036e-05,   5.55363977e-05],
       [  3.61063954e-07,   2.71989386e-06,   9.99985371e-01, ...,
          1.84972159e-06,   2.14031309e-06,   8.58782736e-07],
       [  3.40651507e-04,   1.60331943e-03,   8.42123965e-04, ...,
          2.37457618e-04,   3.51861177e-05,   9.64895214e-01]])

In [6]:
preds = proba.argmax(1)
preds

array([ 9,  6,  2, 11,  1,  6,  7,  8,  3,  6,  7,  9,  2,  4,  6,  5, 11,
        6,  0,  8,  3,  8, 11,  8, 10,  6,  3,  7,  6,  2, 10,  0, 11,  6,
        6, 10,  4,  1,  3,  1,  4,  3,  5,  2,  3,  2, 10,  2,  7, 11,  8,
        8,  6,  8,  6,  6,  5, 11,  3,  6,  8,  0,  8,  1, 10,  8,  3,  6,
       10,  5,  1,  8,  1, 11,  8, 10,  3, 10,  6,  6,  6,  1,  2,  7, 11,
        1,  8,  8,  7, 10,  5,  2,  6,  6,  5, 10,  6,  6,  3,  4,  7,  6,
        8,  0,  2,  3,  3,  1,  3,  3,  3,  8, 11,  5,  8,  1,  6,  7,  6,
        6,  8,  3,  8,  8,  6,  1,  3,  1,  5,  9,  3,  0,  1,  6,  2, 11,
        0,  6,  6,  3,  3,  3,  3,  8,  9,  8,  6,  8, 10,  4,  5,  3,  5,
        1,  6,  5,  2,  6,  1, 10,  9,  2, 11, 11,  6,  0,  6,  8, 11,  5,
        1, 11,  8,  8,  7,  8,  9,  6,  3, 11,  6, 10,  9,  3,  6,  9, 10,
        8,  6, 11, 10,  5,  6,  3, 11, 11,  2,  6,  6,  8, 10,  9,  2, 10,
       10, 11,  2,  2,  9,  1,  8,  3,  6, 10,  3,  7, 11,  3,  3, 11,  6,
        6,  5,  3, 11, 11

In [7]:
DATA_FOLDER = '../data/test/'

files = os.listdir(DATA_FOLDER)

n_files = len(files)

cols = ['file','species']
df_stg1 = pd.DataFrame(columns=cols)

df_stg1['file'] = [f for f in files]
df_stg1.head()

Unnamed: 0,file,species
0,1b490196c.png,
1,85431c075.png,
2,506347cfe.png,
3,7f46a71db.png,
4,668c1007c.png,


In [8]:
classes = ['Black-grass', 'Charlock', 'Cleavers', 'Common Chickweed', 
           'Common wheat', 'Fat Hen', 'Loose Silky-bent', 'Maize', 
           'Scentless Mayweed', 'Shepherds Purse', 'Small-flowered Cranesbill', 'Sugar beet']
species = []
for pred in preds:
    species.append(classes[pred])
species

['Shepherds Purse',
 'Loose Silky-bent',
 'Cleavers',
 'Sugar beet',
 'Charlock',
 'Loose Silky-bent',
 'Maize',
 'Scentless Mayweed',
 'Common Chickweed',
 'Loose Silky-bent',
 'Maize',
 'Shepherds Purse',
 'Cleavers',
 'Common wheat',
 'Loose Silky-bent',
 'Fat Hen',
 'Sugar beet',
 'Loose Silky-bent',
 'Black-grass',
 'Scentless Mayweed',
 'Common Chickweed',
 'Scentless Mayweed',
 'Sugar beet',
 'Scentless Mayweed',
 'Small-flowered Cranesbill',
 'Loose Silky-bent',
 'Common Chickweed',
 'Maize',
 'Loose Silky-bent',
 'Cleavers',
 'Small-flowered Cranesbill',
 'Black-grass',
 'Sugar beet',
 'Loose Silky-bent',
 'Loose Silky-bent',
 'Small-flowered Cranesbill',
 'Common wheat',
 'Charlock',
 'Common Chickweed',
 'Charlock',
 'Common wheat',
 'Common Chickweed',
 'Fat Hen',
 'Cleavers',
 'Common Chickweed',
 'Cleavers',
 'Small-flowered Cranesbill',
 'Cleavers',
 'Maize',
 'Sugar beet',
 'Scentless Mayweed',
 'Scentless Mayweed',
 'Loose Silky-bent',
 'Scentless Mayweed',
 'Loose Sil

In [9]:
df_stg1['species'] = species
df_stg1.head()

Unnamed: 0,file,species
0,1b490196c.png,Shepherds Purse
1,85431c075.png,Loose Silky-bent
2,506347cfe.png,Cleavers
3,7f46a71db.png,Sugar beet
4,668c1007c.png,Charlock


In [10]:
df_stg1 = df_stg1.sort_values(by=['file'])
df_stg1.to_csv(OUTPUT_FILE, index=False)