In [1]:
import os
import PIL
from PIL import Image
from keras.applications.resnet50 import preprocess_input, ResNet50
from keras.models import load_model
import numpy as np
from keras import backend as K
from sklearn.externals import joblib

Using TensorFlow backend.


In [2]:
import pandas as pd

TEST_DATA = '../data/test_data.h5.npy'
OUTPUT_FILE = '../kaggle/submission.csv'
METADATA = '../metadata.npy'

metadata = np.load(METADATA).item()

In [3]:
pre_trained_preds = np.load(TEST_DATA)
pre_trained_preds.shape

(794, 1, 1, 2048)

In [4]:
def recall(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    recall = true_positives / (possible_positives + K.epsilon())
    return recall

def precision(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    return precision

def f1(y_true, y_pred):
    
    def recall(y_true, y_pred):
        true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
        possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
        recall = true_positives / (possible_positives + K.epsilon())
        return recall
    
    def precision(y_true, y_pred):
        true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
        predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
        precision = true_positives / (predicted_positives + K.epsilon())
        return precision
    
    recall = recall(y_true, y_pred)
    precision = precision(y_true, y_pred)
    return 2*((precision*recall)/(precision+recall))

In [6]:
def get_cnn_predictions(pre_trained_preds):
    models_filenames = ['KF_CNN_Model1', 'KF_CNN_Model2', 
                    'KF_CNN_Model3', 'KF_CNN_Model4',
                    'KF_CNN_Model5', 'KF_CNN_Model6',
                    'KF_CNN_Model7']

    probabilities = []

    for m_filename in models_filenames: 
        cnn_model = load_model('../models/Kaggle/CNN/' + m_filename + '.h5', custom_objects={'recall': recall, 'precision': precision, 'f1': f1})
        cnn_proba = cnn_model.predict_proba(pre_trained_preds)
        probabilities.append(cnn_proba)

    weights = [1] * len(models_filenames)

    proba = np.asarray(probabilities)
    proba = np.average(proba, axis=0, weights=weights)
    preds = proba.argmax(1)
    
    return proba, preds

def get_gb_predictions(pre_trained_preds):
    reshaped_preds = pre_trained_preds.reshape(pre_trained_preds.shape[0], -1)

    gb_model = joblib.load("../models/Kaggle/GB_Model.h5")
    proba = gb_model.predict_proba(reshaped_preds)
    preds = gb_model.predict(reshaped_preds)
    
    return proba, preds

def get_voting_predictions(pre_trained_preds, voting_weights):
    cnn_proba, cnn_preds = get_cnn_predictions(pre_trained_preds)
    gb_proba, gb_preds = get_gb_predictions(pre_trained_preds)
    
    if voting_weights is None:
        voting_weights = [1,1]
    
    proba = np.asarray([cnn_proba, gb_proba])
    proba = np.average(proba, axis=0, weights=voting_weights)
    
    preds = proba.argmax(1)
    
    return proba, preds


In [7]:
def get_predictions(pre_trained_preds, model="CNN", voting_weights=None):
    if model == "CNN":
        proba, preds = get_cnn_predictions(pre_trained_preds)
    elif model == "GB":
        proba, preds = get_gb_predictions(pre_trained_preds)
    elif model == "VOTING":
        proba, preds = get_voting_predictions(pre_trained_preds, voting_weights)
    return preds

In [13]:
preds = get_predictions(pre_trained_preds, model="VOTING", voting_weights=[2,1])



In [14]:
DATA_FOLDER = '../data/test/'

files = os.listdir(DATA_FOLDER)

n_files = len(files)

cols = ['file','species']
df_stg1 = pd.DataFrame(columns=cols)

df_stg1['file'] = [f for f in files]
df_stg1.head()

Unnamed: 0,file,species
0,1b490196c.png,
1,85431c075.png,
2,506347cfe.png,
3,7f46a71db.png,
4,668c1007c.png,


In [15]:
classes = ['Black-grass', 'Charlock', 'Cleavers', 'Common Chickweed', 
           'Common wheat', 'Fat Hen', 'Loose Silky-bent', 'Maize', 
           'Scentless Mayweed', 'Shepherds Purse', 'Small-flowered Cranesbill', 'Sugar beet']
species = []
for pred in preds:
    species.append(classes[pred])

In [16]:
df_stg1['species'] = species
df_stg1.head()

Unnamed: 0,file,species
0,1b490196c.png,Shepherds Purse
1,85431c075.png,Loose Silky-bent
2,506347cfe.png,Cleavers
3,7f46a71db.png,Sugar beet
4,668c1007c.png,Charlock


In [17]:
df_stg1 = df_stg1.sort_values(by=['file'])
df_stg1.to_csv(OUTPUT_FILE, index=False)