In [1]:
import keras
import xgboost
from sklearn.metrics import accuracy_score

import pickle

import pandas as pd
import numpy as np

In [2]:
with open('data_dct.pkl','rb') as f:
    data_dct = pickle.load(f)
data_dct.keys()

dict_keys(['X', 'y', 'folds'])

In [3]:
X = data_dct['X']
y = data_dct['y']
folds = data_dct['folds']
nfolds = len(folds)

In [4]:
y_new = np.array([list(row).index(1) for row in y])

## Conduct Experiment

For each iteration, we are loading our previously trained CNN models from CNN_Model.ipynb and grabbing the outputs of the feature layer (Flatten layer). We then fit our xgboost model on this feature layer. 

In [5]:
train_accuracies = []
test_accuracies = []
for fold_idx in range(nfolds):
    print("Running on fold " + str(fold_idx+1))
    X_test = X[folds[fold_idx]]
    y_test = y_new[folds[fold_idx]]

    train_fold_idxs = [i for i in range(nfolds) if i != fold_idx]
    train_idxs = np.concatenate([list(folds[idx]) for idx in train_fold_idxs])
    X_train = X[train_idxs]
    y_train = y_new[train_idxs]

    trained_model = keras.models.load_model('CNN_models/model_fold_' + str(fold_idx+1))

    # Based on https://alan.do/deep-gradient-boosted-learning-4e33adaf2969
    feature_layer_model = keras.Model(
                     inputs=trained_model.input,
                     outputs=trained_model.get_layer(index=4).output)
    feature_layer_output_train = feature_layer_model.predict(X_train)
    param = {
                'objective':'multi:softmax',
                'num_class':11,
                'learning_rate':.1,
                'max_depth':5,
                'alpha':10
            }
    dtrain = xgboost.DMatrix(feature_layer_output_train, label=y_train)
    xgb_model = xgboost.train(param, dtrain, 100)
    train_preds = xgb_model.predict(dtrain)

    feature_layer_output_test = feature_layer_model.predict(X_test)
    dtest = xgboost.DMatrix(feature_layer_output_test)
    test_preds = xgb_model.predict(dtest)

    train_accuracy = accuracy_score(y_train, train_preds)
    test_accuracy = accuracy_score(y_test, test_preds)

    train_accuracies.append(train_accuracy)
    test_accuracies.append(test_accuracy)


Running on fold 1
Running on fold 2
Running on fold 3


In [6]:
test_accuracies

[0.943034404963339, 0.9414961800748602, 0.9445213556888684]

In [7]:
np.mean(test_accuracies)

0.9430173135756892