# Classifying Data Using a CNN

### Load Data

In [None]:
import pandas as pd
import numpy as np
from sklearn.utils import shuffle
import random
from keras.utils.np_utils import to_categorical


# Set random seed for reproducibility.
# seed = random.randint(0, 2**32)
seed = 2690082906
np.random.seed(seed)

# Note that data is already 1-hot encoded.
train = pd.read_csv("train.csv", index_col=None).astype("float32")

train = shuffle(train)
train.head(10)

# Split data into train and test data.
X, Y = train.iloc[:,1:], to_categorical(train["Class"])
input_len, input_sz = X.shape[0], X.shape[1]
# seed = 3983249514
# np.random.seed(seed)

## Grid Search

### Model

In [None]:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import GaussianDropout, AlphaDropout, GaussianNoise
from keras.optimizers import RMSprop, SGD, Adamax
from keras.initializers import Constant

# CNN model.
def create_model(activation = 'relu',
                 momentum = 0.1,
                 decay_rate = 0.1,
                 dropout_rate = 0.2,
                 learn_rate = 0.1,
                 weight_constraint = None,
                 neurons = 5,
                 init = 'normal',
                 seed = None,
                 optimiser = 'adamax'):
    
    if seed != None:
        np.random.seed(seed)
    
    # create model
    global input_sz
    model = Sequential()
    model.add(Dense(neurons, input_dim = input_sz, kernel_initializer=init, activation=activation))
    model.add(GaussianDropout(dropout_rate))
    model.add(Dense(2, input_dim = neurons, kernel_initializer=Constant(0.5), activation='softmax'))
    
    optimiser = SGD(lr=learn_rate,  momentum=momentum, decay = decay_rate)

    # Compile model
    model.compile(loss='binary_crossentropy', optimizer=optimiser, metrics=['accuracy'])
    return model

def make_prediction(model):
    test = pd.read_csv("test.csv")
    X_test, Id = test.iloc[:,1:], test["TestId"]

    Y_pred = model.predict(X_test)[:,1]#.flatten()
    #Y_predt = clf.predict_proba(X_train)[:,1]
    print(Y_pred.shape)
    pred_df = pd.DataFrame(Id)
    pred_df = pred_df.join(pd.DataFrame({"PredictedScore": Y_pred}))
    pred_df.to_csv("predcnn.csv", index = False)

### Setup Gridsearch

In [None]:
from sklearn.model_selection import GridSearchCV
from keras.wrappers.scikit_learn import KerasClassifier
from keras.optimizers import SGD
from keras.constraints import maxnorm

# Grid Searching with Keras.
# Code based off the following source:
########################################
# Title: How to Grid Search Hyperparameters for Deep Learning Models in Python With Keras
# Author: Jason Brownlee
# Date: 09/08/2018
# Available: https://machinelearningmastery.com/grid-search-hyperparameters-deep-learning-models-python-keras/
########################################
#
# Number of grid searches
kfolds = 8

def grid_search_learning_params():
    global X, Y

    activation = ['relu']
    momentum = dropout_rate = np.linspace(0, 0.5, 3)
    learn_rate = [0.001, 0.01, 0.1, 0.5]
    decay_rate = [0.1, 0.005, 0.00005, 0]
    neurons = [20]
    init = ['normal']

    epochs = [200]
    batch_size = [2000]
    param_grid = dict(activation = activation,
                      momentum = momentum,
                      learn_rate = learn_rate,
                      decay_rate = decay_rate,
                      dropout_rate = dropout_rate,
                      neurons = neurons,
                      epochs = epochs,
                      batch_size = batch_size)
    
    model = KerasClassifier(build_fn=create_model)
    grid = GridSearchCV(estimator=model, cv=kfolds, param_grid=param_grid, n_jobs=1)
    grid_result = grid.fit(X, Y) 
    # summarize results
    print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
    means = grid_result.cv_results_['mean_test_score']
    stds = grid_result.cv_results_['std_test_score']
    params = grid_result.cv_results_['params']
    for mean, stdev, param in zip(means, stds, params):
        print("%f (%f) with: %r" % (mean, stdev, param))

In [None]:
def grid_search_neurons():
    global X, Y
    
    # Modify these variables, depending on the previous best parameters found in the grid search.
    activation = ['relu']
    momentum = [0.25]
    dropout_rate = [0]
    learn_rate = [0.5]
    decay_rate = [5e-05]
    neurons = [1,5,10,20,25,30]
    init = ['normal']

    epochs = [200]
    batch_size = [2000]
    param_grid = dict(activation = activation,
                      momentum = momentum,
                      learn_rate = learn_rate,
                      decay_rate = decay_rate,
                      dropout_rate = dropout_rate,
                      neurons = neurons,
                      epochs = epochs,
                      batch_size = batch_size)
    
    model = KerasClassifier(build_fn=create_model)
    grid = GridSearchCV(estimator=model, cv=kfolds, param_grid=param_grid, n_jobs=1)
    grid_result = grid.fit(X, Y) 
    # summarize results
    print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
    means = grid_result.cv_results_['mean_test_score']
    stds = grid_result.cv_results_['std_test_score']
    params = grid_result.cv_results_['params']
    for mean, stdev, param in zip(means, stds, params):
        print("%f (%f) with: %r" % (mean, stdev, param))

In [None]:
def grid_search_activation():
    global X, Y
    activation = ['hard_sigmoid', 'sigmoid', 'tanh', 'relu', 'linear']
    momentum = [0.25]
    dropout_rate = [0]
    learn_rate = [0.5]
    decay_rate = [5e-05]
    neurons = [25]
    init = ['normal']

    epochs = [200]
    batch_size = [2000]
    param_grid = dict(activation = activation,
                      momentum = momentum,
                      learn_rate = learn_rate,
                      decay_rate = decay_rate,
                      dropout_rate = dropout_rate,
                      neurons = neurons,
                      epochs = epochs,
                      batch_size = batch_size)
    
    model = KerasClassifier(build_fn=create_model)
    grid = GridSearchCV(estimator=model, cv=kfolds, param_grid=param_grid, n_jobs=1)
    grid_result = grid.fit(X, Y) 
    # summarize results
    print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
    means = grid_result.cv_results_['mean_test_score']
    stds = grid_result.cv_results_['std_test_score']
    params = grid_result.cv_results_['params']
    for mean, stdev, param in zip(means, stds, params):
        print("%f (%f) with: %r" % (mean, stdev, param))

## Find Best Model

In [None]:
import multiprocessing
    
    
if __name__ == "__main__":
    # Find best params. Run in separate process to make sure memory is deallocated afterwards.
#     p = multiprocessing.Process(target=grid_search_learning_params)
#     p = multiprocessing.Process(target=grid_search_neurons)
    p = multiprocessing.Process(target=grid_search_activation)

    p.start()
    p.join()

### Best Learning Parameters

### Best Neuron Count

### Best Activation Function

## Cross Validation analysis.

### Setup ROC and AUC code.

In [None]:
%matplotlib inline 
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc

# ROC plot and AUC statistic, based on the code from the following source:
########################################
# Title: Tutorial 3-Titanic3 Cross-validation with ROC analysis
# Author: Chuan Lu
# Date: 06/04/2017
# Code Version: 47c58c0
# Available: https://github.com/aberML/CSM6420/
########################################

# (Hacked together, but does the job ;-) )
mean_acc = 0.0
mean_auc = 0.0
all_tpr = []
all_acc = []
all_auc = []

i=0

def ROC_AUC(y_test, y_prob):
    global i, mean_acc, mean_auc, all_tpr, all_acc, all_auc
    i += 1
    # Get prediction on class label from the model
    y_prediction = np.around(y_prob, decimals=0)
    
    # Get probability output from the model
    acc = np.sum(y_test == y_prediction)*1./len(y_test)
    print("Prediction accuracy:", acc)
    # Compute ROC curve and area the curve
    fpr, tpr, thresholds = roc_curve(y_test, y_prob)
    roc_auc = auc(fpr, tpr)
    print("Area under ROC curve (AUC):", roc_auc)
    plt.plot(fpr, tpr, lw=1, label='ROC fold %d (area = %0.2f)' % (i, roc_auc))    
    all_acc.append(acc)
    all_auc.append(roc_auc)
    return acc, roc_auc

In [None]:
def display_plot(title = 'Receiver operating characteristic example'):
    global i, mean_acc, mean_auc, all_tpr, all_acc, all_auc
    plt.plot([0, 1], [0, 1], '--', color=(0.6, 0.6, 0.6))
    plt.xlim([-0.05, 1.05])
    plt.ylim([-0.05, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title(title)
    plt.legend(loc="lower right")

    all_acc=np.asarray(all_acc)
    all_auc=np.asarray(all_auc)
    print(all_acc)
    # print 95% C.I. for both accuracy and AUC based on CV
    print("Mean Accuracy: %0.3f (+/- %0.3f)" % (all_acc.mean(), all_acc.std() * 1.96))
    print(all_auc)
    print("Mean AUC: %0.3f (+/- %0.3f)" % (all_auc.mean(), all_auc.std() * 1.96))
    
    # Reset values for re-use.
    mean_acc = 0.0
    mean_auc = 0.0
    all_tpr = []
    all_acc = []
    all_auc = []

    i=0

### ROC analysis.

In [None]:
import multiprocessing

from sklearn.cross_validation import StratifiedKFold as SKFold

random_seed = 1234
scv = SKFold(y=Y[:,0], n_folds=kfolds, random_state=random_seed)

def optimal_model(samples):
    global best_auc, best_acc
    X_train, y_train, X_test, y_test = samples
    # build the model
    model = create_model(**{'activation': 'sigmoid',
                            'decay_rate': 5e-05,
                            'dropout_rate': 0,
                            'learn_rate': 0.5,
                            'momentum': 0.25,
                            'neurons': 10})
    # Fit the model
    model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=200, batch_size=2000, verbose=2)
#     ROC_AUC(y_test[:,1], model.predict(X_test)[:,1])
    
    scores = model.evaluate(X_test, y_test, verbose=0)
    print("Baseline Error: %.2f%%" % (100-scores[1]*100))
    
        
    # Prepare test predictions for submission.
    test = pd.read_csv("test.csv")
    X_sub, Id = test.iloc[:,1:], test["TestId"]

    Y_pred = model.predict_proba(X_sub)[:,1]

    pred_df = pd.DataFrame(Id)
    pred_df = pred_df.join(pd.DataFrame({"PredictedScore": Y_pred}))
    
    return y_test[:,1], model.predict_proba(X_test)[:,1], pred_df

In [None]:
def ROC_analysis():
    best_acc = best_auc = 0
    
    dat_split = []
    
    for training_set, test_set in scv:  
        X_train = X.iloc[training_set]
        y_train = Y[training_set]
        X_test = X.iloc[test_set]
        y_test = Y[test_set]
        
        dat_split.append((X_train, y_train, X_test, y_test))
        
    
    pool = multiprocessing.Pool(processes=1)
    results = pool.map(optimal_model, dat_split)
    
    best_auc = best_acc = 0
    
    for y_test, y_pred, pred_df in results:
        print(pred_df.shape)
        acc, roc_auc = ROC_AUC(y_test, y_pred)

        if acc + roc_auc > best_auc + best_acc:
            # Save predictions for the best result.
            best_acc = acc
            best_auc = roc_auc
            print("Using the fit with an acc of", acc, "and an AUC of", roc_auc)
            pred_df.to_csv("prednn.csv", index = False)
            
    display_plot("Neural Network Receiver Operating Characteristic")
    plt.savefig("ROC_NN.pdf")
    
if __name__ == "__main__":
    ROC_analysis()

In [1]:
import sys
import sklearn
import keras
import tensorflow
import pandas as pd

print('Python: ', sys.version_info)
print('Pandas: ', pd.__version__)
print('Sklearn: ', sklearn.__version__)
print('Keras: ', keras.__version__)
print('tensorflow: ', tensorflow.__version__)

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


Python:  sys.version_info(major=3, minor=6, micro=5, releaselevel='final', serial=0)
Pandas:  0.22.0
Sklearn:  0.19.1
Keras:  2.1.6
tensorflow:  1.8.0
