In [1]:
%matplotlib inline
import numpy as np
import scipy as sp
import os
import matplotlib.pylab as plt
from sys import getsizeof
from pandas import DataFrame
import scipy.signal
from split_safe import SafeDataFilter



In [3]:
def group_into_bands(fft, fft_freq, nfreq_bands):
    if nfreq_bands == 178:
        bands = range(1, 180)
    elif nfreq_bands == 4:
        bands = [0.1, 4, 8, 12, 30]
    elif nfreq_bands == 6:
        bands = [0.1, 4, 8, 12, 30, 70, 180]
    # http://onlinelibrary.wiley.com/doi/10.1111/j.1528-1167.2011.03138.x/pdf
    elif nfreq_bands == 8:
        bands = [0.1, 4, 8, 12, 30, 50, 70, 100, 180]
    elif nfreq_bands == 12:
        bands = [0.1, 4, 8, 12, 30, 40, 50, 60, 70, 85, 100, 140, 180]
    elif nfreq_bands == 9:
        bands = [0.1, 4, 8, 12, 21, 30, 50, 70, 100, 180]
    else:
        raise ValueError('wrong number of frequency bands')
    freq_bands = np.digitize(fft_freq, bands)
    df = DataFrame({'fft': fft, 'band': freq_bands})
    df = df.groupby('band').mean()
    return df.fft[1:-1]

# returns channels x bins x time-frames
def compute_fft(x, data_length_sec, sampling_frequency, nfreq_bands, win_length_sec, stride_sec, features):
    n_channels = x.shape[0]
    n_timesteps = (data_length_sec - win_length_sec) / stride_sec + 1
    n_fbins = nfreq_bands + 1 if 'std' in features else nfreq_bands

    x2 = np.zeros((n_channels, n_fbins, n_timesteps))
    for i in range(n_channels):
        xc = np.zeros((n_fbins, n_timesteps))
        for frame_num, w in enumerate(range(0, data_length_sec - win_length_sec + 1, stride_sec)):
            #print frame_num, w
            xw = x[i, w * sampling_frequency: (w + win_length_sec) * sampling_frequency]
            fft = np.log10(np.absolute(np.fft.rfft(xw)))
            fft_freq = np.fft.rfftfreq(n=xw.shape[-1], d=1.0 / sampling_frequency)
            xc[:nfreq_bands, frame_num] = group_into_bands(fft, fft_freq, nfreq_bands)
            if 'std' in features:
                xc[-1, frame_num] = np.std(xw)
        x2[i, :, :] = xc
    return x2

# filters out the low freq and high freq 
def filter_freq(x, new_sampling_frequency, data_length_sec, lowcut, highcut):
    x1 = scipy.signal.resample(x, new_sampling_frequency * data_length_sec, axis=1)

    nyq = 0.5 * new_sampling_frequency
    b, a = sc.signal.butter(5, np.array([lowcut, highcut]) / nyq, btype='band')
    x_filt = sc.signal.lfilter(b, a, x1, axis=1)
    return np.float32(x_filt)


data_length_sec = 600
sampling_frequency = 400
nfreq_bands = 6    # can play around with these:
win_length_sec = 60 
stride_sec = 60
features = "meanlog_std"  # will create a new additional bin of standard deviation of other bins



# Computes X and y from all the .npy files in a directory
# X = n x channels x filters x time-frames
# y = n x 1
def compute_X_Y(direc):
    safesplit = SafeDataFilter()

    n = len([name for name in os.listdir(direc)])
    X = np.zeros((n, 16, 7, 10))
    y = np.empty((n, 1))
    for i, filename in enumerate(os.listdir(direc)):
        if filename.endswith('.npy'):
            print("Computing ", filename)
            f = np.load(direc + filename)
            f = f.T
            filtered = filter_freq(f, 400, 600, 0.1, 180.0)
            new_x = compute_fft(filtered, data_length_sec, sampling_frequency, nfreq_bands, 
                                win_length_sec, stride_sec, features)
            X[i, ] = new_x
#             if filename.endswith('1.npy'):
#                 y[i] = 1
#             elif filename.endswith('0.npy'):
#                 y[i] = 0
#             continue
            label = safesplit.get_label(filename)
            if label is '1':
                y[i] = 1
            elif label is '0':
                y[i] = 0
        else:
            continue
    
    return X, y

direc_train = '/Users/Anuar_The_Great/desktop/ML/all_new_tests/'
X_train, y_train1 = compute_X_Y(direc_train)
X_train = X_train.reshape((X_train.shape[0], 10, 112))
ys = np.zeros((y_train1.shape[0], 2))
ys[:, 1] = (y_train1 > 0).reshape(y_train1.shape[0],)
ys[:, 0] = (y_train1 < 1).reshape(y_train1.shape[0],)
y_train = ys

np.save('/Users/Anuar_The_Great/desktop/X_test.npy', X_train)
np.save('/Users/Anuar_The_Great/desktop/y_test.npy', y_train)

# direc_test = '/Users/Anuar_The_Great/desktop/ML/train01/'
# X_test, y_test1 = compute_X_Y(direc_test)

# # Reshaping the (bins x channels)
# X_test1 = X_test.reshape((X_test.shape[0], 10, 112))

# # making the binary output value into a vector [0, 1] or [1, 0]
# yt = np.zeros((y_test1.shape[0], 2))
# yt[:, 1] = (y_test1 > 0).reshape(y_test1.shape[0],)
# yt[:, 0] = (y_test1 < 1).reshape(y_test1.shape[0],)
# y_test = yt

('Computing ', 'new_1_1.npy')
('Computing ', 'new_1_10.npy')
('Computing ', 'new_1_100.npy')
('Computing ', 'new_1_101.npy')


KeyboardInterrupt: 

In [2]:
X_train = np.load('/Users/Anuar_The_Great/desktop/X_train.npy')
y_train = np.load('/Users/Anuar_The_Great/desktop/y_train.npy')
X_test = np.load('/Users/Anuar_The_Great/desktop/X_test.npy')
X_train[X_train < -100000] = 0
X_test[X_test < -100000] = 0
X_train = np.abs(X_train)
X_test = np.abs(X_test)
print(X_train.shape)
print(X_test.shape)

(2790, 10, 112)
(1908, 10, 112)


In [4]:
#from __future__ import print_function
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten, Reshape
from keras.layers import Convolution1D
from sklearn.grid_search import GridSearchCV, RandomizedSearchCV
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.metrics import roc_auc_score
from numpy.random import randint, random
from keras.regularizers import l2, activity_l2



# create your model using this function
def create_model(nb_filter1=16, nb_filter2=32, activation1='relu', l2_weight1=0.0, l2_weight2=0.0,
                 dropout_rate=0.3, optimizer='adam', nb_epoch=10, hidden_dims=112):
    #batch_size = 100
    filter_length = 1
    n = X_train.shape[0]
    
    print('Build model...')
    model = Sequential()
    model.add(Convolution1D(nb_filter=nb_filter1,
                            filter_length=filter_length,
                            init='glorot_normal',
                            border_mode='valid',
                            activation=activation1,
                            subsample_length=1,
                            W_regularizer=l2(l2_weight1),
                            input_shape=(10, 112)))
    
    model.add(Convolution1D(nb_filter=nb_filter2,
                            filter_length=1,
                            init='glorot_normal',
                            border_mode='valid',
                            subsample_length=1,
                            W_regularizer=l2(l2_weight2),
                            activation=activation1))    

    model.add(Reshape((nb_filter2*10,)))
    model.add(Dense(hidden_dims))
    model.add(Activation(activation1))
    model.add(Dropout(dropout_rate))
    #model.add(Dense(hidden_dims))
    #model.add(Activation('sigmoid'))
    model.add(Dense(2))
    model.add(Activation('softmax'))
    model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['binary_accuracy'])
    #model.summary()
    return model
    


model = KerasClassifier(build_fn=create_model, batch_size=32, verbose=0)
param_grid = {'nb_filter1': [16, 32],
              'nb_filter2': [32, 48],
              'activation1': ['relu'],
              'dropout_rate': [0.3, 0.5],
              'optimizer': ['adam'],
              'nb_epoch': [10, 20]}

# param_grid = {'nb_filter1': [16, 32],
#               'nb_filter2': [16, 32, 48],
#               'activation1': ['sigmoid', 'relu'],
#               'dropout_rate': [0.3, 0.4, 0.5, 0.6],
#               'optimizer': ['adam', 'rmsprop', 'nadam'],
#               'nb_epoch': [10, 30, 50]}
dropout_array = np.random.random(500)*0.5+0.20
param_distributions = {'nb_filter1': randint(8, 32, 500),
                       'nb_filter2': randint(8, 64, 500),
                       'activation1': ['relu'],
                       'dropout_rate': sp.stats.uniform(),
                       'optimizer': ['adam', 'rmsprop', 'nadam'],
                       'nb_epoch': randint(15, 80, 500),
                       'l2_weight1': list(np.linspace(0, 3.5, 100)),
                       'l2_weight2': list(np.linspace(0, 3.5, 100)),
                       'hidden_dims': randint(8, 64, 500)}
n_iter_search = 30

fit_params = {'class_weight': {0: 1.0, 1: 9.14}}

# param_dist = {"max_depth": [3, None],
#               "max_features": sp_randint(1, 11),
#               "min_samples_split": sp_randint(1, 11),
#               "min_samples_leaf": sp_randint(1, 11),
#               "bootstrap": [True, False],
#               "criterion": ["gini", "entropy"],}

# grid = GridSearchCV(estimator=model, param_grid=param_grid, verbose=5, n_jobs=1, scoring='roc_auc', 
#                     fit_params=fit_params)

grid = RandomizedSearchCV(estimator=model, param_distributions=param_distributions, verbose=10, n_jobs=1,
                          scoring='roc_auc', fit_params=fit_params, n_iter=n_iter_search, cv=4)
X = X_train
Y = y_train
grid_result = grid.fit(X, Y)
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))


Fitting 4 folds for each of 30 candidates, totalling 120 fits
[CV] dropout_rate=0.574399165801, optimizer=rmsprop, hidden_dims=53, l2_weight1=2.08585858586, nb_epoch=34, l2_weight2=0.106060606061, nb_filter1=20, nb_filter2=23, activation1=relu 
Build model...
[CV]  dropout_rate=0.574399165801, optimizer=rmsprop, hidden_dims=53, l2_weight1=2.08585858586, nb_epoch=34, l2_weight2=0.106060606061, nb_filter1=20, nb_filter2=23, activation1=relu, score=0.650620 -  10.1s
[CV] dropout_rate=0.574399165801, optimizer=rmsprop, hidden_dims=53, l2_weight1=2.08585858586, nb_epoch=34, l2_weight2=0.106060606061, nb_filter1=20, nb_filter2=23, activation1=relu 
Build model...


[Parallel(n_jobs=1)]: Done   1 tasks       | elapsed:   10.1s


[CV]  dropout_rate=0.574399165801, optimizer=rmsprop, hidden_dims=53, l2_weight1=2.08585858586, nb_epoch=34, l2_weight2=0.106060606061, nb_filter1=20, nb_filter2=23, activation1=relu, score=0.352220 -   9.2s
[CV] dropout_rate=0.574399165801, optimizer=rmsprop, hidden_dims=53, l2_weight1=2.08585858586, nb_epoch=34, l2_weight2=0.106060606061, nb_filter1=20, nb_filter2=23, activation1=relu 
Build model...
[CV]  dropout_rate=0.574399165801, optimizer=rmsprop, hidden_dims=53, l2_weight1=2.08585858586, nb_epoch=34, l2_weight2=0.106060606061, nb_filter1=20, nb_filter2=23, activation1=relu, score=0.184940 -   8.6s
[CV] dropout_rate=0.574399165801, optimizer=rmsprop, hidden_dims=53, l2_weight1=2.08585858586, nb_epoch=34, l2_weight2=0.106060606061, nb_filter1=20, nb_filter2=23, activation1=relu 
Build model...
[CV]  dropout_rate=0.574399165801, optimizer=rmsprop, hidden_dims=53, l2_weight1=2.08585858586, nb_epoch=34, l2_weight2=0.106060606061, nb_filter1=20, nb_filter2=23, activation1=relu, scor

[Parallel(n_jobs=1)]: Done   4 tasks       | elapsed:   37.4s


[CV]  dropout_rate=0.656785000054, optimizer=nadam, hidden_dims=10, l2_weight1=0.247474747475, nb_epoch=19, l2_weight2=3.00505050505, nb_filter1=29, nb_filter2=58, activation1=relu, score=0.500000 -  22.8s
[CV] dropout_rate=0.656785000054, optimizer=nadam, hidden_dims=10, l2_weight1=0.247474747475, nb_epoch=19, l2_weight2=3.00505050505, nb_filter1=29, nb_filter2=58, activation1=relu 
Build model...
[CV]  dropout_rate=0.656785000054, optimizer=nadam, hidden_dims=10, l2_weight1=0.247474747475, nb_epoch=19, l2_weight2=3.00505050505, nb_filter1=29, nb_filter2=58, activation1=relu, score=0.500000 -  16.7s
[CV] dropout_rate=0.656785000054, optimizer=nadam, hidden_dims=10, l2_weight1=0.247474747475, nb_epoch=19, l2_weight2=3.00505050505, nb_filter1=29, nb_filter2=58, activation1=relu 
Build model...
[CV]  dropout_rate=0.656785000054, optimizer=nadam, hidden_dims=10, l2_weight1=0.247474747475, nb_epoch=19, l2_weight2=3.00505050505, nb_filter1=29, nb_filter2=58, activation1=relu, score=0.500000

[Parallel(n_jobs=1)]: Done   7 tasks       | elapsed:  1.6min


[CV]  dropout_rate=0.656785000054, optimizer=nadam, hidden_dims=10, l2_weight1=0.247474747475, nb_epoch=19, l2_weight2=3.00505050505, nb_filter1=29, nb_filter2=58, activation1=relu, score=0.500000 -  21.2s
[CV] dropout_rate=0.931358973622, optimizer=rmsprop, hidden_dims=53, l2_weight1=1.06060606061, nb_epoch=71, l2_weight2=1.09595959596, nb_filter1=19, nb_filter2=35, activation1=relu 
Build model...
[CV]  dropout_rate=0.931358973622, optimizer=rmsprop, hidden_dims=53, l2_weight1=1.06060606061, nb_epoch=71, l2_weight2=1.09595959596, nb_filter1=19, nb_filter2=35, activation1=relu, score=0.500000 -  21.2s
[CV] dropout_rate=0.931358973622, optimizer=rmsprop, hidden_dims=53, l2_weight1=1.06060606061, nb_epoch=71, l2_weight2=1.09595959596, nb_filter1=19, nb_filter2=35, activation1=relu 
Build model...
[CV]  dropout_rate=0.931358973622, optimizer=rmsprop, hidden_dims=53, l2_weight1=1.06060606061, nb_epoch=71, l2_weight2=1.09595959596, nb_filter1=19, nb_filter2=35, activation1=relu, score=0.50

[Parallel(n_jobs=1)]: Done  12 tasks       | elapsed:  3.4min


[CV]  dropout_rate=0.771533163872, optimizer=nadam, hidden_dims=41, l2_weight1=1.27272727273, nb_epoch=22, l2_weight2=2.22727272727, nb_filter1=8, nb_filter2=10, activation1=relu, score=0.500000 -   7.3s
[CV] dropout_rate=0.771533163872, optimizer=nadam, hidden_dims=41, l2_weight1=1.27272727273, nb_epoch=22, l2_weight2=2.22727272727, nb_filter1=8, nb_filter2=10, activation1=relu 
Build model...
[CV]  dropout_rate=0.771533163872, optimizer=nadam, hidden_dims=41, l2_weight1=1.27272727273, nb_epoch=22, l2_weight2=2.22727272727, nb_filter1=8, nb_filter2=10, activation1=relu, score=0.499687 -   6.4s
[CV] dropout_rate=0.771533163872, optimizer=nadam, hidden_dims=41, l2_weight1=1.27272727273, nb_epoch=22, l2_weight2=2.22727272727, nb_filter1=8, nb_filter2=10, activation1=relu 
Build model...
[CV]  dropout_rate=0.771533163872, optimizer=nadam, hidden_dims=41, l2_weight1=1.27272727273, nb_epoch=22, l2_weight2=2.22727272727, nb_filter1=8, nb_filter2=10, activation1=relu, score=0.178333 -   6.6s


[Parallel(n_jobs=1)]: Done  17 tasks       | elapsed:  4.1min


[CV]  dropout_rate=0.860408774442, optimizer=adam, hidden_dims=63, l2_weight1=0.0353535353535, nb_epoch=27, l2_weight2=1.37878787879, nb_filter1=19, nb_filter2=60, activation1=relu, score=0.595938 -  12.6s
[CV] dropout_rate=0.860408774442, optimizer=adam, hidden_dims=63, l2_weight1=0.0353535353535, nb_epoch=27, l2_weight2=1.37878787879, nb_filter1=19, nb_filter2=60, activation1=relu 
Build model...
[CV]  dropout_rate=0.860408774442, optimizer=adam, hidden_dims=63, l2_weight1=0.0353535353535, nb_epoch=27, l2_weight2=1.37878787879, nb_filter1=19, nb_filter2=60, activation1=relu, score=0.167381 -  12.4s
[CV] dropout_rate=0.860408774442, optimizer=adam, hidden_dims=63, l2_weight1=0.0353535353535, nb_epoch=27, l2_weight2=1.37878787879, nb_filter1=19, nb_filter2=60, activation1=relu 
Build model...
[CV]  dropout_rate=0.860408774442, optimizer=adam, hidden_dims=63, l2_weight1=0.0353535353535, nb_epoch=27, l2_weight2=1.37878787879, nb_filter1=19, nb_filter2=60, activation1=relu, score=0.586559

[Parallel(n_jobs=1)]: Done  24 tasks       | elapsed:  5.6min


[CV]  dropout_rate=0.551820883043, optimizer=nadam, hidden_dims=30, l2_weight1=2.61616161616, nb_epoch=56, l2_weight2=1.34343434343, nb_filter1=20, nb_filter2=40, activation1=relu, score=0.500000 - 1.0min
[CV] dropout_rate=0.551820883043, optimizer=nadam, hidden_dims=30, l2_weight1=2.61616161616, nb_epoch=56, l2_weight2=1.34343434343, nb_filter1=20, nb_filter2=40, activation1=relu 
Build model...
[CV]  dropout_rate=0.551820883043, optimizer=nadam, hidden_dims=30, l2_weight1=2.61616161616, nb_epoch=56, l2_weight2=1.34343434343, nb_filter1=20, nb_filter2=40, activation1=relu, score=0.500000 - 1.0min
[CV] dropout_rate=0.551820883043, optimizer=nadam, hidden_dims=30, l2_weight1=2.61616161616, nb_epoch=56, l2_weight2=1.34343434343, nb_filter1=20, nb_filter2=40, activation1=relu 
Build model...
[CV]  dropout_rate=0.551820883043, optimizer=nadam, hidden_dims=30, l2_weight1=2.61616161616, nb_epoch=56, l2_weight2=1.34343434343, nb_filter1=20, nb_filter2=40, activation1=relu, score=0.500000 -  5

[Parallel(n_jobs=1)]: Done  31 tasks       | elapsed: 11.5min


[CV]  dropout_rate=0.402852526596, optimizer=adam, hidden_dims=39, l2_weight1=1.41414141414, nb_epoch=46, l2_weight2=1.02525252525, nb_filter1=30, nb_filter2=37, activation1=relu, score=0.542594 -  38.5s
[CV] dropout_rate=0.65242156451, optimizer=nadam, hidden_dims=8, l2_weight1=2.61616161616, nb_epoch=34, l2_weight2=1.90909090909, nb_filter1=8, nb_filter2=35, activation1=relu 
Build model...
[CV]  dropout_rate=0.65242156451, optimizer=nadam, hidden_dims=8, l2_weight1=2.61616161616, nb_epoch=34, l2_weight2=1.90909090909, nb_filter1=8, nb_filter2=35, activation1=relu, score=0.500000 -  13.6s
[CV] dropout_rate=0.65242156451, optimizer=nadam, hidden_dims=8, l2_weight1=2.61616161616, nb_epoch=34, l2_weight2=1.90909090909, nb_filter1=8, nb_filter2=35, activation1=relu 
Build model...
[CV]  dropout_rate=0.65242156451, optimizer=nadam, hidden_dims=8, l2_weight1=2.61616161616, nb_epoch=34, l2_weight2=1.90909090909, nb_filter1=8, nb_filter2=35, activation1=relu, score=0.500000 -  14.1s
[CV] dro

[Parallel(n_jobs=1)]: Done  40 tasks       | elapsed: 13.9min


[CV]  dropout_rate=0.683230920447, optimizer=adam, hidden_dims=31, l2_weight1=3.11111111111, nb_epoch=39, l2_weight2=1.06060606061, nb_filter1=28, nb_filter2=20, activation1=relu, score=0.500000 -  25.7s
[CV] dropout_rate=0.683230920447, optimizer=adam, hidden_dims=31, l2_weight1=3.11111111111, nb_epoch=39, l2_weight2=1.06060606061, nb_filter1=28, nb_filter2=20, activation1=relu 
Build model...
[CV]  dropout_rate=0.683230920447, optimizer=adam, hidden_dims=31, l2_weight1=3.11111111111, nb_epoch=39, l2_weight2=1.06060606061, nb_filter1=28, nb_filter2=20, activation1=relu, score=0.579650 -  24.5s
[CV] dropout_rate=0.683230920447, optimizer=adam, hidden_dims=31, l2_weight1=3.11111111111, nb_epoch=39, l2_weight2=1.06060606061, nb_filter1=28, nb_filter2=20, activation1=relu 
Build model...
[CV]  dropout_rate=0.683230920447, optimizer=adam, hidden_dims=31, l2_weight1=3.11111111111, nb_epoch=39, l2_weight2=1.06060606061, nb_filter1=28, nb_filter2=20, activation1=relu, score=0.249940 -  23.0s


[Parallel(n_jobs=1)]: Done  49 tasks       | elapsed: 21.8min


[CV]  dropout_rate=0.913200041308, optimizer=nadam, hidden_dims=26, l2_weight1=1.66161616162, nb_epoch=58, l2_weight2=0.742424242424, nb_filter1=23, nb_filter2=52, activation1=relu, score=0.500000 - 1.1min
[CV] dropout_rate=0.913200041308, optimizer=nadam, hidden_dims=26, l2_weight1=1.66161616162, nb_epoch=58, l2_weight2=0.742424242424, nb_filter1=23, nb_filter2=52, activation1=relu 
Build model...
[CV]  dropout_rate=0.913200041308, optimizer=nadam, hidden_dims=26, l2_weight1=1.66161616162, nb_epoch=58, l2_weight2=0.742424242424, nb_filter1=23, nb_filter2=52, activation1=relu, score=0.500000 - 1.1min
[CV] dropout_rate=0.913200041308, optimizer=nadam, hidden_dims=26, l2_weight1=1.66161616162, nb_epoch=58, l2_weight2=0.742424242424, nb_filter1=23, nb_filter2=52, activation1=relu 
Build model...
[CV]  dropout_rate=0.913200041308, optimizer=nadam, hidden_dims=26, l2_weight1=1.66161616162, nb_epoch=58, l2_weight2=0.742424242424, nb_filter1=23, nb_filter2=52, activation1=relu, score=0.500000

[Parallel(n_jobs=1)]: Done  60 tasks       | elapsed: 26.4min


[CV]  dropout_rate=0.516599352734, optimizer=adam, hidden_dims=9, l2_weight1=2.33333333333, nb_epoch=58, l2_weight2=0.848484848485, nb_filter1=17, nb_filter2=30, activation1=relu, score=0.333925 -  34.8s
[CV] dropout_rate=0.516599352734, optimizer=adam, hidden_dims=9, l2_weight1=2.33333333333, nb_epoch=58, l2_weight2=0.848484848485, nb_filter1=17, nb_filter2=30, activation1=relu 
Build model...
[CV]  dropout_rate=0.516599352734, optimizer=adam, hidden_dims=9, l2_weight1=2.33333333333, nb_epoch=58, l2_weight2=0.848484848485, nb_filter1=17, nb_filter2=30, activation1=relu, score=0.500000 -  35.7s
[CV] dropout_rate=0.516599352734, optimizer=adam, hidden_dims=9, l2_weight1=2.33333333333, nb_epoch=58, l2_weight2=0.848484848485, nb_filter1=17, nb_filter2=30, activation1=relu 
Build model...
[CV]  dropout_rate=0.516599352734, optimizer=adam, hidden_dims=9, l2_weight1=2.33333333333, nb_epoch=58, l2_weight2=0.848484848485, nb_filter1=17, nb_filter2=30, activation1=relu, score=0.387143 -  33.2s


[Parallel(n_jobs=1)]: Done  71 tasks       | elapsed: 32.1min


[CV]  dropout_rate=0.193543398096, optimizer=nadam, hidden_dims=43, l2_weight1=3.11111111111, nb_epoch=26, l2_weight2=0.60101010101, nb_filter1=14, nb_filter2=59, activation1=relu, score=0.649711 -  24.1s
[CV] dropout_rate=0.748048629798, optimizer=rmsprop, hidden_dims=56, l2_weight1=0.247474747475, nb_epoch=47, l2_weight2=0.388888888889, nb_filter1=30, nb_filter2=60, activation1=relu 
Build model...
[CV]  dropout_rate=0.748048629798, optimizer=rmsprop, hidden_dims=56, l2_weight1=0.247474747475, nb_epoch=47, l2_weight2=0.388888888889, nb_filter1=30, nb_filter2=60, activation1=relu, score=0.647041 -  18.2s
[CV] dropout_rate=0.748048629798, optimizer=rmsprop, hidden_dims=56, l2_weight1=0.247474747475, nb_epoch=47, l2_weight2=0.388888888889, nb_filter1=30, nb_filter2=60, activation1=relu 
Build model...
[CV]  dropout_rate=0.748048629798, optimizer=rmsprop, hidden_dims=56, l2_weight1=0.247474747475, nb_epoch=47, l2_weight2=0.388888888889, nb_filter1=30, nb_filter2=60, activation1=relu, sco

[Parallel(n_jobs=1)]: Done  84 tasks       | elapsed: 37.0min


[CV]  dropout_rate=0.177094691694, optimizer=rmsprop, hidden_dims=12, l2_weight1=0.0707070707071, nb_epoch=41, l2_weight2=2.19191919192, nb_filter1=13, nb_filter2=44, activation1=relu, score=0.670668 -  11.0s
[CV] dropout_rate=0.177094691694, optimizer=rmsprop, hidden_dims=12, l2_weight1=0.0707070707071, nb_epoch=41, l2_weight2=2.19191919192, nb_filter1=13, nb_filter2=44, activation1=relu 
Build model...
[CV]  dropout_rate=0.177094691694, optimizer=rmsprop, hidden_dims=12, l2_weight1=0.0707070707071, nb_epoch=41, l2_weight2=2.19191919192, nb_filter1=13, nb_filter2=44, activation1=relu, score=0.321238 -  11.7s
[CV] dropout_rate=0.177094691694, optimizer=rmsprop, hidden_dims=12, l2_weight1=0.0707070707071, nb_epoch=41, l2_weight2=2.19191919192, nb_filter1=13, nb_filter2=44, activation1=relu 
Build model...
[CV]  dropout_rate=0.177094691694, optimizer=rmsprop, hidden_dims=12, l2_weight1=0.0707070707071, nb_epoch=41, l2_weight2=2.19191919192, nb_filter1=13, nb_filter2=44, activation1=relu,

[Parallel(n_jobs=1)]: Done  97 tasks       | elapsed: 44.1min


[CV]  dropout_rate=0.131272604726, optimizer=rmsprop, hidden_dims=8, l2_weight1=3.00505050505, nb_epoch=67, l2_weight2=1.13131313131, nb_filter1=14, nb_filter2=59, activation1=relu, score=0.556903 -  17.3s
[CV] dropout_rate=0.131272604726, optimizer=rmsprop, hidden_dims=8, l2_weight1=3.00505050505, nb_epoch=67, l2_weight2=1.13131313131, nb_filter1=14, nb_filter2=59, activation1=relu 
Build model...
[CV]  dropout_rate=0.131272604726, optimizer=rmsprop, hidden_dims=8, l2_weight1=3.00505050505, nb_epoch=67, l2_weight2=1.13131313131, nb_filter1=14, nb_filter2=59, activation1=relu, score=0.185238 -  16.2s
[CV] dropout_rate=0.131272604726, optimizer=rmsprop, hidden_dims=8, l2_weight1=3.00505050505, nb_epoch=67, l2_weight2=1.13131313131, nb_filter1=14, nb_filter2=59, activation1=relu 
Build model...
[CV]  dropout_rate=0.131272604726, optimizer=rmsprop, hidden_dims=8, l2_weight1=3.00505050505, nb_epoch=67, l2_weight2=1.13131313131, nb_filter1=14, nb_filter2=59, activation1=relu, score=0.500000

[Parallel(n_jobs=1)]: Done 112 tasks       | elapsed: 47.3min


[CV]  dropout_rate=0.861324023153, optimizer=rmsprop, hidden_dims=62, l2_weight1=0.883838383838, nb_epoch=17, l2_weight2=3.25252525253, nb_filter1=16, nb_filter2=58, activation1=relu, score=0.500000 -   6.9s
[CV] dropout_rate=0.861324023153, optimizer=rmsprop, hidden_dims=62, l2_weight1=0.883838383838, nb_epoch=17, l2_weight2=3.25252525253, nb_filter1=16, nb_filter2=58, activation1=relu 
Build model...
[CV]  dropout_rate=0.861324023153, optimizer=rmsprop, hidden_dims=62, l2_weight1=0.883838383838, nb_epoch=17, l2_weight2=3.25252525253, nb_filter1=16, nb_filter2=58, activation1=relu, score=0.569383 -   6.6s
[CV] dropout_rate=0.861324023153, optimizer=rmsprop, hidden_dims=62, l2_weight1=0.883838383838, nb_epoch=17, l2_weight2=3.25252525253, nb_filter1=16, nb_filter2=58, activation1=relu 
Build model...
[CV]  dropout_rate=0.861324023153, optimizer=rmsprop, hidden_dims=62, l2_weight1=0.883838383838, nb_epoch=17, l2_weight2=3.25252525253, nb_filter1=16, nb_filter2=58, activation1=relu, scor

[Parallel(n_jobs=1)]: Done 120 out of 120 | elapsed: 52.0min finished


Best: 0.581311 using {'dropout_rate': 0.8613240231529385, 'optimizer': 'rmsprop', 'hidden_dims': 62, 'l2_weight1': 0.88383838383838376, 'nb_epoch': 17, 'l2_weight2': 3.2525252525252526, 'nb_filter1': 16, 'nb_filter2': 58, 'activation1': 'relu'}


In [54]:
grid_result.best_estimator_.model.get_weights()

[array([[[[ -1.40788077e-04,  -5.57521707e-04,  -2.48825760e-04, ...,
            -1.97560985e-05,  -9.98064876e-04,   1.10688145e-13],
          [ -1.35392620e-04,  -5.27751166e-04,  -2.57838285e-04, ...,
            -1.78509272e-05,  -9.49142326e-04,   1.14213977e-13],
          [ -1.35603885e-04,  -4.32697416e-04,  -2.38675348e-04, ...,
            -2.00157847e-05,  -9.68892244e-04,   1.34192553e-11],
          ..., 
          [ -7.26687373e-04,  -1.29189345e-07,   3.38539503e-05, ...,
            -1.78791990e-04,   1.25565464e-04,  -4.64217987e-12],
          [ -3.48266418e-04,  -3.75335389e-08,   1.06253603e-03, ...,
            -1.90441846e-04,  -4.01050114e-04,  -7.76841647e-11],
          [ -1.58353985e-04,  -5.06358592e-05,   4.85011435e-04, ...,
            -1.02640719e-04,   7.89151294e-04,  -1.22917487e-09]]]], dtype=float32),
 array([-0.08475769, -0.09048506, -0.0821263 , -0.12018622, -0.10854337,
        -0.06909075, -0.07121591, -0.09329048, -0.0867572 , -0.07982019,
   

In [46]:
# JUST SOME TESTING. NVM THIS
model = create_model(16, 32, 'sigmoid', 0.3, 'adam', 20, 30)
model_result = model.fit(X_train, y_train, class_weight={0: 1.0, 1: 9.14}, verbose=2, nb_epoch=100)

preds = model_result.model.predict_classes(X_train)
#preds_submission = np.argmax(preds, axis=1).astype('int32')

print("\n")
print(roc_auc_score(y_train[:, 1], preds))

Build model...
Epoch 1/100
0s - loss: 1.3684 - binary_accuracy: 0.4344
Epoch 2/100
0s - loss: 1.3302 - binary_accuracy: 0.4115
Epoch 3/100
0s - loss: 1.3202 - binary_accuracy: 0.3011
Epoch 4/100
0s - loss: 1.3086 - binary_accuracy: 0.4380
Epoch 5/100
0s - loss: 1.3063 - binary_accuracy: 0.2563
Epoch 6/100
0s - loss: 1.2921 - binary_accuracy: 0.3556
Epoch 7/100
0s - loss: 1.2927 - binary_accuracy: 0.3108
Epoch 8/100
0s - loss: 1.2792 - binary_accuracy: 0.4534
Epoch 9/100
0s - loss: 1.2567 - binary_accuracy: 0.5548
Epoch 10/100
0s - loss: 1.2387 - binary_accuracy: 0.5588
Epoch 11/100
0s - loss: 1.2337 - binary_accuracy: 0.6366
Epoch 12/100
0s - loss: 1.2322 - binary_accuracy: 0.6068
Epoch 13/100
0s - loss: 1.2362 - binary_accuracy: 0.6115
Epoch 14/100
0s - loss: 1.2092 - binary_accuracy: 0.6276
Epoch 15/100
0s - loss: 1.2121 - binary_accuracy: 0.6290
Epoch 16/100
0s - loss: 1.2054 - binary_accuracy: 0.6441
Epoch 17/100
0s - loss: 1.2165 - binary_accuracy: 0.5846
Epoch 18/100
0s - loss: 1

In [37]:
# Training using the optimal hyperparameters from RandomizedSearchCV
# on the full dataset
best_params = grid_result.best_params_
model = create_model(best_params['nb_filter1'], best_params['nb_filter2'], best_params['activation1'],
                     best_params['l2_weight1'], best_params['l2_weight2'],
                     best_params['dropout_rate'], best_params['optimizer'], best_params['hidden_dims'])
model_result = model.fit(X_train, y_train, class_weight={0: 1.0, 1: 9.14}, verbose=2, 
                         nb_epoch=17)
preds = model_result.model.predict_classes(X_train)
print(roc_auc_score(y_train[:, 1], preds))

Build model...
Epoch 1/17
0s - loss: 2.0964 - binary_accuracy: 0.6550
Epoch 2/17
0s - loss: 1.3276 - binary_accuracy: 0.5014
Epoch 3/17
0s - loss: 1.3152 - binary_accuracy: 0.4771
Epoch 4/17
0s - loss: 1.3188 - binary_accuracy: 0.4391
Epoch 5/17
0s - loss: 1.2993 - binary_accuracy: 0.5082
Epoch 6/17
0s - loss: 1.3088 - binary_accuracy: 0.5004
Epoch 7/17
0s - loss: 1.3062 - binary_accuracy: 0.4222
Epoch 8/17
0s - loss: 1.3029 - binary_accuracy: 0.5090
Epoch 9/17
0s - loss: 1.2861 - binary_accuracy: 0.4971
Epoch 10/17
0s - loss: 1.3010 - binary_accuracy: 0.4606
Epoch 11/17
0s - loss: 1.2995 - binary_accuracy: 0.4803
Epoch 12/17
0s - loss: 1.3140 - binary_accuracy: 0.4140
Epoch 13/17
0s - loss: 1.2800 - binary_accuracy: 0.4864
Epoch 14/17
0s - loss: 1.2952 - binary_accuracy: 0.5011
Epoch 15/17
0s - loss: 1.2750 - binary_accuracy: 0.5222
Epoch 16/17
0s - loss: 1.2772 - binary_accuracy: 0.4896
Epoch 17/17
0s - loss: 1.2796 - binary_accuracy: 0.5018


In [39]:
# After RandomSearch you can just run this to generate a submission file
# It computes the predictions from the best model obtained from RandomizedSearch
# NOT the one trained on the full dataset.
import pandas as pd
from pandas import DataFrame

#preds_submission = model_result.model.predict_classes(X_test)
grid_result.best_estimator_.model.predict_classes(X_test)

sample_submission = pd.read_csv('/Users/Anuar_The_Great/Desktop/ML/sample_submission.csv')
sample_submission['Class'] = preds_submission
sample_submission.to_csv('/Users/Anuar_The_Great/desktop/submission.csv', index=False)

