In [1]:
# import the required packages
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import StandardScaler
import numpy as np
import pandas as pd
from tensorflow import random

Using TensorFlow backend.


In [2]:
# Load the dataset
X = pd.read_csv('../data/HCV_feats.csv')
y = pd.read_csv('../data/HCV_target.csv')

sc = StandardScaler()
X = pd.DataFrame(sc.fit_transform(X), columns=X.columns)

In [3]:
# Create the function that returns the keras model 1
def build_model_1(activation='relu', optimizer='adam'):
    # create model 1
    model = Sequential()
    model.add(Dense(4, input_dim=X.shape[1], activation=activation))
    model.add(Dense(4, activation=activation))
    model.add(Dense(4, activation=activation))
    model.add(Dense(1, activation='sigmoid'))
    # Compile model
    model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    return model

# Create the function that returns the keras model 2
def build_model_2(activation='relu', optimizer='adam'):
    # create model 2
    model = Sequential()
    model.add(Dense(4, input_dim=X.shape[1], activation=activation))
    model.add(Dense(2, activation=activation))
    model.add(Dense(1, activation='sigmoid'))
    # Compile model
    model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    return model

# Create the function that returns the keras model 3
def build_model_3(activation='relu', optimizer='adam'):
    # create model 3
    model = Sequential()
    model.add(Dense(8, input_dim=X.shape[1], activation=activation))
    model.add(Dense(8, activation=activation))
    model.add(Dense(1, activation='sigmoid'))
    # Compile model
    model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    return model

In [4]:
# define a seed for random number generator so the result will be reproducible
seed = 2
np.random.seed(seed)
random.set_seed(seed)

# determine the number of folds for k-fold cross validation, number of epochs and batch size
n_folds = 5
batch_size=20
epochs=50
# define the list to store cross validation scores
results_1 =[]
# define the possible options for the model
models = [build_model_1, build_model_2, build_model_3]
# loop over models
for m in range(len(models)):
    # build the Scikit-Learn interface for the keras model
    classifier = KerasClassifier(build_fn=models[m], epochs=epochs, batch_size=batch_size, verbose=0, shuffle=False)
    # define the cross validation iterator
    kfold = StratifiedKFold(n_splits=n_folds, shuffle=True, random_state=seed)
    # perform the k-fold cross validation and store the scores in result
    result = cross_val_score(classifier, X, y, cv=kfold)
    # add the scores to the results list 
    results_1.append(result)

In [5]:
# Print cross validation score for each model
for m in range(len(models)):
    print("Model", m+1,"Test Accuracy =", results_1[m].mean())

Model 1 Test Accuracy = 0.5046931445598603
Model 2 Test Accuracy = 0.5054151535034179
Model 3 Test Accuracy = 0.507581228017807


In [6]:
# define a seed for random number generator so the result will be reproducible
np.random.seed(seed)
random.set_seed(seed)
# determine the number of folds for k-fold cross validation
n_folds = 5
# define possible options for epochs and batch_size
epochs = [100, 200]
batches = [10, 20]
# define the list to store cross validation scores
results_2 =[]
# loop over all possible pairs of epochs, batch_size
for e in range(len(epochs)):
    for b in range(len(batches)):
        # build the Scikit-Learn interface for the keras model
        classifier = KerasClassifier(build_fn=build_model_2, epochs=epochs[e], batch_size=batches[b], verbose=0, shuffle=False)
        # define the cross validation iterator
        kfold = StratifiedKFold(n_splits=n_folds, shuffle=True, random_state=seed)
        # perform the k-fold cross validation. 
        # store the scores in result
        result = cross_val_score(classifier, X, y, cv=kfold)
        # add the scores to the results list 
        results_2.append(result)

In [7]:
# Print cross validation score for each possible pair of epochs, batch_size
c = 0
for e in range(len(epochs)):
    for b in range(len(batches)):
        print("batch_size =", batches[b],", epochs =", epochs[e], ", Test Accuracy =", results_2[c].mean())
        c += 1

batch_size = 10 , epochs = 100 , Test Accuracy = 0.5371841073036194
batch_size = 20 , epochs = 100 , Test Accuracy = 0.5025270760059357
batch_size = 10 , epochs = 200 , Test Accuracy = 0.49386281371116636
batch_size = 20 , epochs = 200 , Test Accuracy = 0.49891695380210876


In [8]:
# define a seed for random number generator so the result will be reproducible
np.random.seed(seed)
random.set_seed(seed)
# determine the number of folds for k-fold cross validation, number of epochs and batch size
n_folds = 5
batch_size=20
epochs=100
# define the list to store cross validation scores
results_3 =[]
# define possible options for optimizer and activation
optimizers = ['rmsprop', 'adam','sgd']
activations = ['relu', 'tanh']
# loop over all possible pairs of optimizer, activation
for o in range(len(optimizers)):
    for a in range(len(activations)):
        optimizer = optimizers[o]
        activation = activations[a]
        # build the Scikit-Learn interface for the keras model
        classifier = KerasClassifier(build_fn=build_model_2, epochs=epochs, batch_size=batch_size, verbose=0, shuffle=False)
        # define the cross validation iterator
        kfold = StratifiedKFold(n_splits=n_folds, shuffle=True, random_state=seed)
        # perform the k-fold cross validation. 
        # store the scores in result
        result = cross_val_score(classifier, X, y, cv=kfold)
        # add the scores to the results list 
        results_3.append(result)

In [9]:
# Print cross validation score for each possible pair of optimizer, activation
c = 0
for o in range(len(optimizers)):
    for a in range(len(activations)):
        print("activation = ", activations[a],", optimizer = ", optimizers[o], ", Test accuracy = ", results_3[c].mean())
        c += 1

activation =  relu , optimizer =  rmsprop , Test accuracy =  0.5523465752601624
activation =  tanh , optimizer =  rmsprop , Test accuracy =  0.5032491028308869
activation =  relu , optimizer =  adam , Test accuracy =  0.5039711177349091
activation =  tanh , optimizer =  adam , Test accuracy =  0.5119133591651917
activation =  relu , optimizer =  sgd , Test accuracy =  0.5046931326389312
activation =  tanh , optimizer =  sgd , Test accuracy =  0.4830324828624725
