<a href="https://colab.research.google.com/github/DipankarJDutta/Biomarker-discovery-for-FASD-GDM-with-B-T-cells/blob/Optimizing-dropout-rate-and-weight-constraint/B-T-median_optimizing%20dropout%20rate.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
#Loading essentials
import numpy
from numpy import loadtxt
from sklearn.model_selection import GridSearchCV
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.wrappers.scikit_learn import KerasClassifier
from keras.constraints import maxnorm

In [0]:
# Fixing random seed for reproducibility
from numpy.random import seed
seed(7)
from tensorflow import set_random_seed
set_random_seed(7)

In [0]:
#Creating model required for KerasClassifier
def create_model(dropout_rate=0.0):
    #create model
    model = Sequential()
    model.add(Dense(27, input_dim=29, kernel_initializer='uniform', activation='relu'))
    model.add(Dropout(dropout_rate))
    model.add(Dense(1, kernel_initializer='uniform', activation='relu'))
    model.add(Dropout(dropout_rate))
    model.add(Dense(1, kernel_initializer='uniform', activation='sigmoid'))
    #compile model
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

In [0]:
#load dataset
dataset = loadtxt ('B-T-median.csv', delimiter = ',')

In [0]:
#split dataset into input and output variables
x = dataset [:, 0:29]
y = dataset [:, 29]

In [0]:
#Create model for gridsearch
model = KerasClassifier (build_fn=create_model, epochs=240, batch_size=10, verbose=0)

In [15]:
#Define Grid Search parameters for optimal Batch size for input T-cell dataset with 47 samples
dropout_rate = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
param_grid = dict(dropout_rate=dropout_rate)
#Jobs are run in parallel with n_jobs=-1; 10-fold cross-validation
grid = GridSearchCV(estimator=model, param_grid = param_grid, n_jobs=-1, cv=10)
grid_result = grid.fit(x, y)





In [16]:
#Summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Best: 0.526667 using {'dropout_rate': 0.6}
0.460000 (0.220504) with: {'dropout_rate': 0.0}
0.426667 (0.183666) with: {'dropout_rate': 0.1}
0.460000 (0.254209) with: {'dropout_rate': 0.2}
0.493333 (0.241201) with: {'dropout_rate': 0.3}
0.503333 (0.212106) with: {'dropout_rate': 0.4}
0.520000 (0.271702) with: {'dropout_rate': 0.5}
0.526667 (0.227938) with: {'dropout_rate': 0.6}
0.483333 (0.246419) with: {'dropout_rate': 0.7}
0.486667 (0.210924) with: {'dropout_rate': 0.8}
0.476667 (0.245877) with: {'dropout_rate': 0.9}
