<a href="https://colab.research.google.com/github/DipankarJDutta/WBC-Biomarker-ID-for-FASD-GDM-/blob/Optimal-Dropout-rate-for-MLP-model/MLP_T_cells_AS_EtOH_GDM_z_score_optimization_dropout%20rate.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
#Loading essentials for batch size (Source: scikit-learn grid search)
import numpy
from numpy import loadtxt
from sklearn.model_selection import GridSearchCV
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.wrappers.scikit_learn import KerasClassifier
from keras.constraints import maxnorm

In [0]:
# Fixing random seed for reproducibility
from numpy.random import seed
seed(7)
from tensorflow import set_random_seed
set_random_seed(7)

In [0]:
#Creating model required for KerasClassifier
def create_model(dropout_rate=0.0, weight_constraint=0):
    #create model
    model = Sequential()
    model.add(Dense(12, input_dim=25, kernel_initializer='uniform', activation='relu', kernel_constraint=maxnorm(weight_constraint)))
    model.add(Dropout(dropout_rate))
    model.add(Dense(8, kernel_initializer='uniform', activation='relu', kernel_constraint=maxnorm(weight_constraint)))
    model.add(Dropout(dropout_rate))
    model.add(Dense(1, kernel_initializer='uniform', activation='sigmoid'))
    #compile model
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

In [0]:
#load z-scored inclevel values of significantly AS genes in T-cells common to EtOH & GDM datasets
dataset = loadtxt ('z-score_T-cell.csv', delimiter = ',')

In [0]:
#split dataset into input and output variables (25 input, 1 output)
x = dataset [:, 0:25]
y = dataset [:, 25]

In [0]:
#Create model for gridsearch
model = KerasClassifier (build_fn=create_model, epochs=100, batch_size=21, verbose=0)

In [8]:
#Define Grid Search parameters for optimal Batch size for input T-cell dataset with 47 samples
weight_constraint = [1, 2, 3, 4, 5]
dropout_rate = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
param_grid = dict(dropout_rate=dropout_rate, weight_constraint=weight_constraint)
#Jobs are run in parallel with n_jobs=-1; 5-fold cross-validation
grid = GridSearchCV(estimator=model, param_grid = param_grid, n_jobs=-1, cv=5)
grid_result = grid.fit(x, y)







Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where














In [9]:
#Summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Best: 0.645833 using {'dropout_rate': 0.5, 'weight_constraint': 3}
0.583333 (0.054857) with: {'dropout_rate': 0.0, 'weight_constraint': 1}
0.541667 (0.101493) with: {'dropout_rate': 0.0, 'weight_constraint': 2}
0.604167 (0.122781) with: {'dropout_rate': 0.0, 'weight_constraint': 3}
0.625000 (0.111285) with: {'dropout_rate': 0.0, 'weight_constraint': 4}
0.625000 (0.128650) with: {'dropout_rate': 0.0, 'weight_constraint': 5}
0.520833 (0.155251) with: {'dropout_rate': 0.1, 'weight_constraint': 1}
0.541667 (0.101493) with: {'dropout_rate': 0.1, 'weight_constraint': 2}
0.437500 (0.110894) with: {'dropout_rate': 0.1, 'weight_constraint': 3}
0.604167 (0.102204) with: {'dropout_rate': 0.1, 'weight_constraint': 4}
0.604167 (0.109845) with: {'dropout_rate': 0.1, 'weight_constraint': 5}
0.625000 (0.099187) with: {'dropout_rate': 0.2, 'weight_constraint': 1}
0.541667 (0.138193) with: {'dropout_rate': 0.2, 'weight_constraint': 2}
0.583333 (0.149071) with: {'dropout_rate': 0.2, 'weight_constraint': 