In [38]:
import pandas as pd
import math

In [4]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [118]:
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier

In [14]:
from sklearn.preprocessing import MinMaxScaler

In [21]:
from sklearn.model_selection import train_test_split

In [119]:
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV

### Input Dataset (Diabetes)

In [8]:
names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']
df_diabetes = pd.read_csv('diabetes.csv', names = names)

In [9]:
df_diabetes.head()

Unnamed: 0,preg,plas,pres,skin,test,mass,pedi,age,class
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


### Define the features and target response variables

In [94]:
X = df_diabetes[['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age']]
y = df_diabetes['class']

In [95]:
### Normalize the input features so that they are on the same scale. This is so that the errors calculated for back-propagation are calculated from a similar scale of features. This would mean smaller initial errors compared to that from non-normalised feature data. Smaller scale of errors leads to faster convergence of the gradient descent when adjusting the weights using the chosen cost function.

In [96]:
scaler = MinMaxScaler(feature_range=[0, 1])
X_rescaled = scaler.fit_transform(X)
X = pd.DataFrame(data = X_rescaled, columns = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age'])
X.head(3)

Unnamed: 0,preg,plas,pres,skin,test,mass,pedi,age
0,0.352941,0.743719,0.590164,0.353535,0.0,0.500745,0.234415,0.483333
1,0.058824,0.427136,0.540984,0.292929,0.0,0.396423,0.116567,0.166667
2,0.470588,0.919598,0.52459,0.0,0.0,0.347243,0.253629,0.183333


In [97]:
X= X.to_numpy()
y = y.to_numpy()

In [98]:
X_train, X_test, y_train, y_test = train_test_split(X,y,train_size=0.75, random_state = 1, stratify =y, shuffle = True)

### Function to linearly decrease or increase the number of nodes for the layers between the first layer and last layer  

In [41]:
def FindLayerNodesLinear(n_layers, first_layer_nodes, last_layer_nodes):
    layers = []
    
    nodes_increment = (last_layer_nodes - first_layer_nodes)/ (n_layers-1)
    nodes = first_layer_nodes
    for i in range(1, n_layers+1):
        layers.append(math.ceil(nodes))
        nodes = nodes + nodes_increment
    
    return layers

In [132]:
FindLayerNodesLinear(5, 50, 10)

[50, 40, 30, 20, 10]

In [126]:
FindLayerNodesLinear(4, 15, 15)

[15, 15, 15, 15]

In [128]:
FindLayerNodesLinear(5, 5, 45)

[5, 15, 25, 35, 45]

### Function to vary the parameters of a tensor flow model by creating a new model based on given parameters

In [100]:
def createmodel(n_layers, first_layer_nodes, last_layer_nodes, activation_func, loss_func):
    model = Sequential()
    n_nodes = FindLayerNodesLinear(n_layers, first_layer_nodes, last_layer_nodes)
    for i in range(1, n_layers):
        if i==1:
            model.add(Dense(first_layer_nodes, input_dim=X_train.shape[1], activation=activation_func))
        else:
            model.add(Dense(n_nodes[i-1], activation=activation_func))
            
    #Finally, the output layer should have a single node in binary classification
    model.add(Dense(1, activation=activation_func))
    model.compile(optimizer='adam', loss=loss_func, metrics = ["accuracy"]) #note: metrics could also be 'mse'
    
    return model

##Wrap model into scikit-learn
model =  KerasClassifier(build_fn=createmodel, verbose = False)            

### Define the grid for searching the optimal parameters within the grid

In [103]:
activation_funcs = ['sigmoid', 'relu', 'tanh'] 
loss_funcs = ['binary_crossentropy','hinge']
param_grid = dict(n_layers=[2,3], first_layer_nodes = [64,32,16], last_layer_nodes = [4],  activation_func = activation_funcs, loss_func = loss_funcs, batch_size = [100], epochs = [20,60])
grid = GridSearchCV(estimator = model, param_grid = param_grid)

In [104]:
grid.fit(X,y)

GridSearchCV(cv='warn', error_score='raise-deprecating',
             estimator=<tensorflow.python.keras.wrappers.scikit_learn.KerasClassifier object at 0x0000028ED88EF3C8>,
             iid='warn', n_jobs=None,
             param_grid={'activation_func': ['sigmoid', 'relu', 'tanh'],
                         'batch_size': [100], 'epochs': [20, 60],
                         'first_layer_nodes': [64, 32, 16],
                         'last_layer_nodes': [4],
                         'loss_func': ['binary_crossentropy', 'hinge'],
                         'n_layers': [2, 3]},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring=None, verbose=0)

### Optimal Grid Parameters

In [105]:
print(grid.best_score_)
print(grid.best_params_)

0.7682291666666666
{'activation_func': 'relu', 'batch_size': 100, 'epochs': 60, 'first_layer_nodes': 64, 'last_layer_nodes': 4, 'loss_func': 'binary_crossentropy', 'n_layers': 3}


In [133]:
pd.DataFrame(grid.cv_results_)[['mean_test_score', 'std_test_score', 'params']].to_csv('GridOptimization.csv')