In [153]:
from sklearn.datasets import load_iris
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

from keras.models import Sequential
from keras.layers import Activation, Dense
from keras import optimizers
from keras.layers import Dropout

In [114]:
iris = load_iris()

In [115]:
iris.keys()

dict_keys(['data', 'target', 'target_names', 'DESCR', 'feature_names', 'filename'])

In [116]:
data = pd.DataFrame(iris['data'], columns = iris['feature_names'] )
target = pd.DataFrame(iris['target'],columns = ['target'])

In [117]:
#combine the input predictors and target so that it can be split into training and testing
data_target = data.join(target)

In [118]:
data_target.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


In [119]:
data_target['target'].value_counts()

2    50
1    50
0    50
Name: target, dtype: int64

In [120]:
#for this implementaion, let's make the problem a binary. So considering only '0' and '1' as the target
X = np.array(data_target[(data_target['target'] == 0) | (data_target['target'] == 1)].drop('target', axis=1))
y = np.array(data_target[(data_target['target'] == 0) | (data_target['target'] == 1)]['target']).reshape(100,1)

In [121]:
train_x,test_x,train_y,test_y = train_test_split(X,y,test_size = 0.2)

print('Shape of train_x:', train_x.shape)
print('Shape of train_y:', train_y.shape)
print('Shape of test_x:', test_x.shape)
print('Shape of test_y:', test_y.shape)

Shape of train_x: (80, 4)
Shape of train_y: (80, 1)
Shape of test_x: (20, 4)
Shape of test_y: (20, 1)


Splitting the data into 9 mini batches

In [122]:
batches = 9

#get the total number of batches
batch_count = train_x.shape[0] // batches

batch_trainx = []
batch_trainy = []

for i in range(0, batch_count):
  begin = i * batches
  end = (i + 1) * batches

  batch_trainx.append(train_x[begin:end])
  batch_trainy.append(train_y[begin:end])

#when the total count is not exactly divisible by batches
left_out = train_x.shape[0] % batches

if left_out != 0:
  batch_trainx.append(train_x[end: end + left_out])
  batch_trainy.append(train_y[end: end + left_out])

In [123]:
# intializing the weights for each layer
def intialize_weights(layer_dims):
    np.random.seed(3)
    parameters = {}
    L = len(layer_dims)
    
    #initialise the value of weights based on the number of layers
    for i in range(1,L-1):
        #for the hidden layers we will use He initialisation because of relu activation
        parameters['W'+str(i)] = np.random.randn(layer_dims[i-1],layer_dims[i]) * np.sqrt(2/layer_dims[i-1])
        parameters['b'+str(i)] = np.zeros([1, layer_dims[i]])   
            
    #for the last layer we can use Xavier initialisation 
    parameters['W' + str(i+1)] = np.random.randn(layer_dims[i],layer_dims[i+1]) * np.sqrt(1/layer_dims[i])
    parameters['b'+str(i+1)] = np.zeros([1, layer_dims[i+1]])    
    
    return parameters

In [140]:
#forward propagation
def forward_propagation(layer_dims,train_x,parameters, keep_probs,mode):
    
    caches = []
    Aprev = train_x
    L = len(layer_dims)
    
    #forward propagation for all the layers except last layer
    for i in range(1,L-1): 
        W = parameters['W'+ str(i)]
        b = parameters['b' + str(i)] 
        Z = np.dot(Aprev, W) + b  
        Aprev = np.maximum(0,Z)   
        if mode == 'train':
          drop = np.random.rand(Aprev.shape[0],Aprev.shape[1])
          drop = (drop < keep_probs).astype(int)
          Aprev = np.multiply(Aprev, drop)
          Aprev = np.divide(Aprev,keep_probs)     
          cache = Aprev, W, b, drop
          caches.append(cache)     
        else:
          cache = Aprev, W, b
          caches.append(cache)
    
    #forward propagation for the last layer
    W = parameters['W'+ str(L-1)]
    b = parameters['b' + str(L-1)]
    Zlast = np.dot(Aprev, W) + b    
    Alast = 1/(1 + np.exp(-Zlast))   
    if mode == 'train':      
      drop = 0  #dummy value since in the last layer we do not have any dropout
      cache = Alast, W, b, drop
    else:
      cache = Alast, W, b

    caches.append(cache)
    return caches

In [141]:
#cost function calculation
def cost_calculate(predict_y,train_y):
    m = train_y.shape[0]
    cost = -(np.dot(train_y.T, np.log(predict_y)) + np.dot((1-train_y).T, np.log(1-predict_y)))/m
    return cost

In [147]:
def backward_propagation(layer_dims, caches, parameters, train_y, learning_rate):
    #backward propagation for the last layer
    #Extract the last array from the caches, as this corresponds to the final output
    L = len(layer_dims)    
    Acurr,Wcurr,bcurr,dcurr = caches[L - 2]  
    Aprev,Wprev,bprev,dprev = caches[L - 3]
    
    m = train_y.shape[0]    
    
    dzprev = (Acurr - train_y)    
    dwlast = np.dot(Aprev.T, dzprev)/m    
    dblast = np.sum(dzprev, keepdims = True, axis = 0)/m        
    parameters['W' + str(L-1)]= parameters['W' + str(L-1)] - (learning_rate * dwlast)    
    parameters['b' + str(L-1)]= parameters['b' + str(L-1)] - (learning_rate * dblast)    
        
    for i in reversed(range(L-2)):
        Anext,Wnext,bnext,dnext = caches[i+1]
        Acurr,Wcurr,bcurr,dcurr = caches[i]  
        if i == 0:
            Aprev = train_x
        else:            
            Aprev,Wprev,bprev,dprev = caches[i-1]
                
        dzcurr = np.where(Acurr > 0,1,Acurr)             
        da = np.dot(dzprev,Wnext.T)
        da = da * dcurr
        da = da / keep_probs
        dzprev = np.multiply(da, dzcurr)
        
        dW = np.dot(Aprev.T,dzprev)/m
        db = np.sum(dzprev, keepdims = True, axis = 0)/m  
        parameters['W' + str(i+1)]= parameters['W' + str(i+1)] - (learning_rate * dW)
        parameters['b' + str(i+1)]= parameters['b' + str(i+1)] - (learning_rate * db)     
        return parameters

In [143]:
def complete_model(layer_dims, train_x, train_y, learning_rate, iterations, keep_probs,mode):
    
    L = len(layer_dims)
    # Intialize the weights
    parameters = intialize_weights(layer_dims)
    
    for i in range(iterations):
      for j in range(len(batch_trainx)):
        #forward propagation
        caches = forward_propagation(layer_dims,batch_trainx[j],parameters, keep_probs,mode)
        
        #calculate the cost 
        A,W,b,d = caches[-1]
        cost = cost_calculate(A,batch_trainy[j])
        if i%1000 == 0:
            print('The cost after iteration {}: {}'.format(i, np.squeeze(cost)))
                  
        #backward propagation
        parameters = backward_propagation(layer_dims, caches, parameters, batch_trainy[j], learning_rate)
    return parameters

In [144]:
layer_dims = [4,5,3,1]
learning_rate = 0.15
iterations = 14900
keep_probs = 0.8
mode = 'train'
parameters = complete_model(layer_dims, train_x, train_y, learning_rate, iterations, keep_probs, mode)

The cost after iteration 0: 0.7522831878182904
The cost after iteration 0: 0.6585108248345981
The cost after iteration 0: 0.43804564385972017
The cost after iteration 0: 0.31757307213269453
The cost after iteration 0: 0.8603093749658406
The cost after iteration 0: 0.6287096739506934
The cost after iteration 0: 0.8662410491814225
The cost after iteration 0: 0.9053140580471664
The cost after iteration 0: 0.5596619583020677
The cost after iteration 1000: 0.6604974997014464
The cost after iteration 1000: 0.7906566942646328
The cost after iteration 1000: 0.710896799105365
The cost after iteration 1000: 0.8941763334511647
The cost after iteration 1000: 0.4521113491463764
The cost after iteration 1000: 0.5214789379845669
The cost after iteration 1000: 0.40745081008682144
The cost after iteration 1000: 0.5280723409772821
The cost after iteration 1000: 0.6070039223495167
The cost after iteration 2000: 0.4964077922351091
The cost after iteration 2000: 0.4866097808319927
The cost after iteration 

In [145]:
mode = 'test'
predict = forward_propagation(layer_dims,test_x,parameters,keep_probs,mode)[-1][0]
test_cost = cost_calculate(predict,test_y)

In [146]:
test_cost

array([[0.44282985]])

In [157]:
model = Sequential()
model.add(Dense(50, input_shape = (4, ),kernel_initializer='he_normal'))
model.add(Activation('relu'))
model.add(Dropout(0.2))
model.add(Dense(50,kernel_initializer='he_normal'))
model.add(Activation('relu'))
model.add(Dropout(0.2))
model.add(Dense(1,kernel_initializer='glorot_normal'))
model.add(Activation('sigmoid'))

In [158]:
sgd = optimizers.SGD(lr = 0.01)
model.compile(loss = 'binary_crossentropy', metrics = ['accuracy'])

In [156]:
model.fit(train_x, train_y, validation_data = (test_x, test_y), epochs = 30)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<tensorflow.python.keras.callbacks.History at 0x7f9713041908>