Import the required libraries

In [58]:
from sklearn.datasets import load_iris
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

from keras.models import Sequential
from keras.layers import Activation, Dense
from keras import optimizers

We will use iris dataset for the implementation.

In [33]:
iris = load_iris()

In [34]:
iris.keys()

dict_keys(['data', 'target', 'target_names', 'DESCR', 'feature_names', 'filename'])

In [35]:
data = pd.DataFrame(iris['data'], columns = iris['feature_names'] )
target = pd.DataFrame(iris['target'],columns = ['target'])

In [36]:
#combine the input predictors and target so that it can be split into training and testing
data_target = data.join(target)

In [37]:
data_target.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


In [38]:
data_target['target'].value_counts()

2    50
1    50
0    50
Name: target, dtype: int64

In [39]:
#for this implementaion, let's make the problem a binary. So considering only '0' and '1' as the target
X = np.array(data_target[(data_target['target'] == 0) | (data_target['target'] == 1)].drop('target', axis=1))
y = np.array(data_target[(data_target['target'] == 0) | (data_target['target'] == 1)]['target']).reshape(100,1)

In [40]:
train_x,test_x,train_y,test_y = train_test_split(X,y,test_size = 0.2)

print('Shape of train_x:', train_x.shape)
print('Shape of train_y:', train_y.shape)
print('Shape of test_x:', test_x.shape)
print('Shape of test_y:', test_y.shape)

Shape of train_x: (80, 4)
Shape of train_y: (80, 1)
Shape of test_x: (20, 4)
Shape of test_y: (20, 1)


In [41]:
# intializing the weights for each layer
def intialize_weights(layer_dims):
    np.random.seed(3)
    parameters = {}
    L = len(layer_dims)
    
    #initialise the value of weights based on the number of layers
    for i in range(1,L):
        parameters['W'+str(i)] = np.random.randn(layer_dims[i-1],layer_dims[i]) * 0.01
        parameters['b'+str(i)] = np.zeros([1, layer_dims[i]])    
            
    return parameters

In [42]:
#forward propagation
def forward_propagation(layer_dims,train_x,parameters):
    
    caches = []
    Aprev = train_x
    L = len(layer_dims)
    
    #forward propagation for all the layers except last layer
    for i in range(1,L-1): 
        W = parameters['W'+ str(i)]
        b = parameters['b' + str(i)] 
        Z = np.dot(Aprev, W) + b  
        Aprev = np.maximum(0,Z)        
        cache = Aprev, W,b
        caches.append(cache)         
    
    #forward propagation for the last layer
    W = parameters['W'+ str(L-1)]
    b = parameters['b' + str(L-1)]
    Zlast = np.dot(Aprev, W) + b    
    Alast = 1/(1 + np.exp(-Zlast))    
    cache = Alast, W, b
    caches.append(cache)
    return caches

In [43]:
#cost function calculation
def cost_calculate(predict_y,train_y):
    m = train_y.shape[0]
    cost = -(np.dot(train_y.T, np.log(predict_y)) + np.dot((1-train_y).T, np.log(1-predict_y)))/m
    return cost

In [44]:
def backward_propagation(layer_dims, caches, parameters, train_y, learning_rate):
    #backward propagation for the last layer
    #Extract the last array from the caches, as this corresponds to the final output
    L = len(layer_dims)    
    Acurr,Wcurr,bcurr = caches[L - 2]  
    Aprev,Wprev,bprev = caches[L - 3]
    
    m = train_y.shape[0]    
    
    dzprev = (Acurr - train_y)    
    dwlast = np.dot(Aprev.T, dzprev)/m    
    dblast = np.sum(dzprev, keepdims = True, axis = 0)/m        
    parameters['W' + str(L-1)]= parameters['W' + str(L-1)] - (learning_rate * dwlast)    
    parameters['b' + str(L-1)]= parameters['b' + str(L-1)] - (learning_rate * dblast)    
        
    for i in reversed(range(L-2)):
        Anext,Wnext,bnext = caches[i+1]
        Acurr,Wcurr,bcurr = caches[i]  
        if i == 0:
            Aprev = train_x
        else:            
            Aprev,Wprev,bprev = caches[i-1]
                
        dzcurr = np.where(Acurr > 0,1,Acurr)     
        dzprev = np.multiply(np.dot(dzprev,Wnext.T), dzcurr)
        dW = np.dot(Aprev.T,dzprev)/m
        db = np.sum(dzprev, keepdims = True, axis = 0)/m  
        parameters['W' + str(i+1)]= parameters['W' + str(i+1)] - (learning_rate * dW)
        parameters['b' + str(i+1)]= parameters['b' + str(i+1)] - (learning_rate * db)     
        return parameters

In [45]:
def complete_model(layer_dims, train_x, train_y, learning_rate, iterations):
    
    L = len(layer_dims)
    # Intialize the weights
    parameters = intialize_weights(layer_dims)
    
    for i in range(iterations):
        #forward propagation
        caches = forward_propagation(layer_dims,train_x,parameters)
        
        #calculate the cost 
        A,W,b = caches[-1]
        cost = cost_calculate(A,train_y)
        if i%1000 == 0:
            print('The cost after iteration {}: {}'.format(i, np.squeeze(cost)))
                  
        #backward propagation
        parameters = backward_propagation(layer_dims, caches, parameters, train_y, learning_rate)
    return parameters

        

In [46]:
layer_dims = [4,5,3,1]
learning_rate = 0.15
iterations = 14900
parameters = complete_model(layer_dims, train_x, train_y, learning_rate, iterations)

The cost after iteration 0: 0.6931458662427084
The cost after iteration 1000: 0.6928176749538959
The cost after iteration 2000: 0.6926375234627491
The cost after iteration 3000: 0.6905539990663362
The cost after iteration 4000: 0.6674492997222916
The cost after iteration 5000: 0.4943033781872236
The cost after iteration 6000: 0.22287462328522417
The cost after iteration 7000: 0.09010404330852087
The cost after iteration 8000: 0.05181617021726566
The cost after iteration 9000: 0.035345626279470674
The cost after iteration 10000: 0.02656350008290693
The cost after iteration 11000: 0.021191446335718244
The cost after iteration 12000: 0.017610774821345116
The cost after iteration 13000: 0.015068752173866001
The cost after iteration 14000: 0.013179961770325327


In [54]:
predict = forward_propagation(layer_dims,test_x,parameters)[-1][0]
test_cost = cost_calculate(predict,test_y)

In [55]:
test_cost

array([[0.0122371]])

In [56]:
model = Sequential()
model.add(Dense(50, input_shape = (4, )))
model.add(Activation('relu'))
model.add(Dense(50))
model.add(Activation('relu'))
model.add(Dense(1))
model.add(Activation('sigmoid'))


In [59]:
sgd = optimizers.SGD(lr = 0.01)
model.compile(loss = 'binary_crossentropy', metrics = ['accuracy'])

In [60]:
model.fit(train_x, train_y, validation_data = (test_x, test_y), epochs = 30)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<tensorflow.python.keras.callbacks.History at 0x7f49cff0a940>