In [None]:
import random
import seaborn
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Input
from sklearn.model_selection import train_test_split
seaborn.set(style='whitegrid'); seaborn.set_context('talk')
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

In [None]:
def format_output(data):
    y1 = data.pop('Y1')
    y1 = np.array(y1)
    y2 = data.pop('Y2')
    y2 = np.array(y2)
    return pd.DataFrame((y1, y2))
def norm(x):
    return (x - train_stats['mean']) / (10 * train_stats['std'])

In [None]:
URL = 'https://archive.ics.uci.edu/ml/machine-learning-databases/00242/ENB2012_data.xlsx'
df = pd.read_excel(URL)
df = df.sample(frac=1).reset_index(drop=True)
df.pop('X8')
df.pop('X3')
df.pop('X4')
df.pop('X5')
df.pop('X6')
df.pop('X7') # the diagram only has 2 input hence removing all other inputs
train, test = train_test_split(df, test_size=0.2)
train_stats = train.describe()
train_stats.pop('Y1')
train_stats.pop('Y2')
train_stats = train_stats.transpose()
train_Y = format_output(train)
test_Y = format_output(test)
norm_train_X = norm(train)
norm_test_X = norm(test)

In [None]:
def layer_sizes(X, Y):
    n_x = X.shape[1]
    m=X.shape[0]
    n_h = 3
    n_y = Y.shape[0]
    return (n_x, n_h, n_y,m)

In [None]:
n_x, n_h, n_y,m=layer_sizes(norm_train_X,train_Y)

In [None]:
layer_sizes(norm_train_X,train_Y)

(2, 3, 2, 614)

In [None]:
def initialize_parameters(n_x, n_h, n_y):
    np.random.seed(2) 
    W1 = np.random.randn(n_h, n_x) * 0.01
    b1 = np.zeros((n_h, 1))
    W2 = np.random.randn(n_y, n_h) * 0.01
    b2 = np.zeros((n_y,1))
    parameters = {"W1": W1,
                  "b1": b1,
                  "W2": W2,
                  "b2": b2}
    return parameters
parameters = initialize_parameters(n_x, n_h, n_y)

print("W1 = " + str(parameters["W1"]))
print("b1 = " + str(parameters["b1"]))
print("W2 = " + str(parameters["W2"]))
print("b2 = " + str(parameters["b2"]))

W1 = [[-0.00416758 -0.00056267]
 [-0.02136196  0.01640271]
 [-0.01793436 -0.00841747]]
b1 = [[0.]
 [0.]
 [0.]]
W2 = [[ 0.00502881 -0.01245288 -0.01057952]
 [-0.00909008  0.00551454  0.02292208]]
b2 = [[0.]
 [0.]]


In [None]:
print("The size of the input layer is: n_x = " + str(n_x))
print("The size of the hidden layer is: n_h = " + str(n_h))
print("The size of the output layer is: n_y = " + str(n_y))

The size of the input layer is: n_x = 2
The size of the hidden layer is: n_h = 3
The size of the output layer is: n_y = 2


In [None]:
def softmax(x):
    y = np.exp(x - np.max(x))
    f_x = y / np.sum(np.exp(x))
    return f_x

In [None]:
def forward_propagation(X, parameters):
    W1 = parameters["W1"]
    b1 = parameters["b1"]
    W2 = parameters["W2"]
    b2 = parameters["b2"]
    print(W1.shape)
    print(b1.shape)
    print(W2.shape)
    print(b2.shape)
    
    Z1 = np.dot(W1, np.transpose(X)) + b1
    A1 =  np.tanh((Z1))
    Z2 = np.dot(W2, A1) + b2
    A2 = 1/(1 - np.exp(-Z2))
    A2 = np.maximum(A2, 0)
    A2 = softmax(Z2)

    cache = {"Z1": Z1,
             "A1": A1,
             "Z2": Z2,
             "A2": A2}
    
    return A2, cache

In [None]:
t_X, parameters = norm_train_X,parameters
A2, cache = forward_propagation(t_X, parameters)
print("A2 = " + str(A2))

(3, 2)
(3, 1)
(2, 3)
(2, 1)
A2 = [[0.00081425 0.00081419 0.00081423 ... 0.00081425 0.00081418 0.00081419]
 [0.00081424 0.00081428 0.00081425 ... 0.00081424 0.00081429 0.00081428]]


In [None]:
print(train_Y.transpose())

         0      1
0    38.65  43.73
1    16.99  19.65
2    14.50  17.03
3    40.78  39.55
4    17.14  17.20
..     ...    ...
609  42.08  38.23
610  19.50  27.30
611  40.00  36.26
612  13.00  14.47
613  17.35  21.19

[614 rows x 2 columns]


In [None]:
def compute_cost(A2, Y, parameters):
    A2=A2.transpose()
    Y=Y.transpose()
    m = Y.shape[0] 
    print(m)
    logprobs = np.multiply(np.log(np.abs(A2)),Y) + np.multiply(np.log(np.abs(1-A2)),1-Y)
    cost = -(1/m)*np.sum(logprobs)
    cost = (np.squeeze(cost)) 
    return cost

In [None]:
cost = compute_cost(pd.DataFrame(A2),train_Y,parameters)
print("cost = " + str(cost))

614
cost = 0    159.285085
1    175.351486
dtype: float64


In [None]:
def backward_propagation(parameters, cache, X, Y):
    m = X.shape[0]
    print(m)
    W1 = parameters["W1"]
    W2 = parameters["W2"]
    A1 = cache["A1"]
    A2 = cache["A2"]
    dZ2 = A2 - Y
    dW2 = (1/m)*np.dot(dZ2,A1.T)
    db2 = (1/m)*np.sum(dZ2, axis =1)
    dZ1 = np.dot(W2.T, dZ2) * (1 - np.power(A1,2))
    dW1 = (1/m) * np.dot(dZ1,X)
    db1 = (1/m) * np.sum(dZ1, axis = 1)
    
    grads = {"dW1": dW1,
             "db1": db1,
             "dW2": dW2,
             "db2": db2}
    
    return grads

In [None]:
grads = backward_propagation(parameters, cache, norm_train_X, train_Y)
print ("dW1 = "+ str(grads["dW1"]))
print ("db1 = "+ str(grads["db1"]))
print ("dW2 = "+ str(grads["dW2"]))
print ("db2 = "+ str(grads["db2"]))

614
dW1 = [[ 0.00230737 -0.00247349]
 [ 0.00450757 -0.00477789]
 [-0.00713875  0.00764058]]
db1 = [ 0.11148251  0.14292156 -0.32818113]
dW2 = [[0.00224283 0.0243753  0.00564742]
 [0.00214554 0.02337049 0.00538615]]
db2 = 0   -22.394407
1   -24.653241
dtype: float64


In [None]:
def update_parameters(parameters, grads, learning_rate = 0.001):
    W1 = parameters["W1"]
    b1 = parameters["b1"]
    W2 = parameters["W2"]
    b2 = parameters["b2"]
    dW1 = grads["dW1"]
    db1 = grads["db1"]
    dW2 = grads["dW2"]
    db2 = grads["db2"]
    print(W1.shape)
    print(b1.shape)
    print(W2.shape)
    print(b2.shape)
    print(dW1.shape)
    print(db1.shape)
    print(dW2.shape)
    print(db2.shape)
    

    W1 = W1 - (learning_rate * dW1)
    b1 = b1 - (learning_rate * db1)
    W2 = W2 - (learning_rate * dW2)
    b2 = b2 - (learning_rate * db2)

    # b1=b1[0]
    # b1 = np.reshape(b1, (-1, 1))
    parameters = {"W1": W1,
                  "b1": b1,
                  "W2": W2,
                  "b2": b2}
    
    return parameters

In [None]:
parameters, grads = parameters, grads
parameters = update_parameters(parameters, grads)

print("W1 = " + str(parameters["W1"]))
print("b1 = " + str(parameters["b1"]))
print("W2 = " + str(parameters["W2"]))
print("b2 = " + str(parameters["b2"]))

(3, 2)
(3, 1)
(2, 3)
(2, 1)
(3, 2)
(3,)
(2, 3)
(2,)


ValueError: ignored

In [None]:
def nn_model(X, Y, n_h, num_iterations = 100):
    np.random.seed(3)
    n_x = layer_sizes(X, Y)[0]
    n_y = layer_sizes(X, Y)[2]
    parameters = initialize_parameters(n_x, n_h, n_y)
    for i in range(0, num_iterations):
        A2, cache = forward_propagation(X, parameters)
        print(A2)
        cost = compute_cost(A2, Y, parameters)
        grads = backward_propagation(parameters, cache,X,Y)
        parameters = update_parameters(parameters, grads)
    return parameters

In [None]:
X_assess, Y_assess = norm_train_X,train_Y
parameters = nn_model(X_assess, Y_assess, n_h, num_iterations=1000)
print("W1 = " + str(parameters["W1"]))
print("b1 = " + str(parameters["b1"]))
print("W2 = " + str(parameters["W2"]))
print("b2 = " + str(parameters["b2"]))

In [None]:
def predict(parameters, X):
    A2, cache = forward_propagation(X, parameters)
    return A2

In [None]:
parameters, X_assess = parameters,norm_train_X

predictions = predict(parameters, X_assess)
print(predictions)