In [None]:
import numpy as np
from sklearn.datasets import make_moons
from matplotlib import pyplot as plt
from matplotlib.colors import ListedColormap
from sklearn.model_selection import train_test_split

In [None]:
def sigmoid(z):
    return 1/(1+np.exp(-z))

In [None]:
def der_sigmoid(z):
    return np.multiply(sigmoid(z) , (1 - sigmoid(z)))

In [None]:
def relu(z):
    return np.maximum(0, z)

In [None]:
def der_relu(z):
    return np.greater(z, 0)

In [None]:
def der_tanh(z):
    return (1-np.power(z,2))

In [None]:
#Initializes W and b for an L layer NN

def init_params(layers):
    #Inputs
    #layers: list containing number of units in each layer(including input layer)

    #Outputs
    #parameters: dict containing all initialized parameters(W1,b1,W2,b2...WL,bL)
    np.random.seed(2)
    L = len(layers)
    parameters = {}
    
    for l in range(1, L):
        parameters['w' + str(l)] = (np.random.randint(100, size = (layers[l], layers[l-1]))) * 0.01
        parameters['b' + str(l)] = np.zeros((layers[l], 1))

    return parameters

In [None]:
def forward_prop_linear(A, W, b):
    Z = []
    Z = np.dot(W, A) + b
    return Z

In [None]:
def forward_prop_activation(Z, activation):
    A = []
    if(activation == 'relu'):
        A = relu(Z)
    if(activation == 'sigmoid'):
        A = sigmoid(Z)
        
    if(activation == 'tanh'):
        A = np.tanh(Z)
    return A

In [None]:
def forward_prop(X, params, activations):
    L = (len(params) // 2) + 1
    cache = {}
    A_prev = X
    cache['A0'] = X
    for l in range(1, L):
        cache['Z' + str(l)] = forward_prop_linear(A_prev, params['w' + str(l)], params['b' + str(l)])
        cache['A' + str(l)] = forward_prop_activation(cache['Z' + str(l)], activations[l])
        A_prev = cache['A' + str(l)]
    return cache

In [None]:
def calc_cost(y, A):
    m = (y.shape[1])
    cost = (-1/m) * np.sum(np.dot(y, np.log(A).T) + np.dot((1-y), np.log(1-A).T))
    cost = np.squeeze(cost)
    
    return cost    

In [None]:
def back_prop_activation(w, dz, A, activation):
    if (activation == 'relu'):
        dz_new = np.multiply(np.dot(w.T, dz), der_relu(A))
    if (activation == 'sigmoid'):
        dz_new = np.multiply(np.dot(w.T, dz), der_sigmoid(A))
    if (activation == 'tanh'):
        dz_new = np.multiply(np.dot(w.T, dz), der_tanh(A))
    
    return dz_new

In [None]:
def back_prop(y, cache, params, act_funcs):
    L = (len(cache) // 2) + 1
    grads = {}
    m = y.shape[1]
    for l in reversed(range(1, L)):
        if(l == L - 1):
            grads['dz' + str(l)] = cache['A' + str(l)] - y 
        else:
            grads['dz' + str(l)] = back_prop_activation(params['w' + str(l+1)], grads['dz' + str(l+1)], cache['A'+str(l)], act_funcs[l]) 
        grads['dw' + str(l)] = 1/m * np.dot(grads['dz' + str(l)], cache['A' + str(l-1)].T)
        
        grads['db' + str(l)] = 1/m * np.sum(grads['dz' + str(l)], axis = 1, keepdims=True)
    return grads      

In [None]:
def update_params(L, params, grads, learning_rate = 0.1):
    for l in range(1, L):
        params['w' + str(l)] = params['w' + str(l)] - learning_rate * grads['dw' + str(l)]   
        params['b' + str(l)] = params['b' + str(l)] - learning_rate * grads['db' + str(l)]
        
    return params

In [None]:
def nn_model(X, Y, learning_rate, num_iterations, layers, activation_functions):
    np.random.seed(3)
    L = len(layers)
    print(L)
    params = init_params(layers)
    for i in range(num_iterations):
        cache = forward_prop(X, params, activation_functions)
        cost = calc_cost(Y, cache['A' + str(L-1)])
        if(i % 5000 == 0):
            print(cost)
        grads = back_prop(Y, cache, params, activation_functions)
        params = update_params(L, params, grads, learning_rate)
    
    return params

In [None]:
def predict(x_test, params, activations):
    L = len(params) // 2
    cache = forward_prop(x_test, params, activations)
    
    predictions = cache['A' + str(L)] >= 0.5
    
    return predictions

In [None]:
def plot_decision_boundary(model, X, y):
    # Set min and max values and give it some padding
    x_min, x_max = X[0, :].min() - 1, X[0, :].max() + 1
    y_min, y_max = X[1, :].min() - 1, X[1, :].max() + 1
    h = 0.01
    # Generate a grid of points with distance h between them
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
    # Predict the function value for the whole grid
    Z = model(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)
    y = y.reshape(y.shape[1])
    # Plot the contour and training examples
    plt.contourf(xx, yy, Z, cmap=plt.cm.Spectral)
    plt.ylabel('x2')
    plt.xlabel('x1')
    plt.scatter(X[0, :], X[1, :], c=y, cmap=plt.cm.Spectral)

In [None]:
def accuracy(predictions, y_test):
    x = (np.abs(predictions - y_test)).sum()
    acc = 100 - ((x/y_test.shape[1]) * 100)
    return str(acc) + "%"

In [None]:
dataset = make_moons(noise=0.3, random_state=2)

X = dataset[0]
Y = dataset[1]

X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=.4)

y_test = y_test.reshape(1, y_test.shape[0])
y_train = y_train.reshape(1, y_train.shape[0])

X_train = X_train.T
X_test = X_test.T

In [None]:
X, Y = dataset
X, Y = X.T, Y.reshape(Y.shape[0])

# Visualize the data
plt.scatter(X[0, :], X[1, :], c = Y);

In [None]:
layers = [X_train.shape[0], 4, 4, 4, 4, 4, 1]
act_funcs = ['input', 'tanh','tanh', 'tanh', 'tanh','tanh', 'sigmoid']

In [None]:
params = nn_model(X_train, y_train, 0.65, 10000, layers, act_funcs)

In [None]:
predictions = predict(X_test, params, act_funcs)

In [None]:
plot_decision_boundary(lambda x: predict(x.T, params, act_funcs), X_test, y_test)


In [None]:
accuracy(predictions, y_test)
