# First Model

In [1]:
import numpy as np

In [2]:
# Data
X = [[1,1,0,0,1,1,1,1],[1,1,0,0,0,1,1,1],[1,0,0,0,1,1,1,1],[0,1,1,1,0,0,0,1],[0,0,0,1,0,0,0,0],[0,0,0,1,1,0,0,0]]
Y = [1,1,1,0,0,0]
labels = [0, 1]

In [3]:
def create_neural_net(layer_array, input_dims):
    weights = []
    biases = []
    activations = []
    
    for i in range(len(layer_array)):
        node_num = layer_array[i][0]
        weights_of_layer = []
        biases_of_layer = []
        if i == 0:
            last_layer_node_number = input_dims
        else:
            last_layer_node_number = layer_array[i-1][0]
        
        for n in range(0,node_num):
            weights_of_node = []
            for l in range(0, last_layer_node_number):
                weights_of_node.append(1) 
            weights_of_layer.append(weights_of_node)
            biases_of_layer.append(0)
            
        weights.append(weights_of_layer)
        biases.append(biases_of_layer)
        activations.append(layer_array[i][1])
    return [weights, biases, activations]

In [4]:
layer_array = [[len(labels), 'sigmoid']]
input_dims = 8
neural_net = create_neural_net(layer_array, input_dims)

print(' weights:',neural_net[0],'\n\n biases:',neural_net[1],'\n\n activations:', neural_net[2])

 weights: [[[1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1]]] 

 biases: [[0, 0]] 

 activations: ['sigmoid']


In [5]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))
    
def sigmoid_deriv(x):
    return x * (1 - x)

def relu(x):
    if x < 0:
        return 0
    else:
        return x

In [6]:
def predict_ratio(data, neural_net):
    weights = neural_net[0]
    biases = neural_net[1]
    activations = neural_net[2]
    
    layer_num = len(weights)
    
    for l in range(0, layer_num):
        data = np.dot(weights[l], data)
        for t in range(len(data)):
            data[t] += biases[l][t]
        if activations[l] == 'sigmoid':
            data = sigmoid(data)
        elif activations[l] == 'relu':
            data = relu(data)
        else:
            data = sigmoid(data)
            print('activation function', activations[l], 'cannot be found. Sigmoid is used')   
    return data

def predict(data, neural_net):
    data = predict_ratio(data, neural_net)
    
    class_num = len(data)
    
    highest_class = None
    highest_class_probability = -1
    
    for i in range(0, class_num):
        if highest_class == None:
            highest_class = i
            highest_class_probability = data[i]
        elif data[i] > highest_class_probability:
            highest_class = i
            highest_class_probability = data[i]
            
    return highest_class, highest_class_probability

In [7]:
def train_network(X, Y, labels, neural_net, epochs=1000):
    for epoch in range(0, epochs):
        for d in range(0, len(X)):
            prediction = predict_ratio(X[d], neural_net)
            true_prediction = []
            for i in range(0, len(labels)):
                true_prediction.append(0)
            true_prediction[labels.index(Y[d])] = 1
            
            errors = []
            for t in range(len(prediction)):
                errors.append(true_prediction[t] - prediction[t]) 
            adjust_deriv = errors * sigmoid_deriv(prediction)
            
            for k in range(0, len(adjust_deriv)):
                adjustment = np.dot(X[d], adjust_deriv[k])
                neural_net[0][0][k] += adjustment
    return neural_net

In [8]:
neural_net = train_network(X, Y, labels, neural_net, epochs=1000)

In [9]:
for i in range(len(X)):
    print(predict(X[i], neural_net))

(1, 0.9919398375371878)
(1, 0.9936373425420446)
(1, 0.9925307416847557)
(0, 0.9903682943291514)
(0, 0.9836167677309535)
(0, 0.9876082070557368)


# Second Model with MNIST dataset

In [None]:
from sklearn.datasets import fetch_openml
from keras.utils.np_utils import to_categorical
from sklearn.model_selection import train_test_split
import time

x, y = fetch_openml('mnist_784', version=1, return_X_y=True)
x = (x/255).astype('float32')
y = to_categorical(y)

x_train, x_val, y_train, y_val = train_test_split(x, y, test_size=0.15, random_state=42)

In [10]:
import numpy as np

class DeepNeuralNetwork():
    def __init__(self, sizes, epochs=10, l_rate=0.001):
        self.sizes = sizes
        self.epochs = epochs
        self.l_rate = l_rate
        self.params = self.initialization()

    def sigmoid(self, x, derivative=False):
        if derivative:
            return (np.exp(-x))/((np.exp(-x)+1)**2)
        return 1/(1 + np.exp(-x))

    def softmax(self, x):
        exps = np.exp(x - x.max())
        return exps / np.sum(exps, axis=0)

    def initialization(self):
        input_layer=self.sizes[0]
        hidden_1=self.sizes[1]
        hidden_2=self.sizes[2]
        output_layer=self.sizes[3]

        params = {
            'W1':np.random.randn(hidden_1, input_layer) * np.sqrt(1. / hidden_1),
            'W2':np.random.randn(hidden_2, hidden_1) * np.sqrt(1. / hidden_2),
            'W3':np.random.randn(output_layer, hidden_2) * np.sqrt(1. / output_layer)
        }

        return params

    def forward_pass(self, x_train):
        params = self.params
        params['A0'] = x_train
        params['Z1'] = np.dot(params["W1"], params['A0'])
        params['A1'] = self.sigmoid(params['Z1'])
        params['Z2'] = np.dot(params["W2"], params['A1'])
        params['A2'] = self.sigmoid(params['Z2'])
        params['Z3'] = np.dot(params["W3"], params['A2'])
        params['A3'] = self.softmax(params['Z3'])

        return params['A3']

    def backward_pass(self, y_train, output):
   

        params = self.params
        change_w = {}

        error = output - y_train
        change_w['W3'] = np.dot(error, params['A3'])

        error = np.multiply( np.dot(params['W3'].T, error), self.sigmoid(params['Z2'], derivative=True) )
        change_w['W2'] = np.dot(error, params['A2'])

        error = np.multiply( np.dot(params['W2'].T, error), self.sigmoid(params['Z1'], derivative=True) )
        change_w['W1'] = np.dot(error, params['A1'])

        return change_w

    def update_network_parameters(self, changes_to_w):
        for key, value in changes_to_w.items():
            for w_arr in self.params[key]:
                w_arr -= self.l_rate * value

    def compute_accuracy(self, x_val, y_val):
        predictions = []
        for x, y in zip(x_val, y_val):
            output = self.forward_pass(x)
            pred = np.argmax(output)
            predictions.append(pred == y)
        
        summed = sum(pred for pred in predictions) / 100.0
        return np.average(summed)

    def train(self, x_train, y_train, x_val, y_val):
        start_time = time.time()
        for iteration in range(self.epochs):
            for x,y in zip(x_train, y_train):
                output = self.forward_pass(x)
                changes_to_w = self.backward_pass(y, output)
                self.update_network_parameters(changes_to_w)
            
            accuracy = self.compute_accuracy(x_val, y_val)
            print('Epoch: {0}, Time Spent: {1:.2f}s, Accuracy: {2}'.format(
                iteration+1, time.time() - start_time, accuracy
            ))
            
dnn = DeepNeuralNetwork(sizes=[784, 128, 64, 10])
dnn.train(x_train, y_train, x_val, y_val)

Epoch: 1, Time Spent: 32.31s, Accuracy: 94.5
Epoch: 2, Time Spent: 66.70s, Accuracy: 94.5
Epoch: 3, Time Spent: 101.08s, Accuracy: 94.5
Epoch: 4, Time Spent: 134.38s, Accuracy: 94.5
Epoch: 5, Time Spent: 171.25s, Accuracy: 94.5
Epoch: 6, Time Spent: 206.02s, Accuracy: 94.5
Epoch: 7, Time Spent: 238.57s, Accuracy: 94.5
Epoch: 8, Time Spent: 273.84s, Accuracy: 94.5
Epoch: 9, Time Spent: 310.04s, Accuracy: 94.5
Epoch: 10, Time Spent: 344.32s, Accuracy: 94.5
