In [1]:
import os
import pandas as pd
import numpy as np
import random
from scipy.stats import truncnorm


### put it in a class

In [2]:
#Sigmoid activation function with forward pass
@np.vectorize
def sigmoid (x):
  return 1/(1+np.e**-x)

In [3]:
#Sigmoid activation function with backward pass
@np.vectorize
def d_sigmoid (x):
  return x*(1.0-x)

In [4]:
#ReLU activation function with forward pass
@np.vectorize
def relu (x):
  return max(0,x)

In [5]:
#ReLU activation function with backward pass
@np.vectorize
def d_relu (x):
  if x<0:
    return 0
  if x>0:
    return 1

In [6]:
def softmax(x):
    a = x - np.max(x, axis=0, keepdims=True)
    new_a = np.exp(a)
    result = new_a / np.new_a(new_a, axis=0, keepdims=True)
    return s

In [7]:
def cross_entropy(target, output):
    return -np.mean(target*np.log(output))

def cross_entropy_matrix(output, target):
    target = np.array(target)
    output = np.array(output)
    product = target*np.log(output)
    errors = -np.sum(product, axis=1)
    m = len(errors)
    errors = np.sum(errors) / m
    return errors

In [8]:
def truncated_normal(mean = 0, sd =1, low = 0, upp = 0):
    return truncnorm((low - mean) / sd, (upp - mean) / sd, loc=mean, scale=sd)

### class NN


In [9]:
class NeuralNetwork:
    def __init__(self, 
                 no_input_nodes, 
                 no_output_nodes, 
                 no_hidden_nodes, 
                 learning_rate, 
                 no_hidden_layers,
                 activation_function, 
                 activation_derivative):
        
        self.no_input_nodes = no_input_nodes
        self.no_output_nodes = no_output_nodes       
        self.no_hidden_nodes = no_hidden_nodes
        self.learning_rate = learning_rate 
        self.no_hidden_layers = no_hidden_layers          
        self.activation_function = activation_function
        self.activation_derivative = activation_derivative
        self.weights()

   #method to initialise the weight matrices of the NN     
    def weights(self):
        
        weights =[]
        tn = truncated_normal(mean=2, sd=1, low=-0.5, upp=0.5)
        
        
        #shape parameters of wih
        n = self.no_input_nodes * self.no_hidden_nodes
        #random variates of weights connecting input and hidden nodes
        self.wih = tn.rvs(n).reshape(self.no_hidden_nodes, self.no_input_nodes )
        weights.append(self.wih)
        
        if self.no_hidden_layers>=1:
            for x in range(self.no_hidden_layers):
                n = self.no_hidden_nodes * self.no_hidden_nodes
                self.W = tn.rvs(n).reshape(self.no_hidden_nodes, self.no_hidden_nodes)
                weights.append(self.W)

        #shape parameters of who
        n = self.no_hidden_nodes * self.no_output_nodes
        #weights connecting hidden and output nodes
        self.who = tn.rvs(n).reshape(self.no_output_nodes,self.no_hidden_nodes)
        weights.append(self.who)
        return weights
    
    def create_biases(self):    
        bias = []
        tn = truncated_normal(mean=2, sd=1, low=-0.5, upp=0.5)
        if self.no_hidden_layers>=1:
            for x in range(self.no_hidden_layers):
                self.b = tn.rvs(self.no_hidden_nodes).reshape(-1,1)  
                bias.append(self.b)
        self.b = tn.rvs(self.no_output_nodes).reshape(-1,1) 
        bias.append(self.b)
        return bias

    def forward (self, X): 
        weights = self.weights()
        bias = self.create_biases()
        forward_l = []
        Z=[]
        A=[]
        A.append(X)
        a = X.T

        for x in range(1, self.no_hidden_layers):
            z = np.dot(weights[x], a) + bias[x]
            a = self.activation_function(z)
            Z.append(z)
            A.append(a)
        return Z, A, forward_l   # A3 z3 A2 Z2 A1 Z1
  
    
    def backprop(self, X, target):
        dZ=[]
        dW = []
        db=[]
        forward = self.forward(X)
        forward_A = forward[1]
        forward_Z = forward[0]
        weights = self.weights()
        forward_a = sorted(forward_A, reverse = True) #A3, A2, A1
        forward_z = sorted(forward_Z, reverse = True) #Z3, Z2, Z1
        del forward_z[0] 
        new_a = forward_a[2:]
        weights_l = sorted(weights, reverse = True) # W3, W2, W1
        
        
        m = X.shape[0]
        # deltas
        #output
        dZ3 = forward[-1] - target                            
        dW3 = dZ3.dot(forward[-2].T)/m                        
        db3 = np.sum(dZ3, axis=1, keepdims=True)/m 
        dZ.append(dZ3)
        dW.append(dW3)
        db.append(db3)
        #hidden 
        
        for x in range(new_a): 
            dZ = weights_l[x].T.dot(dZ3)*self.activation_derivative(forward_z[x])   
            dW = dZ.dot(new_a[x].T)/m  
            db = np.sum(dZ, axis=1, keepdims=True)/m  
            dZ3 = dZ
            dZ.append(dZ)
            dW.append(dW)
            db.append(db)
            return dZ, dW, db
        
        #input
        dZ1 = weights_A[1].T.dot(dZ[-1])*self.hidden_derivative_1(forward_Z[0])  
        dW1 = dZ1.dot(X)/m                                     
        db1 = np.sum(dZ1, axis=1, keepdims=True)/m  
        dZ.append(dZ1)
        dW.append(dW1)
        db.append(db1)
     
        # Update
        lr = self.learning_rate
        W=[]
        B=[]
        for x in len (dZ):
            w -=lr*dW[x]
            b -=lr*db[x]
            W.append(w)
            B.append(b)
            return W, B
            
    def predict(self, X_predict):
        forward_list = self.forward(X_predict)
        A_list = forward_list[1]
        A = A_list[-1]
        return A
    
    
    def predict_class(self, X_predict):
        A = self.predict(X_predict)
        y_pred = np.argmax(A, axis=0)
        return y_pred
    
    def run(self, X_train, target, epochs=10):
        costs = []
        for i in range(epochs):
            A = self.predict(X_train)
            cost = cross_entropy(target, A)
            costs.append(cost)
            if i%100 == 0:
                print(f'Loss after epoch {i} : {cost}')
            self.backprop(X_train, target)
        return costs  
    
    
    def evaluate(self, X_evaluate, target):
        '''
        return accuracy score, target must be the classes and not the hot encoded target
        '''
        
        y_pred = self.predict_class(X_evaluate)
        accuracy = classification_rate(y_pred, target)
        print('Accuracy :', accuracy)
        return accuracy
          
 

In [10]:
from tensorflow.keras.datasets import fashion_mnist
(X_train, y_train), (X_test, y_test) = fashion_mnist.load_data()

X_train.reshape(60000,784)
X_test.reshape(10000,784)

X_train = X_train / 255 
X_test = X_test / 255 

from tensorflow.keras.utils import to_categorical
y_train = to_categorical(y_train, 10).T
y_test = to_categorical(y_test, 10).T


D = X_train.shape[1]
K = y_train.shape[0]
M=5

the value of each pixel in the image is in the interval [0.255]. 
implementing a normalization function, and then apply it to each image in the training and test datasets.

In [11]:
# X_train = x_train / 255 
# X_test = x_test / 255 

In [12]:
# image_size = 28 #width and length

In [13]:
# image_pixels = 784

In [14]:
nn = NeuralNetwork(no_input_nodes =D, 
                 no_output_nodes= K, 
                 no_hidden_nodes = D+1, 
                 learning_rate = 0.01, 
                 no_hidden_layers = 1,  
                 activation_function = relu, 
                 activation_derivative = d_relu)

In [15]:
c = nn.run(X_train, y_train, epochs=5)

  return -np.mean(target*np.log(output))


ValueError: operands could not be broadcast together with shapes (10,60000) (60000,28,28) 