Used resources:
- Lab 6 and 7


In [1]:
import numpy as np
from scipy.stats import truncnorm
from tensorflow.keras.datasets import fashion_mnist
from tensorflow.keras.utils import to_categorical

In [2]:
#Sigmoid activation function with forward pass
@np.vectorize
def sigmoid(x):
    return 1 / (1 + np.e ** -x)

#Sigmoid activation function with backward pass
@np.vectorize
def d_sigmoid(x):
    return x * (1.0 - x)

#ReLU activation function with forward pass
@np.vectorize
def relu (x):
  return max(0,x)

#ReLU activation function with backward pass
@np.vectorize
def d_relu (x):
  if x<0:
    return 0
  if x>0:
    return 1

#loss method - cross entropy
def cross_entropy(output, target):
    return -np.mean(target*np.log(output))

# output function - softmax
def softmax(x):
    a = x - np.max(x, axis=0, keepdims=True)
    new_a = np.exp(a)
    result = new_a / np.new_a(new_a, axis=0, keepdims=True)
    

def truncated_normal(mean=0, sd=1, low=0, upp=10):
    return truncnorm(
        (low - mean) / sd, (upp - mean) / sd, loc=mean, scale=sd)

In [3]:
class NeuralNetwork:
    
    def __init__(self, 
                 no_nodes,
                 learning_rate,
                 epochs):
        self.no_nodes = no_nodes
        self.learning_rate = learning_rate
        self.epochs = epochs
        self.create_weight_matrices()
        
    # bring as an output weigths list with weight considering input nodes and output nodes    
    def create_weight_matrices(self): 
        """ A method to initialize the weight matrices of the neural network"""
        weights = []
        for i in range(len(self.no_nodes)-1):
            rad = 0.5 
            X = truncated_normal(mean=1, sd=1, low=-rad, upp=rad)
            weight = X.rvs((self.no_nodes[i], self.no_nodes[i+1]))
            weights.append(weight)
        return weights  
    
    #bias
    def f_bias (self):
        biases = []
        for i in range(1, len(self.no_nodes)):
            rad = 0.5 
            tn = truncated_normal(mean=2, sd=1, low=-rad, upp=rad)
            bias = tn.rvs(self.no_nodes[i]).reshape(-1,1) 
            biases.append(bias)
        return biases
    
    #forward pass
    def forward(self, X_train_trans):
        biases = self.f_bias()
        weights = self.create_weight_matrices()
        output_list = []
        for i in range(len(weights)):
            #input vector
            if i == 0:
                output_vector = np.dot(weights[i].T, X_train_trans) + biases[i]
                output_vector_in = activation_function(output_vector)
                output_list.append(output_vector_in)
            else:
                output_vector = np.dot (weights[i].T, output_list[i-1]) + biases[i]
                output_vector_out = activation_function(output_vector)
                output_list.append(output_vector_out)
        return output_vector_out, output_list
    

    #training with forward pass and backpropagation
    def train(self, X_train, y_train):
        weights = self.create_weight_matrices()
        # input_vector and target_vector can be tuple, list or ndarray
        X_train_trans = np.array(X_train, ndmin=2).T
        y_train_trans = np.array(y_train, ndmin=2).T
    
        for i in range(self.epochs):
        
            # forward pass
            forward = self.forward(X_train_trans)
            output = forward[0]            
            output_list = forward[1]
            
            #backprop   
            for i in reversed(range(len(weights))):
               
                if i == (len(weights)-1):
                    # derivative of the loss for the output
                    output_errors = (y_train_trans - output)
                    # derivative of the activation function
                    derivative_output = activation_derivative (output)  
                    tmp = output_errors * derivative_output
                    # multiply with the previous activation (output_vector_hidden)
                    who_update = self.learning_rate * (np.dot(tmp, output_list[i-1].T))
                    weights[i] += who_update.T 

                elif i == 0:
                    #from hidden to input layer
                    hidden_errors = np.dot(weights[i+1], output_errors * derivative_output)
                    derivative_hidden = activation_derivative(output_list[i])  
                    tmp = hidden_errors * derivative_hidden
                    wih_update = self.learning_rate * np.dot(tmp, X_train_trans.T)
                    weights[i] += wih_update.T

                elif i > 0 and i < (len(weights)-1):
                   # hidden layers
                    hidden_errors = np.dot(weights[i+1], output_errors * derivative_output)
                    derivative_hidden = activation_derivative(output_list[i])  
                    tmp = hidden_errors * derivative_hidden
                    whh_update = self.learning_rate * np.dot(tmp, output_list[i-1].T)
                    weights[i] += whh_update.T
                
        return weights


    def run(self, X_test, weights):
        biases = self.f_bias()
        for i in range(len(weights)):
            if i == 0:
                input_vector = np.array(X_test, ndmin=2).T
                output_vector = np.dot(weights[0].T, input_vector) + biases[i]
                output_vector = activation_function(output_vector)
           
            else:
                output_vector = np.dot(weights[i].T, output_vector) + biases[i]
                output_vector = activation_function(output_vector)                
            y_hat = output_vector.T
        return y_hat

In [4]:
#introducing dataset and preparing for manipulation
fashion = fashion_mnist.load_data()

(X_train, y_train),(X_test, y_test) = fashion

In [5]:
X_train = X_train.astype("float32")
X_test = X_test.astype("float32")

In [6]:
mean = np.mean (X_train, axis = (0,1,2))
std = np.std (X_train, axis = (0,1,2))

X_train = (X_train- mean)/(std+1e-7)
X_test = (X_test- mean)/(std+1e-7)

#reshaping datset input
X_train = X_train.reshape((X_train.shape[0], 28*28))
X_test = X_test.reshape((X_test.shape[0], 28*28))

#onehot encoding the output
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

In [7]:
activation_function = sigmoid
activation_derivative = d_sigmoid

In [8]:
model = NeuralNetwork([784, 32, 10], 0.001, 2)

In [9]:
new_weights = model.train(X_train, y_train)

In [10]:
y_hat = model.run(X_test, new_weights)

In [16]:
y_hat[y_hat>0.5]=1
y_hat[y_hat<0.5]=0
accuracy = np.mean(sum(y_hat==y_test)/len(y_hat))
print(f"Accuracy :  {accuracy}")

Accuracy :  0.8999
