In [1]:
import numpy as np
import matplotlib.pyplot as plt
import gzip
import shutil
import pandas as pd
import zipfile as zp
import pickle
import os
import random
from scipy.stats import truncnorm
import matplotlib.pyplot as plt
import matplotlib.ticker as plticker

# with open('t10k-images-idx3-ubyte', 'rb') as f_in:
#     with gzip.open('t10k-images-idx3-ubyte.gz', 'wb') as f_out:
#         shutil.copyfileobj(f_in, f_out)

num_of_labels = 10

# Assign Zip Files to variable
zf = zp.ZipFile('fashion-mnist_train.zip')
zf1 = zp.ZipFile('fashion-mnist_test.zip')

# Load CSV files from Zip Files
train_data = np.loadtxt(zf.open('fashion-mnist_train.csv'), delimiter=',')
test_data = np.loadtxt(zf1.open('fashion-mnist_test.csv'), delimiter=',')

# Display images from 1 to 10
# for i in range(10):
#     img = train_imgs[i].reshape((28,28))
#     plt.imshow(img, cmap="Greys")
#     plt.show()

# Map image data values into intervals [0.01, 0.99]
fac = 0.99 / 255
add_fac = 0.01
train_imgs = np.asfarray(train_data[:, 1:], dtype='float') * fac + add_fac
test_imgs = np.asfarray(test_data[:, 1:], dtype='float') *fac + add_fac
train_labels = np.asfarray(train_data[:, :1], dtype='float')
test_labels = np.asfarray(test_data[:, :1], dtype='float')

lr = np.arange(num_of_labels)
# transform labels into one hot representation
train_labels_one_hot = (lr==train_labels).astype(float)
test_labels_one_hot = (lr==test_labels).astype(float)
# we don't want zeroes and ones in the labels neither:
train_labels_one_hot[train_labels_one_hot==0] = 0.01
train_labels_one_hot[train_labels_one_hot==1] = 0.99
test_labels_one_hot[test_labels_one_hot==0] = 0.01
test_labels_one_hot[test_labels_one_hot==1] = 0.99

# Create Pickle file from previous data
with open(os.path.join(".","pkl_fashionmnist.pkl"), "bw") as fh:
    data = (train_imgs, 
            test_imgs, 
            train_labels,
            test_labels,
            train_labels_one_hot,
            test_labels_one_hot)
    pickle.dump(data, fh)

def relu(x):
    return np.maximum(0.0, x)

def drelu(x):
    row = len(x)
    column = len(x[0])
    
    for r in range(row):
        for c in range(column):
            if x[r, c]:
                return 0
            else:
                return 1
            
def softmax(x):
    assert len(x.shape) == 2
    s = np.max(x, axis=1)
    s = s[:, np.newaxis] # necessary step to do broadcasting
    e_x = np.exp(x - s)
    div = np.sum(e_x, axis=1)
    div = div[:, np.newaxis] # dito
    return e_x / div

def leaky_relu(x):
    _x = x.copy()
    _x[x < 0] = _x[x < 0] * 0.01
    return _x

def dleaky_relu(x):
    out = np.ones_like(x)
    out[x < 0] *= 0.01
    return out

def dlrelu(x, alpha=0.01):
    dx = np.ones_like(x)
    dx[x < 0] = alpha
    return dx
            
@np.vectorize
def sigmoid(x):
    return 1 / (1 + np.e ** -x)

def dsigmoid(x):
    output = 1/(1+np.e ** -x)
    return output * (1 - output)

def truncated_normal(mean=0, sd=1, low=0, upp=10):
    return truncnorm((low - mean) / sd, 
                     (upp - mean) / sd, 
                     loc=mean, 
                     scale=sd)
class NeuralNetwork:
    
    def __init__(self, 
                 no_of_in_nodes, 
                 no_of_out_nodes, 
                 no_of_hidden_nodes,
                 activation_function,
                 learning_rate):
        self.no_of_in_nodes = no_of_in_nodes
        self.no_of_out_nodes = no_of_out_nodes
        self.no_of_hidden_nodes = no_of_hidden_nodes
        self.learning_rate = learning_rate 
        self.create_weight_matrices()
        
        if activation_function == 'sigmoid':
            self.activation = sigmoid
            self.dactivation = dsigmoid
            
        if activation_function == 'softmax':
            self.activation = sigmoid
            self.dactivation = softmax
        
        if activation_function == 'relu':
            self.activation = relu
            self.dactivation = drelu
            
        if activation_function == 'leakyrelu':
            self.activation = leaky_relu
            self.dactivation = softmax
        
        
    def create_weight_matrices(self):
        """ A method to initialize the weight matrices of the neural network"""
        rad = 1 / np.sqrt(self.no_of_in_nodes)
        X = truncated_normal(mean=0, 
                             sd=1, 
                             low=-rad, 
                             upp=rad)
        self.wih = X.rvs((self.no_of_hidden_nodes, 
                                       self.no_of_in_nodes))
        rad = 1 / np.sqrt(self.no_of_hidden_nodes)
        X = truncated_normal(mean=0, 
                             sd=1, 
                             low=-rad, 
                             upp=rad)
        self.who = X.rvs((self.no_of_out_nodes, 
                                        self.no_of_hidden_nodes))
        
    
    def train_single(self, input_vector, target_vector):
        """
        input_vector and target_vector can be tuple, 
        list or ndarray
        """
        
        output_vectors = []
        input_vector = np.array(input_vector, ndmin=2).T
        target_vector = np.array(target_vector, ndmin=2).T
        
        output_vector1 = np.dot(self.wih, 
                                input_vector)
        output_hidden = self.activation(output_vector1)
        
        output_vector2 = np.dot(self.who, 
                                output_hidden)
        
        output_network = self.dactivation(output_vector2)
        
        output_errors = target_vector - output_network
        # update the weights:
        tmp = output_errors * output_network * \
              (1.0 - output_network)     
        tmp = self.learning_rate  * np.dot(tmp, 
                                           output_hidden.T)
        self.who += tmp
        # calculate hidden errors:
        hidden_errors = np.dot(self.who.T, 
                               output_errors)
        # update the weights:
        tmp = hidden_errors * output_hidden * (1.0 - output_hidden)
        self.wih += self.learning_rate * np.dot(tmp, input_vector.T)
        
    def train(self, data_array, 
              labels_one_hot_array,
              epochs=1,
              intermediate_results=False):
        intermediate_weights = []
        for epoch in range(epochs):  
            print("*", end="")
            for i in range(len(data_array)):
                self.train_single(data_array[i], 
                                  labels_one_hot_array[i])
            if intermediate_results:
                intermediate_weights.append((self.wih.copy(), 
                                             self.who.copy()))
        return intermediate_weights        
            
    def confusion_matrix(self, data_array, labels):
        cm = {}
        for i in range(len(data_array)):
            res = self.run(data_array[i])
            res_max = res.argmax()
            target = labels[i][0]
            if (target, res_max) in cm:
                cm[(target, res_max)] += 1
            else:
                cm[(target, res_max)] = 1
        return cm
        
    
    def run(self, input_vector):
        """ input_vector can be tuple, list or ndarray """
        
        input_vector = np.array(input_vector, ndmin=2).T
        output_vector = np.dot(self.wih, 
                               input_vector)
        output_vector = self.activation(output_vector)
        
        output_vector = np.dot(self.who, 
                               output_vector)
        output_vector = self.activation(output_vector)
    
        return output_vector
    
    def evaluate(self, data, labels):
        corrects, wrongs = 0, 0
        for i in range(len(data)):
            res = self.run(data[i])
            res_max = res.argmax()
            if res_max == labels[i]:
                corrects += 1
            else:
                wrongs += 1
        return corrects, wrongs
    

In [2]:
# Load data from Pickle file
with open(os.path.join(".","pkl_fashionmnist.pkl"), "br") as fh:
    data = pickle.load(fh)
train_imgs = data[0]
test_imgs = data[1]
train_labels = data[2]
test_labels = data[3]
train_labels_one_hot = data[4]
test_labels_one_hot = data[5]

img_size = 28 # dimensions
num_of_labels = 10 # 0, 1, 2, ... 9
image_pixels = img_size * img_size

epochs = 10

ANN = NeuralNetwork(no_of_in_nodes = image_pixels, 
                    no_of_out_nodes = 10, 
                    no_of_hidden_nodes = 100,
                    activation_function = 'relu',
                    learning_rate = 0.15)
    
    
 
weights = ANN.train(train_imgs, 
                    train_labels_one_hot, 
                    epochs=epochs, 
                    intermediate_results=True)

for i in range(epochs):  
    print("epoch: ", i)
    ANN.wih = weights[i][0]
    ANN.who = weights[i][1]
   
    corrects, wrongs = ANN.evaluate(train_imgs, train_labels)
    print("accuracy train: ", corrects / ( corrects + wrongs))
    corrects, wrongs = ANN.evaluate(test_imgs, test_labels)
    print("accuracy test: ", corrects / ( corrects + wrongs))

**********epoch:  0
accuracy train:  0.29861666666666664
accuracy test:  0.2947
epoch:  1
accuracy train:  0.31995
accuracy test:  0.3149
epoch:  2
accuracy train:  0.23391666666666666
accuracy test:  0.23
epoch:  3
accuracy train:  0.17453333333333335
accuracy test:  0.1714
epoch:  4
accuracy train:  0.1683
accuracy test:  0.1646
epoch:  5
accuracy train:  0.09906666666666666
accuracy test:  0.0995
epoch:  6
accuracy train:  0.09978333333333333
accuracy test:  0.1002
epoch:  7
accuracy train:  0.10015
accuracy test:  0.1007
epoch:  8
accuracy train:  0.10556666666666667
accuracy test:  0.1048
epoch:  9
accuracy train:  0.10351666666666667
accuracy test:  0.1029
