In [1]:
import sys
import time
import pickle
import gzip
from random import randint
from scipy import misc
from scipy import special
import scipy.ndimage
from scipy.sparse import csc_matrix, issparse
import numpy as np
import datetime as dt
from sklearn.cluster import KMeans,MiniBatchKMeans
import matplotlib.pyplot as plt
import json
import csv
import collections
import math
import sys

In [2]:
DATA_PATH = 'data/mnist/'

IMAGES_TRAIN = 'data_training'
IMAGES_TEST = 'data_testing'

RANDOM_SEED = 42
N_CLASSES = 10
N_FEATURES = 28 * 28

In [3]:
data_training = DATA_PATH+IMAGES_TRAIN
data_testing = DATA_PATH+IMAGES_TEST
ft = gzip.open(data_training, 'rb')
TRAINING = pickle.load(ft)
ft.close()
ft = gzip.open(data_testing, 'rb')
TESTING = pickle.load(ft)
ft.close()

# Neural Network

In [4]:
class Neural_Network:
    
    def __init__(self, neurons, batchsize, stop_function, stop_parameter):
        self.input_size = N_FEATURES
        self.output_size = N_CLASSES
        self.neurons = neurons
        self.batchsize = batchsize
        self.stop_f = stop_function
        self.stop_p = stop_parameter
        self.best = 0.
        self.same = 0
        self.iteration = 0
        
        # Standardize random weights
        np.random.seed(RANDOM_SEED)
        hidden_layer = np.random.rand(self.neurons, self.input_size + 1) / self.neurons
        output_layer = np.random.rand(self.output_size, self.neurons + 1) / self.output_size
        self.layers = [hidden_layer, output_layer]

    def train(self, training, testing):
        
        accu_train = [0.,0.]
        
        # Batch Setting
        len_batch_train = len(training[0])
        len_batch_test = len(testing[0])
        if(self.batchsize > 0 and self.batchsize <= 1):
            len_batch_train = int(np.ceil(len_batch_train * self.batchsize))
            len_batch_test = int(np.ceil(len_batch_test * self.batchsize))
        
        # Start prints 
        self.start_time = dt.datetime.now()
        print('-- Training Session Start (%s) --' % (self.start_time))
        typeTrainingPrint = "Stop Function: "    
        if self.stop_f == 0:
            typeTrainingPrint += str(self.stop_p)+" epochs"
        elif self.stop_f == 1:
            typeTrainingPrint += str(self.stop_p)+" epoch(s) w/o improvements"
        elif self.stop_f == 2:
            typeTrainingPrint += "improvements below "+str(self.stop_p)+"%"
        print('\nNeurons: %d\nBatch Train: %d\nBatch Test: %d\n%s\n' % (self.neurons,len_batch_train,len_batch_test,typeTrainingPrint))
        
        # Divide training and testing batches
        test_output = testing[0:len_batch_test][0:len_batch_test]
        test_input = training[0:len_batch_train][0:len_batch_train]
        inputs = training[0][0:len_batch_train]
        targets = np.zeros((len_batch_train, 10))
        for i in range(len_batch_train):
            targets[i, training[1][i]] = 1

        # Performs iterations
        while not self.is_stop_function_enabled(accu_train[1]):
            
            self.iteration += 1
            
            for input_vector, target_vector in zip(inputs, targets):
                self.backpropagate(input_vector, target_vector)
            
            # Accuracy
            accu_test = self.accu(test_output)
            accu_train = self.accu(test_input)
            
            # Messages
            if (self.iteration == 1 or self.iteration % 10 == 0):
                self.print_message_iter(self.iteration,accu_test,accu_train,self.ETAepoch(self.start_time))
                
        # Print last epoch
        if (self.iteration % 10 != 0):
            self.print_message_iter(self.iteration,accu_test,accu_train,self.ETAepoch(self.start_time))

        # Final message
        print('\n-- Training Session End (%s) --' % (dt.datetime.now()))

    def feed_forward(self, input_vector):
        outputs = []
        for layer in self.layers:
            input_with_bias = np.append(input_vector, 1)   # Ajout constante
            output = np.inner(layer, input_with_bias)
            output = special.expit(output)
            outputs.append(output)
            # The output is the input of the next layer
            input_vector = output
        return outputs

    def backpropagate(self, input_vector, target):
        c = 10**(-4) + 10**(-1)/math.sqrt(self.iteration)  # Learning coefficient
        hidden_outputs, outputs = self.feed_forward(input_vector)

        # Calculation of partial derivatives for the output layer and subtraction
        output_deltas = outputs * (1 - outputs) * (outputs - target)
        self.layers[-1] -= c*np.outer(output_deltas, np.append(hidden_outputs, 1))

        # Calculation of partial derivatives for the hidden layer and subtraction
        hidden_deltas = hidden_outputs * (1 - hidden_outputs) * np.dot(np.delete(self.layers[-1], self.neurons, 1).T, output_deltas)
        self.layers[0] -= c*np.outer(hidden_deltas, np.append(input_vector, 1))

    def predict(self, input_vector):
        return self.feed_forward(input_vector)[-1]

    def predict_one(self, input_vector):
        return np.argmax(self.feed_forward(input_vector)[-1])

    def accu(self, testing_batch):
        res = np.zeros((10, 2))
        for k in range(len(testing_batch[1])):
            if self.predict_one(testing_batch[0][k]) == testing_batch[1][k]:
                res[testing_batch[1][k]] += 1
            else:
                res[testing_batch[1][k]][1] += 1
        total = np.sum(res, axis=0)
        each = [res[k][0]/res[k][1] for k in range(len(res))]
        min_c = sorted(range(len(each)), key=lambda k: each[k])[0]
        return np.round([each[min_c]*100, total[0]/total[1]*100, min_c], 2)
    
    def is_stop_function_enabled(self,accuracy):
        if self.stop_f == 0:
            if self.iteration < self.stop_p:
                return False
            else:
                return True
        elif self.stop_f == 1:
            if accuracy > self.best or self.iteration == 0:
                self.same = 0
                self.best = accuracy
                return False
            else:
                self.same += 1
                if self.same < self.stop_p:
                    return False
                else:
                    return True
        elif self.stop_f == 2:
            if accuracy > self.best + self.stop_p or self.iteration == 0:
                self.best = accuracy
                return False
            else:
                return True
    
    def print_message_iter(self,iteration,accu_test,accu_train,eta):
        len_eta = len(eta)
        space_fill = 6 - len_eta
        eta = "("+eta+")"
        for _ in range(space_fill):
            eta += " "
        message = 'Epoch '+str(self.iteration).zfill(3) + " "+eta+" "
        message += 'Accuracy TRAIN: '+str(accu_train[1]).zfill(4)+'%\t'
        message += 'Accuracy TEST: '+str(accu_test[1]).zfill(4)+'%\t'
        message += 'Min: '+ str(accu_test[0]).zfill(4)+ '% ('+str(int(accu_test[2]))+')'
        print(message)
    
    def ETAepoch(self,start_time):
        diff = dt.datetime.now() - self.start_time
        eta = divmod(diff.days * 86400 + diff.seconds, 60)
        if eta[0] != 0:
            ret = str(eta[0])+"m"
        else:
            ret = ""
        ret += str(eta[1])+"s"
        return ret
        
    def getWeights(self):
        return self.layers

In [5]:
nn = Neural_Network(neurons=300,batchsize=1,stop_function=2,stop_parameter=0.01)
nn.train(TRAINING,TESTING)

-- Training Session Start (2019-01-25 18:20:26.504873) --

Neurons: 300
Batch Train: 60000
Batch Test: 10000
Stop Function: improvements below 0.01%

Epoch 001 (40s)    Accuracy TRAIN: 91.11%	Accuracy TEST: 91.53%	Min: 78.59% (5)
Epoch 010 (6m43s)  Accuracy TRAIN: 97.74%	Accuracy TEST: 97.0%	Min: 94.85% (9)
Epoch 020 (13m27s) Accuracy TRAIN: 98.6%	Accuracy TEST: 97.63%	Min: 96.3% (7)
Epoch 030 (20m13s) Accuracy TRAIN: 98.93%	Accuracy TEST: 97.78%	Min: 96.63% (9)
Epoch 033 (22m14s) Accuracy TRAIN: 98.99%	Accuracy TEST: 97.8%	Min: 96.63% (9)

-- Training Session End (2019-01-25 18:42:40.660350) --


# Neural Network with Clustering

In [6]:
def nearest_centroid_index(centers,value):
    centers = np.asarray(centers)
    idx = (np.abs(centers - value)).argmin()
    return idx

def build_clusters(cluster,weights):
    kmeans = MiniBatchKMeans(n_clusters=cluster,random_state=RANDOM_SEED,init_size=3*cluster)
    kmeans.fit(np.hstack(weights).reshape(-1,1))
    return kmeans.cluster_centers_

def redefine_weights(weights,centers):
    arr_ret = np.empty_like(weights).astype(np.int16)
    for i, row in enumerate(weights):
        for j, col in enumerate(row):
            arr_ret[i,j] = nearest_centroid_index(centers,weights[i,j])
    return arr_ret

def idx_matrix_to_matrix(idx_matrix,centers,shape):
    return centers[idx_matrix.reshape(-1,1)].reshape(shape)

def centroid_gradient_matrix(idx_matrix,gradient,cluster):
    return scipy.ndimage.sum(gradient,idx_matrix,index=range(cluster))

In [7]:
class Neural_Network_KM:

    def __init__(self, neurons, batchsize, cluster, pre_weights, stop_function, stop_parameter):
        
        start_setting_time = dt.datetime.now()
        
        self.input_size = N_FEATURES
        self.output_size = N_CLASSES
        self.neurons = neurons
        self.batchsize = batchsize
        self.cluster = cluster
        self.iteration = 0
        self.stop_f = stop_function
        self.stop_p = stop_parameter
        self.best = 0.
        self.same = 0
        
        # Variable for shape
        shape_hidden = (self.neurons,self.input_size+1)
        shape_output = (self.output_size,self.neurons+1)
        self.layers_shape = [shape_hidden,shape_output]
            
        # Initialize cluster for pre-trained weights (dict with centers)
        c_hidden = build_clusters(self.cluster,pre_weights[0])
        c_output = build_clusters(self.cluster,pre_weights[-1])
        self.centers = [c_hidden,c_output]
        
        # Initialize index matrix for pre-trained weights
        idx_hidden = redefine_weights(pre_weights[0],self.centers[0])
        idx_output = redefine_weights(pre_weights[-1],self.centers[-1])
        self.idx_layers = [idx_hidden,idx_output]
        
        # Setting time print    
        end_setting_time = dt.datetime.now() - start_setting_time
        eta = divmod(end_setting_time.days * 86400 + end_setting_time.seconds, 60)
        self.eta_print_setting = str(eta[0])+"m"+str(eta[1])+"s"
    
 

    def train(self, training, testing):
        
        accu_train = [0.,0.]
        
        # Batch Setting
        len_batch_train = len(training[0])
        len_batch_test = len(testing[0])
        if(self.batchsize > 0 and self.batchsize <= 1):
            len_batch_train = int(np.ceil(len_batch_train * self.batchsize))
            len_batch_test = int(np.ceil(len_batch_test * self.batchsize))
        
        # Divide training and testing batches
        test_output = testing[0:len_batch_test][0:len_batch_test]
        test_input = training[0:len_batch_train][0:len_batch_train]
        inputs = training[0][0:len_batch_train]
        targets = np.zeros((len_batch_train, 10))
        for i in range(len_batch_train):
            targets[i, training[1][i]] = 1
        
        # Start prints 
        self.start_time = dt.datetime.now()
        print('-- Training Session Start (%s) --' % (self.start_time))
        typeTrainingPrint = "Stop Function: "    
        if self.stop_f == 0:
            typeTrainingPrint += str(self.stop_p)+" epochs"
        elif self.stop_f == 1:
            typeTrainingPrint += str(self.stop_p)+" epoch(s) w/o improvements"
        elif self.stop_f == 2:
            typeTrainingPrint += "improvements below "+str(self.stop_p)+"%"
        print('\nNeurons: %d\nClusters: %d\nBatch Train: %d (%d%%)\nBatch Test: %d (%d%%)\n%s\n' % (self.neurons,self.cluster,len_batch_train,self.batchsize*100,len_batch_test,self.batchsize*100,typeTrainingPrint))
        
        # Performs iterations
        while not self.is_stop_function_enabled(accu_train[1]):
            
            self.iteration += 1
            
            # Backpropagate with feed forward
            for input_vector, target_vector in zip(inputs, targets):
                weights = []
                for i,c,s in zip(self.idx_layers,self.centers,self.layers_shape):
                    w = idx_matrix_to_matrix(i,c,s)
                    weights.append(w)
                self.backpropagate(input_vector, target_vector, weights)
                
            # Accuracy
            accu_test = self.accu(test_output,weights)
            accu_train = self.accu(test_input,weights)
            
            # Messages
            self.print_message_iter(self.iteration,accu_test,accu_train,self.ETAepoch(self.start_time))
                      
        # Final message
        print('\n-- Training Session End (%s) --' % (dt.datetime.now()))
        print('-------------------------')
        print(self.printLineCSV(self.cluster,self.ETAepoch(self.start_time),accu_train,accu_test))
        print('-------------------------\n')

    def feed_forward(self, input_vector, weights):
        outputs = []
        for w in weights:
            input_with_bias = np.append(input_vector, 1)   # Ajout constante
            output = np.inner(w, input_with_bias)
            output = special.expit(output) # Sigmoid function
            outputs.append(output)
            # The output is the input of the next layer
            input_vector = output
        return outputs

    def backpropagate(self, input_vector, target, weights):
        c = 10**(-4) + (10**(-1))/math.sqrt(self.iteration)  # Learning coefficient
        hidden_outputs, outputs = self.feed_forward(input_vector, weights)

        # Calculation of partial derivatives for the output layer and subtraction
        output_deltas = outputs * (1 - outputs) * (outputs - target)
        gradient = np.outer(output_deltas, np.append(hidden_outputs, 1))
        cg = centroid_gradient_matrix(self.idx_layers[-1],gradient,self.cluster)
        self.centers[-1] = self.centers[-1] - c * np.array(cg).reshape(self.cluster,1)

        # Calculation of partial derivatives for the hidden layer and subtraction
        hidden_deltas = hidden_outputs * (1 - hidden_outputs) * np.dot(np.delete(weights[-1], self.neurons, 1).T, output_deltas)
        gradient = np.outer(hidden_deltas, np.append(input_vector, 1))
        cg = centroid_gradient_matrix(self.idx_layers[0],gradient,self.cluster)
        self.centers[0] = self.centers[0] - c * np.array(cg).reshape(self.cluster,1)
        
    
    
    def predict(self, input_vector, weights):
        return self.feed_forward(input_vector,weights)[-1]

    def predict_one(self, input_vector, weights):
        return np.argmax(self.feed_forward(input_vector,weights)[-1])

    def accu(self, testing, weights):
        res = np.zeros((10, 2))
        for k in range(len(testing[1])):
            if self.predict_one(testing[0][k], weights) == testing[1][k]:
                res[testing[1][k]] += 1
            else:
                res[testing[1][k]][1] += 1
        total = np.sum(res, axis=0)
        each = [res[k][0]/res[k][1] for k in range(len(res))]
        min_c = sorted(range(len(each)), key=lambda k: each[k])[0]
        return np.round([each[min_c]*100, total[0]/total[1]*100, min_c], 2)
    
    
    def is_stop_function_enabled(self,accuracy):
        if self.stop_f == 0:
            if self.iteration < self.stop_p:
                return False
            else:
                return True
        elif self.stop_f == 1:
            if accuracy > self.best or self.iteration == 0:
                self.same = 0
                self.best = accuracy
                return False
            else:
                self.same += 1
                if self.same < self.stop_p:
                    return False
                else:
                    return True
        elif self.stop_f == 2:
            if accuracy > self.best + self.stop_p or self.iteration == 0:
                self.best = accuracy
                return False
            else:
                return True
    
    def print_message_iter(self,iteration,accu_test,accu_train,eta):
        len_eta = len(eta)
        space_fill = 6 - len_eta
        eta = "("+eta+")"
        for _ in range(space_fill):
            eta += " "
        message = 'Epoch '+str(self.iteration).zfill(3) + " "+eta+" "
        message += 'Accuracy TRAIN: '+str(accu_train[1]).zfill(4)+'%\t'
        message += 'Accuracy TEST: '+str(accu_test[1]).zfill(4)+'%\t'
        message += 'Min: '+ str(accu_test[0]).zfill(4)+ '% ('+str(int(accu_test[2]))+')'
        print(message)
        
    def getWeights(self):
        return self.layers
    
    def minsec2sec(self,time):
        if 'm' in time:
            splitted = time.split('m')
            return int(splitted[0]) * 60 + int(splitted[1][:-1])
        else:
            return int(time[:-1])
    
    def ETAepoch(self,start_time):
        diff = dt.datetime.now() - self.start_time
        eta = divmod(diff.days * 86400 + diff.seconds, 60)
        if eta[0] != 0:
            ret = str(eta[0])+"m"
        else:
            ret = ""
        ret += str(eta[1])+"s"
        return ret
    
    def printLineCSV(self,cluster,time,a_train,a_test):
        cr = round((784*300)*64/((784*300)*math.log(cluster,2) + cluster*64),3)
        return str(cluster)+','+str(cr)+','+time+','+str(a_train[1])+','+str(a_test[1])

In [8]:
pre_trained_weights = nn.getWeights()
nn_km = Neural_Network_KM(neurons=300,batchsize=1,cluster=256,pre_weights=pre_trained_weights,stop_function=2,stop_parameter=0.01)
nn_km.train(TRAINING,TESTING)

-- Training Session Start (2019-01-25 18:42:43.840234) --

Neurons: 300
Clusters: 256
Batch Train: 60000 (100%)
Batch Test: 10000 (100%)
Stop Function: improvements below 0.01%

Epoch 001 (2m49s)  Accuracy TRAIN: 98.89%	Accuracy TEST: 97.54%	Min: 95.43% (7)
Epoch 002 (5m35s)  Accuracy TRAIN: 98.83%	Accuracy TEST: 97.48%	Min: 94.45% (9)

-- Training Session End (2019-01-25 18:48:18.846351) --
-------------------------
256,7.931,5m35s,98.83,97.48
-------------------------



# Neural Network with Pruning

In [9]:
def pruning_matrix(mat,percentage,method='out'):
    threshold = (100-percentage)
    
    if method == 'inout':
        threshold /= 4
        perc_up,perc_down,perc_mid_up,perc_mid_down = 100 - threshold, threshold, 50 + threshold, 50 - threshold
        percentile_up = np.percentile(mat,perc_up)
        percentile_down = np.percentile(mat,perc_down)
        percentile_mid_up = np.percentile(mat,perc_mid_up)
        percentile_mid_down = np.percentile(mat,perc_mid_down)
    else:
        threshold /= 2
        if method == 'in': perc_up, perc_down = 50 + threshold, 50 - threshold
        elif method == 'out': perc_up, perc_down = 100 - threshold, threshold
        percentile_up = np.percentile(mat,perc_up)
        percentile_down = np.percentile(mat,perc_down)
        
    w_pruned = np.copy(mat)
    for i,row in enumerate(mat):
        for j,_ in enumerate(row):
            if method == 'in':
                if mat[i,j] > percentile_down and mat[i,j] < percentile_up:
                    w_pruned[i,j] = 0
            elif method == 'out':
                if mat[i,j] < percentile_down or mat[i,j] > percentile_up:
                    w_pruned[i,j] = 0
            elif method == 'inout':
                if mat[i,j] < percentile_down or mat[i,j] > percentile_up or (mat[i,j] > percentile_mid_down and mat[i,j] < percentile_mid_up):
                    w_pruned[i,j] = 0
    return w_pruned

# in place
def sparse_sub_dense(sparse,dense,mask):
    sparse.data -= dense.T[mask.T]
    
def delete_last_row(csc):
    i = csc.indptr[-1]
    indptr = csc.indptr[:-1]
    data = csc.data[:i]
    indices = csc.indices[:i]
    return csc_matrix((data,indices,indptr))

In [10]:
class Neural_Network_PR_CSC:
    
    def __init__(self, neurons, batchsize, stop_function, stop_parameter, weights=None, pruning=None, pruning_method=None):
        self.input_size = N_FEATURES
        self.output_size = N_CLASSES
        self.neurons = neurons
        self.batchsize = batchsize
        self.stop_f = stop_function
        self.stop_p = stop_parameter
        self.pre_trained_weights = weights
        self.pruning = pruning
        self.pruning_method = pruning_method
        self.best = 0.
        self.same = 0
        self.iteration = 0
        
        if weights == None:
            # Standardize random weights
            np.random.seed(RANDOM_SEED)
            hidden_layer = np.random.rand(self.neurons, self.input_size + 1) / self.neurons
            output_layer = np.random.rand(self.output_size, self.neurons + 1) / self.output_size
            self.layers = [hidden_layer, output_layer]
        else:
            # Pruning weights
            pw_hidden = csc_matrix(pruning_matrix(pre_trained_weights[0],self.pruning,self.pruning_method))
            pw_output = csc_matrix(pruning_matrix(pre_trained_weights[1],self.pruning,self.pruning_method))
            self.layers = [pw_hidden, pw_output]
            # Matrix mask
            mask_hidden = pw_hidden.A != 0 
            mask_output = pw_output.A != 0
            self.masks = [mask_hidden,mask_output]


    def train(self, training, testing):
        
        accu_train = [0.,0.]
        
        # Batch Setting
        len_batch_train = len(training[0])
        len_batch_test = len(testing[0])
        if(self.batchsize > 0 and self.batchsize <= 1):
            len_batch_train = int(np.ceil(len_batch_train * self.batchsize))
            len_batch_test = int(np.ceil(len_batch_test * self.batchsize))
        
        # Start prints 
        self.start_time = dt.datetime.now()
        print('-- Training Session Start (%s) --' % (self.start_time))
        typeTrainingPrint = "Stop Function: "    
        if self.stop_f == 0:
            typeTrainingPrint += str(self.stop_p)+" epochs"
        elif self.stop_f == 1:
            typeTrainingPrint += str(self.stop_p)+" epoch(s) w/o improvements"
        elif self.stop_f == 2:
            typeTrainingPrint += "improvements below "+str(self.stop_p)+"%"
        print('\nNeurons: %d\nBatch Train: %d (%d%%)\nBatch Test: %d (%d%%)\nPruning: %d%% (%s)\n%s\n' % (self.neurons,len_batch_train,self.batchsize*100,len_batch_test,self.batchsize*100,self.pruning,self.pruning_method,typeTrainingPrint))
        
        # Divide training and testing batches
        test_output = testing[0:len_batch_test][0:len_batch_test]
        test_input = training[0:len_batch_train][0:len_batch_train]
        inputs = training[0][0:len_batch_train]
        targets = np.zeros((len_batch_train, 10))
        for i in range(len_batch_train):
            targets[i, training[1][i]] = 1

        # Performs iterations
        while not self.is_stop_function_enabled(accu_train[1]):
            
            self.iteration += 1
            
            for input_vector, target_vector in zip(inputs, targets):
                self.backpropagate(input_vector, target_vector)
            
            # Accuracy
            accu_test = self.accu(test_output)
            accu_train = self.accu(test_input)
            
            # Messages
            self.print_message_iter(self.iteration,accu_test,accu_train,self.ETAepoch(self.start_time))
    
        # Final message
        print('\n-- Training Session End (%s) --' % (dt.datetime.now()))
        print('-------------------------')
        print(self.printLineCSV(self.pruning,self.ETAepoch(self.start_time),accu_train,accu_test))
        print('-------------------------\n')

    def feed_forward(self, input_vector):
        outputs = []
        for layer in self.layers:
            input_with_bias = np.append(input_vector, 1)   # Ajout constante
            output = layer * input_with_bias
            output = special.expit(output)
            outputs.append(output)
            # The output is the input of the next layer
            input_vector = output
        return outputs

    def backpropagate(self, input_vector, target):
        c = 10**(-4) + (10**(-1))/math.sqrt(self.iteration)  # Learning coefficient
        hidden_outputs, outputs = self.feed_forward(input_vector)
        
        # Calculation of partial derivatives for the output layer and subtraction
        output_deltas = outputs * (1 - outputs) * (outputs - target)
        sparse_sub_dense(self.layers[-1],c*np.outer(output_deltas, np.append(hidden_outputs, 1)),self.masks[-1])
        
        # Calculation of partial derivatives for the hidden layer and subtraction
        hidden_deltas = hidden_outputs * (1 - hidden_outputs) * (delete_last_row(self.layers[-1]).T * output_deltas)
        sparse_sub_dense(self.layers[0],c*np.outer(hidden_deltas, np.append(input_vector, 1)),self.masks[0])

    def predict(self, input_vector):
        return self.feed_forward(input_vector)[-1]

    def predict_one(self, input_vector):
        return np.argmax(self.feed_forward(input_vector)[-1])

    def accu(self, testing_batch):
        res = np.zeros((10, 2))
        for k in range(len(testing_batch[1])):
            if self.predict_one(testing_batch[0][k]) == testing_batch[1][k]:
                res[testing_batch[1][k]] += 1
            else:
                res[testing_batch[1][k]][1] += 1
        total = np.sum(res, axis=0)
        each = [res[k][0]/res[k][1] for k in range(len(res))]
        min_c = sorted(range(len(each)), key=lambda k: each[k])[0]
        return np.round([each[min_c]*100, total[0]/total[1]*100, min_c], 2)
    
    def is_stop_function_enabled(self,accuracy):
        if self.stop_f == 0:
            if self.iteration < self.stop_p:
                return False
            else:
                return True
        elif self.stop_f == 1:
            if accuracy > self.best or self.iteration == 0:
                self.same = 0
                self.best = accuracy
                return False
            else:
                self.same += 1
                if self.same < self.stop_p:
                    return False
                else:
                    return True
        elif self.stop_f == 2:
            if accuracy > self.best + self.stop_p or self.iteration == 0:
                self.best = accuracy
                return False
            else:
                return True
    
    def print_message_iter(self,iteration,accu_test,accu_train,eta):
        len_eta = len(eta)
        space_fill = 6 - len_eta
        eta = "("+eta+")"
        for _ in range(space_fill):
            eta += " "
        message = 'Epoch '+str(self.iteration).zfill(3) + " "+eta+" "
        message += 'Accuracy TRAIN: '+str(accu_train[1]).zfill(4)+'%\t'
        message += 'Accuracy TEST: '+str(accu_test[1]).zfill(4)+'%\t'
        message += 'Min: '+ str(accu_test[0]).zfill(4)+ '% ('+str(int(accu_test[2]))+')'
        print(message)
    
    def ETAepoch(self,start_time):
        diff = dt.datetime.now() - self.start_time
        eta = divmod(diff.days * 86400 + diff.seconds, 60)
        if eta[0] != 0:
            ret = str(eta[0])+"m"
        else:
            ret = ""
        ret += str(eta[1])+"s"
        return ret
        
    def getWeights(self):
        return self.layers
    
    def printLineCSV(self,pruning,time,a_train,a_test):
        pr = pruning/100
        cr = round((784*300)/(784*300*pr),3)
        return str(pr)+','+str(cr)+','+time+','+str(a_train[1])+','+str(a_test[1])

In [11]:
pre_trained_weights = nn.getWeights()
nn_pr_csc = Neural_Network_PR_CSC(neurons=300,batchsize=1,weights=pre_trained_weights,pruning=60,pruning_method='in',stop_function=2,stop_parameter=0.01)
nn_pr_csc.train(TRAINING,TESTING)

-- Training Session Start (2019-01-25 18:48:19.919443) --

Neurons: 300
Batch Train: 60000 (100%)
Batch Test: 10000 (100%)
Pruning: 60% (in)
Stop Function: improvements below 0.01%

Epoch 001 (1m49s)  Accuracy TRAIN: 98.82%	Accuracy TEST: 97.56%	Min: 94.94% (7)
Epoch 002 (3m43s)  Accuracy TRAIN: 99.03%	Accuracy TEST: 97.78%	Min: 96.69% (7)
Epoch 003 (5m30s)  Accuracy TRAIN: 99.11%	Accuracy TEST: 97.87%	Min: 96.89% (7)
Epoch 004 (7m16s)  Accuracy TRAIN: 99.17%	Accuracy TEST: 97.92%	Min: 97.08% (7)
Epoch 005 (9m3s)   Accuracy TRAIN: 99.21%	Accuracy TEST: 97.98%	Min: 97.13% (9)
Epoch 006 (10m50s) Accuracy TRAIN: 99.25%	Accuracy TEST: 98.0%	Min: 96.93% (9)
Epoch 007 (12m38s) Accuracy TRAIN: 99.27%	Accuracy TEST: 98.02%	Min: 96.83% (9)
Epoch 008 (14m24s) Accuracy TRAIN: 99.3%	Accuracy TEST: 98.0%	Min: 96.73% (9)
Epoch 009 (16m12s) Accuracy TRAIN: 99.32%	Accuracy TEST: 98.0%	Min: 96.63% (9)
Epoch 010 (17m59s) Accuracy TRAIN: 99.35%	Accuracy TEST: 97.98%	Min: 96.43% (9)
Epoch 011 (19m46s) Acc

# Neural Network with Clustering Pruned

In [12]:
def nearest_centroid_index(centers,value):
    centers = np.asarray(centers)
    idx = (np.abs(centers - value)).argmin()
    return idx

def build_clusters_pruning(cluster,weights):
    kmeans = MiniBatchKMeans(n_clusters=cluster,random_state=RANDOM_SEED,init_size=3*cluster)
    kmeans.fit(weights.data.reshape(-1,1))
    return kmeans.cluster_centers_

def redefine_weights_pruning(weights,centers):
    new_data_idx = [nearest_centroid_index(centers,w) for w in weights.data]
    return csc_matrix((new_data_idx,weights.indices,weights.indptr))

def idx_matrix_to_matrix_pruning(idx_matrix,centers):
    return csc_matrix((centers[idx_matrix.data].reshape(-1,),idx_matrix.indices,idx_matrix.indptr))

def centroid_gradient_pruning(idx_matrix,gradient,mask,cluster):
    gradient += 0.000000001
    gradient[mask] = 0
    return scipy.ndimage.sum(csc_matrix(gradient).data,idx_matrix.data,index=range(cluster))

In [13]:
class Neural_Network_PR_KM:

    def __init__(self, neurons, batchsize, cluster, pruning, pre_weights, stop_function, stop_parameter):
        
        start_setting_time = dt.datetime.now()
        
        self.input_size = N_FEATURES
        self.output_size = N_CLASSES
        self.neurons = neurons
        self.batchsize = batchsize
        self.pruning = pruning
        self.cluster = cluster
        self.iteration = 0
        self.stop_f = stop_function
        self.stop_p = stop_parameter
        self.best = 0.
        self.same = 0
        
        # Matrix mask
        mask_hidden = pre_weights[0].A == 0 
        mask_output = pre_weights[-1].A == 0
        self.masks = [mask_hidden,mask_output]
        
        # Variable for shape
        shape_hidden = (self.neurons,self.input_size+1)
        shape_output = (self.output_size,self.neurons+1)
        self.layers_shape = [shape_hidden,shape_output]
            
        # Initialize cluster for pre-trained weights (dict with centers)
        c_hidden = build_clusters_pruning(self.cluster,pre_weights[0])
        c_output = build_clusters_pruning(self.cluster,pre_weights[-1])
        self.centers = [c_hidden,c_output]
        
        # Initialize index matrix for pre-trained weights
        idx_hidden = redefine_weights_pruning(pre_weights[0],self.centers[0])
        idx_output = redefine_weights_pruning(pre_weights[-1],self.centers[-1])
        self.idx_layers = [idx_hidden,idx_output]
        
        # Setting time print    
        end_setting_time = dt.datetime.now() - start_setting_time
        eta = divmod(end_setting_time.days * 86400 + end_setting_time.seconds, 60)
        self.eta_print_setting = str(eta[0])+"m"+str(eta[1])+"s"
    
 

    def train(self, training, testing):
        
        accu_train = [0.,0.]
        
        # Batch Setting
        len_batch_train = len(training[0])
        len_batch_test = len(testing[0])
        if(self.batchsize > 0 and self.batchsize <= 1):
            len_batch_train = int(np.ceil(len_batch_train * self.batchsize))
            len_batch_test = int(np.ceil(len_batch_test * self.batchsize))
        
        # Divide training and testing batches
        test_output = testing[0:len_batch_test][0:len_batch_test]
        test_input = training[0:len_batch_train][0:len_batch_train]
        inputs = training[0][0:len_batch_train]
        targets = np.zeros((len_batch_train, 10))
        for i in range(len_batch_train):
            targets[i, training[1][i]] = 1
        
        # Start prints 
        self.start_time = dt.datetime.now()
        print('-- Training Session Start (%s) --' % (self.start_time))
        typeTrainingPrint = "Stop Function: "    
        if self.stop_f == 0:
            typeTrainingPrint += str(self.stop_p)+" epochs"
        elif self.stop_f == 1:
            typeTrainingPrint += str(self.stop_p)+" epoch(s) w/o improvements"
        elif self.stop_f == 2:
            typeTrainingPrint += "improvements below "+str(self.stop_p)+"%"
        print('\nNeurons: %d\nPruning: %.2f\nClusters: %d\nBatch Train: %d (%d%%)\nBatch Test: %d (%d%%)\n%s\n' % (self.neurons,self.pruning/100,self.cluster,len_batch_train,self.batchsize*100,len_batch_test,self.batchsize*100,typeTrainingPrint))
        
        # Performs iterations
        while not self.is_stop_function_enabled(accu_train[1]):
            
            self.iteration += 1
            
            # Backpropagate with feed forward
            for input_vector, target_vector in zip(inputs, targets):
                weights = []
                for i,c in zip(self.idx_layers,self.centers):
                    w = idx_matrix_to_matrix_pruning(i,c)
                    weights.append(w)
                self.backpropagate(input_vector, target_vector, weights)
                
            # Accuracy
            accu_test = self.accu(test_output,weights)
            accu_train = self.accu(test_input,weights)
            
            # Messages
            self.print_message_iter(self.iteration,accu_test,accu_train,self.ETAepoch(self.start_time))
                      
        # Final message
        print('\n-- Training Session End (%s) --' % (dt.datetime.now()))
        print('-------------------------')
        print(self.printLineCSV(self.pruning,self.cluster,self.ETAepoch(self.start_time),accu_train,accu_test))
        print('-------------------------\n')

    def feed_forward(self, input_vector, weights):
        outputs = []
        for w in weights:
            input_with_bias = np.append(input_vector, 1)   # Ajout constante
            output = w * input_with_bias
            output = special.expit(output)
            outputs.append(output)
            # The output is the input of the next layer
            input_vector = output
        return outputs                            
                                  
    def backpropagate(self, input_vector, target, weights):
        c = 10**(-4) + (10**(-1))/math.sqrt(self.iteration)  # Learning coefficient
        hidden_outputs, outputs = self.feed_forward(input_vector, weights)
        
        # Calculation of partial derivatives for the output layer and subtraction
        output_deltas = outputs * (1 - outputs) * (outputs - target)
        gradient = np.outer(output_deltas, np.append(hidden_outputs, 1))
        cg = centroid_gradient_pruning(self.idx_layers[-1],gradient,self.masks[-1],self.cluster)
        self.centers[-1] = self.centers[-1] - (c * np.array(cg)).reshape(-1,1)
        
        # Calculation of partial derivatives for the hidden layer and subtraction
        hidden_deltas = hidden_outputs * (1 - hidden_outputs) * (delete_last_row(weights[-1]).T * output_deltas)
        gradient = np.outer(hidden_deltas, np.append(input_vector, 1))
        cg = centroid_gradient_pruning(self.idx_layers[0],gradient,self.masks[0],self.cluster)
        self.centers[0] = self.centers[0] - (c * np.array(cg)).reshape(-1,1)
        
    
    def predict(self, input_vector, weights):
        return self.feed_forward(input_vector,weights)[-1]

    def predict_one(self, input_vector, weights):
        return np.argmax(self.feed_forward(input_vector,weights)[-1])

    def accu(self, testing, weights):
        res = np.zeros((10, 2))
        for k in range(len(testing[1])):
            if self.predict_one(testing[0][k], weights) == testing[1][k]:
                res[testing[1][k]] += 1
            else:
                res[testing[1][k]][1] += 1
        total = np.sum(res, axis=0)
        each = [res[k][0]/res[k][1] for k in range(len(res))]
        min_c = sorted(range(len(each)), key=lambda k: each[k])[0]
        return np.round([each[min_c]*100, total[0]/total[1]*100, min_c], 2)
    
    
    def is_stop_function_enabled(self,accuracy):
        if self.stop_f == 0:
            if self.iteration < self.stop_p:
                return False
            else:
                return True
        elif self.stop_f == 1:
            if accuracy > self.best or self.iteration == 0:
                self.same = 0
                self.best = accuracy
                return False
            else:
                self.same += 1
                if self.same < self.stop_p:
                    return False
                else:
                    return True
        elif self.stop_f == 2:
            if accuracy > self.best + self.stop_p or self.iteration == 0:
                self.best = accuracy
                return False
            else:
                return True
    
    def print_message_iter(self,iteration,accu_test,accu_train,eta):
        len_eta = len(eta)
        space_fill = 6 - len_eta
        eta = "("+eta+")"
        for _ in range(space_fill):
            eta += " "
        message = 'Epoch '+str(self.iteration).zfill(3) + " "+eta+" "
        message += 'Accuracy TRAIN: '+str(accu_train[1]).zfill(4)+'%\t'
        message += 'Accuracy TEST: '+str(accu_test[1]).zfill(4)+'%\t'
        message += 'Min: '+ str(accu_test[0]).zfill(4)+ '% ('+str(int(accu_test[2]))+')'
        print(message)
        
    def getWeights(self):
        return self.idx_layers,self.centers
    
    def minsec2sec(self,time):
        if 'm' in time:
            splitted = time.split('m')
            return int(splitted[0]) * 60 + int(splitted[1][:-1])
        else:
            return int(time[:-1])
    
    def ETAepoch(self,start_time):
        diff = dt.datetime.now() - self.start_time
        eta = divmod(diff.days * 86400 + diff.seconds, 60)
        if eta[0] != 0:
            ret = str(eta[0])+"m"
        else:
            ret = ""
        ret += str(eta[1])+"s"
        return ret
    
    def printLineCSV(self,pruning,cluster,time,a_train,a_test):
        pr = pruning/100
        cr = round((784*300)*64/((784*300*pr)*math.log(cluster,2) + cluster*64),3)
        return str(pr)+','+str(cluster)+','+str(cr)+','+time+','+str(a_train[1])+','+str(a_test[1])

In [14]:
pruned_weights = nn_pr_csc.getWeights()
nn_pr_km = Neural_Network_PR_KM(neurons=300,batchsize=1,cluster=512,pruning=60,pre_weights=pruned_weights,stop_function=2,stop_parameter=0.01)
nn_pr_km.train(TRAINING,TESTING)

-- Training Session Start (2019-01-25 19:09:57.241629) --

Neurons: 300
Pruning: 0.60
Clusters: 512
Batch Train: 60000 (100%)
Batch Test: 10000 (100%)
Stop Function: improvements below 0.01%

Epoch 001 (8m40s)  Accuracy TRAIN: 99.42%	Accuracy TEST: 98.13%	Min: 97.13% (9)
Epoch 002 (17m17s) Accuracy TRAIN: 99.44%	Accuracy TEST: 97.93%	Min: 95.94% (9)
Epoch 003 (25m55s) Accuracy TRAIN: 99.44%	Accuracy TEST: 97.98%	Min: 95.84% (9)

-- Training Session End (2019-01-25 19:35:52.278423) --
-------------------------
0.6,512,11.554,25m55s,99.44,97.98
-------------------------

