# Introduction

In this notebook I built a small Neural Network from scratch with the help of the book :

Neural Network From Scratch, written by Harrison Kinsley & Daniel Kukieła

# Import data and libraries

In [2]:
import os
import numpy as np
import matplotlib.pyplot as plt
import cv2
import pickle
import copy

In [2]:
data_dir = 'data'

categories = ['happy', 'sad', 'angry']

image_ext = ['jpeg', 'jpg', 'png']

Data Augmentation

In [22]:
from PIL import Image
import os
import numpy as np

def augment_data(image_path, output_folder, prefix, rotations=[-15, 15], flips=[True, False]):
    # Charger l'image
    img = Image.open(image_path)

    # Appliquer différentes transformations et sauvegarder les nouvelles images
    for rotation in rotations:
        for flip in flips:
            # Rotation
            rotated_img = img.rotate(rotation)

            # Retournement horizontal
            if flip:
                rotated_img = rotated_img.transpose(Image.FLIP_LEFT_RIGHT)

            # Sauvegarde de l'image augmentée
            new_image_path = os.path.join(output_folder, f"{prefix}_r{rotation}_f{flip}.png")
            rotated_img.save(new_image_path)

# Chemin vers le dossier contenant vos images
input_folder = r"C:\Users\anton\OneDrive\Desktop\NN_scratch\data"

# Parcourir les dossiers d'émotions
for emotion_folder in os.listdir(input_folder):
    emotion_path = os.path.join(input_folder, emotion_folder)

    if os.path.isdir(emotion_path):
        # Parcourir les fichiers dans chaque dossier d'émotion
        for filename in os.listdir(emotion_path):
            img_path = os.path.join(emotion_path, filename)

            # Créer un dossier pour chaque émotion dans le dossier de sortie
            output_emotion_folder = os.path.join(input_folder, emotion_folder)

            # Appliquer l'augmentation de données
            augment_data(img_path, output_emotion_folder, filename.split('.')[0])


Resize images and create

In [39]:
import cv2
import os

# Fonction pour redimensionner une image
def resize_image(img_path, output_folder, new_size=(224, 224)):
    
    img = cv2.imread(img_path, cv2.IMREAD_UNCHANGED)

    if img is not None and not img.size == 0:
        resized_img = cv2.resize(img, new_size, interpolation=cv2.INTER_AREA)
        output_path = os.path.join(output_folder, os.path.basename(img_path))
        cv2.imwrite(output_path, resized_img)
    else:
        print(f"Erreur lors de la lecture de l'image : {img_path}")

# ...


# Chemin vers le dossier contenant vos images
input_folder = r"C:\Users\anton\OneDrive\Desktop\NN_scratch\data"

# Parcourir les dossiers d'émotions
for emotion_folder in os.listdir(input_folder):
    emotion_path = os.path.join(input_folder, emotion_folder)

    if os.path.isdir(emotion_path):
        # Parcourir les fichiers dans chaque dossier d'émotion
        for filename in os.listdir(emotion_path):
            img_path = os.path.join(emotion_path, filename)

            # Redimensionner l'image
            resize_image(img_path, emotion_path)

In [3]:
images = []
labels = []

for emotion_folder in os.listdir('data'):
    emotion_path = os.path.join('data', emotion_folder)

    
    for filename in os.listdir(emotion_path):
        img_path = os.path.join(emotion_path, filename)

        if emotion_folder.lower() == 'happy':
            label = 1
        elif emotion_folder.lower() == 'sad':
            label = 2
        else:
            label = 3
            
        images.append(img_path)
        labels.append(label)

In [4]:
# Mélanger les images et les labels de manière cohérente
random_seed = 42  # Utilisez une valeur de graine aléatoire pour une reproductibilité
np.random.seed(random_seed)

# Obtenez les indices mélangés
shuffled_indices = np.random.permutation(len(images))

# Utilisez numpy.take pour obtenir les éléments mélangés
shuffled_images = np.take(images, shuffled_indices)
shuffled_labels = np.take(labels, shuffled_indices)

In [5]:
print(shuffled_images[39])

data\Angry\angry (220).png


# Layer_Dense

In [80]:
class Layer_Dense:

    def __init__(self, n_inputs, n_neurons, weight_regularizer_L1 = 0, weight_regularizer_L2 = 0, bias_regularizer_L1=0, bias_regularizer_L2 = 0):

        self.weights = 0.01 * np.random.randn(n_inputs, n_neurons)
        self.biases = np.zeros((1, n_neurons))

        self.weight_regularizer_l1 = weight_regularizer_L1
        self.weight_regularizer_l2 = weight_regularizer_L2
        self.bias_regularizer_l1 = bias_regularizer_L1
        self.bias_regularizer_l2 = bias_regularizer_L2

    def forward(self, inputs, training):
        self.inputs = inputs
        self.output = np.dot(inputs, self.weights)+self.biases
    
    def backward(self, dvalues):
        # Gradients on parameters
        self.dweights = np.dot(self.inputs.T, dvalues)
        self.dbiases = np.sum(dvalues, axis = 0 , keepdims = True )
        
        # Gradients on regularization
        # L1 on weights
        if self.weight_regularizer_l1 > 0 :
            dL1 = np.ones_like(self.weights)
            dL1[self.weights < 0 ] = - 1
            self.dweights += self.weight_regularizer_l1 * dL1

        # L2 on weights
        
        if self.weight_regularizer_l2 > 0 :
            self.dweights += 2 * self.weight_regularizer_l2 * self.weights
        # L1 on biases
        if self.bias_regularizer_l1 > 0 :
            dL1 = np.ones_like(self.biases)
            dL1[self.biases < 0 ] = - 1
            self.dbiases += self.bias_regularizer_l1 * dL1
        # L2 on biases
        if self.bias_regularizer_l2 > 0 :
            self.dbiases += 2 * self.bias_regularizer_l2 * self.biases
        # Gradient on values
        self.dinputs = np.dot(dvalues, self.weights.T)

    def get_parameters(self):
        return self.weights, self.biases
    
    def set_parameters(self, weights, biases):
        self.weights
        self.biases


# Activation function

In [44]:
class Activation_ReLU :

    def forward(self, inputs, training):
        self.inputs = inputs
        self.output = np.maximum(0,inputs)
    
    def backward(self, dvalues):

        self.dinputs = dvalues.copy()

        self.dinputs[self.inputs <= 0] = 0
    
    def predictions (self, outputs):
        return outputs

In [57]:
class Activation_Softmax:

    def forward(self, inputs, training):

        self.inputs = inputs

        exp_values = np.exp(inputs - np.max(inputs, axis=1, keepdims=True))

        probabilities = exp_values/np.sum(exp_values, axis=1, keepdims=True)

        self.output = probabilities

    def backward(self, dvalues):
        
        self.dinputs = np.empty_like(dvalues)

        for index, (single_output, single_dvalues) in enumerate(zip(self.output, dvalues)):

            single_output = single_output.reshape(-1,1)

            jacobian_matrix = np.diagflat(single_output) - np.dot(single_output, single_output.T)

            self.dinputs[index] = np.dot(jacobian_matrix, single_dvalues)
    
    def predictions(self, outputs):
        return np.argmax(outputs, axis = 1)

In [None]:
class Activation_Sigmoid:

    def predictions (self, outputs):
        return (outputs > 0.5) * 1

    def forward(self, inputs):

        self.inputs = inputs
        self.output = 1/(1+np.exp(-inputs))
    
    def backward(self, dvalues):

        self.dinputs = dvalues * (1-self.output) * self.output

In [None]:
class Activation_Linear:

    def forward(self, inputs):
        self.inputs = inputs
        self.output = inputs
    
    def backward (self, dvalues):

        self.dinputs = dvalues.copy()
    
    def predictions (self, outputs):

        return outputs

# Loss

In [62]:
class Loss:

    def remember_trainable_layers(self, trainable_layers):

        self.trainable_layers = trainable_layers


    def calculate(self, output, y, include_regularization=False):

        sample_losses = self.forward(output, y)

        data_loss = np.mean(sample_losses)

        self.accumulated_sum+=np.sum(sample_losses)
        self.accumulated_count += len(sample_losses)

        if not include_regularization:
            
            return data_loss

        return data_loss, self.regularization_loss()
    
    def calculate_accumulated(self, include_regularization=False):

        data_loss = self.accumulated_sum/self.accumulated_count

        if not include_regularization:
            return data_loss
        
        return data_loss, self.regularization_loss()
    
    def new_pass(self):
        self.accumulated_sum = 0
        self.accumulated_count = 0


    def regularization_loss(self):

        regularization_loss = 0

        for layer in self.trainable_layers:

            if layer.weight_regularizer_l1 > 0:
                regularization_loss += layer.weight_regularizer_l1 * np.sum(np.abs(layer.weights))

            if layer.weight_regularizer_l2 > 0:
                regularization_loss += layer.weight_regularizer_l2 * np.sum(np.abs(layer.weights))

            if layer.bias_regularizer_l1 > 0:
                regularization_loss += layer.bias_regularizer_l1 * np.sum(np.abs(layer.biases))

            if layer.bias_regularizer_l2 > 0:
                regularization_loss += layer.bias_regularizer_l2 * np.sum(np.abs(layer.biases))
            
        return regularization_loss

In [49]:
class Loss_CategoricalCrossentropy(Loss):

    def forward(self, y_pred, y_true):

        samples = len(y_pred)

        y_pred_clipped = np.clip(y_pred, 1e-7, 1 - 1e-7)

        if len(y_true.shape)==1:
            correct_confidences = y_pred_clipped[range(samples), y_true]

        elif len(y_true.shape)==2:
            correct_confidences = np.sum(y_pred_clipped * y_true, axis=1)

        negative_log_likelihoods = -np.log(correct_confidences)

        return negative_log_likelihoods
    
    def backward(self, dvalues, y_true):

        samples = len(dvalues)
        labels = len(dvalues[0])

        if len(y_true.shape) == 1:
            y_true = np.eye(labels)[y_true]
        
        self.dinputs = -y_true/dvalues
        self.dinputs = self.dinputs/samples

In [11]:
class Loss_BinaryCrossentropy(Loss):

    # Forward pass
    def forward ( self , y_pred , y_true ):
        # Clip data to prevent division by 0
        # Clip both sides to not drag mean towards any value
        y_pred_clipped = np.clip(y_pred, 1e-7 , 1 - 1e-7 )
        # Calculate sample-wise loss
        sample_losses = - (y_true * np.log(y_pred_clipped) + ( 1 - y_true) * np.log( 1 - y_pred_clipped))
        sample_losses = np.mean(sample_losses, axis =- 1 )
        # Return losses
        return sample_losses
    # Backward pass
    def backward ( self , dvalues , y_true ):
        # Number of samples
        samples = len (dvalues)
        # Number of outputs in every sample
            # We'll use the first sample to count them
        outputs = len (dvalues[ 0 ])
        # Clip data to prevent division by 0
        # Clip both sides to not drag mean towards any value
        clipped_dvalues = np.clip(dvalues, 1e-7 , 1 - 1e-7 )
        # Calculate gradient
        self.dinputs = - (y_true / clipped_dvalues - ( 1 - y_true) / ( 1 - clipped_dvalues)) / outputs
        # Normalize gradient
        self.dinputs = self.dinputs / samples



In [12]:
class Loss_MeanSquaredError(Loss):

    def forward(self, y_pred, y_true):

        sample_losses = np.mean((y_true - y_pred)**2, axis=-1)
        return sample_losses
    
    def backward(self, dvalues, y_true):

        samples = len(dvalues)

        outputs = len(dvalues[0])

        self.dinputs = -2 * (y_true - dvalues) / outputs

        self.dinputs = self.dinputs / samples

In [13]:
class Loss_MeanAbsoluteError(Loss):

    def forward (self, y_pred, y_true):

        sample_losses = np.mean(np.abs(y_true - y_pred), axis = -1)

        return sample_losses
    
    def backward (self, dvalues, y_true):

        samples = len(dvalues)

        outputs = len(dvalues[0])

        self.dinputs = np.sign(y_true - dvalues)/outputs

        self.dinputs = self.dinputs / samples

# Common Categorical Cross-Entropy and softmax derivative

In [12]:
class Activation_Softmax_Loss_CategoricalCrossEntropy():
    
    def backward(self, dvalues, y_true):

        samples = len(dvalues)

        if len(y_true.shape)==2:
            y_true = np.argmax(y_true, axis=1)
        
        self.dinputs = dvalues.copy()

        self.dinputs[range(samples), y_true] -=1

        self.inputs = self.dinputs/samples

# Optimizers

## SGD

In [15]:
class Optimizer_SGD:

    def __init__(self, learning_rate=1.0, decay = 0):
        self.learning_rate = learning_rate
        self.current_learning_rate = learning_rate
        self.decay = decay
        self.iterations = 0

    def pre_update_params(self):
        if self.decay:
            self.current_learning_rate = self.learning_rate * (1 / (1+self.decay*self.iterations))

    def update_params(self, layer):

        if self.momentum:

            if not hasattr(layer, 'weight_momentums'):
                layer.weight_momentums = np.zeros_like(layer.weights)
                layer.bias_momentums = np.zeros_like(layer.biases)

            weight_updates = self.momentum * layer.weight_momentums - self.current_learning_rate*layer.dweights

            layer.weight_momentums = weight_updates

            bias_updates = self.momentum * layer.bias_momentums - self.current_learning_rate * layer.dbiases

            layer.bias_momentums = bias_updates

        else :
            weight_updates = -self.current_learning_rate * layer.dweights
            bias_updates = -self.current_learning_rate * layer.dbiases
            
        layer.weights += -self.learning_rate * layer.dweights
        layer.biases += -self.learning_rate * layer.dbiases

    def post_update_params (self):
        self.iterations +=1

## AdaGrad

In [16]:
class Optimizer_AdaGrad:

    def __init__(self, learning_rate=1.0, decay = 0, epsilon = 1e-7):
        self.learning_rate = learning_rate
        self.current_learning_rate = learning_rate
        self.decay = decay
        self.iterations = 0
        self.epsilon = epsilon

    def pre_update_params(self):
        if self.decay:
            self.current_learning_rate = self.learning_rate * (1 / (1+self.decay*self.iterations))

    def update_params(self, layer):

        if not hasattr(layer, 'weight_cache'):
            layer.weight_cache = np.zeros_like(layer.weights)
            layer.bias_cache = np.zeros_like(layer.biases)
        
        layer.weight_cache += layer.dweights**2
        layer.bias_cache += layer.dbiases**2

        layer.weights += -self.current_learning_rate * layer.dweights/(np.sqrt(layer.weight_cache) + self.epsilon)

        layer.biases += -self.current_learning_rate * layer.dbiases / (np.sqrt(layer.bias_cache)+self.epsilon)


    def post_update_params (self):
        self.iterations +=1

## RMSProp

In [17]:
class Optimizer_RMSprop:

    def __init__(self, learning_rate=1.0, decay = 0, epsilon = 1e-7, rho):
        self.learning_rate = learning_rate
        self.current_learning_rate = learning_rate
        self.decay = decay
        self.iterations = 0
        self.epsilon = epsilon
        self.rho = rho

    def pre_update_params(self):
        if self.decay:
            self.current_learning_rate = self.learning_rate * (1 / (1+self.decay*self.iterations))

    def update_params(self, layer):

        if not hasattr(layer, 'weight_cache'):
            layer.weight_cache = np.zeros_like(layer.weights)
            layer.bias_cache = np.zeros_like(layer.biases)
        
        layer.weight_cache = self.rho * layer.weight_cache + (1 - self.rho) * layer.dweights**2
        layer.bias_cache = self.rho * layer.bias_cache + (1 - self.rho) * layer.dbiases**2

        layer.weights += -self.current_learning_rate * layer.dweights/(np.sqrt(layer.weight_cache) + self.epsilon)

        layer.biases += -self.current_learning_rate * layer.dbiases / (np.sqrt(layer.bias_cache)+self.epsilon)


    def post_update_params (self):
        self.iterations +=1

SyntaxError: parameter without a default follows parameter with a default (1724351955.py, line 3)

## Adam

In [66]:
class Optimizer_Adam:
    def __init__(self, learning_rate=0.001, decay = 0, epsilon = 1e-7, beta_1=0.9, beta_2 = 0.999):
        self.learning_rate = learning_rate
        self.current_learning_rate = learning_rate
        self.decay = decay
        self.iterations = 0
        self.epsilon = epsilon
        self.beta_1 = beta_1
        self.beta_2 = beta_2

    def pre_update_params(self):
        if self.decay:
            self.current_learning_rate = self.learning_rate * (1 / (1+self.decay*self.iterations))

    def update_params(self, layer):

        if not hasattr(layer, 'weight_cache'):
            layer.weight_momentums = np.zeros_like(layer.weights)
            layer.weight_cache = np.zeros_like(layer.weights)
            layer.bias_momentums = np.zeros_like(layer.biases)
            layer.bias_cache = np.zeros_like(layer.biases)

        # Update momentum with current gradients
        layer.weight_momentums = self.beta_1 * layer.weight_momentums + ( 1 - self.beta_1) * layer.dweights
        layer.bias_momentums = self.beta_1 * layer.bias_momentums + ( 1 - self.beta_1) * layer.dbiases
        
        weight_momentums_corrected = layer.weight_momentums / ( 1 - self.beta_1 ** (self.iterations + 1 ))
        bias_momentums_corrected = layer.bias_momentums / ( 1 - self.beta_1 ** (self.iterations + 1 ))
# Update cache with squared current gradients
        layer.weight_cache = self.beta_2 * layer.weight_cache + ( 1 - self.beta_2) * layer.dweights ** 2
        layer.bias_cache = self.beta_2 * layer.bias_cache + ( 1 - self.beta_2) * layer.dbiases ** 2
# Get corrected cache
        weight_cache_corrected = layer.weight_cache / ( 1 - self.beta_2 ** (self.iterations + 1 ))
        bias_cache_corrected = layer.bias_cache / ( 1 - self.beta_2 ** (self.iterations + 1 ))

        layer.weights += -self.current_learning_rate * weight_momentums_corrected/(np.sqrt(weight_cache_corrected) + self.epsilon)

        layer.biases += -self.current_learning_rate * bias_momentums_corrected / (np.sqrt(bias_cache_corrected)+self.epsilon)


    def post_update_params (self):
        self.iterations +=1
        

# Dropout

In [41]:
class Layer_Dropout:

    def __init__(self,rate):
        self.rate = 1 - rate

    def forward(self, inputs, training):

        self.inputs = inputs

        if not training:
            self.output = inputs.copy()
            return

        self.binary_mask = np.random.binomial(1, self.rate, size=inputs.shape)/self.rate

        self.output = inputs * self.binary_mask
    
    def backward(self, dvalues):
        self.dinputs = dvalues * self.binary_mask

# Create train and test sets

In [15]:
# Chemin vers le dossier contenant vos images
input_folder = r"C:\Users\anton\OneDrive\Desktop\NN_scratch\data"

# Mapping des émotions aux labels
emotion_mapping = {"Happy": 0, "Sad": 1, "Angry": 2}

# Stockage des données et des labels
X_train = []
Y_train = []
X_test = []
Y_test = []

# Parcourir les dossiers d'émotions
for emotion_folder in os.listdir(input_folder):
    emotion_path = os.path.join(input_folder, emotion_folder)

    if os.path.isdir(emotion_path):
        # Obtenir le label associé à l'émotion
        label = emotion_mapping[emotion_folder]

        # Parcourir les fichiers dans chaque dossier d'émotion
        image_paths = [os.path.join(emotion_path, filename) for filename in os.listdir(emotion_path)]
        np.random.shuffle(image_paths)  # Mélanger les chemins d'images

        # Déterminer le nombre d'images à placer dans l'ensemble de test
        num_test_samples = int(0.2 * len(image_paths))  # 20% pour l'ensemble de test

        # Placer les images dans l'ensemble de test
        for i in range(num_test_samples):
            img_path = image_paths[i]
            X_test.append(img_path)
            Y_test.append(label)

        # Placer le reste des images dans l'ensemble d'entraînement
        for i in range(num_test_samples, len(image_paths)):
            img_path = image_paths[i]
            X_train.append(img_path)
            Y_train.append(label)

X_train = [cv2.imread(img_path) for img_path in X_train]
X_test = [cv2.imread(img_path) for img_path in X_test]

X_test = np.array(X_test)
Y_test = np.array(Y_test)
X_train = np.array(X_train)
Y_train = np.array(Y_train)



# Afficher les dimensions des ensembles
print("Dimensions de l'ensemble d'entraînement (X_train, y_train):", X_train.shape, Y_train.shape)
print("Dimensions de l'ensemble de test (X_test, y_test):", X_test.shape, Y_test.shape)


Dimensions de l'ensemble d'entraînement (X_train, y_train): (660, 224, 224, 3) (660,)
Dimensions de l'ensemble de test (X_test, y_test): (162, 224, 224, 3) (162,)


In [16]:
import random

# Associer X_train et Y_train en tant que tuples (image, label)
train_data = list(zip(X_train, Y_train))
test_data = list(zip(X_test, Y_test))

# Mélanger les données
random.shuffle(train_data)
random.shuffle(test_data)

# Séparer à nouveau les données mélangées en X_train et Y_train
X_train, Y_train = zip(*train_data)
X_test, Y_test = zip(*train_data)

# Convertir en tableaux NumPy
X_train = np.array(X_train)
Y_train = np.array(Y_train)
X_test= np.array(X_test)
Y_test = np.array(Y_test)


Scale the data between -1 and 1

In [17]:
X_train = (X_train / 127.5) - 1
X_test = (X_test / 127.5) - 1

In [18]:
X_train.min()

-1.0

In [19]:
# Aplatir les images en une seule dimension
X_train = X_train.reshape(X_train.shape[0], -1)
X_test = X_test.reshape(X_test.shape[0], -1)


In [20]:
X_train.shape

(660, 150528)

# Model

In [50]:
class Layer_Input:

    def forward (self, inputs, training):
        self.output = inputs

In [87]:
class Model:

    def __init__(self):

        self.layers = []

        self.softmax_classifier_output = None

    def add(self, layer):

        self.layers.append(layer)
    
    def set(self, loss=None, optimizer=None, accuracy=None):

        if loss is not None:
            self.loss = loss
        if optimizer is not None:
            self.optimizer = optimizer
        if accuracy is not None:
            self.accuracy = accuracy
    
    def finalize(self):

        self.input_layer = Layer_Input()

        layer_count = len(self.layers)

        self.trainable_layers=[]

        for i in range(layer_count):

            if i ==0 :
                self.layers[i].prev = self.input_layer
                self.layers[i].next = self.layers[i+1]
            
            # All layers except for the first and the last
            elif i < layer_count - 1 :
                self.layers[i].prev = self.layers[i - 1 ]
                self.layers[i].next = self.layers[i + 1 ]
            # The last layer - the next object is the loss
            else :
                self.layers[i].prev = self.layers[i - 1 ]
                self.layers[i].next = self.loss
                self.output_layer_activation = self.layers[i]
            
            if hasattr(self.layers[i], 'weights'):
                self.trainable_layers.append(self.layers[i])
            
            if self.loss is not None:
                
                self.loss.remember_trainable_layers(self.trainable_layers)
        
        if isinstance(self.layers[-1], Activation_Softmax) and isinstance(self.loss, Loss_CategoricalCrossentropy):
            self.softmax_classifier_output = Activation_Softmax_Loss_CategoricalCrossEntropy()
    
    def train(self, X,y, epochs = 1, batch_size = None, print_every = 1, validation_data = None):

        self.accuracy.init(y)

        train_steps = 1

        if validation_data is not None:
            validation_steps = 1

            X_val, y_val = validation_data
        
        if batch_size is not None:
            train_steps = len(X) // batch_size

            if train_steps * batch_size < len(X):
                train_steps += 1
            
            if validation_data is not None:

                validation_steps = len(X_val) // batch_size

                if validation_steps * batch_size < len(X_val):
                    validation_steps+=1

        for epoch in range (1, epochs+1):

            print(f'epoch : {epoch}')

            self.loss.new_pass()
            self.accuracy.new_pass()

            for step in range(train_steps):

                if batch_size is None:
                    batch_X = X
                    batch_y = y
                else:
                    batch_X=X[step*batch_size : (step+1)*batch_size]
                    batch_y = y[step*batch_size : (step+1)*batch_size]
    
                output = self.forward(batch_X, training = True)
                data_loss, regularization_loss = self.loss.calculate(output, batch_y, include_regularization=True)
                loss = data_loss + regularization_loss
        
                predictions = self.output_layer_activation.predictions(output)
                accuracy = self.accuracy.calculate(predictions,batch_y)
                self.backward(output, batch_y)

                self.optimizer.pre_update_params()
                for layer in self.trainable_layers:
                    self.optimizer.update_params(layer)
                self.optimizer.post_update_params()

                if not epoch % print_every or step == train_steps -1:

                    print ( f'step: {step},' +
                            f'acc: {accuracy:.3f},' +
                            f'loss: {loss:.3f} (' +
                            f'data_loss: {data_loss:.3f} , ' +
                            f'reg_loss: {regularization_loss:.3f} ), ' +
                            f'lr: {self.optimizer.current_learning_rate} ' )
            
            epoch_data_loss, epoch_regularization_loss = self.loss.calculate_accumulated(include_regularization=True)
            epoch_loss = epoch_data_loss + epoch_regularization_loss
            epoch_accuracy = self.accuracy.calculate_accumulated()

            print ( f'training, ' +
                    f'acc: {epoch_accuracy:.3f} , ' +
                    f'loss: {epoch_loss:.3f} (' +
                    f'data_loss: {epoch_data_loss:.3f} , ' +
                    f'reg_loss: {epoch_regularization_loss:.3f} ), ' +
                    f'lr: {self.optimizer.current_learning_rate} ' )


        if validation_data is not None:

            self.loss.new_pass()
            self.accuracy.new_pass()

            for step in range(validation_steps):
                if batch_size is None:
                    batch_X = X_val
                    batch_y = y_val
                
                else:
                    batch_X = X_val[step*batch_size:(step+1)*batch_size]
                    batch_y = y_val[step*batch_size:(step+1)*batch_size]


            output = self.forward(batch_X, training = False)

            loss=self.loss.calculate(output, batch_y)

            predictions = self.output_layer_activation.predictions(output)

            accuracy = self.accuracy.calculate(predictions, batch_y)

            validation_loss = self.loss.calculate_accumulated()
            validation_accuracy = self.accuracy.calculate_accumulated()


            print ( f'validation, ' +
                    f'acc: {validation_accuracy :.3f} , ' +
                    f'loss: {validation_loss :.3f} ' )
    
    def forward(self, X, training):

        self.input_layer.forward(X, training)

        for layer in self.layers:
            layer.forward(layer.prev.output, training)
        
        return layer.output
    
    def backward(self, output, y):

        self.loss.backward(output, y)

        for layer in reversed(self.layers):
            layer.backward(layer.next.dinputs)
    
    def get_parameters(self):

        parameters = []

        for layer in self.trainable_layers:
            parameters.append(layer.get_parameters())
        
        return parameters
    
    def set_parameters(self, parameters):

        for parameter_set, layer in zip(parameters, self.trainable_layers):

            layer.set_parameters(*parameter_set)
    
    def save_parameters(self, path):

        with open(path, 'wb') as f:
            pickle.dump(self.get_parameters(),f)
    
    def load_parameters(self, path):

        with open(path,'rb') as f:
            self.set_parameters(pickle.load(f))
    
    def save(self, path):

        model = copy.deepcopy(self)

        model.loss.new_pass()
        model.accuracy.new_pass()

        model.input_layer.__dict__.pop('output', None)
        model.loss.__dict__.pop('dinputs', None)


        for layer in model.layers:
            for property in ['input', 'output', 'dinputs', 'dweights', 'dbiases']:
                layer.__dict__.pop(property, None)
        
        with open(path, 'wb') as f:
            pickle.dump(model,f)

    def load(path):

        with open(path, 'rb') as f:
            model = pickle.load(f)
        
        return model
    
    def predict(self, X, batch_size=None):
        prediction_steps = 1

        if batch_size is not None:
            prediction_steps = len(X) // batch_size

            if prediction_steps * batch_size < len(X):

                prediction_steps +=1
        
        output = []

        for step in range(prediction_steps):

            if batch_size is None:
                batch_X = X
            
            else:
                batch_X = X[step*batch_size:(step+1)*batch_size]
            
            batch_output = self.forward(batch_X, training=False)

            output.append(batch_output)

        return np.vstack(output)

# Accuracy

In [63]:
class Accuracy : 

    def calculate(self, predictions, y):

        comparisons = self.compare(predictions, y)

        accuracy = np.mean(comparisons)

        self.accumulated_sum += np.sum(comparisons)
        self.accumulated_count += len(comparisons)

        return accuracy
    
    def calculate_accumulated(self):

        accuracy = self.accumulated_sum/self.accumulated_count

        return accuracy
    
    def new_pass(self):
        self.accumulated_sum = 0
        self.accumulated_count = 0

In [64]:
# Accuracy calculation for classification model
class Accuracy_Categorical(Accuracy):

    # No initialization is needed
    def init(self, y):
        pass

    # Compares predictions to the ground truth values
    def compare(self, predictions, y):
        if len(y.shape) == 2:
            y = np.argmax(y, axis=1)
        return predictions == y

In [29]:
class Accuracy_Regression(Accuracy):

    def __init__(self):
        self.precision = None
    
    def init(self, y, reinit=False):

        if self.precision is None or reinit:
            self.precision = np.std(y)/250
    
    def compare(self, predictions, y):

        return np.absolute(predictions -y) < self.precision
        

# Training

In [83]:
model = Model()

model.add(Layer_Dense(X_train.shape[1], 128))
model.add(Layer_Dropout(0.5))
model.add(Activation_ReLU())
model.add(Layer_Dense(128, 3))
model.add(Activation_Softmax())

model.set(loss=Loss_CategoricalCrossentropy(), optimizer=Optimizer_Adam(decay=5e-5), accuracy=Accuracy_Categorical())

model.finalize()

model.train(X_train, Y_train, validation_data=(X_test, Y_test), epochs = 20, batch_size=8, print_every=10)

epoch : 1
step: 82,acc: 0.750,loss: 1.949 (data_loss: 1.949 , reg_loss: 0.000 ), lr: 0.0009959167413604224 
training, acc: 0.597 , loss: 2.801 (data_loss: 2.801 , reg_loss: 0.000 ), lr: 0.0009959167413604224 
epoch : 2
step: 82,acc: 0.500,loss: 0.903 (data_loss: 0.903 , reg_loss: 0.000 ), lr: 0.0009918175055789735 
training, acc: 0.656 , loss: 1.549 (data_loss: 1.549 , reg_loss: 0.000 ), lr: 0.0009918175055789735 
epoch : 3
step: 82,acc: 0.750,loss: 0.529 (data_loss: 0.529 , reg_loss: 0.000 ), lr: 0.0009877518767285659 
training, acc: 0.653 , loss: 1.615 (data_loss: 1.615 , reg_loss: 0.000 ), lr: 0.0009877518767285659 
epoch : 4
step: 82,acc: 0.500,loss: 0.983 (data_loss: 0.983 , reg_loss: 0.000 ), lr: 0.000983719443214795 
training, acc: 0.673 , loss: 1.841 (data_loss: 1.841 , reg_loss: 0.000 ), lr: 0.000983719443214795 
epoch : 5
step: 82,acc: 0.500,loss: 0.924 (data_loss: 0.924 , reg_loss: 0.000 ), lr: 0.000979719800137161 
training, acc: 0.742 , loss: 1.148 (data_loss: 1.148 , reg_

In [84]:
parameters = model.get_parameters()

# Saving parameters

In [88]:
model.save_parameters('emotion.parms')
model.save('emotion.model')

AttributeError: 'Model' object has no attribute 'save_parameters'

# Loading parameters

In [None]:
model = Model.load('emotion.model')

In [None]:
X, y, X_test, y_test = create_data_emotion('emotion_image')

keys = np.array(range(X.shape[0]))
np.random.shuffle(keys)
X= X[keys]
y=y[keys]

X = (X.reshape(X.shape[ 0 ], - 1 ).astype(np.float32) - 127.5 ) / 127.5
X_test = (X_test.reshape(X_test.shape[ 0 ], - 1 ).astype(np.float32) -
127.5 ) / 127.5
# Load the model
model = Model.load( 'fashion_mnist.model' )
# Evaluate the model
model.evaluate(X_test, y_test)
