In [1]:
import numpy as np
import mnist

In [2]:
def _(x):
    print(x)
    print("")

In [3]:
class Neuron(object):
    """A simple feed-forward artificial neuron.
    Args:
        num_inputs (int): The input vector size / number of input values.
        activation_fn (callable): The activation function.
    Attributes:
        W (ndarray): The weight values for each input.
        b (float): The bias value, added to the weighted sum.
        activation_fn (callable): The activation function.
    """
    def __init__(self, num_inputs, activation_fn):
        super().__init__()
        # Randomly initializing the weight vector and bias value:
        self.W = np.random.rand(num_inputs)
        self.b = np.random.rand(1)
        self.activation_fn = activation_fn
        
    def forward(self, x):
        """Forward the input signal through the neuron."""
        z = np.dot(x, self.W) + self.b
        return self.activation_fn(z)

In [4]:
# Fixing the random number generator's seed, for reproducible results:
np.random.seed(42)
# Random input column array of 3 values (shape = `(1, 3)`)
x = np.random.rand(3).reshape(1, 3)
_(x)
# > [[0.37454012 0.95071431 0.73199394]]
# Instantiating a Perceptron (simple neuron with step function):
step_fn = lambda y: 0 if y <= 0 else 1
perceptron = Neuron(num_inputs=x.size, activation_fn=step_fn)
_(perceptron.W)
_(perceptron.b)
out = perceptron.forward(x)
_(out)

[[0.37454012 0.95071431 0.73199394]]

[0.59865848 0.15601864 0.15599452]

[0.05808361]

1



In [5]:
class FullyConnectedLayer(object):
    """A simple fully-connected NN layer.
    Args:
        num_inputs (int): The input vector size/number of input values.
        layer_size (int): The output vector size/number of neurons.
        activation_fn (callable): The activation function for this layer.
    Attributes:
        W (ndarray): The weight values for each input.
        b (ndarray): The bias value, added to the weighted sum.
        size (int): The layer size/number of neurons.
        activation_fn (callable): The neurons' activation function.
    """
    
    def __init__(self, num_inputs, layer_size, activation_fn, d_activation_fn=None):
        super().__init__()
        # Randomly initializing the parameters (using a normal distribution this time):
        self.W = np.random.standard_normal((num_inputs, layer_size))#Size of matrix is num_x of rows and num_layer of rows 
        self.b = np.random.standard_normal(layer_size)
        self.size = layer_size
        
        self.activation_fn = activation_fn
        self.d_activation_fn = d_activation_fn # Deriv. activation function
        self.x, self.y, self.dL_dW, self.dL_db = 0, 0, 0, 0 # Storage attr.
        
    def forward(self, x):
        """Forward the input signal through the layer."""
        z = np.dot(x, self.W) + self.b
        self.y = self.activation_fn(z)
        self.x = x # we store values for back-propagation
        return self.y #Output will be a verctor
    
    def backward(self, dL_dy):
        """Back-propagate the loss."""
        dy_dz = self.d_activation_fn(self.y)  # = f'
        dL_dz = (dL_dy * dy_dz) # dL/dz = dL/dy * dy/dz = l'_{k+1} * f'
        dz_dw = self.x.T
        dz_dx = self.W.T
        dz_db = np.ones(dL_dy.shape[0]) # dz/db = d(W.x + b)/db = 0 + db/db = "ones"-vector

        # Computing the derivatives with respect to the layer's parameters, and storing them for opt. optimization:
        self.dL_dW = np.dot(dz_dw, dL_dz)
        self.dL_db = np.dot(dz_db, dL_dz)

        # Computing the derivative with respect to the input, to be passed to the previous layers (their `dL_dy`):
        dL_dx = np.dot(dL_dz, dz_dx)
        return dL_dx
    
    def optimize(self, epsilon):
        """Optimize the layer's parameters w.r.t. the derivative values."""
        self.W -= epsilon * self.dL_dW
        self.b -= epsilon * self.dL_db
        

In [6]:
np.random.seed(42)
# Random input column-vectors of 2 values (shape = `(1, 2)`):
x1 = np.random.uniform(-1, 1, 2).reshape(1, 2)
_(x1)
x2 = np.random.uniform(-1, 1, 2).reshape(1, 2)
_(x2)

relu_fn = lambda y: np.maximum(y, 0)    #Defining our activation function
layer = FullyConnectedLayer(2, 3, relu_fn)

# Our layer can process x1 and x2 separately...
out1 = layer.forward(x1)
_(out1)

out2 = layer.forward(x2)
_(out2)

#And together:
x12 = np.concatenate((x1, x2))# stack of input vectors, of shape `(2, 2)`
_(x12)
out12 = layer.forward(x12)
_(out12)

[[-0.25091976  0.90142861]]

[[0.46398788 0.19731697]]

[[0.28712364 0.         0.33478571]]

[[0.         0.         1.08175419]]

[[-0.25091976  0.90142861]
 [ 0.46398788  0.19731697]]

[[0.28712364 0.         0.33478571]
 [0.         0.         1.08175419]]



In [7]:
# Loading the training and testing data:
X_train, y_train = mnist.train_images(), mnist.train_labels()
X_test, y_test = mnist.test_images(), mnist.train_labels()
num_classes = 10    # classes are digits from 0 to 9

# We transform the images into column vectors(as inputs for our NN):
X_train, X_test = X_train.reshape(-1, 28*28), X_test.reshape(-1, 28*28)
# We "one-hot" the labels (as targets for our NN), for instance, transform label `4` into vector `[0, 0, 0, 0, 1, 0, 0, 0, 0, 0]`:
y_train = np.eye(num_classes)[y_train]

In [8]:
def sigmoid(x): # Apply the sigmoid function to the elements of x.
    return 1 / (1 + np.exp(-x)) # y

def derivated_sigmoid(y):   # sigmoid derivative function
    return y * (1 - y)

def loss_L2(pred, target):  # L2 loss function
    return np.sum(np.square(pred - target)) / pred.shape[0] # opt. for results not depending on the batch size (pred.shape[0]) , we divide the loss by it

def derivated_loss_L2(pred, target):   # L2 derivative function
    return 2 * (pred - target) # we could add the batch size division here too, but it wouldn't really affect the training (just scaling down the derivatives).

def binary_cross_entropy(pred, target):            # cross-entropy loss function
    return -np.mean(np.multiply(np.log(pred), target) + np.multiply(np.log(1 - pred), (1 - target)))


def derivated_binary_cross_entropy(pred, target):  # cross-entropy derivative function
    return (pred - target) / (pred * (1 - pred))

class SimpleNetwork(object):
    """A simple fully-connected NN.
    Args:
        num_inputs (int): The input vector size / number of input values. results not depending on the batch size (pred.shape[0]), we divide the loss by it
        num_outputs (int): The output vector size.
        hidden_layers_sizes (list): A list of sizes for each hidden layer to be added to the network
    Attributes:
        layers (list): The list of layers forming this simple network.
    """
    
    def __init__(self, num_inputs, num_outputs, hidden_layers_sizes=(64,32),
                 loss_fn = loss_L2, d_loss_fn=derivated_loss_L2):
        super().__init__()
        # We build the list of layers composing the network:
        sizes = [num_inputs, *hidden_layers_sizes, num_outputs]
        self.layers = [
            FullyConnectedLayer(sizes[i], sizes[i+1], sigmoid, derivated_sigmoid)
            for i in range(len(sizes) -1)]
        self.loss_fn, self.d_loss_fn = loss_fn, d_loss_fn
    
    
    def forward(self, x):
        """Forward the input vector `x` through the layers."""
        for layer in self.layers: #From the input layer to the output one
            x = layer.forward(x)
        return x
    
    def predict(self, x):
        """Compute the output corresponding to `x`, and return the index of the largest output value"""
        estimations = self.forward(x)
        best_class = np.argmax(estimations)
        return best_class
    
    def evaluate_accuracy(self, X_val, y_val):
        """Evaluate the network's accuracy on a validate dataset."""
        num_corrects = 0
        for i in range(len(X_val)):
            if self.predict(X_val[i]) == y_val[i]:
                num_corrects += 1
        return num_corrects / len(X_val)
    
    def backward(self, dL_dy):
        """Back-propagate the loss derivative from last to 1st layer."""
        for layer in reversed(self.layers):
            dL_dy = layer.backward(dL_dy)
        return dL_dy
    
    def optimize(self, epsilon):
        """Optimize the parameters according to the stored gradients."""
        for layer in self.layers:
            layer.optimize(epsilon)
    
    def train(self, X_train, y_train, X_val, y_val, batch_size=32, num_epochs=5, learning_rate=5e-3):
        """Train (and evaluate) the network on the provided dataset."""
        num_batches_per_epoch = len(X_train) // batch_size
        loss, accuracy = [], []
        for i in range(num_epochs) : # for each training epoch
            epoch_loss = 0
            for b in range(num_batches_per_epoch): # for each batch
                # Get batch:
                b_idx = b * batch_size
                b_idx_e = b_idx + batch_size
                b_idx_e = b_idx + batch_size
                x, y_true = X_train[b_idx:b_idx_e], y_train[b_idx:b_idx_e]
                # Optimixe on batch:
                y = self.forward(x) # forward pass
                epoch_loss += self.loss_fn(y, y_true) # LOSS
                dL_dy = self.d_loss_fn(y, y_true) # loss derivation
                self.backward(dL_dy) # back-propogation pass
                self.optimize(learning_rate) # optimization
                
            loss.append(epoch_loss / num_batches_per_epoch)
            # After each epoch, we "validate" our network, i.e., we measure its accuracy over the test/validation set:
            accuracy.append(self.evaluate_accuracy(X_val, y_val))
            print("Epoch {:4d}: training loss = {:.6f} | val accuracy = {:.2f}%".format(i, loss[i], accuracy[i] * 100))


In [9]:
X_train, y_train = mnist.train_images(), mnist.train_labels()
X_test,  y_test  = mnist.test_images(), mnist.test_labels()
num_classes = 10    # classes are the digits from 0 to 9

In [10]:
X_train, X_test = X_train.reshape(-1, 28 * 28), X_test.reshape(-1, 28 * 28)


In [11]:
X_train, X_test = X_train / 255., X_test / 255.
print("Normalized pixel values between {} and {}".format(X_train.min(), X_train.max()))

Normalized pixel values between 0.0 and 1.0


In [12]:
y_train = np.eye(num_classes)[y_train]


In [13]:

mnist_classifier = SimpleNetwork(num_inputs=X_train.shape[1], 
                                 num_outputs=num_classes, hidden_layers_sizes=[64, 32])

In [14]:
predictions = mnist_classifier.forward(X_train)                         # forward pass
loss_untrained = mnist_classifier.loss_fn(predictions, y_train)   # loss computation

accuracy_untrained = mnist_classifier.evaluate_accuracy(X_test, y_test)  # Accuracy
print("Untrained : training loss = {:.6f} | val accuracy = {:.2f}%".format(
    loss_untrained, accuracy_untrained * 100))

Untrained : training loss = 3.654294 | val accuracy = 10.54%


In [15]:
losses, accuracies = mnist_classifier.train(X_train, y_train, X_test, y_test, 
                                            batch_size=30, num_epochs=500)

Epoch    0: training loss = 0.739994 | val accuracy = 61.10%
Epoch    1: training loss = 0.442242 | val accuracy = 76.49%


KeyboardInterrupt: 

# Binary Classification

In [26]:
X_train1, y_train1 = mnist.train_images(), mnist.train_labels()
X_test1,  y_test1  = mnist.test_images(), mnist.test_labels()
num_classes = 10    # classes are the digits from 0 to 9

In [27]:
y_train_binary = (y_train1 == 5).astype(np.int)
y_test_binary = (y_test1 == 5).astype(np.int)

In [28]:
y_train_binary = np.eye(2)[y_train_binary]


In [29]:
y_train_binary

array([[0., 1.],
       [1., 0.],
       [1., 0.],
       ...,
       [0., 1.],
       [1., 0.],
       [1., 0.]])

In [40]:
mnist_classifier_binary = SimpleNetwork(num_inputs=X_train.shape[1], 
                                 num_outputs=2, hidden_layers_sizes=[784, 512, 256, 128, 64, 32])

In [41]:
predictions1 = mnist_classifier_binary.forward(X_train)                         # forward pass
loss_untrained1 = mnist_classifier_binary.loss_fn(predictions1, y_train_binary)   # loss computation

accuracy_untrained1 = mnist_classifier_binary.evaluate_accuracy(X_test, y_test)  # Accuracy
print("Untrained : training loss = {:.6f} | val accuracy = {:.2f}%".format(
    loss_untrained1, accuracy_untrained1 * 100))

Untrained : training loss = 0.708148 | val accuracy = 9.08%


In [43]:
losses1, accuracies1 = mnist_classifier_binary.train(X_train, y_train_binary, X_test, y_test_binary, 
                                            batch_size=30, num_epochs=500)

Epoch    0: training loss = 0.163492 | val accuracy = 91.08%
Epoch    1: training loss = 0.128874 | val accuracy = 93.83%


KeyboardInterrupt: 