1 - Imports

In [17]:
import numpy as np
import pandas as pd
import os
import warnings
warnings.filterwarnings('ignore')
from itertools import zip_longest

2 - Extract data

In [22]:
def extract_data(data_file, label_file):
    # Create dataframe
    data = []
    label = []

    # For each line in the two files
    for line_data, line_label in zip_longest(open(data_file), open(label_file)):
        # Add data to our array
        data.append(list(map(float, line_data.split(','))))
        
        # Add label to our array
        label.append(int(line_label))
    
    return np.array(data), np.array(label)

X, y = extract_data("image_0.txt", "label.txt")
y[y == 10.] = 0.

3 - Separate data between training and testing data

In [24]:
def train_test_split(X, y, percentage=0.8):
    nb_train_lines = round(500 * percentage)
    nb_test_lines = 500 - nb_train_lines
    
    X_train = np.empty((10 * nb_train_lines, 400))
    y_train = np.empty((10 * nb_train_lines))
    X_test = np.empty((10 * nb_test_lines, 400))
    y_test = np.empty((10 * nb_test_lines))
    
    for i in range(0, 10):        
        X_train[i * nb_train_lines:(i + 1) * nb_train_lines] = X[i * 500:(i * 500) + nb_train_lines]
        y_train[i * nb_train_lines:(i + 1) * nb_train_lines] = y[i * 500:(i * 500) + nb_train_lines]
        X_test[i * nb_test_lines:(i + 1) * nb_test_lines] = X[(i * 500) + nb_train_lines:(i + 1) * 500]
        y_test[i * nb_test_lines:(i + 1) * nb_test_lines] = y[(i * 500) + nb_train_lines:(i + 1) * 500]
    
    return X_train, y_train, X_test, y_test

X_train, y_train, X_test, y_test = train_test_split(X, y)

4 - Implement the feed-forward neural network

In [81]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

class FFNN:
    """ Feed-Forward Neural Network with one hidden layer. """
    
    def __init__(self, input_size, hidden_layer_size):
        self.nb_layers = 3
        self.hidden_layer_size = hidden_layer_size
        self.input_size = input_size
        
        # Input with intercept
        self.X_intercept = np.empty(0)
        
        # First level of weights
        self.w1 = np.random.randn(input_size + 1, self.hidden_layer_size)
        
        # First layer output
        self.fst_out = np.empty(0)
    
        # Second level of weights
        self.w2 = np.random.randn(self.hidden_layer_size + 1, 10)
        
        # Second layer output
        self.snd_out = np.empty(0)
        
        # Real output
        self.Y = np.empty(0)
        
    def forward(self, X, Y):
        self.Y = Y
        
        # Add intercept column (column of ones)
        self.X_intercept = np.concatenate((np.ones((X.shape[0], 1)), X), axis=1)
        
        # Multiply by the weights
        self.fst_out = np.dot(self.X_intercept, self.w1)
        
        # Go through the first activation (sigmoid)
        self.fst_out = sigmoid(self.fst_out)
        
        # Add intercept column (column of ones)
        self.fst_out = np.concatenate((np.ones((self.fst_out.shape[0], 1)), self.fst_out), axis=1)
        
        # Multiply by the weights
        self.snd_out = np.dot(self.fst_out, self.w2)
        
        # Go through the second activation (sigmoid)
        self.snd_out = sigmoid(self.snd_out)
    
    def accuracy(self):
        nb_right = 0
        
        for pred, y in zip(self.snd_out, self.Y):
            if y[np.argmax(pred)] == 1:
                nb_right += 1
        
        return 100 * nb_right / self.Y.shape[0]
    
    def backward(self):
        learning_rate = 0.1
        
        # For all the weights of the first layer
        for n in range(self.input_size + 1):
            for k in range(1, self.hidden_layer_size):
                # First layer weights update
                derror_w1 = (self.snd_out - self.Y) * self.snd_out * (1 - self.snd_out)
                
                derror_w1 *= self.w2[k]
                
                f = np.delete(self.fst_out, 0, axis=1) # we delete the intercept column
                derror_w1 *= (f[:, k] * (1 - f[:, k]) * self.X_intercept[:, n])[:, np.newaxis]
                
                self.w1[n][k] -= learning_rate * derror_w1.sum()
                
        # For all the weights of the first layer
        for k in range(self.hidden_layer_size):
            for j in range(1, self.snd_out.shape[1]):
                # Second layer weights update
                derror_w2 = (self.snd_out - self.Y) * self.snd_out * (1 - self.snd_out)
                
                derror_w2 *= self.fst_out[:, k][:, np.newaxis]
                
                self.w2[k][j] -= learning_rate * derror_w2.sum()
        
        

ffnn = FFNN(X_train.shape[1], 15)


In [59]:
Y_train = np.zeros((len(y_train), 10))

for i in range(len(y_train)):
    Y_train[i][int(y_train[i])] = 1

5 - Train the model

In [70]:
def shuffle(X, Y):
    rng_state = np.random.get_state()
    np.random.shuffle(X)
    np.random.set_state(rng_state)
    np.random.shuffle(Y)
    
    return X, Y

In [82]:
nb_epochs = 1000
nb_batch = 5
nb_rows_in_batch = int(nb_epochs / nb_batch)

for i in range(nb_epochs):
    X, Y = shuffle(X_train, Y_train)
    
    for j in range(nb_batch):
        X_batch = X[:(j + 1) * nb_rows_in_batch]
        Y_batch = Y[:(j + 1) * nb_rows_in_batch]
        
        ffnn.forward(X_batch, Y_batch)
        
        ffnn.backward()
    
    print(i, ffnn.accuracy(), '%')
    

0 9.6 %
1 9.7 %
2 9.4 %
3 7.7 %
4 10.6 %
5 10.7 %
6 10.1 %
7 10.9 %
8 10.7 %
9 10.2 %
10 10.3 %
11 10.5 %
12 10.3 %
13 8.9 %
14 10.6 %
15 10.6 %
16 8.2 %
17 10.2 %
18 10.9 %
19 9.5 %
20 10.6 %
21 10.5 %
22 9.7 %
23 9.6 %
24 9.7 %
25 9.6 %
26 9.7 %
27 9.7 %
28 9.0 %
29 10.6 %
30 9.7 %
31 10.2 %
32 8.9 %
33 10.8 %
34 10.0 %
35 9.7 %
36 11.8 %
37 9.9 %
38 10.0 %
39 9.6 %
40 9.0 %
41 8.5 %
42 10.2 %
43 11.4 %
44 9.4 %
45 11.0 %
46 11.8 %
47 11.0 %
48 9.8 %
49 9.4 %
50 9.8 %
51 10.9 %
52 10.2 %
53 9.6 %
54 11.0 %
55 9.6 %
56 10.7 %
57 9.9 %
58 8.4 %
59 10.0 %
60 8.9 %
61 10.5 %
62 11.6 %
63 10.0 %
64 10.9 %
65 10.8 %
66 10.0 %
67 10.1 %
68 9.4 %
69 9.8 %
70 10.9 %
71 9.3 %
72 7.9 %
73 8.8 %
74 10.0 %
75 9.7 %
76 9.5 %
77 9.6 %
78 9.8 %
79 11.1 %
80 12.6 %
81 10.3 %
82 10.6 %
83 10.0 %
84 11.5 %
85 9.9 %
86 9.3 %
87 8.7 %
88 9.6 %
89 11.0 %
90 9.4 %
91 10.6 %
92 9.5 %
93 9.4 %
94 9.6 %
95 9.3 %
96 10.7 %
97 10.1 %
98 9.0 %
99 10.3 %
100 10.2 %
101 9.1 %
102 9.7 %
103 10.7 %
104 9.0 %
105 11.

KeyboardInterrupt: 