In [19]:
# Packages
import numpy as np
import pickle


In [32]:
# Data
training = np.genfromtxt('training_set.csv', delimiter = ',')
validation = np.genfromtxt('validation_set.csv', delimiter = ',')


(10000, 3)


In [62]:
m1 = 8
m2 = 4

class TwoLayerPerceptron():
    def __init__(self, training, validation, learning_rate = 0.02, epochs = 1000):
        self.training = training
        self.validation = validation
        self.learning_rate = learning_rate
        self.epochs = epochs
        self.w1 = self._initialize_weights(m1, 2)
        self.w2 = self._initialize_weights(m2, m1)
        self.w3 = self._initialize_weights(1, m2)
        self.t1 = self._initialize_thresholds(m1, 1)
        self.t2 = self._initialize_thresholds(m2, 1)
        self.t3 = self._initialize_thresholds(1, 1)

    def _initialize_weights(self, m, n, mu = 0, sigma = 1):
        size = (m, n)
        w = np.random.normal(mu, sigma, size = size)
        return w

    def _initialize_thresholds(self, m, n):
        size = (m, n)
        t = np.zeros(size)
        return t

    def feed_forward(self, inputs):
        output1 = np.tanh(self.w1 @ np.transpose(inputs) - self.t1)
        output2 = np.tanh(self.w2 @ output1 - self.t2)
        output3 = np.tanh(self.w3 @ output2 - self.t3)
        return output1, output2, output3

    def propagate_back(self, inputs, target, output1, output2, output3):
        error3 = (target - output3) * (1 - output3 ** 2)
        error2 = np.multiply((np.transpose(self.w3) @ error3), (1 - output2 ** 2))
        error1 = np.multiply((np.transpose(self.w2) @ error2), (1 - output1 ** 2))
        dw3 = -1 * self.learning_rate * (-1 * error3 * np.transpose(output2))
        dw2 = -1 * self.learning_rate * np.multiply(-1 * error2, np.transpose(output1))
        dw1 = -1 * self.learning_rate * np.multiply(-1 * error1, inputs)
        dt3 = -1 * self.learning_rate * error3
        dt2 = -1 * self.learning_rate * error2
        dt1 = -1 * self.learning_rate * error1
        return dw1, dw2, dw3, dt1, dt2, dt3

    def _update(self):
        training = np.array(self.training.copy())
        np.random.shuffle(training)
        for pattern in training:
            inputs = pattern[:-1].reshape(1,2)
            target = pattern[-1].reshape(1,1)
            output1, output2, output3 = self.feed_forward(inputs)
            dw1, dw2, dw3, dt1, dt2, dt3 = self.propagate_back(inputs, target, output1, output2, output3)
            self.w1 += dw1
            self.w2 += dw2
            self.w3 += dw3
            self.t1 += dt1
            self.t2 += dt2
            self.t3 += dt3       

    def _classification_error(self):
        validation = np.array(self.validation.copy())
        length = validation.shape[0]
        inputs = validation[:,:-1].reshape(length, 2)
        targets = validation[:,-1].reshape(length, 1)
        output3 = self.feed_forward(inputs)[-1]
        errors = np.sum(np.abs(np.sign(output3) - np.transpose(targets)))
        return 0.5 * errors / length

    def train(self):
        for epoch in range(self.epochs):
            self._update()
            error = self._classification_error()
            print(f"Epoch {epoch}: {error}")
            if error < 0.12:
                print(f"Converged after {epoch} iterations...")
                return True
        print("\nNo convergence...\n")
        return False

    def print(self):
        print(f"w1: {self.w1}")
        print(f"w2: {self.w2}")
        print(f"w3: {self.w3}")
        print(f"t1: {self.t1}")
        print(f"t2: {self.t2}")
        print(f"t3: {self.t3}")

def main():
    network = TwoLayerPerceptron(training, validation)
    network.train()
    np.savetxt("w1.csv", network.w1, delimiter=",")
    np.savetxt("w2.csv", network.w2, delimiter=",")
    np.savetxt("w3.csv", network.w3, delimiter=",")
    np.savetxt("t1.csv", network.t1, delimiter=",")
    np.savetxt("t2.csv", network.t2, delimiter=",")
    np.savetxt("t3.csv", network.t3, delimiter=",")

if __name__ == "__main__":
    main()

Epoch 0: 0.1516
Epoch 1: 0.1516
Epoch 2: 0.1516
Epoch 3: 0.1364
Epoch 4: 0.1366
Epoch 5: 0.1348
Epoch 6: 0.1366
Epoch 7: 0.1326
Epoch 8: 0.1346
Epoch 9: 0.131
Epoch 10: 0.1274
Epoch 11: 0.1324
Epoch 12: 0.1292
Epoch 13: 0.1262
Epoch 14: 0.1262
Epoch 15: 0.1214
Epoch 16: 0.1262
Epoch 17: 0.1252
Epoch 18: 0.1226
Epoch 19: 0.1226
Epoch 20: 0.125
Epoch 21: 0.1334
Epoch 22: 0.1208
Epoch 23: 0.1242
Epoch 24: 0.1266
Epoch 25: 0.1242
Epoch 26: 0.122
Epoch 27: 0.1226
Epoch 28: 0.1204
Epoch 29: 0.1194
Converged after 29 iterations...
w1: [[-1.66246179 -2.63131697]
 [ 6.09365405  0.64582226]
 [ 1.52910986  3.12039609]
 [ 3.09183992  0.2790167 ]
 [ 2.51355266 -0.65955321]
 [-4.41854119  4.65764226]
 [-1.51286511  1.57901761]
 [ 2.0221333   6.78273928]]
w2: [[ 0.12298495 -0.7820567  -0.60282765  0.54845405  1.63406847  0.95654461
   0.38979667  0.6601195 ]
 [ 0.35742738  1.95164394 -0.68943284 -1.44686468  0.73545639  2.75914139
   2.45752393  3.37214771]
 [-1.72354926  1.19807265  0.85418766  2.69