In [9]:
import numpy as np

# Activation Class

In [10]:
class Activation:
    @staticmethod
    def tanh(x):
        return np.tanh(x)

    @staticmethod
    def softmax(x):
        e_x = np.exp(x - np.max(x))
        return e_x / e_x.sum(axis=0)

    @staticmethod
    def derivative_tanh(x):
        return 1 - np.power(np.tanh(x), 2)

# Parameters Class

In [11]:
class Parameters:
    def __init__(self, n_x, n_h, n_y):
        self.w1 = np.random.randn(n_h, n_x) * 0.01
        self.b1 = np.zeros((n_h, 1))
        self.w2 = np.random.randn(n_y, n_h) * 0.01
        self.b2 = np.zeros((n_y, 1))

# Forward Propagation

In [12]:
class ForwardProp:
    @staticmethod
    def forward(x, parameters):
        z1 = np.dot(parameters.w1, x) + parameters.b1
        a1 = Activation.tanh(z1)
        z2 = np.dot(parameters.w2, a1) + parameters.b2
        a2 = Activation.softmax(z2)

        cache = {"z1": z1, "a1": a1, "z2": z2, "a2": a2}
        return cache

# Loss Function Class

In [13]:
class LossFunction:
    @staticmethod
    def compute_cost(a2, y):
        m = y.shape[1]
        cost = -(1 / m) * np.sum(y * np.log(a2))
        return cost


# Back Propagation

In [14]:
class BackProp:
    @staticmethod
    def backward(x, y, parameters, cache):
        m = x.shape[1]
        dz2 = cache['a2'] - y
        dw2 = (1 / m) * np.dot(dz2, cache['a1'].T)
        db2 = (1 / m) * np.sum(dz2, axis=1, keepdims=True)
        dz1 = (1 / m) * np.dot(parameters.w2.T, dz2) * Activation.derivative_tanh(cache['a1'])
        dw1 = (1 / m) * np.dot(dz1, x.T)
        db1 = (1 / m) * np.sum(dz1, axis=1, keepdims=True)

        gradients = {"dw1": dw1, "db1": db1, "dw2": dw2, "db2": db2}
        return gradients

# Gradient Descent

In [15]:
class GradDescent:
    @staticmethod
    def update(parameters, gradients, learning_rate):
        parameters.w1 -= learning_rate * gradients['dw1']
        parameters.b1 -= learning_rate * gradients['db1']
        parameters.w2 -= learning_rate * gradients['dw2']
        parameters.b2 -= learning_rate * gradients['db2']
        return parameters

# Training Class

In [16]:
class Training:
    def __init__(self, n_h, learning_rate, iterations):
        self.n_h = n_h
        self.learning_rate = learning_rate
        self.iterations = iterations

    def train(self, x, y):
        n_x, n_y = x.shape[0], y.shape[0]
        parameters = Parameters(n_x, self.n_h, n_y)
        cost_list = []

        for i in range(self.iterations):
            cache = ForwardProp.forward(x, parameters)
            cost = LossFunction.compute_cost(cache['a2'], y)
            gradients = BackProp.backward(x, y, parameters, cache)
            parameters = GradDescent.update(parameters, gradients, self.learning_rate)

            cost_list.append(cost)
            if i % (self.iterations / 10) == 0:
                print("Cost after", i, "iterations is:", cost)

        return parameters, cost_list

# Model Class

In [17]:
class Model:
    def __init__(self, n_h, learning_rate, iterations):
        self.training = Training(n_h, learning_rate, iterations)

    def fit(self, X_train, Y_train):
        return self.training.train(X_train, Y_train)

# Loading Training Data

In [18]:
X_train = np.loadtxt('train_X.csv', delimiter = ',').T
Y_train = np.loadtxt('train_label.csv', delimiter = ',').T

In [19]:
# Create an instance of the Model class
neural_network_model = Model(n_h=n_h, learning_rate=learning_rate, iterations=iterations)

# Train the model using the training data
parameters, cost_list = neural_network_model.fit(X_train, Y_train)


Cost after 0 iterations is: 2.4056438351896285
Cost after 10 iterations is: 2.2953041782224792
Cost after 20 iterations is: 2.195767668320458
Cost after 30 iterations is: 2.1046961664419057
Cost after 40 iterations is: 2.0206753250765153
Cost after 50 iterations is: 1.9427901334494948
Cost after 60 iterations is: 1.8703902153163086
Cost after 70 iterations is: 1.8029667162184726
Cost after 80 iterations is: 1.7400911359001228
Cost after 90 iterations is: 1.6813861619228583
