In [None]:
class FCNN():
    def __init__(self, x, y, classes=2, minibatch=280, h1=20, h2=10):
        # data parameter
        self.x, self.y = x, y
        self.size = x.shape[0] # dataset size
        self.D = x.shape[1] # dataset features (dimension)
        self.classes = classes # number of classes
        self.minibatch = minibatch
        
        # hidden layer parameters
        self.h1, self.h2 = h1, h2 # number of hidden units in first and second hidden layers
        self.W0 = np.zeros((self.D, self.h1))
        self.W1 = np.zeros((self.h1, self.h2))
        self.Wout = np.zeros((self.h2, classes))
        
        self.b0 = np.zeros((1, h1))
        self.b1 = np.zeros((1, h2))
        self.bout = np.zeros((1, classes))
        
        # performance parameter
        self.eta = 0.0025
        self.label_class = np.argmax(self.y, axis=1)
        self.loss, self.precision = [], []
    
    # hidden layer operation
    def Hidden_layer(self, x, w, b):
        return np.dot(x, w) + b
    
    # activation function of hidden layer
    def Sigmoid(self, x):
        return 1 / (1 + np.exp(-x))
    
    # softmax function for output layer
    # z is the unnormalized probability
    def Softmax(self, z):
        exp_z = np.exp(z)
        exp_sum = np.sum(exp_z, axis=1).reshape(np.shape(exp_z)[0], 1)
        return exp_z / exp_sum
    
    # calculate cross-entropy loss and save it during training
    def Cross_entropy(self):  # y have pass through softmax function
        E = 0
        q = np.log2(self.y_pred.T)
        for i in range(self.size):
            E -= self.y[i][0] * q[0][i] + self.y[i][1] * q[1][i]
        self.loss.append(E)
    
    # calculate the classification accuracy
    def Precision(self):
        # convert one-hot to classes
        y_pred_class = np.argmax(self.y_pred, axis=1)
        count = 0
        for i in range(self.size):
            if y_pred_class[i] == self.label_class[i]:
                count += 1
        self.precision.append(count / self.size)
    
    # gradient of (softmax + cross-entropy) loss
    def Softmax_CrossEntropy_Derivative(self, y_pred, y):
        return y_pred - y
    
    # activation function derivative
    def Sigmoid_Derivative(self, x):
        return self.Sigmoid(x) * (1 - self.Sigmoid(x))
    
    # weight derivative
    def Weight_Gradient(self, a, y):
        return np.dot(a.T, y)
    
    # bias derivative
    def Bias_Gradient(self, y):
        return np.sum(y, axis=0)
    
    # forward-propagation
    def Forward(self):
        # first hidden layer
        self.a0 = self.Hidden_layer(self.x, self.W0, self.b0)
        self.y0 = self.Sigmoid(self.a0)
        
        # second hidden layer
        self.a1 = self.Hidden_layer(self.y0, self.W1, self.b1)
        self.y1 = self.Sigmoid(self.a1)
        
        # output layer
        self.aout = self.Hidden_layer(self.y1, self.Wout, self.bout)
        self.y_pred = self.Softmax(self.aout)
        
        self.Cross_entropy()
        self.Precision()
    
    def Derivative_sigmoid(self, x):
        return self.Sigmoid(x) * (1-self.Sigmoid(x))
    
    # backward-propagation
    def Backward(self):
        # create minibatch data
        sgd_x = np.zeros((self.minibatch, self.D))
        sgd_a0 = np.zeros((self.minibatch, self.h1))
        sgd_y0 = np.zeros((self.minibatch, self.h1))
        sgd_a1 = np.zeros((self.minibatch, self.h2))
        sgd_y1 = np.zeros((self.minibatch, self.h2))
        sgd_y_pred = np.zeros((self.minibatch, self.classes))
        sgd_y = np.zeros((self.minibatch, self.classes))
        
        # randomly choose minibatch data to update gradient
        rand_index = np.random.choice(self.size, self.minibatch, replace=False)
        
        for i in range(len(rand_index)):
            sgd_x[i, :] = self.x[rand_index[i], :]
            sgd_a0[i, :] = self.a0[rand_index[i], :]
            sgd_y0[i, :] = self.y0[rand_index[i], :]
            sgd_a1[i, :] = self.a1[rand_index[i], :]
            sgd_y1[i, :] = self.y1[rand_index[i], :]
            sgd_y_pred[i, :] = self.y_pred[rand_index[i], :]
            sgd_y[i, :] = self.y[rand_index[i], :]

        # calculate gradient for each parameter
        grad_y_pred = sgd_y_pred - sgd_y  # minibatch * 2
        grad_wout = np.dot(sgd_y1.T, grad_y_pred)  # H2 * 2
        grad_bout = np.sum(grad_y_pred, axis=0)  # 1 * 2

        grad_y1 = np.dot(grad_y_pred, self.Wout.T) * self.Derivative_sigmoid(sgd_a1) # minibatch * H2
        grad_w1 = np.dot(sgd_y0.T, grad_y1)  # H1 * H2
        grad_b1 = np.sum(grad_y1, axis=0)  # 1* H2

        grad_y0 = np.dot(grad_y1, self.W1.T) * self.Derivative_sigmoid(sgd_a0) # minibatch * H
        grad_w0 = np.dot(sgd_x.T, grad_y0)  # D * H1
        grad_b0 = np.sum(grad_y0, axis=0)  # 1* H1
    
        # update weights and bias
        self.W0 -= grad_w0 * self.eta
        self.b0 -= grad_b0 * self.eta
        
        self.W1 -= grad_w1 * self.eta
        self.b1 -= grad_b1 * self.eta
        
        self.Wout -= grad_wout * self.eta
        self.bout -= grad_bout * self.eta
    
    # plot learning curve
    def Plot(self):
        x_axis = list(range(self.epoch))
        
        # plot learning curve
        plt.subplot(1, 2, 1)
        plt.plot(x_axis, self.loss, color='blue', label='training loss')
        plt.legend()
        
        # plot precision curve
        plt.subplot(1, 2, 2)
        percetage = [round(i, 2)*100 for i in self.precision]
        plt.plot(x_axis, percetage, color='blue', label='training accuracy')
        plt.legend()

        plt.show()
    
    # output final loss and accuracy
    def Final_Result(self):
        print('Final training   loss   = ' + str(self.loss[self.epoch-1]))
        percentage = round(self.precision[self.epoch-1], 4) * 100
        print('Final training accuracy = ' + str(percentage) + ' %')
    
    # training main function
    def Training(self, epoch):
        self.epoch = epoch
        self.count = 0
        # training
        for i in range(self.epoch):
            self.count += 1
            self.Forward()
            self.Backward()
        
        # plot loss and accuracy
        self.Plot()
        self.Final_Result()