In [None]:
class neural_network:
    
    def init_params(nodes, X):
        
        X = np.array(X)
        m, n = X.shape
        
        W1 = np.random.rand(nodes, m) - 0.5 
        b1 = np.random.rand(nodes, 1) - 0.5
        W2 = np.random.rand(10, nodes) - 0.5
        b2 = np.random.rand(10, 1) - 0.5
        return W1, b1, W2, b2
    
    def ReLU(Z):
        return np.maximum(Z, 0) # Para cada valor de Z, se for maior que 0 retorna Z, se não, retorna 0.

    def softmax(Z):
        A = np.exp(Z) / sum(np.exp(Z)) # Função exponencial, então temos e^Z / sum(e^Z)
        return A

    def ReLU_deriv(Z): # Se ReLU é linear, sua derivada é 0 então:
        return Z > 0 # true = 1 false = 0

    def one_hot(Y):
        one_hot_Y = np.zeros((Y.size, Y.max() + 1))
        one_hot_Y[np.arange(Y.size), Y] = 1
        one_hot_Y = one_hot_Y.T
        return one_hot_Y
    
    def forward_prop(W1, b1, W2, b2, X):
        Z1 = W1.dot(X) + b1 # Produto escalar W1 e X(input layer)
        A1 = neural_network.ReLU(Z1)
        Z2 = W2.dot(A1) + b2
        A2 = neural_network.softmax(Z2)
        return Z1, A1, Z2, A2

    def backward_prop(Z1, A1, Z2, A2, W1, W2, X, Y):
        one_hot_Y = neural_network.one_hot(Y)
        dZ2 = A2 - one_hot_Y
        dW2 = 1 / m * dZ2.dot(A1.T)
        db2 = 1 / m * np.sum(dZ2)
        dZ1 = W2.T.dot(dZ2) * neural_network.ReLU_deriv(Z1)
        dW1 = 1 / m * dZ1.dot(X.T)
        db1 = 1 / m * np.sum(dZ1)
        return dW1, db1, dW2, db2

    def update_params(W1, b1, W2, b2, dW1, db1, dW2, db2, alpha):
        W1 = W1 - alpha * dW1
        b1 = b1 - alpha * db1    
        W2 = W2 - alpha * dW2  
        b2 = b2 - alpha * db2    
        return W1, b1, W2, b2
    
    def get_predictions(A2):
        return np.argmax(A2, 0)
    
    def predict(X, model):
        _, _, _, A2 = neural_network.forward_prop(model[0], model[1], model[2], model[3], X)
        predictions = neural_network.get_predictions(A2)
        return predictions
    
    def get_accuracy(predictions, Y):
        return np.sum(predictions == Y) / Y.size
    
    def train(X, Y, alpha, iterations, nodes):
        W1, b1, W2, b2 = neural_network.init_params(nodes ,X) # Cria parametros de Weight e Bias
        for i in range(iterations):
            Z1, A1, Z2, A2 = neural_network.forward_prop(W1, b1, W2, b2, X)
            dW1, db1, dW2, db2 = neural_network.backward_prop(Z1, A1, Z2, A2, W1, W2, X, Y)
            W1, b1, W2, b2 = neural_network.update_params(W1, b1, W2, b2, dW1, db1, dW2, db2, alpha)
            if i % 10 == 0: # Para cada 10 iterations, print
                print("Iteration: ", i)
                predictions = neural_network.get_predictions(A2)
                print(neural_network.get_accuracy(predictions, Y))
        return W1, b1, W2, b2