In [27]:
import numpy as np
import pandas as pd
import math
from sklearn.model_selection import train_test_split
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
from sklearn.preprocessing import PolynomialFeatures

from matplotlib.colors import ListedColormap
cm_bright = ListedColormap(['#FF0000', '#0000FF'])
%matplotlib inline
from sklearn.metrics import accuracy_score

import sklearn.linear_model

In [28]:
class NeuralNet:
    
    def __init__(self, layers_sizes, normalize = True, learning_rate = 0.01, num_iter = 15000, costs_iters=300, tol=1e-7):
        self.learning_rate = learning_rate
        self.num_iter = num_iter
        self.normalize = normalize
        self.layers_sizes = layers_sizes
        self.tol = tol
        self.layers = len(layers_sizes) + 1
        self.costs_iters = costs_iters
    
    def __normalize(self, X, mean = None, std = None):
        n = X.shape[0]
        m = mean
        if m is None:
            m = np.mean(X, axis=1).reshape((n, 1))
        s = std
        if s is None:
            s = np.std(X, axis=1).reshape((n, 1))
        X_new = (X - m) / s
        return X_new, m, s
    
    def __softmax(self, Z):
        e = np.exp(Z)
        return e/np.sum(e, axis=0, keepdims=True)
    
    def __relu(self, Z):
        return np.maximum(0,Z)
    
    def __relu_derivative(self, A):
        return np.greater_equal(A, 0).astype(int)
    
    def __initialize_parameters(self):
        W, b = [], []
        
        for l in range(1,self.layers+1):
            W_l = np.random.randn(self.layers_sizes[l], self.layers_sizes[l-1]) * 0.01
            b_l = np.zeros((self.layers_sizes[l], 1))
            W.append(W_l)
            b.append(b_l)
        
        self.parameters = {"W" : W, "b" : b}
        
        for key in self.parameters:
            for key_i,i in zip(self.parameters[key],range(len(self.parameters[key]))):
                print("{}{}: {}".format(key,i+1,key_i.shape))
       
    def __forward_propagation(self, X):            
        W = self.parameters["W"]
        b = self.parameters["b"]
        Z, A = [], [] 

        A.append(X)
        for l in range(0,self.layers-1):
            Z_l = np.dot(W[l],A[l]) + b[l]
            A_l = self.__relu(Z_l)
            Z.append(Z_l)
            A.append(A_l)
        
        l = self.layers-1
        Z_l = np.dot(W[l],A[l]) + b[l]
        A_l = self.__softmax(Z_l)
        Z.append(Z_l)
        A.append(A_l)
        
        cache = (Z,A)
        return A[-1], cache
    
    def compute_cost(self, A, Y):
        m = Y.shape[1]
        res = Y * np.log(A) + (1 - Y) * np.log(1 - A)
        J = -(1 / m) * np.sum(res)
        return J
    
    def __backward_propagation(self, X, Y, cache):
        m = X.shape[1]
        
        W = self.parameters["W"]
        b = self.parameters["b"]
        (Z,A) = cache
        
        dZ, dW, db = [], [], []
        
        dZ_l = A[-1] - Y
        dW_l = 1. / m * np.dot(dZ_l, A[-2].T)
        db_l = 1. / m * np.sum(dZ_l, axis = 1, keepdims = True)
        
        dZ.insert(0,dZ_l)
        dW.insert(0,dW_l)
        db.insert(0,db_l)
        
        for l in range(self.layers-1,0,-1):
            dA_l = np.dot(W[l].T,dZ_l)
            dZ_l = np.multiply(dA_l, self.__relu_derivative(A[l]))
            dW_l = 1. / m * np.dot(dZ_l, A[l-1].T)
            db_l = 1. / m * np.sum(dZ_l, axis = 1, keepdims = True)
            dZ.insert(0,dZ_l)
            dW.insert(0,dW_l)
            db.insert(0,db_l)
        
        grads = {"dZ":dZ,"dW":dW,"db":db}
        return grads
  
    
    def __update_parameters(self, grads):        
        W = self.parameters["W"]
        b = self.parameters["b"]
        dW = grads["dW"]
        db = grads["db"]
    
        for l in range(0,len(W)):
            W[l] = W[l] - self.learning_rate * dW[l]
            b[l] = b[l] - self.learning_rate * db[l]
            
        self.parameters["W"] = W
        self.parameters["b"] = b

        
    def fit(self, X_vert, Y_vert, print_cost = True):
        
        X, Y = X_vert.T, Y_vert.T
        Y = Y.reshape((Y.shape[0], 1))
        enc = OneHotEncoder(handle_unknown='ignore')
        self.enc = enc
        Y = enc.fit_transform(Y).toarray().astype(int).T
        
        if self.normalize:
            X, self.__mean, self.__std = self.__normalize(X)
        
        costs = []
        costs_imp = []
        
        m = X.shape[1]
        n_x = X.shape[0]
        C = Y.shape[0]
        
        self.layers_sizes.insert(0,n_x)
        self.layers_sizes.append(C);
        
        self.__initialize_parameters()

        for i in range(self.num_iter):
            A, cache = self.__forward_propagation(X)

            cost = self.compute_cost(A, Y)
            
            grads = self.__backward_propagation(X, Y, cache)

            self.__update_parameters(grads)

            if print_cost and i % 1000 == 0:
                print("{}-th iteration: {}".format(i, cost))

            if i % 1000 == 0:
                costs.append(cost)
            
            costs_imp.append(cost)
            if i > self.costs_iters:
                len_imp = len(costs_imp)-1
                if abs(costs_imp[len_imp] - costs_imp[len_imp-self.costs_iters]) < self.tol:
                    print("Stop iteration: {}".format(i))
                    break;

        if print_cost:
            plt.plot(costs)
            plt.ylabel("Cost")
            plt.xlabel("Iteration, *1000")
            plt.show()
    
    def predict_proba(self, X_vert):
        X = X_vert.T
        if self.normalize:
            X, _, _ = self.__normalize(X, self.__mean, self.__std)
        
        probs = self.__forward_propagation(X)[0].T
        return probs
    
    def predict(self, X_vert):
        positive_probs = self.predict_proba(X_vert)
        max_cols = positive_probs.max(axis=1).reshape((-1,1))
        pred = (positive_probs == max_cols).astype(int)
        pred = self.enc.inverse_transform(pred)
        pred = pred.reshape((-1,)).T
        return pred

In [29]:
from sklearn.datasets import load_iris
X, Y = load_iris(return_X_y = True)


In [30]:
from sklearn.neural_network import MLPClassifier
cls = MLPClassifier(hidden_layer_sizes = (20,), max_iter = 10000,  solver = 'adam') 

In [31]:
cls.fit(X, Y)

MLPClassifier(hidden_layer_sizes=(20,), max_iter=10000)

In [32]:
Y_prob = cls.predict_proba(X)
Y_prob

array([[9.94816668e-01, 5.18333190e-03, 1.81558657e-11],
       [9.86417498e-01, 1.35825012e-02, 3.04844803e-10],
       [9.91840080e-01, 8.15991962e-03, 1.08001932e-10],
       [9.85078002e-01, 1.49219979e-02, 5.90771453e-10],
       [9.95299464e-01, 4.70053597e-03, 1.56830124e-11],
       [9.93107367e-01, 6.89263329e-03, 2.31303688e-11],
       [9.91052341e-01, 8.94765932e-03, 1.54881766e-10],
       [9.92423559e-01, 7.57644053e-03, 5.62439925e-11],
       [9.81337890e-01, 1.86621083e-02, 1.44349963e-09],
       [9.89481312e-01, 1.05186881e-02, 1.55663804e-10],
       [9.96000122e-01, 3.99987759e-03, 5.90239803e-12],
       [9.90159916e-01, 9.84008354e-03, 1.46111434e-10],
       [9.89275573e-01, 1.07244264e-02, 1.90474211e-10],
       [9.90151910e-01, 9.84809001e-03, 2.57641641e-10],
       [9.98302641e-01, 1.69735923e-03, 2.89725910e-13],
       [9.97918070e-01, 2.08192977e-03, 5.47342165e-13],
       [9.96594629e-01, 3.40537072e-03, 3.65437588e-12],
       [9.93505288e-01, 6.49471

In [33]:
Y_hat = cls.predict(X)
Y_hat

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 2, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

In [34]:
accuracy_score(Y, Y_hat)

0.98