# Multilayer Neural Network

In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import sklearn.utils as skutils
from sklearn import datasets
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import Normalizer
from sklearn.preprocessing import StandardScaler

In [14]:
class Multilayer:

    def __init__(self, l_in, l_hidden, seed=np.random.randint(1000), alpha=0.01, l2_lambda=0):
        self.alpha = alpha
        self.l2_lambda = l2_lambda
        self.m = [l_in] + list(l_hidden) + [1]  # neurons per layer
        self.L = len(self.m)  # number of layers (depth = -1)

        self.fa = {}  # activation functions per layer
        self.fd = {}  # activation function derivatives per layer

        l = self.L - 1  # last layer activation
        self.fa[l] = Multilayer.a_sigmoid
        self.fd[l] = Multilayer.d_sigmoid

        # hidden layer activations
        for l in range(1, self.L - 1):
            self.fa[l] = Multilayer.a_relu
            self.fd[l] = Multilayer.d_relu

        self.A = {}  # activation values per layer per neuron
        self.Z = {}  # weighted inputs per layer per neuron
        self.W = {}  # weights per layer per neuron (columns)
        self.B = {}  # biases per layer per neuron
        self.C = {}  # cost per epoch
        
        np.random.seed(seed)
        for l in range(1, self.L):
            self.W[l] = np.random.normal(loc=0, scale=0.01, size=(self.m[l-1], self.m[l]))
            self.B[l] = np.random.normal(loc=0, scale=0.01, size=self.m[l])

    def fit(self, X, y, max_epoch=1000, batch_size=None, gradient_check=False):
        X = np.array(X)
        y = np.array(y).reshape(-1, 1)
        current_epoch = 0

        while current_epoch < max_epoch:
            current_epoch += 1
            for Xb, yb in Multilayer.create_batches(X, y, batch_size):
                self.A[0] = Xb
                self.y = yb
                self.Z, self.A = self.forward_pass()
                self.W, self.B = self.backward_pass()
                self.C[current_epoch] = self.cost(X, y)
                if gradient_check:
                    self.gradient_check()
                
        return self

    def forward_pass(self, W=None, B=None):
        W = self.W if W is None else W
        B = self.B if B is None else B
        Z = self.Z.copy()
        A = self.A.copy()
        
        # Step 1
        for l in range(1, self.L):
            Z[l] = A[l-1].dot(W[l]) + B[l]
            A[l] = self.fa[l](Z[l])
        
        return Z, A

    def backward_pass(self):
        delta_A, delta_Z, delta_W, delta_B = {}, {}, {}, {}

        for l in range(self.L - 1, 0, -1):
            # Step 2. A
            if l == self.L - 1:
                delta_A[l] = ((1 - self.y) / (1 - self.A[l])) - (self.y / self.A[l])
                delta_Z[l] = self.A[l] - self.y  # delta_A[l] * self.fd[l](self.Z[l])
            else:
                delta_A[l] = delta_Z[l+1].dot(self.W[l+1].T)
                delta_Z[l] = delta_A[l] * self.fd[l](self.Z[l])

            # Step 2. B
            delta_W[l] = self.A[l-1].T.dot(delta_Z[l]) + ((self.l2_lambda / 2) * (self.W[l] ** 2))
            delta_B[l] = delta_Z[l].sum() / len(delta_Z[l])

        W = self.W.copy()
        B = self.B.copy()
        
        # Step 3.
        for l in range(1, self.L):
            W[l] -= self.alpha * delta_W[l]
            B[l] -= self.alpha * delta_B[l]
        
        return W, B

    def predict(self, X, threshold=0.5):
        self.A[0] = np.array(X)
        self.Z, self.A = self.forward_pass()
        self.y = Multilayer.q_sigmoid(self.A[self.L - 1], threshold)
        return self

    def score(self, X, y, threshold=0.5):
        return accuracy_score(y, self.predict(X, threshold).y)
    
    def cost(self, X, y):
        y_cap = self.predict(X).A[self.L - 1]
        return -((y * np.log(y_cap)) + (1 - y) * np.log(1 - y_cap)).sum()
    
    def gradient_check(self):
        {}
    
    def create_batches(X, y, batch_size):
        Xb, yb = skutils.shuffle(X, y)
        batches = (int)(len(X) / (batch_size or len(X)))
        Xb = np.array_split(Xb, batches)
        yb = np.array_split(yb, batches)
        return zip(Xb, yb)
    
    def q_sigmoid(a, threshold=0.5):  # quantizer
        return np.where(a < threshold, 0, 1)

    def a_sigmoid(z):  # logistic function
        return 1 / (1 + np.exp(-z))

    def d_sigmoid(z):  # sigmoid derivative (2.A validation)
        a = Multilayer.a_sigmoid(z)
        return a * (1 - a)

    def a_relu(z):  # rectified linear unit
        return np.where(z > 0, z, 0)

    def d_relu(z):  # relu derivative
        return np.where(z > 0, 1, 0)

## XOR Dataset

In [15]:
X = np.random.randn(500, 2)
y = np.where(np.logical_xor(X[:, 0] > 0, X[:, 1] > 0), 1, 0)

In [16]:
model = Multilayer(l_in=2, l_hidden=[3], seed=1, alpha=0.01).fit(X, y, max_epoch=40, batch_size=20)

### Predict

In [None]:
sns.scatterplot(x=X[:, 0], y=X[:, 1], hue=model.predict(X).y[:,0])

### Cost

In [None]:
sns.lineplot(x=list(model.C.keys()), y=list(model.C.values()))

### Accuracy

In [None]:
model.score(X, y)

## Circles Dataset

In [None]:
noisy_circles = datasets.make_circles(n_samples=1000, factor=.5, noise=.05)
X, y = noisy_circles[0], noisy_circles[1]

In [None]:
model = Multilayer(l_in=2, l_hidden=[5], seed=1, alpha=0.05).fit(X, y, max_epoch=10, batch_size=20)

### Predict

In [None]:
sns.scatterplot(x=X[:, 0], y=X[:, 1], hue=model.predict(X).y[:,0])

### Cost

In [None]:
sns.lineplot(x=list(model.C.keys()), y=list(model.C.values()))

### Accuracy

In [None]:
model.score(X, y)

## Moons Dataset

In [None]:
noisy_moons = datasets.make_moons(n_samples=1000, noise=.05)
X, y = noisy_moons[0], noisy_moons[1]

In [None]:
model = Multilayer(l_in=2, l_hidden=[6], seed=0, alpha=0.05).fit(X, y, max_epoch=40, batch_size=20)

### Predict

In [None]:
sns.scatterplot(x=X[:, 0], y=X[:, 1], hue=model.predict(X).y[:,0])

### Cost

In [None]:
sns.lineplot(x=list(model.C.keys()), y=list(model.C.values()))

### Accuracy

In [None]:
model.score(X, y)

## Heart Disease

In [None]:
columns = ['age', 'sex', 'cp', 'trestbps','chol', 'fbs', 'restecg', 'thalach', 'exang', 'oldpeak', 'slope', 'ca', 'thal', 'num']
df = pd.read_csv('http://archive.ics.uci.edu/ml/machine-learning-databases/statlog/heart/heart.dat', delimiter=' ', names=columns)

### Preprocessing

In [None]:
real_attributes = [0,3,4,7,9,11]
ordered_attributes = [10]
binary_attributes = [1,5,8]
nominal_attributes = [6,2,12]
target_attribute = 'num'

In [None]:
df[target_attribute] = np.where(df[target_attribute] == 1, 0, 1)

In [None]:
df.iloc[:,real_attributes] = StandardScaler().fit_transform(df.iloc[:,real_attributes])
df.iloc[:,ordered_attributes] = Normalizer().fit_transform(df.iloc[:,ordered_attributes])
df = pd.get_dummies(df, columns=df.columns[nominal_attributes], drop_first=True)

In [None]:
all_fields = list(df.drop(target_attribute, axis=1))

In [None]:
df.head()

### Predict

In [None]:
X_train, X_test, y_train, y_test = train_test_split(df[all_fields], np.array(df[target_attribute]), test_size=0.2)

In [None]:
model = Multilayer(l_in=len(X_train.T), l_hidden=[3], seed=0, alpha=0.01, l2_lambda=0.05).fit(X_train, y_train, max_epoch=200, batch_size=20)

### Cost

In [None]:
sns.lineplot(x=list(model.C.keys()), y=list(model.C.values()))

### Accuracy

In [None]:
print('train set', model.score(X_train, y_train))
print('test set', model.score(X_test, y_test))