# Multi Layer Perceptron

In [10]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [11]:
df = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data', header=None)
df

Unnamed: 0,0,1,2,3,4
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,Iris-virginica
146,6.3,2.5,5.0,1.9,Iris-virginica
147,6.5,3.0,5.2,2.0,Iris-virginica
148,6.2,3.4,5.4,2.3,Iris-virginica


In [3]:
def relu(x):
    return tf.where(x >= 0, x, 0)

class MLP():
    def __init__(self, neurons=[1, 100, 100, 1], activation=[relu, relu, None]):
        self.W = []
        self.activation = activation
        
        for i in range(1, len(neurons)):
            self.W.append(tf.Variable(np.random.randn(neurons[i-1], neurons[i]))) # W
            self.W.append(tf.Variable(np.random.randn(neurons[i]))) #b

    
    def __call__(self, x):
        for i in range(0, len(self.W), 2): # W and bias 
            x = x @ self.W[i] + self.W[i+1]
            if self.activation[i // 2] is not None: # //2 beacuse step=2
                x = self.activation[i // 2](x)
        return x
    
    
    def fit(self, X, Y, lr=0.0001, epochs=2000):
        for epoch in range(epochs):
            with tf.GradientTape() as t:
                loss = tf.reduce_mean((self(X) - Y)**2)
            dW = t.gradient(loss, self.W)

            for i, W in enumerate(self.W):
                W.assign_sub(lr * dW[i])

            if epoch % 1000 == 0:
                print(epoch, loss.numpy())

In [54]:
itrain = list()
itest = list()
X = df.iloc[:, :4].values
L = df.iloc[:, -1].values
classes = np.unique(L)
split = 0.5
Y = []
for c in classes:
    Idx = L == c
    idx = np.where(Idx)[0] # where return idx and data type
    sp = int(split * len(idx))
    itrain.extend(idx[:sp])
    itest.extend(idx[sp:])
    Y.append(Idx.astype(np.int)) # one-hot
Y = np.array(Y).T # one-hot
print(Y)

[[1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 

In [83]:
model = MLP([4, 100, 50, 3], [tf.sigmoid, tf.sigmoid, tf.sigmoid])
model.fit(X[itrain], Y[itrain], lr=0.1, epochs=5000)

0 0.5712578502472352
1000 0.020232045539494665
2000 0.013399382038791567
3000 0.01044589410039163
4000 0.008723198627146635


In [84]:
# test model
Z = model(X[itest])

In [85]:
# model
tf.argmax(Z, axis=1)

<tf.Tensor: id=2922511, shape=(75,), dtype=int64, numpy=
array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2], dtype=int64)>

In [86]:
# result test
tf.argmax(Y[itest], axis=1)

<tf.Tensor: id=2922514, shape=(75,), dtype=int64, numpy=
array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2], dtype=int64)>

In [82]:
np.sum(tf.argmax(Y[itest], axis=1) == tf.argmax(Z, axis=1)) / len(itest)

0.96