# Import Libraries

In [264]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import accuracy_score
from collections import Counter

# Import Dataset

In [243]:
IRIS = load_iris()

data = IRIS['data']
labels = IRIS['target']

feature_names = IRIS['feature_names']
target_names  = IRIS['target_names']

dataset = pd.DataFrame({feature_names[0]:data[:,0],
                        feature_names[1]:data[:,1],
                        feature_names[2]:data[:,2],
                        feature_names[3]:data[:,3],
                        'type': target_names[labels]})

#print(IRIS['DESCR'])

## Train Test split

In [244]:
x_train,x_test, y_train, y_test=train_test_split(data,labels,test_size=0.2)

# MLP

https://towardsdatascience.com/math-neural-network-from-scratch-in-python-d6da9f29ce65

In [245]:
class FCLayer(Layer):
    # input_size = number of input neurons
    # output_size = number of output neurons
    def __init__(self, input_size, output_size):
        self.weights = np.random.rand(input_size, output_size) - 0.5
        self.bias = np.random.rand(1, output_size) - 0.5

    # returns output for a given input
    def forward_propagation(self, input_data):
        self.input = input_data
        self.output = np.dot(self.input, self.weights) + self.bias
        return self.output

    # computes dE/dW, dE/dB for a given output_error=dE/dY. Returns input_error=dE/dX.
    def backward_propagation(self, output_error, learning_rate):
        input_error = np.dot(output_error, self.weights.T)
        weights_error = np.dot(self.input.T, output_error)
        # dBias = output_error

        # update parameters
        self.weights -= learning_rate * weights_error
        self.bias -= learning_rate * output_error
        return input_error

In [246]:
class ActivationLayer(Layer):
    def __init__(self, activation, activation_prime):
        self.activation = activation
        self.activation_prime = activation_prime

    # returns the activated input
    def forward_propagation(self, input_data):
        self.input = input_data
        self.output = self.activation(self.input)
        return self.output

    # Returns input_error=dE/dX for a given output_error=dE/dY.
    # learning_rate is not used because there is no "learnable" parameters.
    def backward_propagation(self, output_error, learning_rate):
        return self.activation_prime(self.input) * output_error

In [247]:
# Activation Functions 

def tanh(x):
    return np.tanh(x);

def tanh_prime(x):
    return 1-np.tanh(x)**2;



In [248]:
# Loss functions

def mse(y_true, y_pred):
    return np.mean(np.power(y_true-y_pred, 2));

def mse_prime(y_true, y_pred):
    return 2*(y_pred-y_true)/y_true.size;

In [249]:
class Network:
    def __init__(self):
        self.layers = []
        self.loss = None
        self.loss_prime = None

    # add layer to network
    def add(self, layer):
        self.layers.append(layer)

    # set loss to use
    def use(self, loss, loss_prime):
        self.loss = loss
        self.loss_prime = loss_prime

    # predict output for given input
    def predict(self, input_data):
        # sample dimension first
        samples = len(input_data)
        result = []

        # run network over all samples
        for i in range(samples):
            # forward propagation
            output = input_data[i]
            for layer in self.layers:
                output = layer.forward_propagation(output)
            result.append(output)

        return result

    # train the network
    def fit(self, x_train, y_train, epochs, learning_rate):
        # sample dimension first
        samples = len(x_train)

        # training loop
        for i in range(epochs):
            err = 0
            for j in range(samples):
                # forward propagation
                output = x_train[j]
                for layer in self.layers:
                    output = layer.forward_propagation(output)

                # compute loss (for display purpose only)
                err += self.loss(y_train[j], output)

                # backward propagation
                error = self.loss_prime(y_train[j], output)
                for layer in reversed(self.layers):
                    error = layer.backward_propagation(error, learning_rate)

            # calculate average error on all samples
            err /= samples
            print('epoch %d/%d   error=%f' % (i+1, epochs, err))

In [250]:
# One hot encoding  for labels

y_train_encoded = OneHotEncoder().fit_transform(y_train.reshape(-1,1))

# reshape x_train and x_test so that each sample is transposed

x_train_encoded = x_train.reshape(x_train.shape[0] ,1 , x_train.shape[1])
x_test_encoded = x_test.reshape(x_test.shape[0] ,1 , x_test.shape[1])

In [251]:
net = Network()
net.add(FCLayer(4, 5))
net.add(ActivationLayer(tanh, tanh_prime))
net.add(FCLayer(5, 10))
net.add(ActivationLayer(tanh, tanh_prime))
net.add(FCLayer(10, 6))
net.add(ActivationLayer(tanh, tanh_prime))
net.add(FCLayer(6, 3))
net.add(ActivationLayer(tanh, tanh_prime))

net.use(mse, mse_prime)
net.fit(x_train_encoded, y_train_encoded.toarray(), epochs=35, learning_rate=0.1)

epoch 1/35   error=0.200398
epoch 2/35   error=0.119874
epoch 3/35   error=0.115739
epoch 4/35   error=0.112972
epoch 5/35   error=0.108534
epoch 6/35   error=0.105060
epoch 7/35   error=0.097884
epoch 8/35   error=0.090345
epoch 9/35   error=0.095897
epoch 10/35   error=0.083151
epoch 11/35   error=0.084270
epoch 12/35   error=0.087546
epoch 13/35   error=0.088629
epoch 14/35   error=0.073346
epoch 15/35   error=0.057947
epoch 16/35   error=0.075914
epoch 17/35   error=0.056034
epoch 18/35   error=0.081103
epoch 19/35   error=0.083187
epoch 20/35   error=0.062784
epoch 21/35   error=0.109191
epoch 22/35   error=0.077263
epoch 23/35   error=0.071554
epoch 24/35   error=0.079289
epoch 25/35   error=0.082914
epoch 26/35   error=0.075938
epoch 27/35   error=0.075040
epoch 28/35   error=0.078888
epoch 29/35   error=0.076598
epoch 30/35   error=0.084942
epoch 31/35   error=0.072208
epoch 32/35   error=0.069539
epoch 33/35   error=0.069491
epoch 34/35   error=0.073091
epoch 35/35   error=0.0

In [252]:
out_train = net.predict(x_train_encoded)
out_test  = net.predict(x_test_encoded) 

In [258]:
out_train_labels_MLP = []
out_test_labels_MLP  = []

for output in out_train:
    out_train_labels_MLP.append(np.argmax(output[0]))
    
for output in out_test:
    out_test_labels_MLP.append(np.argmax(output[0]))
    
train_accuracy_score = accuracy_score(y_train,out_train_labels_MLP)
test_accuracy_score  = accuracy_score(y_test,out_test_labels_MLP)

print(f'Train accuracy_score : {train_accuracy_score}\nTest accuracy_score : {test_accuracy_score}')
    

Train F1-Score : 0.9416666666666667
Test F1-Score : 1.0


# SVM 

In [259]:
SVM = SVC()
SVM.fit(x_train,y_train)

out_train_labels_SVM = SVM.predict(x_train) 
out_test_labels_SVM  = SVM.predict(x_test)

train_accuracy_score = accuracy_score(y_train,out_train_labels_SVM)
test_accuracy_score  = accuracy_score(y_test,out_test_labels_SVM)

print(f'Train accuracy_score : {train_accuracy_score}\nTest accuracy_score : {test_accuracy_score}')


Train accuracy_score : 0.9583333333333334
Test accuracy_score : 0.9333333333333333


# Decision Tree

https://betterdatascience.com/mml-decision-trees/

In [260]:
class Node:
    '''
    Helper class which implements a single tree node.
    '''
    def __init__(self, feature=None, threshold=None, data_left=None, data_right=None, gain=None, value=None):
        self.feature = feature
        self.threshold = threshold
        self.data_left = data_left
        self.data_right = data_right
        self.gain = gain
        self.value = value

In [261]:
class DecisionTree:
    '''
    Class which implements a decision tree classifier algorithm.
    '''
    def __init__(self, min_samples_split=2, max_depth=5):
        self.min_samples_split = min_samples_split
        self.max_depth = max_depth
        self.root = None
        
    @staticmethod
    def _entropy(s):
        '''
        Helper function, calculates entropy from an array of integer values.
        
        :param s: list
        :return: float, entropy value
        '''
        # Convert to integers to avoid runtime errors
        counts = np.bincount(np.array(s, dtype=np.int64))
        # Probabilities of each class label
        percentages = counts / len(s)

        # Caclulate entropy
        entropy = 0
        for pct in percentages:
            if pct > 0:
                entropy += pct * np.log2(pct)
        return -entropy
    
    def _information_gain(self, parent, left_child, right_child):
        '''
        Helper function, calculates information gain from a parent and two child nodes.
        
        :param parent: list, the parent node
        :param left_child: list, left child of a parent
        :param right_child: list, right child of a parent
        :return: float, information gain
        '''
        num_left = len(left_child) / len(parent)
        num_right = len(right_child) / len(parent)
        
        # One-liner which implements the previously discussed formula
        return self._entropy(parent) - (num_left * self._entropy(left_child) + num_right * self._entropy(right_child))
    
    def _best_split(self, X, y):
        '''
        Helper function, calculates the best split for given features and target
        
        :param X: np.array, features
        :param y: np.array or list, target
        :return: dict
        '''
        best_split = {}
        best_info_gain = -1
        n_rows, n_cols = X.shape
        
        # For every dataset feature
        for f_idx in range(n_cols):
            X_curr = X[:, f_idx]
            # For every unique value of that feature
            for threshold in np.unique(X_curr):
                # Construct a dataset and split it to the left and right parts
                # Left part includes records lower or equal to the threshold
                # Right part includes records higher than the threshold
                df = np.concatenate((X, y.reshape(1, -1).T), axis=1)
                df_left = np.array([row for row in df if row[f_idx] <= threshold])
                df_right = np.array([row for row in df if row[f_idx] > threshold])

                # Do the calculation only if there's data in both subsets
                if len(df_left) > 0 and len(df_right) > 0:
                    # Obtain the value of the target variable for subsets
                    y = df[:, -1]
                    y_left = df_left[:, -1]
                    y_right = df_right[:, -1]

                    # Caclulate the information gain and save the split parameters
                    # if the current split if better then the previous best
                    gain = self._information_gain(y, y_left, y_right)
                    if gain > best_info_gain:
                        best_split = {
                            'feature_index': f_idx,
                            'threshold': threshold,
                            'df_left': df_left,
                            'df_right': df_right,
                            'gain': gain
                        }
                        best_info_gain = gain
        return best_split
    
    def _build(self, X, y, depth=0):
        '''
        Helper recursive function, used to build a decision tree from the input data.
        
        :param X: np.array, features
        :param y: np.array or list, target
        :param depth: current depth of a tree, used as a stopping criteria
        :return: Node
        '''
        n_rows, n_cols = X.shape
        
        # Check to see if a node should be leaf node
        if n_rows >= self.min_samples_split and depth <= self.max_depth:
            # Get the best split
            best = self._best_split(X, y)
            # If the split isn't pure
            if best['gain'] > 0:
                # Build a tree on the left
                left = self._build(
                    X=best['df_left'][:, :-1], 
                    y=best['df_left'][:, -1], 
                    depth=depth + 1
                )
                right = self._build(
                    X=best['df_right'][:, :-1], 
                    y=best['df_right'][:, -1], 
                    depth=depth + 1
                )
                return Node(
                    feature=best['feature_index'], 
                    threshold=best['threshold'], 
                    data_left=left, 
                    data_right=right, 
                    gain=best['gain']
                )
        # Leaf node - value is the most common target value 
        return Node(
            value=Counter(y).most_common(1)[0][0]
        )
    
    def fit(self, X, y):
        '''
        Function used to train a decision tree classifier model.
        
        :param X: np.array, features
        :param y: np.array or list, target
        :return: None
        '''
        # Call a recursive function to build the tree
        self.root = self._build(X, y)
        
    def _predict(self, x, tree):
        '''
        Helper recursive function, used to predict a single instance (tree traversal).
        
        :param x: single observation
        :param tree: built tree
        :return: float, predicted class
        '''
        # Leaf node
        if tree.value != None:
            return tree.value
        feature_value = x[tree.feature]
        
        # Go to the left
        if feature_value <= tree.threshold:
            return self._predict(x=x, tree=tree.data_left)
        
        # Go to the right
        if feature_value > tree.threshold:
            return self._predict(x=x, tree=tree.data_right)
        
    def predict(self, X):
        '''
        Function used to classify new instances.
        
        :param X: np.array, features
        :return: np.array, predicted classes
        '''
        # Call the _predict() function for every observation
        return [self._predict(x, self.root) for x in X]

In [266]:
DT = DecisionTree()
DT.fit(x_train, y_train)


out_train_labels_DT = DT.predict(x_train) 
out_test_labels_DT  = DT.predict(x_test)

train_accuracy_score = accuracy_score(y_train,out_train_labels_DT)
test_accuracy_score  = accuracy_score(y_test,out_test_labels_DT)

print(f'Train accuracy_score : {train_accuracy_score}\nTest accuracy_score : {test_accuracy_score}')


Train accuracy_score : 1.0
Test accuracy_score : 0.9666666666666667
