In [25]:
import pandas as pd
import math
import random
from sklearn.preprocessing import MinMaxScaler
import time

In [26]:

def logistic_function(x):
    try:
        ans = 1.0 / (1 + math.exp(-x))
    except OverflowError:
        if x > 0:
            ans = 1
        elif x == 0:
            ans = 0.5
        else:
            ans = 0
    return ans


def logistic_function_bar(x):
    return logistic_function(x)*(1 - logistic_function(x))


def relu_func(x):
    if x < 0:
        return 0.01*x
    else:
        return x


def relu_func_bar(x):
    if x < 0:
        return 0.01
    else:
        return 1


def func3(x):
    return math.tanh(x/2.0)


def func3_bar(x):
    return 0.5*(1 - func3(x)*func3(x))


class MultiLayerPerceptron:
    def __init__(self, num_of_layers, node_counts, activation_function, activation_function_der, num_of_features):
        # num_of_layers = hidden + output
        # node_counts = number of nodes in every layer including output layer (num of classes)
        self.nof = num_of_features
        self.w = []
        self.nol = num_of_layers
        self.node_counts = node_counts
        self.af = activation_function
        self.daf = activation_function_der
        self.u = 1

        self.class_vectors = [[]]

        self.k = [num_of_features]

        for nc in node_counts:
            self.k.append(nc)

        self.w.append([])  # dummy append to start the index from 1

        for r in range(1, num_of_layers + 1):
            self.w.append([])
            self.w[r].append([])  # dummy append to start the index from 1
            for j in range(1, self.k[r] + 1):
                self.w[r].append([])
                for k in range(self.k[r - 1] + 1):
                    self.w[r][j].append(random.uniform(0, 1))

        for i in range(1, self.k[num_of_layers] + 1):
            self.class_vectors.append([0]*(self.k[num_of_layers] + 1))
            self.class_vectors[i][i] = 1
            
        print(self.w)

    def train(self, features, labels):
        N = features.shape[0]

        ym = []

        for i in range(0, N):
            ym.append(self.class_vectors[labels.iloc[i]])

        temp = [1]*N

        x = pd.concat([pd.DataFrame(temp), features], axis=1, ignore_index=True)

        v = []
        y = []
        delta = []

        for i in range(N):
            v.append([])
            y.append([])
            delta.append([])
            y[i].append(x.iloc[i, :].tolist())
            for r in range(self.nol + 1):
                v[i].append([])
                delta[i].append([])
                if r > 0:
                    y[i].append([])
                for j in range(self.k[r] + 1):
                    v[i][r].append(0)
                    delta[i][r].append(0)
                    if r > 0:
                        if j == 0:
                            y[i][r].append(1)
                        else:
                            y[i][r].append(0)
        
        iter_constraint = 0
        while True:
            iter_constraint += 1
            if iter_constraint > 50:
                break
            for i in range(0, N):
                for r in range(1, self.nol + 1):
                    for j in range(1, self.k[r] + 1):
                        v[i][r][j] = pd.Series(self.w[r][j]).dot(pd.Series(y[i][r - 1]))
                        y[i][r][j] = self.af(v[i][r][j])

                for j in range(1, self.k[self.nol] + 1):
                    err = y[i][self.nol][j] - ym[i][j]
                    delta[i][self.nol][j] = err*self.daf(v[i][self.nol][j])

                for r in range(self.nol, 1, -1):
                    for j in range(1, self.k[r - 1] + 1):
                        err = 0
                        for k in range(1, self.k[r] + 1):
                            err += delta[i][r][k]*self.w[r][k][j]
                        delta[i][r - 1][j] = err*self.daf(v[i][r - 1][j])

                for r in range(1, self.nol + 1):
                    for j in range(1, self.k[r] + 1):
                        update = pd.Series(y[i][r - 1]).multiply(delta[i][r][j])
                        update = update.multiply(-self.u)
                        w_new = pd.Series(self.w[r][j]).add(update)
                        self.w[r][j] = w_new.tolist()
            J = 0
            for i in range(0, N):
                ei = 0
                for j in range(1, self.k[self.nol] + 1):
                    ei += (ym[i][j] - y[i][self.nol][j])*(ym[i][j] - y[i][self.nol][j])
                ei *= 0.5
                J += ei
            print("Iteration " + str(iter_constraint) + ", Cost " + str(J))
            if J < 10:
                break

    def decide(self, x):
        x = pd.concat([pd.Series([1]), x], axis=0, ignore_index=True)
        y = [x.tolist()]

        for r in range(1, self.nol + 1):
            y.append([])
            y[r].append(1.0)
            for j in range(1, self.k[r] + 1):
                v = pd.Series(self.w[r][j]).dot(pd.Series(y[r - 1]))
                y[r].append(self.af(v))

        max_value_idx = y[self.nol][1:].index(max(y[self.nol][1:]))
        return max_value_idx + 1

        # for i in range(1, self.k[self.nol] + 1):
        #    if self.class_vectors[i][1:] == y[self.nol][1:]:
        #       return i


In [27]:

# df = pd.read_csv('trainNN.txt', delimiter='\s+', header=None)
df = pd.read_csv('trainNN.txt', delimiter='\s+', header=None)

X = df.iloc[:, :df.shape[1] - 1]
Y = df.iloc[:, df.shape[1] - 1]

min_max_scaler = MinMaxScaler()  # min max scaler
min_max_scaler.fit(X)
X = min_max_scaler.transform(X)

print(X)

num_of_class = len(Y.unique())

start = time.time()
mlp = MultiLayerPerceptron(2, [4, num_of_class], logistic_function, logistic_function_bar, X.shape[1])

mlp.train(pd.DataFrame(X), Y)

end = time.time()
print("Time to train " + str(end - start))

true_res = false_res = 0

for rn in range(X.shape[0]):
    res = mlp.decide(pd.Series(X[rn, :]))
    if res == Y.iloc[rn]:
        true_res += 1.0
    else:
        false_res += 1.0

accuracy = true_res/(true_res + false_res)
print("Accuracy of training data " + str(accuracy*100))

# test_df = pd.read_csv('testNN.txt', delimiter='\s+', header=None)
test_df = pd.read_csv('testNN.txt', delimiter='\s+', header=None)
X = test_df.iloc[:, :test_df.shape[1] - 1]
Y = test_df.iloc[:, test_df.shape[1] - 1]

X = min_max_scaler.transform(X)

for rn in range(X.shape[0]):
    res = mlp.decide(pd.Series(X[rn, :]))
    if res == Y.iloc[rn]:
        true_res += 1.0
    else:
        false_res += 1.0

accuracy = true_res/(true_res + false_res)
print("Accuracy of test data " + str(accuracy*100))


[[], [[], [0.9924272423050888, 0.6337283331673697, 0.30723257433384377, 0.9061253147332906, 0.3196133983831767], [0.727022649394206, 0.29037964469179844, 0.9324148738310891, 0.8873849336888244, 0.9731060758740194], [0.9390117532957908, 0.8107422433534368, 0.6359261099132527, 0.8579151182628039, 0.026354447515190893], [0.05577736068071382, 0.17055317678216142, 0.7928911407672176, 0.2903294662521586, 0.16798609836550804]], [[], [0.7099546894027847, 0.10751229730633871, 0.14741561627245825, 0.537829795744963, 0.06188648256547191], [0.7751017364347768, 0.5644801255864988, 0.8927831184201337, 0.1365774166974174, 0.9834889064656145], [0.05590394237656271, 0.8944986362641655, 0.5473837499024965, 0.57846067415853, 0.20012958882416054], [0.9620769885429875, 0.8058986010899408, 0.2721454353642462, 0.45768024826606835, 0.4901089555754927]]]
Iteration 1, Cost 167.29272206252105
Iteration 2, Cost 88.65448325908888
Iteration 3, Cost 48.05665779194374
Iteration 4, Cost 18.664746825607406
Iteration 5,