In [3]:
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
import json
from activ_func import Activation_Function, reluVect, sigmoidVect, softmax
from backprop_func import delta_linear_output, delta_relu_output, delta_sigmoid_output, delta_softmax_output, delta_linear_hidden, delta_relu_hidden, delta_sigmoid_hidden, delta_softmax_hidden

ITER_LIMIT = 1000
MAX_SSE = 1e-7

class Layer:
    def __init__(self, w: np.ndarray, activ_func: Activation_Function) -> None:
        if (w.ndim != 2):
            raise RuntimeError("Layer initialized with non 2-dimensional array")

        self.w = w
        self.n_inputs = w.shape[0]
        self.n_neurons = w.shape[1]
        self.activ_func = activ_func


class FFNN:
    def __init__(self, n_inputs: int, n_classes: int, learning_rate: float, batch_size: int, max_iter: int, error_threshold: float, stopped_by: str) -> None:
        self._n_inputs = n_inputs
        self._n_classes = n_classes
        self._batch_size = batch_size
        self._max_iter = max_iter
        self._error_threshold = error_threshold
        self._use_max_iter = stopped_by == "max_iteration"

        self._targets: list[list[float]] = []
        self._input: list[list[float]] = []
        self._layers: list[Layer] = []
        self._batch_grad: list[np.ndarray] = []

        self._current_output: np.ndarray = None

        self._learning_rate = learning_rate

    def init_batch_grad(self):
        self._batch_grad = [np.zeros(shape=(layer.n_inputs, layer.n_neurons)) for layer in self._layers]

    def get_output(self):
        return np.transpose(self._current_output).tolist()

    def addInput(self, newInput: list, target_output: list):
        if self._n_inputs != len(newInput):
            raise RuntimeError("Added input with incorrect number of attributes")
        if self._n_classes != len(target_output):
            raise RuntimeError("Added target with incorrect number of classes")

        self._input.append(newInput)
        self._targets.append(target_output)

    def addLayer(self, newLayer: Layer):
        if len(self._input) == 0:
            raise RuntimeError("Input not defined before adding hidden layer")

        if (
            len(self._layers) == 0 and (len(self._input[0]) + 1) != newLayer.n_inputs
        ) or (len(self._layers) != 0 and (self._layers[-1].n_neurons + 1) != newLayer.n_inputs):
            raise RuntimeError(
                "Number of inputs in layer matrix does not match output from previous layer"
            )

        self._layers.append(newLayer)

    def calc_error(self, output: list, target: list, use_log: bool):
        if use_log:
            for idx, o_val in enumerate(output):
                if target[idx] == 1.0:
                    return - np.log(o_val)
            return float('inf')
        else:
            err = 0.0
            for idx, o_val in enumerate(output):
                err += (target[idx] - o_val) ** 2
            err /= 2.0
            return err

    def feed_forward(self):
        for iter in range(ITER_LIMIT):
            if iter == self._max_iter:
                return

            self.init_batch_grad()
            iter_error = 0.0

            for idx, cur_input in enumerate(self._input):
                layer_inputs: list[list[float]] = []
                layer_nets: list[list[float]] = []

                current = np.transpose(np.array([cur_input]))
                bias = np.array([[1.0]])

                for _, layer in enumerate(self._layers):
                    current = np.concatenate((bias, current), axis=0)
                    layer_inputs.append(current.copy().transpose().tolist())

                    new_current = np.transpose(layer.w) @ current
                    current = new_current
                    layer_nets.append(current.copy().transpose().tolist())

                    if layer.activ_func == Activation_Function.RELU:
                        current = reluVect(current)
                    elif layer.activ_func == Activation_Function.SIGMOID:
                        current = sigmoidVect(current)
                    elif layer.activ_func == Activation_Function.SOFTMAX:
                        current = softmax(current)

                iter_error += self.calc_error(np.transpose(current).tolist()[0], self._targets[idx], layer.activ_func == Activation_Function.SOFTMAX)
                target = self._targets[idx]
                self._current_output = current
                self.backwards_propagation(layer_inputs, layer_nets, target, iter, idx)

                if (idx + 1) % self._batch_size == 0 or idx + 1 == len(self._input):
                    self.update_weights()
                    self.init_batch_grad()
            
            if not self._use_max_iter and iter_error <= self._error_threshold:
                return

        if not self._use_max_iter:
            print("Using error_threshold to stop but hit ITER_LIMIT to stop program to run indefinitely")

    def update_weights(self):
        for idx in range(len(self._layers)):
            self._layers[idx].w += self._batch_grad[idx]

    def update_batch_grad(self, layer_idx: int, delta: np.ndarray, layer_input: np.ndarray, hidden: bool):
        grad = layer_input * delta * self._learning_rate
        self._batch_grad[layer_idx] += grad

    def backwards_propagation(self, layer_inputs: list[list[float]], layer_nets: list[list[float]], target: list[float], iter: int, input_idx: int):
        ds_delta: np.ndarray = None
        for idx, layer in enumerate(reversed(self._layers)):
            layer_idx = (-1-idx) % len(self._layers)
            nets = np.array(layer_nets[layer_idx]).transpose()

            if idx == 0:
                target_mat = np.array(target).transpose()

                if layer.activ_func == Activation_Function.SOFTMAX:
                    ds_delta = delta_softmax_output(self._current_output, target_mat)
                elif layer.activ_func == Activation_Function.RELU:
                    ds_delta = delta_relu_output(self._current_output, target_mat, nets)
                elif layer.activ_func == Activation_Function.SIGMOID:
                    ds_delta = delta_sigmoid_output(self._current_output, target_mat)
                else:
                    ds_delta = delta_linear_output(self._current_output, target_mat)
            else:
                cur_delta = None
                layer_outputs = np.array([layer_inputs[layer_idx + 1][0][1:]])

                if layer.activ_func == Activation_Function.SOFTMAX:
                    cur_delta = delta_softmax_hidden(layer_outputs, ds_delta, self._layers[layer_idx + 1].w)
                elif layer.activ_func == Activation_Function.RELU:
                    cur_delta = delta_relu_hidden(nets, ds_delta, self._layers[layer_idx + 1].w)
                elif layer.activ_func == Activation_Function.SIGMOID:
                    cur_delta = delta_sigmoid_hidden(layer_outputs, ds_delta, self._layers[layer_idx + 1].w)
                else:
                    cur_delta = delta_linear_hidden(ds_delta, self._layers[layer_idx + 1].w)

                ds_layer_input = np.array(layer_inputs[layer_idx + 1]).transpose()
                self.update_batch_grad(layer_idx + 1, ds_delta,ds_layer_input, False)
                ds_delta = cur_delta

        ds_layer_input = np.array(layer_inputs[0]).transpose()
        self.update_batch_grad(0, ds_delta, ds_layer_input, True)

    def predict(self, input_data: list[float]):
        if len(input_data) != self._n_inputs:
            raise RuntimeError("Input to predict has incorrect number of attributes")

        current = np.array([input_data]).transpose()
        bias = np.array([[1.0]])

        for layer in self._layers:
            current = np.concatenate((bias, current), axis=0)

            new_current = np.transpose(layer.w) @ current
            current = new_current

            if layer.activ_func == Activation_Function.RELU:
                current = reluVect(current)
            elif layer.activ_func == Activation_Function.SIGMOID:
                current = sigmoidVect(current)
            elif layer.activ_func == Activation_Function.SOFTMAX:
                current = softmax(current)

        output = current.transpose().tolist()[0]
        return output

In [4]:
import csv

with open('../data/iris.csv', newline='') as f:
    reader = csv.reader(f)
    iris_data = list(reader)[1:]

w_1 = np.random.seed(0)

w_1 = np.random.uniform(low=-0.5, high=0.5, size=(5, 4))
w_2 = np.random.uniform(low=-0.5, high=0.5, size=(5, 4))
w_3 = np.random.uniform(low=-0.5, high=0.5, size=(5, 4))
w_final = np.random.uniform(low=-0.5, high=0.5, size=(5, 3))

input_size = 4
n_classes = 3
learning_rate = 0.2
batch_size = 1
max_iter = 2
error_threshold = 0.0
stopped_by = "max_iteration"

fnaf = FFNN(input_size, n_classes, learning_rate, batch_size, max_iter, error_threshold, stopped_by)

for idx, row in enumerate(iris_data):
    input_data = row[1:-1]
    input_data = [float(x) for x in input_data]
    target_data = [0.0, 0.0, 0.0]

    if row[-1] == "Iris-setosa":
        target_data[0] = 1.0
    elif row[-1] == "Iris-versicolor":
        target_data[1] = 1.0
    else:
        target_data[2] = 1.0

    fnaf.addInput(input_data, target_data)

fnaf.addLayer(Layer(w_1, Activation_Function.LINEAR))
fnaf.addLayer(Layer(w_final, Activation_Function.SOFTMAX))

fnaf.feed_forward()

test_in = iris_data[0][1:-1]
test_in = [float(x) for x in test_in]
print(test_in)
fnaf.predict(test_in)

[5.1, 3.5, 1.4, 0.2]


  row_exp_sum = np.sum(np.exp(row))
  e_v = np.exp(v)
  return e_v / expSum
  outputs = ufunc(*inputs)


[nan, nan, nan]

In [5]:
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

data = pd.read_csv("../data/iris.csv")
df_x = data.iloc[:, 1:5]
df_y = data.iloc[:, 5]

# Encode target labels
label_encoder = LabelEncoder()
integer_encoded = label_encoder.fit_transform(df_y)

# Convert integers to one-hot encoding
onehot_encoder = OneHotEncoder(sparse=False)
integer_encoded = integer_encoded.reshape(len(integer_encoded), 1)
onehot_encoded = onehot_encoder.fit_transform(integer_encoded)

# Split the data into train and test sets
x_train, x_test, y_train, y_test = train_test_split(df_x, onehot_encoded, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(x_train)
X_test_scaled = scaler.transform(x_test)

# Train MLPClassifier
mlp_classifier = MLPClassifier(hidden_layer_sizes=(100,), max_iter=200, random_state=42, verbose=1)
mlp_classifier.fit(X_train_scaled, y_train)

# Predict on test set
y_pred = mlp_classifier.predict(X_test_scaled)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Iteration 1, loss = 2.12135773
Iteration 2, loss = 2.09521495
Iteration 3, loss = 2.06943994
Iteration 4, loss = 2.04403624
Iteration 5, loss = 2.01906487
Iteration 6, loss = 1.99445333
Iteration 7, loss = 1.97021547
Iteration 8, loss = 1.94635058
Iteration 9, loss = 1.92284543
Iteration 10, loss = 1.89971574
Iteration 11, loss = 1.87699711
Iteration 12, loss = 1.85466862
Iteration 13, loss = 1.83269246
Iteration 14, loss = 1.81106442
Iteration 15, loss = 1.78978560
Iteration 16, loss = 1.76886166
Iteration 17, loss = 1.74829291
Iteration 18, loss = 1.72808279
Iteration 19, loss = 1.70820938
Iteration 20, loss = 1.68867652
Iteration 21, loss = 1.66948563
Iteration 22, loss = 1.65064000
Iteration 23, loss = 1.63211006
Iteration 24, loss = 1.61390315
Iteration 25, loss = 1.59600765
Iteration 26, loss = 1.57842538
Iteration 27, loss = 1.56114493
Iteration 28, loss = 1.54417867
Iteration 29, loss = 1.52752306
Iteration 30, loss = 1.51115987
Iteration 31, loss = 1.49509509
Iteration 32, los

