# Retrieve dataset

In [1]:
# Loading Data set from UC Irvine archive.
!wget https://archive.ics.uci.edu/static/public/53/iris.zip
!unzip -d iris -o iris.zip

--2024-03-20 17:43:57--  https://archive.ics.uci.edu/static/public/53/iris.zip
Resolving archive.ics.uci.edu (archive.ics.uci.edu)... 128.195.10.252
Connecting to archive.ics.uci.edu (archive.ics.uci.edu)|128.195.10.252|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified
Saving to: ‘iris.zip’

iris.zip                [ <=>                ]   3.65K  --.-KB/s    in 0s      

2024-03-20 17:43:57 (486 MB/s) - ‘iris.zip’ saved [3738]

Archive:  iris.zip
  inflating: iris/Index              
  inflating: iris/bezdekIris.data    
  inflating: iris/iris.data          
  inflating: iris/iris.names         


## Load dataset

Stats stored in `iris.names`

In [2]:
import pandas as pd

df = pd.read_csv('iris/iris.data', names=['sepal-length', 'sepal-width', 'petal-length', 'petal-width', 'class'])
df

Unnamed: 0,sepal-length,sepal-width,petal-length,petal-width,class
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,Iris-virginica
146,6.3,2.5,5.0,1.9,Iris-virginica
147,6.5,3.0,5.2,2.0,Iris-virginica
148,6.2,3.4,5.4,2.3,Iris-virginica


# Data pre-processing

## Generate a One-hot encoding of the output classes

In [3]:
unique_classes = list(set(df['class']))
df['class_encoded'] = df['class'].apply(lambda x : [(1 if x == c else 0) for c in unique_classes])
df

Unnamed: 0,sepal-length,sepal-width,petal-length,petal-width,class,class_encoded
0,5.1,3.5,1.4,0.2,Iris-setosa,"[0, 0, 1]"
1,4.9,3.0,1.4,0.2,Iris-setosa,"[0, 0, 1]"
2,4.7,3.2,1.3,0.2,Iris-setosa,"[0, 0, 1]"
3,4.6,3.1,1.5,0.2,Iris-setosa,"[0, 0, 1]"
4,5.0,3.6,1.4,0.2,Iris-setosa,"[0, 0, 1]"
...,...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,Iris-virginica,"[0, 1, 0]"
146,6.3,2.5,5.0,1.9,Iris-virginica,"[0, 1, 0]"
147,6.5,3.0,5.2,2.0,Iris-virginica,"[0, 1, 0]"
148,6.2,3.4,5.4,2.3,Iris-virginica,"[0, 1, 0]"


## Split data into X and y, representing the inputs and output

In [4]:
# Extract X and y (inputs and outputs)
X, y = df.iloc[:, 0:4], df.iloc[:, 5]
print(X)
print(y)

     sepal-length  sepal-width  petal-length  petal-width
0             5.1          3.5           1.4          0.2
1             4.9          3.0           1.4          0.2
2             4.7          3.2           1.3          0.2
3             4.6          3.1           1.5          0.2
4             5.0          3.6           1.4          0.2
..            ...          ...           ...          ...
145           6.7          3.0           5.2          2.3
146           6.3          2.5           5.0          1.9
147           6.5          3.0           5.2          2.0
148           6.2          3.4           5.4          2.3
149           5.9          3.0           5.1          1.8

[150 rows x 4 columns]
0      [0, 0, 1]
1      [0, 0, 1]
2      [0, 0, 1]
3      [0, 0, 1]
4      [0, 0, 1]
         ...    
145    [0, 1, 0]
146    [0, 1, 0]
147    [0, 1, 0]
148    [0, 1, 0]
149    [0, 1, 0]
Name: class_encoded, Length: 150, dtype: object


## Scale data

In [5]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X = scaler.fit(X).transform(X)
X

array([[-9.00681170e-01,  1.03205722e+00, -1.34127240e+00,
        -1.31297673e+00],
       [-1.14301691e+00, -1.24957601e-01, -1.34127240e+00,
        -1.31297673e+00],
       [-1.38535265e+00,  3.37848329e-01, -1.39813811e+00,
        -1.31297673e+00],
       [-1.50652052e+00,  1.06445364e-01, -1.28440670e+00,
        -1.31297673e+00],
       [-1.02184904e+00,  1.26346019e+00, -1.34127240e+00,
        -1.31297673e+00],
       [-5.37177559e-01,  1.95766909e+00, -1.17067529e+00,
        -1.05003079e+00],
       [-1.50652052e+00,  8.00654259e-01, -1.34127240e+00,
        -1.18150376e+00],
       [-1.02184904e+00,  8.00654259e-01, -1.28440670e+00,
        -1.31297673e+00],
       [-1.74885626e+00, -3.56360566e-01, -1.34127240e+00,
        -1.31297673e+00],
       [-1.14301691e+00,  1.06445364e-01, -1.28440670e+00,
        -1.44444970e+00],
       [-5.37177559e-01,  1.49486315e+00, -1.28440670e+00,
        -1.31297673e+00],
       [-1.26418478e+00,  8.00654259e-01, -1.22754100e+00,
      

In [6]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)

y_train = y_train.reset_index(drop=True)
y_test = y_test.reset_index(drop=True)

print(X_train)
print(y_train)
print()
print(X_test)
print(y_test)

[[ 7.95669016e-01 -1.24957601e-01  8.19624347e-01  1.05353673e+00]
 [ 1.15917263e+00 -5.87763531e-01  5.92161531e-01  2.64698913e-01]
 [-1.14301691e+00 -1.28197243e+00  4.21564419e-01  6.59117823e-01]
 [-5.25060772e-02 -5.87763531e-01  7.62758643e-01  1.57942861e+00]
 [ 2.24968346e+00 -1.05056946e+00  1.78634131e+00  1.44795564e+00]
 [-1.02184904e+00  1.03205722e+00 -1.39813811e+00 -1.18150376e+00]
 [-2.94841818e-01 -3.56360566e-01 -9.02269170e-02  1.33225943e-01]
 [-1.50652052e+00  8.00654259e-01 -1.34127240e+00 -1.18150376e+00]
 [ 1.03800476e+00  1.06445364e-01  5.35295827e-01  3.96171883e-01]
 [ 1.03800476e+00 -1.24957601e-01  7.05892939e-01  6.59117823e-01]
 [-4.16009689e-01 -1.74477836e+00  1.37235899e-01  1.33225943e-01]
 [-1.02184904e+00 -1.24957601e-01 -1.22754100e+00 -1.31297673e+00]
 [ 1.64384411e+00  1.26346019e+00  1.33141568e+00  1.71090158e+00]
 [ 1.15917263e+00  3.37848329e-01  1.21768427e+00  1.44795564e+00]
 [-1.02184904e+00 -2.43898725e+00 -1.47092621e-01 -2.61192967e

# Gradient Descent

## Representation of Network

Params:

* Hidden: int, number of hidden layers
* Layers: list (should be length N+2), number of nodes per layer (1st is input, last is output)

Data Structures:

* Inputs: List of nodes, each item (i) is a set containing the nodes that the ith node recieves input from
* Outputs: List of nodes, each item (i) is a set containing hte nodes that the ith node connects to
* Weights: Matrix of weights between node (i) to (j)
* Baises: List of floats, biases for each node

In [None]:
# 4 input -> 4 nodes -> 4 nodes -> 3 output
layers = [4, 4, 4, 3]
# layers = [3, 2, 1]

In [None]:
import numpy as np

inputs = []
outputs = []
weights = np.zeros((sum(layers), sum(layers)))
biases = []
n = 0 # Current num of nodes created

# Loop through all layers, where nodes = nodes in curr layer
for idx, nodes in enumerate(layers):

    # Create inputs and outputs for the current layer
    for i in range(n, n+nodes):

        # If we are in an input layer, no input connections duh
        # Otherwise, add all previous layer's nodes
        if idx == 0:
            inputs.append([])
        else:
            inputs.append(list(range(n-layers[idx-1], n)))

        # If we are in an output layer, no output connections duh
        # Otherwise, add all next layer's nodes
        if idx == len(layers) - 1:
            outputs.append([])
        else:
            outputs.append(list(range(n+nodes, n+nodes+layers[idx+1])))

        # Create a bias for each node (no bias for input nodes duh)
        if idx == 0:
            biases.append(0.0)
        else:
            biases.append(np.random.normal(scale=0.25))

    # Increment total num of nodes
    n += nodes

# Create a triangular matrix of weights
for i, row in enumerate(inputs):
    for cell in row:
        weights[cell][i] = np.random.normal(scale=0.25)

print(inputs)
print(outputs)
# print(f'weights:\n',weights)
# print(f'bias:\n',biases)

[[], [], [], [], [0, 1, 2, 3], [0, 1, 2, 3], [0, 1, 2, 3], [0, 1, 2, 3], [4, 5, 6, 7], [4, 5, 6, 7], [4, 5, 6, 7], [4, 5, 6, 7], [8, 9, 10, 11], [8, 9, 10, 11], [8, 9, 10, 11]]
[[4, 5, 6, 7], [4, 5, 6, 7], [4, 5, 6, 7], [4, 5, 6, 7], [8, 9, 10, 11], [8, 9, 10, 11], [8, 9, 10, 11], [8, 9, 10, 11], [12, 13, 14], [12, 13, 14], [12, 13, 14], [12, 13, 14], [], [], []]


## TEST DATASET BC THIS SHIT AINT WORKING

In [None]:
# biases = [0, 0, 0, -0.4, 0.2, 0.1]
# weights = [
#     [0, 0, 0, 0.2, -0.3, 0],
#     [0, 0, 0, 0.4, 0.1, 0],
#     [0, 0, 0, -0.5, 0.2, 0],
#     [0, 0, 0, 0, 0, -0.3],
#     [0, 0, 0, 0, 0, -0.2],
#     [0, 0, 0, 0, 0, 0],
# ]
# X_train = [[1, 0, 1]]
# y_train = [[1]]

## Backpropogration Setup

Set up the error functions and activation functions for gradient descent

In [None]:
import math

def error(outputs):
    pass

# Define the activation functions
sigmoid = lambda x : 1 / (1 + math.e**(-x))
tanh = lambda x : (math.e**(x) - math.e**(-x)) / (math.e**(x) + math.e**(-x))
relu = lambda x : x if x > 0 else 0

# Derivatives of the activation functions
d_sigmoid = lambda x : x * (1 - x)
d_tanh = lambda x : 1 - x**2
d_relu = lambda x : 1 if x > 0 else 0

# Inputs to the algorithm (with X and y test/train splits)
act_func = sigmoid
d_act_func = d_sigmoid
learn_rate = 1
max_iter = 200
# learn_rate = 0.9
# max_iter = 1

## Backpropogation Algorithm

Define the backpropogation training function with gradient descent and specified activation function.

In [None]:
# Array for outputs (os), delta of each node (deltas),
# and total number of output nodes
os = [0] * n
deltas = [0] * n
output_nodes = layers[-1]
y_comp = [x for y in y_train for x in y] # Flattened output array

# Loop until MAX ITERATIONS
for i in range(max_iter):

    correct = 0
    all_test_os = [] # All example outputs

    # Loop through all training examples
    for j, input in enumerate(X_train):

        # 1. Compute the outputs of the network
        for k, val in enumerate(input):
            os[k] = val

        for k in range(len(input), n):
            curr_weights = [weights[x][k] for x in inputs[k]]
            os[k] = act_func(sum([x[0] * x[1] for x in zip(curr_weights, [os[y] for y in inputs[k]])]) + biases[k])

        # 2. Calculate the delta_k for output units
        for k in range(output_nodes):
            idx = n - output_nodes + k # Index of outputs/deltas array
            deltas[idx] = d_act_func(os[idx]) * (y_train[j][k] - os[idx])

        # 3. Calculate the delta_h for hidden units
        for k in range(layers[0], n - output_nodes):
            curr_outputs = outputs[k]
            curr_ws = [weights[k][x] for x in curr_outputs]
            curr_deltas = [deltas[o] for o in curr_outputs]
            deltas[k] = d_act_func(os[k]) * sum([x[0] * x[1] for x in zip(curr_ws, curr_deltas)])

        # 4. Update each weight
        for k, row in enumerate(inputs):
            for cell in row:
                # print(cell, k, learn_rate, deltas[k], os[cell])
                weights[cell][k] += learn_rate * deltas[k] * os[cell]
                # print(weights, '\n')

        # 4b. Update biases
        for k in range(layers[0], n):
            biases[k] += learn_rate * deltas[k]

        # Add final output to all test output array
        all_test_os.extend(os[-3:])
        correct += 1 if y_train[j].index(max(y_train[j])) == os[-3:].index(max(os[-3:])) else 0
    err = (1 / (2 * len(y_comp))) * sum([(a[0] - a[1])**2 for a in zip(y_comp, all_test_os)])
    print(f'Iter {i+1}: {err} | {correct}/{len(y_train)}')
    # print(weights)



In [None]:
# WHY IS THIS MODEL STILL STUPIDDDDDDDDDDDDDDDDDD

# Classes as notebook

In [7]:
class Model:
    def __init__(self, layers, activation='sigmoid', split_percent=0.1, learn_rate=0.1, iterations=100):
      self.split_percent = split_percent
      self.activation = activation

      # Setting activation
      valid_funcs = ['sigmoid', 'tanh', 'relu']
      if activation not in valid_funcs:
          raise Exception(f'{activation} not a valid activation function, must be one of [{", ".join(valid_funcs)}]')

      # Model Representation
      self.layers = layers
      self.inputs = []
      self.outputs = []
      self.weights = np.zeros((sum(self.layers), sum(self.layers)))
      self.biases = []
      self.n = 0 # Current num of nodes created

      # Loop through all layers, where nodes = nodes in curr layer
      for idx, nodes in enumerate(self.layers):

          # Create inputs and outputs for the current layer
          for i in range(self.n, self.n+nodes):

              # If we are in an input layer, no input connections duh
              # Otherwise, add all previous layer's nodes
              if idx == 0:
                  self.inputs.append([])
              else:
                  self.inputs.append(list(range(self.n-self.layers[idx-1], self.n)))

              # If we are in an output layer, no output connections duh
              # Otherwise, add all next layer's nodes
              if idx == len(self.layers) - 1:
                  self.outputs.append([])
              else:
                  self.outputs.append(list(range(self.n+nodes, self.n+nodes+self.layers[idx+1])))

              # Create a bias for each node (no bias for input nodes duh)
              if idx == 0:
                  self.biases.append(0.0)
              else:
                  self.biases.append(np.random.normal(scale=0.25))

          # Increment total num of nodes
          self.n += nodes

      # Create a triangular matrix of weights
      for i, row in enumerate(self.inputs):
          for cell in row:
              self.weights[cell][i] = np.random.normal(scale=0.25)

      # Backprop Setup
      # Define the activation functions
      self.sigmoid = lambda x : 1 / (1 + math.e**(-x))
      self.tanh = lambda x : (math.e**(x) - math.e**(-x)) / (math.e**(x) + math.e**(-x))
      self.relu = lambda x : x if x > 0 else 0

      # Derivatives of the activation functions
      self.d_sigmoid = lambda x : x * (1 - x)
      self.d_tanh = lambda x : 1 - x**2
      self.d_relu = lambda x : 1 if x > 0 else 0

      # Inputs to the algorithm (with X and y test/train splits)
      activations = [self.sigmoid, self.tanh, self.relu]
      d_activations = [self.d_sigmoid, self.d_tanh, self.d_relu]
      self.act_func = activations[valid_funcs.index(self.activation)]
      self.d_act_func = d_activations[valid_funcs.index(self.activation)]
      self.learn_rate = learn_rate
      self.max_iter = iterations

    def pre_process(self, df: pd.DataFrame):
      # One hot encoding for outputs
      unique_classes = list(set(df['class']))
      df['class_encoded'] = df['class'].apply(lambda x : [(1 if x == c else 0) for c in unique_classes])

      # Extract X and y (inputs and outputs)
      X, y = df.iloc[:, 0:4], df.iloc[:, 5]

      # Scale Data
      scaler = StandardScaler()
      X = scaler.fit(X).transform(X)
      return X, y

    def split_data(self,X,y):
      X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=self.split_percent)
      # Reset index after splitting
      y_train = y_train.reset_index(drop=True)
      y_test = y_test.reset_index(drop=True)
      return X_train, X_test, y_train, y_test

    def train(self, X_train, y_train):
      # Array for outputs (os), delta of each node (deltas),
      # and total number of output nodes
      os = [0] * self.n
      deltas = [0] * self.n
      output_nodes = self.layers[-1]
      y_comp = [x for y in y_train for x in y] # Flattened output array

      # Loop until MAX ITERATIONS
      for i in range(self.max_iter):

          correct = 0
          all_test_os = [] # All example outputs

          # Loop through all training examples
          for j, input in enumerate(X_train):

              # 1. Compute the outputs of the network
              for k, val in enumerate(input):
                  os[k] = val

              for k in range(len(input), self.n):
                  curr_weights = [self.weights[x][k] for x in self.inputs[k]]
                  os[k] = self.act_func(sum([x[0] * x[1] for x in zip(curr_weights, [os[y] for y in self.inputs[k]])]) + self.biases[k])

              # 2. Calculate the delta_k for output units
              for k in range(output_nodes):
                  idx = self.n - output_nodes + k # Index of outputs/deltas array
                  deltas[idx] = self.d_act_func(os[idx]) * (y_train[j][k] - os[idx])

              # 3. Calculate the delta_h for hidden units
              for k in range(self.layers[0], self.n - output_nodes):
                  curr_outputs = self.outputs[k]
                  curr_ws = [self.weights[k][x] for x in curr_outputs]
                  curr_deltas = [deltas[o] for o in curr_outputs]
                  deltas[k] = self.d_act_func(os[k]) * sum([x[0] * x[1] for x in zip(curr_ws, curr_deltas)])

              # 4. Update each weight
              for k, row in enumerate(self.inputs):
                  for cell in row:
                      # print(cell, k, learn_rate, deltas[k], os[cell])
                      self.weights[cell][k] += self.learn_rate * deltas[k] * os[cell]
                      # print(weights, '\n')

              # 4b. Update biases
              for k in range(self.layers[0], self.n):
                  self.biases[k] += self.learn_rate * deltas[k]

              # Add final output to all test output array
              all_test_os.extend(os[-3:])
              correct += 1 if y_train[j].index(max(y_train[j])) == os[-3:].index(max(os[-3:])) else 0
          err = (1 / (2 * len(y_comp))) * sum([(a[0] - a[1])**2 for a in zip(y_comp, all_test_os)])
          print(f'Epoch: {i+1}: {err} | {correct}/{len(y_train)}')
          # print(weights)
          self.train_size = [correct, len(y_train)]
          self.train_acc =  100*(correct/len(y_train)) # train accuracy

    def test(self,X_test, y_test):
      # take input of X_test push through network check with output
      correct = 0
      os = [0] * self.n
      check = []
      y_comp = [x for y in y_test for x in y]
      for j, input in enumerate(X_test):
        for k, val in enumerate(input):
          os[k] = val
        for k in range(len(input), self.n):
          curr_weights = [self.weights[x][k] for x in self.inputs[k]]
          os[k] = self.act_func(sum([x[0] * x[1] for x in zip(curr_weights, [os[y] for y in self.inputs[k]])]) + self.biases[k])
        check.extend(os[-3:])
        correct += 1 if y_test[j].index(max(y_test[j])) == os[-3:].index(max(os[-3:])) else 0
      err = (1 / (2 * len(y_comp))) * sum([(a[0] - a[1])**2 for a in zip(y_comp, check)])
      #print(f'Accuracy for {len(y_test)} cases: {100*(correct/len(y_test))}% | Passed: {correct}/{len(y_test)}')
      self.test_size = [correct, len(y_test)]
      self.test_acc = 100*(correct/len(y_test)) # test accuracy

    def tabulate(self):
      print(f"\nParameters\t\t Accuracy")
      print(f"Activation: {self.activation}\t Training Accuracy: {self.train_acc}% ({self.train_size[0]}/{self.train_size[1]})")
      print(f"Learning Rate: {self.learn_rate}\t Test Accuracy: {self.test_acc}% ({self.test_size[0]}/{self.test_size[1]})")
      print(f'Split Percent: {self.split_percent}\nEpochs: {self.max_iter}')
      for i, l in enumerate(self.layers):
        if i==0:
          print(f'Input Layer: {l}')
        elif i==len(self.layers)-1:
          print(f'Output Layer: {l}')
        else:
          print(f'Hidden Layer {i}: {l}')


In [1]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import numpy as np
import math

df = pd.read_csv('iris/iris.data', names=['sepal-length', 'sepal-width', 'petal-length', 'petal-width', 'class'])
# parameters
model = Model(activation='sigmoid', split_percent=0.2, layers=[4,8,6,3], learn_rate=1, iterations=100)
X, y = model.pre_process(df)
X_train, X_test, y_train, y_test = model.split_data(X,y)
model.train(X_train, y_train)
model.test(X_test, y_test)
model.tabulate()

FileNotFoundError: [Errno 2] No such file or directory: 'iris/iris.data'

# README
## How to run
The code is presented as a python script (.py).
You will need the iris dataset downloaded and extracted.
Obtain the dataset from UCI ML Library with the following commands:
- `wget https://archive.ics.uci.edu/static/public/53/iris.zip`
- `unzip -d iris -o iris.zip`

You will need the following packages installed:
- Pandas (`pip install pandas`)
- Numpy (`pip install numpy`)
- scikit-learn (`pip install scikit-learn`)

After requirments are met you can adjust any of the hyper-parameters at the bottom of the file on ___line XX___.

Run the model with:
- `python3 model.py`

You will be presented in your terminal with the epochs and accuracy at each epoch as well as a tabular form of the overall accruacies and parameters.

## Train Test Split
We chose a train/test split of 10% since there is not a whole lot of data to train with.