In [128]:
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from matplotlib import pyplot as plt
import numpy as np
import math

In [129]:
digits = load_digits()
minibatchsize = 5
target_size = len(digits.target_names)
print(target_size)

X = np.array(digits.data)
y = np.array(digits.target)

# scaling the numbers
X = (X - np.min(X))/(np.max(X)-np.min(X))

# float 64 shoudl be fine aswell
print(X.dtype)

# encoding into one hot vectors
y = np.eye(target_size)[y]

# helper function to confirm correct splitting
def show_img(digits):
  plt.gray()
  plt.matshow(digits.reshape(8, 8))
  plt.show()

# yield X, y randomly
def shuffle_generator(X, y):
  shuffler = np.random.permutation(len(X))
  X = X[shuffler]
  y = y[shuffler]
  idx = 0
  while idx < len(X):
    yield(X[idx], y[idx])
    idx += 1

10
float64


In [130]:
# ACTIVATION FUNCTIONS
def sigmoid(input):
  return 1/(1 + np.exp(-input))

def sigmoid_backwards(input):
  return sigmoid(input) * (1 - sigmoid(input))

def softmax(input):
  sum = np.sum(np.exp(input))
  out = np.exp(input) / sum
  return out


# LOSS FUNCTION
def cross_entropy(predictions, targets):
  return -np.sum(targets * np.log(predictions + 10**-100))

def cross_entropy_backwards(predictions, targets):
  return predictions - targets


In [147]:
from typing import List

class MLP_layer():
  def __init__(self,units, input_size, activation_function, activation_function_backwards = 0):
    self.activation_function = activation_function
    self.activation_function_backwards = activation_function_backwards
    self.units = units
    self.input_size = input_size

    self.input = np.zeros(input_size)
    self.preactivation = np.zeros(units)
    self.activation = np.zeros(units)
    self.weights = np.random.normal(0, 0.2, size=(units, input_size))
    self.bias = np.zeros(units)

  def forward(self, input : np.ndarray):
    self.input = input
    self.preactivation = (self.weights @ input) + self.bias
    self.activation = self.activation_function(self.preactivation)
    return self.activation

  def backwards(self, error_signal):
    error_signal = error_signal * self.activation_function_backwards(self.preactivation) # dL/a * da/dpre
    dLdW = np.outer(error_signal, self.input)       # to adjust the weights                                    dL/dpre * dpre/dW
    dLdinput = error_signal @ self.weights # error signal for the next layer                          dL/dpre * dpre/dinput
    return (dLdW, dLdinput)

  def backwards_output(self, targets):
    error_signal = cross_entropy_backwards(predictions=self.preactivation, targets=targets) #dL/dpre

    dLdW = np.outer(error_signal, self.input)       # to adjust the weights                          dL/dpre * dpre/dW
    dLdinput = self.weights.T @ error_signal # error signal for the next layer                dL/dpre * dpre/dinput
    return (dLdW, dLdinput)




In [161]:
class ANN():
  def __init__(self):
    self.layer1 = MLP_layer(32, 64, sigmoid, activation_function_backwards=sigmoid_backwards)
    self.layer2 = MLP_layer(16, 32, sigmoid, activation_function_backwards=sigmoid_backwards)
    self.layer3 = MLP_layer(10, 16, softmax)
    self.layers = [self.layer1, self.layer2, self.layer3]

    self.learning_rate = 0.03

  def forward(self, input):
    prediction = input
    for layer in self.layers:
      prediction = layer.forward(prediction)
    return prediction


  def backwards(self, targets):
    (dLdW, error_signal) = self.layers[-1].backwards_output(targets)
    self.layers[-1].weights -= self.learning_rate * dLdW
    for layer in reversed(self.layers[:-1]):
      (dLdW, error_signal) = layer.backwards(error_signal)
      layer.weights -= self.learning_rate * dLdW



In [162]:
ann = ANN()


# for _ in range(10):
#   X_temp, y_temp = next(gen)
#   print(X_temp, y_temp)
#   show_img(X_temp)


# X_temp, y_temp = next(gen)
# prediction = ann.forward(X_temp)
# loss = cross_entropy(prediction, y_temp)
# print(f"loss: {loss}")
# ann.backwards(y_temp)


def training(ANN, epochs=100):
  avg_losses = []

  for i in range(epochs):
    avg_loss = []
    gen = shuffle_generator(X, y)
    for (X_temp, y_temp) in gen:
      prediction = ANN.forward(X_temp)
      current_loss = cross_entropy(prediction, y_temp)
      avg_loss.append(current_loss)
      ANN.backwards(y_temp)
    print(sum(avg_loss) / len(avg_loss))
  return ann

ann = training(ann)

2.2675142575777687
2.1426873653964393
2.0223607624069553
1.9485592935479128
1.8988268126387287
1.867128062585637
1.8405047936764274
1.8207416494914448
1.8026220465783058
1.7838255051300052
1.76171053581809
1.7411156941849706
1.723834838416441
1.7043443413939023
1.6909074211644122
1.6792668346915178
1.668316701386275
1.6578133092799754
1.647781204870292
1.642434257091358
1.6341225445444885
1.62983343179348
1.6225871891742285
1.6177206102473114
1.6133300480251644
1.60931091763812
1.6054817618001833
1.6000078997514742
1.5980970583590886
1.5927229416847506
1.5899142903614123
1.5877691041731097
1.5849051956163944
1.5810831406444945
1.5796226092074703
1.5757616503905856
1.573909090295955
1.5697536588298246
1.5695215196590493
1.5665660086863185
1.5651275294903906
1.5614782696050433
1.5607472459057945
1.5582677452897784
1.5569534839451353
1.5552509287103617
1.5534748146329052
1.5516118492800302
1.5505800627803155
1.5483787203713104
1.5464691473505796
1.5466085454821936
1.544787025319486
1.5423

In [175]:
gen = shuffle_generator(X, y)
correct = 0
total = 0
for (X_temp, y_temp) in gen:
  total += 1
  prediction = ann.forward(X_temp)
  if np.argmax(prediction) == np.argmax(y_temp):
    correct += 1
accuracy = correct/total
print(f"correct: {correct}, total: {total}, accuracy: {accuracy}")



correct: 1784, total: 1797, accuracy: 0.9927657206455203
