In [2]:
import numpy as np
from matplotlib import pyplot as plt
from matplotlib.colors import ListedColormap

class Sigmoid():
  def __init__(this):
    this.layer_type= "activation"

  def forward(this, X):
    this.output = 1 / (1 + np.exp(-X))
    return this.output

  def backward(this, gradient):
    this.gradient = this.output * (1 - this.output) * gradient
    return this.gradient


class Layer():
  def __init__(this, input_size, layer_size, init="rand_zeros"):
    if init == "rand_zeros":
      this.W = np.random.rand(input_size, layer_size)
      this.b = np.zeros((1, layer_size))
    elif init == "randn_rand":
      this.W = np.random.randn(input_size, layer_size)
      this.b = np.random.rand(1, layer_size)
    this.layer_type= "layer"

  def forward(this, X):
    this.input = X
    this.output = np.matmul(X, this.W) + this.b
    return this.output

  def backward(this, gradient):
    this.dW = np.matmul(this.input.T, gradient)
    this.db = np.sum(gradient, axis=0)
    this.gradient = np.matmul(gradient, this.W.T)
    return this.gradient

  def optimize(this, learning_rate):
    this.W = this.W - this.dW * learning_rate
    this.b = this.b - this.db * learning_rate


class MSE():
  def __init__(this):
    pass

  def forward(this, y_pred, y_true):
    this.error = y_pred - y_true
    this.output = np.sum(this.error ** 2, axis=1)
    return this.output

  def backward(this):
    return this.error


class Model_Base():
  def __init__(this, sequential):
    this.sequential = sequential
    this.history = {"train_loss": [], "train_accuracy": [], "val_loss": [], "val_accuracy": []}

  def predict(this, X):
    for layer in this.sequential:
      X = layer.forward(X)
    return X

  def backward(this, gradient):
    for layer in reversed(this.sequential):
      gradient = layer.backward(gradient)

  def optimize(this, learning_rate):
    for layer in this.sequential:
      if layer.layer_type == "layer":
        layer.optimize(learning_rate)

  def fit(this, X, y, epochs, learning_rate, loss_fn, batch_size, val_data=None, print_info=False):
    # Training
    for i in range(epochs):
      indeces = np.random.choice(len(X), len(X), replace=False)
      for j in range(len(X) // batch_size):
        X_batch = X[indeces[j * batch_size : j * batch_size + 10]]
        y_batch = y[indeces[j * batch_size : j * batch_size + 10]]
        y_pred = this.predict(X_batch)
        loss = loss_fn.forward(y_pred, y_batch)
        gradient = loss_fn.backward()
        this.backward(gradient)
        this.optimize(learning_rate)
      # Saving and printing info
      y_pred_train = this.predict(X)
      loss_train = loss_fn.forward(y_pred_train, y)
      acc_train = accuracy(y_pred_train, y)
      this.history["train_loss"].append(np.sum(loss_train) / len(loss_train))
      this.history["train_accuracy"].append(acc_train)
      if type(val_data) != type(None):
        X_val, y_val = val_data
        y_pred_val = this.predict(X_val)
        loss_val = loss_fn.forward(y_pred_val, y_val)
        acc_val = accuracy(y_pred_val, y_val)
        this.history["val_loss"].append(np.sum(loss_val) / len(loss_val))
        this.history["val_accuracy"].append(acc_val)
        if print_info:
          print(f"Epoch: {i + 1} Train loss: {round(np.sum(loss_train) / len(loss_train), 2)} Train accuracy: {round(acc_train * 100, 2)}% Validation loss: {round(np.sum(loss_val) / len(loss_val), 2)} Validation accuracy: {round(acc_val * 100, 2)}%")
      elif print_info:
        print(f"Epoch: {i + 1} Train loss: {round(np.sum(loss_train) / len(loss_train), 2)} Train accuracy: {round(acc_train * 100, 2)}%")


def accuracy(y_pred, y_true):
  return np.sum(np.argmax(y_pred, axis=1) == np.argmax(y_true, axis=1)) / len(y_pred)