<a href="https://colab.research.google.com/github/AugustvonMackensen/AI_colab/blob/main/deeplearning_backpropagation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

[1] Import Packages

In [None]:
import matplotlib.pyplot as plt
import numpy as np
from sklearn.datasets import load_iris

[2] Define the functions for the data preparation

In [None]:
def prepare_data(target):
  iris = load_iris()    # Read iris dataset
  X_tr = iris.data[:, 2:]   # Select the bloom's length and width
  labels = iris.target_names
  y = iris.target

  # Set Label for the samples : If label is targeted then 1, else 0
  y_tr = []
  for i in range(150):
    y_tr.append(labels[y[i]] == target)
  y_tr = np.array(y_tr, dtype=int)
  return X_tr, y_tr, ['(1) ' + target, '(0) the others']

[3] Sigmoid

In [None]:
def sigmoid(x):
  ''' x : numpy array'''
  return 1 / (1 + np.exp(-x))

[4] Loss : Mse, Cross Entropy

In [None]:
# Get Loss Mse
def loss_mse(y, y_hat):
  loss = 0.0
  for i in range(len(y)):
    err = y_hat[i] - y[i]
    loss += np.dot(err, err)
  return loss / len(y)

#Get Loss Ce
def loss_ce(y, y_hat):
  loss = 0.0
  if len(y.shape) == 1 or y.shape[1] == 1:
    for i in range(len(y)):
      loss += -(y[i] * np.log(y_hat[i]) + (1-y[i]) * np.log((1-y.hat[i]))).sum()
  else:
    for i in range(len(y)):
      loss += -(y[i] * np.log(y_hat[i])).sum()
  return loss/len(y)

[5] Dense Class

In [None]:
class Dense():
  def __init__(self, nIn, nOut, activation='sigmoid', loss='mse'):
    self.nIn = nIn      # the number of inputs
    self.nOut = nOut    # the number of outputs

    # initialize the weight and bias by using He normal
    rnd = np.random.default_rng()
    self.w = rnd.normal(scale=np.sqrt(2.0 / self.nIn), size = (self.nOut, self.nIn)).astype(np.float32)
    self.b = rnd.normal(scale=np.sqrt(2.0 / self.nIn), size = self.nOut).astype(np.float32)

    # Set Activation Function
    if activation == 'sigmoid':
      self.activation = sigmoid
      if loss == 'ce': self.dE_du = self.dE_du_sigmoid_ce
      else:            self.dE_du = self.dE_du_sigmoid_mse
      self.do_du = self.do_du_sigmoid

    # Set Initial Value for Velocity to apply Momentum
    self.velocity_w, self.velocity_b = 0.0, 0.0

  # Calculate output in terms of input X
  def output(self, X):
    self.in_vec = X   # Store the input for bp
    # Calculate the sum of weights
    u = np.array([np.dot(self.w[i], X) + self.b[i] for i in range(self.nOut)], dtype=np.float32)

    # Calculate output by applying activation
    self.out_vec = self.activation(u) # store the Output for bp
    return self.out_vec

  # Update w and b by following Gradient Descent
  def gd(self, dw, db, momentum=0):
    self.velocity_w = self.velocity_w * momentum - dw
    self.velocity_b = self.velocity_b * momentum - db
    self.w += self.velocity_w
    self.b += self.velocity_b

  def dE_du_sigmoid_mse(self, y):
    return (self.out_vec - y) * self.do_du_sigmoid()

  def dE_du_sigmoid_ce(self, y):
    return self.out_vec - y

  def do_du_sigmoid(self):
    return self.out_vec * (1 - self.out_vec)


[6] BP Model Class : FeedForward Model Class for BP learning

In [None]:
class BP_Model():
  def __init__(self, nUnitLst, loss='mse', activation_h='sigmoid', activation_o='sigmoid'):
    layers = []
    self.nLayers = len(nUnitLst) - 1

    # Generate Hidden Layer
    for i in range(self.nLayers - 1):
      layers.append(Dense(nUnitLst[i], nUnitLst[i+1], activation=activation_h, loss=loss))

    # Generate Output Layer
    layers.append(Dense(nUnitLst[self.nLayers-1], nUnitLst[self.nLayers], activation=activation_o, loss=loss))
    self.layers = np.array(layers, dtype=object)
    self.ohe = np.identity(nUnitLst[-1])
    if loss == 'ce':
      self.loss = loss_ce
    else:
      self.loss =loss_mse

  def predict(self, x):
    res = []
    for j in range(len(x)):
      xx = x[j]
      for i in range(self.nLayers):
        xx = self.layers[i].output(xx)
      res.append(xx)
    return np.array(res)

  def fit(self, X, y, N, epochs, eta=0.01, momentum=0):
    # Shuffle the index of learning samples randomly
    idx = list(range(N))
    np.random.shuffle(idx)
    X = np.array([X[idx[i]] for i in range(N)])
    if self.layers[self.nLayers-1].nOut == 1:
      y = np.array([[y[idx[i]]] for i in range(N)])
    else:
      y = np.array([self.ohe[y[idx[i]]] for i in range(N)])

    f = 'Epochs = {:4d}   Loss = {:8.5f}'
    # Prepared to store changing values of w and b
    dw, db = [], []
    for i in range(self.nLayers):
      dw.append(np.zeros((self.layers[i].nOut, self.layers[i].nIn),
                          dtype=np.float32))
      db.append(np.zeros(self.layers[i].nOut, dtype=np.float32))
    for n in range(epochs):
      for m in range(N):
        # output layer
        iCurrLayer = self.nLayers - 1
        currLayer = self.layers[iCurrLayer]
        self.predict([X[m]])
        delta = currLayer.dE_du(y[m])
        du_dw = currLayer.in_vec
        for j in range(currLayer.nOut):
          dw[iCurrLayer][j] = eta * delta[j] * du_dw
          db[iCurrLayer][j] = eta * delta[j]
        nextDelta = delta
        nextLayer = currLayer

        # hidden layers
        for iCurrLayer in range(self.nLayers-2, -1, -1):
          currLayer = self.layers[iCurrLayer]
          dE_do = []
          for n0 in range(currLayer.nOut):
            sDeltaW = nextDelta * nextLayer.w[:, n0]
            dE_do.append(sDeltaW.sum())
          delta = dE_do * currLayer.do_du()
          du_dw = currLayer.in_vec
          for j in range(currLayer.nOut):
            dw[iCurrLayer][j] = eta * delta[j] * du_dw
            db[iCurrLayer][j] = eta * delta[j]
          nextDelta = delta
          nextLayer = currLayer

          for i in range(self.nLayers):
            self.layers[i].gd(dw[i], db[i])

          # Print the learning process
          if n < 10 or (n+1) % 100 == 0:
            y_hat = self.predict(X)
            print(f.format(n+1, self.loss(y, y_hat)))

[7] Visualize the Model

In [None]:
def visualize(net, X, y, multi_class, labels, class_id, colors,
             xlabel, ylabel, legend_loc='lower right'):
  # list the range between min and max, the gap is 0.05
  x_max = np.ceil(np.max(X[:, 0])).astype(int)
  x_min = np.floor(np.min(X[:, 0])).astype(int)
  y_max = np.ceil(np.max(X[:, 1])).astype(int)
  y_min = np.floor(np.min(X[:, 1])).astype(int)
  x_lin = np.linspace(x_min, x_max, (x_max-x_min)*20+1)
  y_lin = np.linspace(y_min, y_max, (y_max-y_min)*20+1)

  # find x and y from the x_lin and y_Lin
  x_mesh, y_mesh = np.meshgrid(x_lin, y_lin)

  # input
  X_test = np.column_stack([x_mesh.ravel(), y_mesh.ravel()])

  # calculate output in terms of X_test
  if multi_class:
    y_hat = net.predict(X_test)
    y_hat = np.array([np.argmax(y_hat[k]) for k in range(len(y_hat))], dtype=int)
  else:
    y_hat = (net.predict(X_test) >= 0.5).astype(int)
    y_hat = y_hat.reshape(len(y_hat))

  # Set the legend and color for each classes and horizontial and vertical range
  plt.xlim(x_min, x_max)
  plt.ylim(y_min, y_max)

  # draw the Scatter Plot
  for c, i, c_name in zip(colors, labels, class_id):
    # Scatter Plot for grid coodrinate
    plt.scatter(X_test[y_hat == i, 0], X_test[y_hat == i, 1],
                c=c, s=5, alpha=0.3, edgecolors='none')
    # Scatter Plot for learning samples
    plt.scatter(X[y==i, 0], X[y==i, 1],
                c=c, s=20, label=c_name)

  # set the position for legend
  plt.legend(loc=legend_loc)
  # print graph after setting label for x-axis and y-axis
  plt.xlabel(xlabel, size=12)
  plt.ylabel(ylabel, size=12)
  plt.show()

[8] Preparing for train data

In [None]:
nSamples = 150
nDim = 2
target = 'versicolor' # Set the flower
X_tr, y_tr, labels = prepare_data(target)

[9] Generate BP_Model and Learning

In [None]:
bp_iris = BP_Model([nDim, 4, 1], loss='mse',
                   activation_h='sigmoid', activation_o='sigmoid')
bp_iris.fit(X_tr, y_tr, nSamples, epochs=1000, eta=0.1, momentum=0.9)

[10] Visualize Certain Area

In [None]:
visualize(bp_iris, X_tr, y_tr, multi_class=False,
          class_id=labels, labels=[1,0], colors=['magenta', 'blue'],
          xlabel='petal length', ylabel='petal width')