In [1]:
### IMPORTS ###
from keras.datasets import cifar10
import numpy as np
import matplotlib.pyplot as plt
from keras.utils import to_categorical
import matplotlib.pyplot as plt

## Data Processing

In [2]:
### LOAD AND PREPARE DATA ###

#Load data
(x_train, y_train), (x_test, y_test) = cifar10.load_data()


#Vectorize images
x_train = np.array([np.float64(x.flatten()) for x in x_train])
x_test = np.array([np.float64(x.flatten()) for x in x_test])


#Normalize images
x_train -= np.mean(x_train, axis = 0)
x_train /= np.std(x_train, axis = 0)
x_test -= np.mean(np.float64(x_test), axis = 0)
x_test /= np.std(x_test, axis = 0)

#One hot encoding of labels
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

x_train = np.array(x_train)
y_train = np.array(y_train)

x_test = np.array(x_test)
y_test = np.array(y_test)


#Insert '1' for bias
x_train = np.insert(x_train, 0, [1] * len(x_train), axis=1)
x_test = np.insert(x_test, 0, [1] * len(x_test), axis=1)


print('Train: X=%s, y=%s' % (x_train.shape, y_train.shape))
print('Test: X=%s, y=%s' % (x_test.shape, y_test.shape))

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
Train: X=(50000, 3073), y=(50000, 10)
Test: X=(10000, 3073), y=(10000, 10)


# MultiLayer Perceptron Implementation

In [27]:
class MultiLayerPerceptron:

  def __init__(self, activation_function, num_hidden_layers, hidden_layers_width):
    self.activation_function = activation_function
    self.num_hidden_layers = num_hidden_layers
    self.hidden_layers_width = hidden_layers_width
    self.loss_per_epoch = []


    #Initialize weights with given number of hidden layers (0, 1 or 2)
    if num_hidden_layers == 0:
      self.w1 = np.random.rand(10, 3073)/100

    elif num_hidden_layers == 1:
      if len(hidden_layers_width) != 1:
        raise Exception("Invalid input: len(hidden_layers_width) != num_hidden_layers")
      self.w1 = np.random.rand(hidden_layers_width[0], 3073)/100
      self.w2 = np.random.rand(10, hidden_layers_width[0])/100

    elif num_hidden_layers == 2:
      if len(hidden_layers_width) != 2:
        raise Exception("Invalid input: len(hidden_layers_width) != num_hidden_layers")
      self.w1 = np.random.rand(hidden_layers_width[0], 3073)/100
      self.w2 = np.random.rand(hidden_layers_width[1], hidden_layers_width[0])/100
      self.w3 = np.random.rand(10, hidden_layers_width[1])/100
    else:
      raise Exception("Unsupported number of hidden layers")


  def fit(self, x, y, learning_rate, epsilon, max_iters, batch_size):

    num_of_batches = int(len(x)/batch_size)
    x_batches = np.array_split(x, num_of_batches)
    y_batches = np.array_split(y, num_of_batches)

    #Gradient descent
    norms = np.array([np.inf])
    t = 0
    print("Epochs: ")
    
    #RELU WITH 0 HIDDEN LAYERS
    if self.activation_function == self.relu and self.num_hidden_layers == 0:
      while np.any(norms > epsilon) and t < max_iters:
          for batch in range(num_of_batches):
            grad = self.relu_gradient(x_batches[batch], y_batches[batch])
            self.w1 -= learning_rate * grad #* (1/num_of_batches)
          t += 1
          norms = np.array([np.linalg.norm(g) for g in grad])
          print(t, end=' ')
      print("")
      print(f"{t} iterations performed")
      return

    #RELU WITH 1 HIDDEN LAYERS
    elif self.activation_function == self.relu and self.num_hidden_layers == 1:
      while np.any(norms > epsilon) and t < max_iters:
          for batch in range(num_of_batches):
            grad_w1, grad_w2 = self.relu_gradient(x_batches[batch], y_batches[batch])
            self.w1 -= learning_rate * grad_w1 #* (1/num_of_batches)
            self.w2 -= learning_rate * grad_w2 #* (1/num_of_batches)
          t += 1
          norms = np.array([np.linalg.norm(g) for g in grad])
          print(t, end=' ')
      print("")
      print(f"{t} iterations performed")
      return



  def relu_gradient(self, x, y):

    y_hat = self.predict(x)
    self.loss_per_epoch.append(MultiLayerPerceptron.total_loss(y, y_hat))

    if self.num_hidden_layers == 0:
      dy = y_hat - y
      #print(f"dy: {dy.shape} - x: {x.shape}")
      dw1 = np.matmul(np.transpose(dy), x)
      #print("dw1 shape before: ", dw1.shape)
      #dw1 = np.array([np.sum(dw1, axis=0)])
      return dw1

    elif self.num_hidden_layers == 1:
      dy = np.transpose(y_hat - y)
      dw2 = np.matmul(dy, np.transpose(self.z1_for_gradient))
      dw1 = np.matmul(np.transpose(dy), self.w2)
      derivative_q1 = (self.q1_for_gradient > 0).astype(int)
      print(f"shapes: {dw2.shape} -- {self.w2.shape}")
      dw1 = np.matmul(dw1, derivative_q1)
      dw1 = np.matmul(dw1, x)
      return dw1, dw2




  def predict(self, x):

    if self.num_hidden_layers == 0:
      return self.softmax(np.transpose(np.dot(self.w1, np.transpose(x))))

    elif self.num_hidden_layers == 1:
      q1 = np.matmul(self.w1, np.transpose(x))
      z1 = self.activation_function(q1)
      self.q1_for_gradient = q1
      self.z1_for_gradient = z1
      return self.softmax(np.transpose(np.matmul(self.w2, z1)))
    else:
      z1 = self.activation_function(np.matmul(self.w1, np.transpose(x)))
      z2 = self.activation_function(np.matmul(self.w2, z1))
      return self.softmax(np.transpose(np.matmul(self.w3, z2)))

  
  @staticmethod
  def relu(x):
    R, C = x.shape
    for i in range(R):
      for j in range(C):
        x[i][j] = max(x[i][j], 0)
    return x

  @staticmethod
  def softmax(x):
    R, C = x.shape
    for i in range(R):
      denominator = sum([np.exp(j) for j in x[i]])
      denominator = denominator if denominator > 0 else 1
      for j in range(C):
        x[i][j] = np.exp(x[i][j])/denominator
    return x

  @staticmethod
  def accuracy(y, y_hat):

    accurate_classifications = 0

    for i, y in enumerate(y):
      category = np.argmax(y)
      predicted_category = np.argmax(y_hat[i])

      if category == predicted_category:
        accurate_classifications += 1

    return accurate_classifications/len(y_hat)

  @staticmethod
  def total_loss(y, y_hat):

    loss = 0

    for i, y in enumerate(y):
      category = np.argmax(y)
      predicted_value = y_hat[i][category]
      loss += (-1 * np.log(predicted_value))

    return loss



In [12]:
a = np.array([[1, 2, 3], [4, 5, 6]])
print((a > 2).astype(int))

[[0 0 1]
 [1 1 1]]


In [28]:
mlp = MultiLayerPerceptron(MultiLayerPerceptron.relu, 1, [256])
mlp.fit(x_train, y_train, learning_rate=0.000001, epsilon=0.0000001, max_iters=3, batch_size=2048)
preds = mlp.predict(x_train)
print("Accuracy: ", MultiLayerPerceptron.accuracy(y_train, preds)*100, "%")
plt.plot(range(len(mlp.loss_per_epoch)), mlp.loss_per_epoch)
plt.show()

Epochs: 
shapes: (10, 256) -- (10, 256)


ValueError: ignored