# MLP from Scratch

## Dependencies

In [18]:
from sklearn.datasets import fetch_openml
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

## Data

In [19]:
# download mnist dataset
mnist = fetch_openml(name='Kuzushiji-MNIST', parser="pandas", as_frame=True, data_home="../data/")

In [23]:
mnist.data.head(5)

Unnamed: 0,pixel-0000,pixel-0001,pixel-0002,pixel-0003,pixel-0004,pixel-0005,pixel-0006,pixel-0007,pixel-0008,pixel-0009,...,pixel-0774,pixel-0775,pixel-0776,pixel-0777,pixel-0778,pixel-0779,pixel-0780,pixel-0781,pixel-0782,pixel-0783
0,0,0,0,0,0,0,0,0,0,0,...,122,255,90,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,32,164,...,255,64,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [27]:
mnist.target.head(3)

0    8
1    7
2    0
Name: class, dtype: category
Categories (10, object): ['0', '1', '2', '3', ..., '6', '7', '8', '9']

In [21]:
# download fashion mnist dataset
fashion_mnist = fetch_openml(name='Fashion-MNIST', parser="pandas", as_frame=True, data_home="../data/")


In [24]:
fashion_mnist.data.head(5)

Unnamed: 0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,pixel10,...,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783,pixel784
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,1,0,0,0,0,...,119,114,130,76,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,22,...,0,0,1,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,33,96,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [29]:
fashion_mnist.target.head(3)

0    9
1    0
2    0
Name: class, dtype: category
Categories (10, object): ['0', '1', '2', '3', ..., '6', '7', '8', '9']

### Preprocessing

Normalization: values become between 0 and 1.

Centering: mean pixel value becomes 0, i.e centering the distribution of pixel values on 0.

Standardization: similar to a combination of the two above. Assumes that the distribution of the data is Gaussian and shifts the distribution of the data to have a mean of 0 and a standard deviation of 1.

In [35]:
def normalize(arr:np.array) -> np.array:
    assert np.min(arr) >= 0 # only dealing with positive arrays
    max = np.max(arr)
    # due to above assert if max == 0 must mean all values are 0
    return arr if max == 0 else arr / max

print(f"1D example: {normalize(np.array([1,2,3,4,5,6,7,8]))}")
print(f"2D example: {normalize(np.array([[1,2,3,4],[5,6,7,8]]))}")
print(f"3D example: {normalize(np.array([[[1,2],[3,4]],[[5,6],[7,8]]]))}")

1D example: [0.125 0.25  0.375 0.5   0.625 0.75  0.875 1.   ]
2D example: [[0.125 0.25  0.375 0.5  ]
 [0.625 0.75  0.875 1.   ]]
3D example: [[[0.125 0.25 ]
  [0.375 0.5  ]]

 [[0.625 0.75 ]
  [0.875 1.   ]]]
