<a href="https://colab.research.google.com/github/Abh1-Shek/NeuralNetwork/blob/main/NN_MNIST_IRIS_CIFAR.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import random
import sklearn

In [None]:
a = [
    [[1, 2, 1], [1, 2, 1]],
    [[1, 2, 1], [0, 0, 0]]
]
b = [
    [[1, 2, 1], [1, 2, 1]],
    [[0, 0, 0], [1, 2, 1]]
]

print(np.add(a, b))
print(np.subtract(a, b))

[[[2 4 2]
  [2 4 2]]

 [[1 2 1]
  [1 2 1]]]
[[[ 0  0  0]
  [ 0  0  0]]

 [[ 1  2  1]
  [-1 -2 -1]]]


In [None]:
class NN:
    def __init__(self, sizes):
        # suppose sizes = [1, 2, 3]
        # then input layer has 1 neuron, hidden 2, and output 3 neurons
        self.sizes = sizes
        self.num_layers = len(sizes)
        
        # an array containing the weights matrix for every
        # layer excluding the input layer
        self.weights = []
        for i in range(1, self.num_layers):
            self.weights.append(np.random.randn(sizes[i], sizes[i - 1]))
            
        # an array containing the bias matrices for every layer
        # except the input layer
        self.biases = [np.random.randn(sz, 1) for sz in sizes[1:]]
    
    # a function which will calc our output from weights, biases and input
    def feed_forward(self, a):
        for w, b in zip(self.weights, self.biases):
            a = self.sigmoid(np.dot(w, a) + b)
        return a
    def predict(self, x, single = True):
        if single:
            return np.argmax(self.feed_forward(x))
        else:
            res = []
            for xi in x:
                res.append(self.predict(xi))
            return res
    
    def sigmoid(self, z):
        return 1.0 / (1.0 + np.exp(-z))
    
    def sigmoid_prime(self, z):
        return self.sigmoid(z) * (1.0 - self.sigmoid(z))
    
    # function to calc accuracy (for classification only)
    # format = 
    def evaluate(self, data, labels):
        output = [np.argmax(self.feed_forward(x)) for x in data]
        labels = [np.argmax(li) for li in labels]
        acc = 0
        for y_hat, y in zip(output, labels):
            acc = acc + int(y_hat == y)
            
        return (acc * 100) / len(data)
    
    
    # a function which create batches for our SGD to work
    def get_batches(self, data, labels, batch_size):
        n = len(data)
        data, labels = sklearn.utils.shuffle(data, labels)
        batches = [
            data[i:i + batch_size] for i in range(0, n, batch_size)
        ]
        batches_labels = [
            labels[i:i + batch_size] for i in range(0, n, batch_size)
        ]
        return batches, batches_labels
    
    # a function for calling schotastic gradient descent on our data
    def SGD(self, data, labels, lr = 0.01, epochs = 10, batch_size = 3, test_data = None, test_labels = None, test = False):
        n_test = 0
        if test:
            n_test = len(test_data)
        n = len(data)
        
        # main algorithm
        for epoch in range(epochs):
            batches_data, batches_labels = self.get_batches(data, labels, batch_size)
            # updating parameters considering every batch
            for batch, label in zip(batches_data, batches_labels):
                self.update_parameters(batch, label, lr)
            
            if test:
                print ("Epoch:", epoch, "accuracy:", self.evaluate(test_data, test_labels))
            else:
                print("Epoch:", epoch, "complete")
    
    def get_next(self, w, activation, b):
        z = np.dot(w, activation) + b
        return z, self.sigmoid(z)
    
    # BP1
    def get_del_L(self, a_L, y, z_L):
        return (a_L - y) * self.sigmoid_prime(z_L)
    
    # BP2
    def get_del_l(self, w_next, del_next, z_l):
        return np.dot(w_next.transpose(), del_next) * self.sigmoid_prime(z_l)
    
    # BP3
    def get_del_b(self, del_l):
        return del_l
    
    # BP4
    def get_del_w(self, del_l, a_prev):
        return np.dot(del_l, a_prev.transpose())
    
    
    def backpropagation(self, X, y):
        grad_b = np.array([np.zeros(b.shape) for b in self.biases])
        grad_w = np.array([np.zeros(w.shape) for w in self.weights])
        
        activation = X
        activations = [X]
        zs = []
        
        for w, b in zip(self.weights, self.biases):
            z, activation = self.get_next(w, activation, b)
            zs.append(z)
            activations.append(activation)
            
        del_L = self.get_del_L(activations[-1], y, zs[-1])
        
        grad_b[-1] = self.get_del_b(del_L)
        grad_w[-1] = self.get_del_w(del_L, activations[-2])
        
        # layers are 0, 1, 2, ..., num_layers - 1
        # num_layers - 2 ==> 1
        del_l = del_L
        for l in range(self.num_layers - 2, 0):
            del_l = self.get_del_l(self.weights[l + 1], del_l, zs[l])
            grad_b[l] = self.get_del_b(del_l)
            grad_w[l] = self.get_del_w(del_l, activations[l - 1])
        
        return (grad_w, grad_b)
        
        
    # def update_parameters(self, batch, label, lr):
    #     for i in range(len(batch)):
    #         del_w, del_b = self.backpropagation(batch[i], label[i])
            
    #         self.weights = np.subtract(self.weights, lr * del_w / len(batch))
    #         self.biases = np.subtract(self.biases, lr * del_b / len(batch))
    
    def update_parameters(self, batch, label, lr):
        grad_w = [np.zeros(w.shape) for w in self.weights]
        grad_b = [np.zeros(b.shape) for b in self.biases]
        
        for i in range(len(batch)):
            del_w, del_b = self.backpropagation(batch[i], label[i])
            grad_w = np.add(grad_w, del_w)
            grad_b = np.add(grad_b, del_b)
        
        self.weights = np.subtract(self.weights, lr * grad_w / len(batch))
        self.biases = np.subtract(self.biases, lr * grad_b / len(batch))
            

In [None]:
myNN = NN([4, 20, 3])

In [None]:
from sklearn.preprocessing import MinMaxScaler
from sklearn import datasets

In [None]:
iris = datasets.load_iris()
print(iris.keys())
print(iris.target_names)
print(iris.feature_names)

dict_keys(['data', 'target', 'frame', 'target_names', 'DESCR', 'feature_names', 'filename', 'data_module'])
['setosa' 'versicolor' 'virginica']
['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']


In [None]:
def preprocessing(dataset):
    DS = dataset.data
    scaler = MinMaxScaler()
    DS = scaler.fit_transform(DS)
    y = []
    yo = [
        [[1], [0], [0]], 
        [[0], [1], [0]], 
        [[0], [0], [1]]
    ]
    X = []
    nf = len(DS[0])
    for i in range(len(DS)):
        X.append(np.resize(DS[i], (nf, 1)))
    for yi in dataset.target:
        y.append(yo[yi])
    return np.array(X), np.array(y)

In [None]:
X, y = preprocessing(iris)
print(X.shape)
print(y.shape)

(150, 4, 1)
(150, 3, 1)


In [None]:
myNN.SGD(X, y, 0.1, 5000, 50, X, y, True)

In [None]:
for w, b in zip(myNN.weights, myNN.biases):
    print('w', w.shape)
    print('b', b.shape)

w (20, 4)
b (20, 1)
w (3, 20)
b (3, 1)


In [None]:
res = myNN.predict(X, False)

In [None]:
print(sum(res == iris.target) / len(res))

0.9266666666666666


In [None]:
print(res)

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 2, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]


In [None]:
from sklearn import datasets

In [None]:
digits = datasets.load_digits()
print(digits.data.shape)
print(digits.target)

(1797, 64)
[0 1 2 ... 8 9 8]


In [None]:
def preprocessing_mnist(dataset):
    DS = dataset.data
    scaler = MinMaxScaler()
    DS = scaler.fit_transform(DS)
    X = []
    y = []
    nf = len(DS[0])
    num_classes = 10
    yo = [
        np.zeros((num_classes, 1)) for i in range(num_classes)
    ]
    for i in range(num_classes):
        yo[i][i][0] = 1
    # see preprocessing function for looking at the structure of yo
    for i in range(len(DS)):
        #(64,) => (64,1)  #because we need vector
        X.append(np.resize(DS[i], (nf, 1)))
        y.append(yo[dataset.target[i]])
        
    return np.array(X), np.array(y)

In [None]:
X_mnist, y_mnist = preprocessing_mnist(digits)

In [None]:
NN_mnist = NN([64, 32, 10])

In [None]:
NN_mnist.SGD(X_mnist, y_mnist, 
             lr = 0.5, epochs = 5000,
             batch_size = 100, 
             test_data = X_mnist, test_labels = y_mnist, test = True)

In [None]:
predictions_mnist = NN_mnist.predict(X_mnist, False)

In [None]:
print(sum(predictions_mnist == digits.target) / len(predictions_mnist) * 100)

10.072342793544797


In [None]:
import tensorflow
from tensorflow.keras.datasets.cifar10 import load_data as cifar

In [None]:
(x_train, y_train), (x_test, y_test) = cifar()

In [None]:
def preprocessing_cifar10(X_, y_):
    X_ = np.array(X_)
    X = []
    y = []
    nf1 = len(X_[0])
    nf2 = len(X_[0][0])
    nf3 = len(X_[0][0][0])

    scaler = MinMaxScaler()


    # nd = len(X_)
    nd = 3000
    num_classes = 10
    yo = [np.zeros((num_classes, 1)) for i in range(num_classes)]
    for i in range(num_classes):
        yo[i][i][0] = 1
    
    for i in range(nd):
        X.append(np.resize(X_[i], (nf1 * nf2 * nf3, 1)))
        X[i] = X[i] / 255
        y.append(yo[y_[i][0]])

    return np.array(X), np.array(y)

In [None]:
X, y = preprocessing_cifar10(x_train, y_train)
X_val, y_val = preprocessing_cifar10(x_test, y_test)

In [None]:
print(X.shape)
print(y.shape)
# print(y[0:10])

(3000, 3072, 1)
(3000, 10, 1)


In [None]:
NN_cifar = NN([3072, 256, 128, 10])

In [None]:
NN_cifar.SGD(X, y,
             lr = 1, epochs = 10,
             batch_size = 1, 
             test_data = X, test_labels = y, test = True)



Epoch: 0 accuracy: 9.133333333333333
Epoch: 1 accuracy: 9.133333333333333
Epoch: 2 accuracy: 9.7
Epoch: 3 accuracy: 10.533333333333333
Epoch: 4 accuracy: 10.566666666666666
Epoch: 5 accuracy: 10.966666666666667
Epoch: 6 accuracy: 12.033333333333333
Epoch: 7 accuracy: 10.8
Epoch: 8 accuracy: 12.5
Epoch: 9 accuracy: 11.566666666666666
