[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1QdTm4ljL99cbTU_CWkLmqaC424CGngOM?usp=sharing)

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import load_digits
from sklearn.datasets import load_boston
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import KFold
from scipy.special import softmax
from jax import jit, grad
from jax.scipy.special import logsumexp
import jax.numpy as jnp
from functools import partial

In [5]:
class NeuralNetwork():
    def __init__(self,_type,activation,hidden):

        self.network = list()
        self.activation = activation
        self.type = _type
        self.X = None
        self.activation = activation
        self.y = None
        self.n_hidden = None
        self.hidden = hidden
        return

    def create_network(self,X):
        hidden = self.hidden
        self.n_hidden = len(hidden)

        for i in range(1,len(hidden)):
            w = np.random.randn(hidden[i],hidden[i-1])
            b = np.random.rand(hidden[i])
            # print(w.shape,b.shape)
            self.network.append([w,b])

        return


    def ReLU(self,Z):
        return jnp.maximum(Z, 0)

    def Sigmoid(self,Z):
        sig = 1/(1 + jnp.exp(Z))
        # print(Z.shape,sig.shape)
        return sig

    def Softmax(self,Z):
        soft = jnp.exp(Z)/sum(jnp.exp(Z))
        return soft

    def Identity(self,Z):
        return Z

    def forward_propogation(self,X,network):
        A = X
        activation = self.activation
        Z = 0
        for i in range(self.n_hidden-2):
      
            Z = jnp.dot(A,network[i][0].T) + network[i][1]
            if(activation[i]=="relu"):
                A = self.ReLU(Z)
            elif(activation[i]=="identity"):
                A = self.Identity(Z)
            elif(activation[i]=="sigmoid"):
                A = self.Sigmoid(Z)
            else:
                A = self.Softmax(Z)
        
        out = jnp.dot(A,network[-1][0].T) + network[-1][1]

        if(self.type==0):
            return out - logsumexp(out,axis=1,keepdims=True)
        else:
            return out

    def oneHot(self,y,classes = 10):
        
        one_hot = np.zeros((len(y), classes))
        for i in range(len(y)):
          one_hot[i][int(y[i])] = 1
        return one_hot
    
    def cost_function(self,network):
        X = self.X
        y = self.y
        # y = np.eye(len(np.unique(y)))[y]
        y = self.oneHot(y)
        self.y1 = y
        out = self.forward_propogation(X,network)
        cost = jnp.sum(out*y,axis=1)
        cost = -jnp.mean(cost)
        return cost

    def update_network(self,network,alpha):

        agrad = grad(self.cost_function)(network)

        for i in range(len(self.network)):
            network[i][0] -= alpha*agrad[i][0]
            network[i][1] -= alpha*agrad[i][1]

        return network

    # @partial(jit, static_argnums=(0,))
    def fit(self,X,y,alpha=0.02,n_iter=1000):
        self.X = X
        self.y = y
        self.create_network(X)
        for i in range(1,n_iter+1):
            self.network = self.update_network(self.network,alpha)

        return

    def accuracy(self,y_hat, y):
        return (np.sum(y_hat == y) / y.size)*100

    def predict(self,X):
        y_hat = jnp.argmax(self.forward_propogation(X,self.network),axis=1)
        return y_hat



In [6]:
digits = load_digits()
X = pd.DataFrame(digits.data)
y = pd.Series(digits['target'])
# print(X['target'].max())
X = X.to_numpy()
y = y.to_numpy()

X = X/255

cnt = 1
hidden = [64,256,128,10]
activation = ["sigmoid","sigmoid"]
NN = NeuralNetwork(0,activation,hidden)
NN.fit(X,y)
y_hat = NN.predict(X)
accu = NN.accuracy(y_hat,y)
accu


DeviceArray(80.57874, dtype=float32)

In [7]:
A = []
kf = KFold(n_splits=3)
for train_index, test_index in kf.split(X):
    # print("TRAIN:", train_index, "TEST:", test_index)
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    NN = NeuralNetwork(0,activation,hidden)
    NN.fit(X_train,y_train)
    y_hat = NN.predict(X_test)
    accu = NN.accuracy(y_hat,y_test)
    A.append(accu)
    print(cnt,". Accuracy: ",A[cnt-1])
    cnt+=1
print("Overall/Average Accuracy: ",sum(A)/len(A))

1 . Accuracy:  70.78464
2 . Accuracy:  67.612686
3 . Accuracy:  70.951584
Overall/Average Accuracy:  69.782974


In [None]:
X, y = load_boston(return_X_y=True)

X = MinMaxScaler().fit_transform(X)

hidden = [64,128,128,1]
# hidden = [64, 128, 128, 64, 32, 10]
activation = ["sigmoid","sigmoid","sigmoid","sigmoid","sigmoid"]
NN = NeuralNetwork(1,activation,hidden)
accu = NN.fit(X_train,y_train)
accu

Ref: https://github.com/google/jax/blob/master/examples/mnist_classifier_fromscratch.py