In [3]:
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from tqdm import tqdm
from tensorflow import keras
from tensorflow.keras.optimizers import Adam

In [4]:
digits = load_digits()

In [7]:
X=digits['data']

In [9]:
X=np.array([(X[i]-np.min(X[i]))/(np.max(X[i])-np.min(X[i])) for i in range(len(X))])

In [11]:
X.shape

(1797, 64)

In [13]:
X[0].shape

(64,)

In [15]:
y=digits['target']

In [19]:
y_one_hot=np.array([[0,0,0,0,0,0,0,0,0,0] for i in range(1) for j in range(y.shape[0])])

In [17]:
y_one_hot=np.array([[0,0,0,0,0,0,0,0,0,0] for i in range(1) for j in range(y.shape[0])])
for i in range(len(y)):    
        y_one_hot[i][y[i]]=1

In [117]:
X_train, X_test, y_label_train, y_label_test = train_test_split(X, y, test_size=0.2,random_state=42)
y_one_hot_label_train, y_one_hot_label_test= train_test_split(y_one_hot, test_size=0.2,random_state=42)

In [119]:
X_train.shape

(1437, 64)

In [121]:
X_test.shape

(360, 64)

In [123]:
y_one_hot_label_train.shape

(1437, 10)

In [125]:
y_one_hot_label_test.shape

(360, 10)

In [127]:
input_dim=64
output_dim=10
hidden_dim=100

In [129]:
W1 = np.random.uniform(-0.01, 0.01, size=(hidden_dim,input_dim))

In [131]:
W2 = np.random.uniform(-0.01, 0.01, size=(output_dim,hidden_dim))

In [133]:
def relu(x):
    return np.maximum(0 , x)

In [135]:
def sigmoid(x):
    x = np.array(x)
    x = np.clip(x, -500, 500)
    return 1/(1+np.exp(-x))

In [137]:
def softmax(x):
    """Compute the softmax of a vector x."""
    exp_x = np.exp(x - np.max(x))  # Subtract max(x) for numerical stability
    return exp_x / np.sum(exp_x)

In [139]:
def binary_crossentropy(y_true, y_pred):
    y_pred = np.array(y_pred)
    y_true = np.array(y_true)
    
    y_pred=y_pred.flatten()
    y_true=y_true.flatten()

    y_pred = np.clip(y_pred, 1e-15, 1 - 1e-15)
    loss = -np.mean(y_true* np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred))
    return loss

In [141]:
def model_forward(x,W1,W2,output_activation="sigmoid"):
    x=np.reshape(x,(len(x),1))
    z1 = W1@x
    a1 = relu(z1)
    z2 = W2@a1
    if output_activation=="sigmoid":
        a2= sigmoid(z2)
    elif  output_activation=="softmax":
        a2= softmax(z2)
    else:
        pass
    return z1,a1,z2,a2

In [143]:
z1,a1,z2,a2=model_forward(X_train[0],W1,W2)

In [167]:
def model_backward(x,y_true,z1,a1,z2,a2,W1,W2,output_activation="sigmoid"):
    x=np.reshape(x,(len(x),1))
    y_true=np.reshape(y_true,(len(y_true),1))
    
    y_pred=a2
    
    epsilon = 1e-10  # Small value to avoid division by zero
    y_pred=np.clip(y_pred, epsilon, 1 - epsilon)
    
    
    dloss_dy_hat = ((y_pred - y_true )/(y_pred*(1-y_pred))).T
    
    if output_activation=="sigmoid":
        dy_hat_dz2 = np.eye(output_dim) * (y_pred * (1 - y_pred))
    elif output_activation=="softmax":
        dy_hat_dz2 = np.diagflat(y_pred) - y_pred@y_pred.T
        
    else:
        pass
        
    dz2_da1 = W2
    dloss_dz2= dloss_dy_hat @ dy_hat_dz2
    da1_dz1 = np.eye(hidden_dim)*(z1>0)*1
    dloss_dw1=(dloss_dz2@dz2_da1@da1_dz1).T@x.T
    #dloss_dw1=(dloss_dy_hat@dy_hat_dz2@dz2_da1@da1_dz1).T@x.T
    dloss_dw2 = dloss_dz2.T@a1.T
    
    return dloss_dw1, dloss_dw2

In [169]:
dloss_dw1, dloss_dw2 = model_backward(X_train[0],y_one_hot_label_train[0],z1,a1,z2,a2,W1,W2)

In [171]:
dloss_dw1.shape

(100, 64)

In [173]:
dloss_dw2.shape

(10, 100)

In [175]:
alpha=0.001
num_epochs=100
epoch=0
loss_history=[]

for epoch in range(num_epochs):
    epoch_loss=0
    for i in tqdm(range(len(X_train))):
        z1,a1,z2,a2=model_forward(X_train[i],W1,W2)
        loss=binary_crossentropy(y_one_hot_label_train[i],a2)
        epoch_loss=epoch_loss+loss
        gradW1,gradW2=model_backward(X_train[i],y_one_hot_label_train[i],z1,a1,z2,a2,W1,W2)
        W1=W1-alpha*gradW1
        W2=W2-alpha*gradW2
    epoch_loss=epoch_loss/len(X_train)
    loss_history.append(epoch_loss)
    print("Loss for epoch "+str(epoch+1)+" is "+str(epoch_loss))

100%|██████████| 1437/1437 [00:00<00:00, 7984.21it/s]


Loss for epoch 1 is 0.02445170098727777


100%|██████████| 1437/1437 [00:00<00:00, 11395.88it/s]


Loss for epoch 2 is 0.024219850264190056


100%|██████████| 1437/1437 [00:00<00:00, 10002.71it/s]


Loss for epoch 3 is 0.023993354893050378


100%|██████████| 1437/1437 [00:00<00:00, 10867.72it/s]


Loss for epoch 4 is 0.023769018555271047


100%|██████████| 1437/1437 [00:00<00:00, 10866.97it/s]


Loss for epoch 5 is 0.023550104913205683


100%|██████████| 1437/1437 [00:00<00:00, 10714.50it/s]


Loss for epoch 6 is 0.02333693456979466


100%|██████████| 1437/1437 [00:00<00:00, 10991.21it/s]


Loss for epoch 7 is 0.0231208647048313


100%|██████████| 1437/1437 [00:00<00:00, 10724.03it/s]


Loss for epoch 8 is 0.022911660295355563


100%|██████████| 1437/1437 [00:00<00:00, 10851.17it/s]


Loss for epoch 9 is 0.022707915292708918


100%|██████████| 1437/1437 [00:00<00:00, 11072.56it/s]


Loss for epoch 10 is 0.022505427268062043


100%|██████████| 1437/1437 [00:00<00:00, 10441.15it/s]


Loss for epoch 11 is 0.02231181559398882


100%|██████████| 1437/1437 [00:00<00:00, 10944.23it/s]


Loss for epoch 12 is 0.02211236349586238


100%|██████████| 1437/1437 [00:00<00:00, 11050.45it/s]


Loss for epoch 13 is 0.02191552775430234


100%|██████████| 1437/1437 [00:00<00:00, 10688.29it/s]


Loss for epoch 14 is 0.021727796994271038


100%|██████████| 1437/1437 [00:00<00:00, 11194.26it/s]


Loss for epoch 15 is 0.021538806598340607


100%|██████████| 1437/1437 [00:00<00:00, 10089.08it/s]


Loss for epoch 16 is 0.02135565982229097


100%|██████████| 1437/1437 [00:00<00:00, 11071.18it/s]


Loss for epoch 17 is 0.02117163707926778


100%|██████████| 1437/1437 [00:00<00:00, 10588.31it/s]


Loss for epoch 18 is 0.02099346894406436


100%|██████████| 1437/1437 [00:00<00:00, 10693.60it/s]


Loss for epoch 19 is 0.02081546508002228


100%|██████████| 1437/1437 [00:00<00:00, 11109.05it/s]


Loss for epoch 20 is 0.02064246589964841


100%|██████████| 1437/1437 [00:00<00:00, 8810.46it/s]


Loss for epoch 21 is 0.02046880099232588


100%|██████████| 1437/1437 [00:00<00:00, 10144.95it/s]


Loss for epoch 22 is 0.020301575843739785


100%|██████████| 1437/1437 [00:00<00:00, 10982.93it/s]


Loss for epoch 23 is 0.02013367263773939


100%|██████████| 1437/1437 [00:00<00:00, 10730.08it/s]


Loss for epoch 24 is 0.019970311438434866


100%|██████████| 1437/1437 [00:00<00:00, 11198.15it/s]


Loss for epoch 25 is 0.0198084920456011


100%|██████████| 1437/1437 [00:00<00:00, 11041.46it/s]


Loss for epoch 26 is 0.019648925638304494


100%|██████████| 1437/1437 [00:00<00:00, 11129.34it/s]


Loss for epoch 27 is 0.019492488234363444


100%|██████████| 1437/1437 [00:00<00:00, 11087.16it/s]


Loss for epoch 28 is 0.019336632126487466


100%|██████████| 1437/1437 [00:00<00:00, 11023.69it/s]


Loss for epoch 29 is 0.01918141351853605


100%|██████████| 1437/1437 [00:00<00:00, 10925.93it/s]


Loss for epoch 30 is 0.019031058276798823


100%|██████████| 1437/1437 [00:00<00:00, 10769.73it/s]


Loss for epoch 31 is 0.018881906942859697


100%|██████████| 1437/1437 [00:00<00:00, 11189.36it/s]


Loss for epoch 32 is 0.018736436619233906


100%|██████████| 1437/1437 [00:00<00:00, 10954.47it/s]


Loss for epoch 33 is 0.01859071657425211


100%|██████████| 1437/1437 [00:00<00:00, 11111.00it/s]


Loss for epoch 34 is 0.01844882801900131


100%|██████████| 1437/1437 [00:00<00:00, 11073.88it/s]


Loss for epoch 35 is 0.018307103585143106


100%|██████████| 1437/1437 [00:00<00:00, 11173.57it/s]


Loss for epoch 36 is 0.018172688376415527


100%|██████████| 1437/1437 [00:00<00:00, 9589.52it/s]


Loss for epoch 37 is 0.01803322075409507


100%|██████████| 1437/1437 [00:00<00:00, 9816.13it/s]


Loss for epoch 38 is 0.017900325377758777


100%|██████████| 1437/1437 [00:00<00:00, 9708.72it/s]


Loss for epoch 39 is 0.017767205792465005


100%|██████████| 1437/1437 [00:00<00:00, 9602.64it/s]


Loss for epoch 40 is 0.01763419468034306


100%|██████████| 1437/1437 [00:00<00:00, 9590.25it/s]


Loss for epoch 41 is 0.017506175293942498


100%|██████████| 1437/1437 [00:00<00:00, 10548.67it/s]


Loss for epoch 42 is 0.01737472901869863


100%|██████████| 1437/1437 [00:00<00:00, 10067.73it/s]


Loss for epoch 43 is 0.01724957983668822


100%|██████████| 1437/1437 [00:00<00:00, 9986.82it/s]


Loss for epoch 44 is 0.017124964070926393


100%|██████████| 1437/1437 [00:00<00:00, 10736.22it/s]


Loss for epoch 45 is 0.016999415979991883


100%|██████████| 1437/1437 [00:00<00:00, 9584.40it/s]


Loss for epoch 46 is 0.01687679676289709


100%|██████████| 1437/1437 [00:00<00:00, 8498.43it/s]


Loss for epoch 47 is 0.01675707572553615


100%|██████████| 1437/1437 [00:00<00:00, 9933.62it/s]


Loss for epoch 48 is 0.016640223704526075


100%|██████████| 1437/1437 [00:00<00:00, 10783.12it/s]


Loss for epoch 49 is 0.01651860027752052


100%|██████████| 1437/1437 [00:00<00:00, 9980.29it/s] 


Loss for epoch 50 is 0.016402478880635313


100%|██████████| 1437/1437 [00:00<00:00, 10481.00it/s]


Loss for epoch 51 is 0.01628819934080736


100%|██████████| 1437/1437 [00:00<00:00, 10085.50it/s]


Loss for epoch 52 is 0.016171307818628317


100%|██████████| 1437/1437 [00:00<00:00, 10284.23it/s]


Loss for epoch 53 is 0.01606083563493893


100%|██████████| 1437/1437 [00:00<00:00, 9841.73it/s]


Loss for epoch 54 is 0.01594744687921417


100%|██████████| 1437/1437 [00:00<00:00, 11134.87it/s]


Loss for epoch 55 is 0.01583758334853309


100%|██████████| 1437/1437 [00:00<00:00, 10919.58it/s]


Loss for epoch 56 is 0.01573114414641515


100%|██████████| 1437/1437 [00:00<00:00, 11077.20it/s]


Loss for epoch 57 is 0.01561862626273803


100%|██████████| 1437/1437 [00:00<00:00, 10717.11it/s]


Loss for epoch 58 is 0.015514596680331304


100%|██████████| 1437/1437 [00:00<00:00, 10778.07it/s]


Loss for epoch 59 is 0.015409747469511419


100%|██████████| 1437/1437 [00:00<00:00, 9619.18it/s]


Loss for epoch 60 is 0.015304841448055433


100%|██████████| 1437/1437 [00:00<00:00, 10543.04it/s]


Loss for epoch 61 is 0.015200644968394128


100%|██████████| 1437/1437 [00:00<00:00, 10764.19it/s]


Loss for epoch 62 is 0.015098550206353104


100%|██████████| 1437/1437 [00:00<00:00, 7766.10it/s]


Loss for epoch 63 is 0.014998168411664528


100%|██████████| 1437/1437 [00:00<00:00, 10569.72it/s]


Loss for epoch 64 is 0.01489697283577793


100%|██████████| 1437/1437 [00:00<00:00, 10878.74it/s]


Loss for epoch 65 is 0.014803504009427412


100%|██████████| 1437/1437 [00:00<00:00, 10938.45it/s]


Loss for epoch 66 is 0.014698202001794784


100%|██████████| 1437/1437 [00:00<00:00, 10203.34it/s]


Loss for epoch 67 is 0.014607998895587901


100%|██████████| 1437/1437 [00:00<00:00, 11066.70it/s]


Loss for epoch 68 is 0.014514859108372098


100%|██████████| 1437/1437 [00:00<00:00, 9661.17it/s]


Loss for epoch 69 is 0.014421595959448395


100%|██████████| 1437/1437 [00:00<00:00, 9904.87it/s]


Loss for epoch 70 is 0.014324618848302604


100%|██████████| 1437/1437 [00:00<00:00, 10580.17it/s]


Loss for epoch 71 is 0.014236192807983473


100%|██████████| 1437/1437 [00:00<00:00, 10704.33it/s]


Loss for epoch 72 is 0.014143110278463553


100%|██████████| 1437/1437 [00:00<00:00, 10382.46it/s]


Loss for epoch 73 is 0.014053778125458154


100%|██████████| 1437/1437 [00:00<00:00, 11224.41it/s]


Loss for epoch 74 is 0.013965044210888428


100%|██████████| 1437/1437 [00:00<00:00, 9623.31it/s]


Loss for epoch 75 is 0.013874632599133532


100%|██████████| 1437/1437 [00:00<00:00, 10761.31it/s]


Loss for epoch 76 is 0.013790854591861064


100%|██████████| 1437/1437 [00:00<00:00, 10485.58it/s]


Loss for epoch 77 is 0.013702602175171262


100%|██████████| 1437/1437 [00:00<00:00, 10467.28it/s]


Loss for epoch 78 is 0.013619688046564722


100%|██████████| 1437/1437 [00:00<00:00, 10782.87it/s]


Loss for epoch 79 is 0.013531131974840425


100%|██████████| 1437/1437 [00:00<00:00, 9854.62it/s]


Loss for epoch 80 is 0.01344731518952009


100%|██████████| 1437/1437 [00:00<00:00, 10681.10it/s]


Loss for epoch 81 is 0.013363101013370916


100%|██████████| 1437/1437 [00:00<00:00, 10347.83it/s]


Loss for epoch 82 is 0.013283976180606324


100%|██████████| 1437/1437 [00:00<00:00, 10773.58it/s]


Loss for epoch 83 is 0.013200883037572704


100%|██████████| 1437/1437 [00:00<00:00, 10580.93it/s]


Loss for epoch 84 is 0.013122778236610308


100%|██████████| 1437/1437 [00:00<00:00, 10862.74it/s]


Loss for epoch 85 is 0.013037171671042477


100%|██████████| 1437/1437 [00:00<00:00, 10613.67it/s]


Loss for epoch 86 is 0.012964063905894614


100%|██████████| 1437/1437 [00:00<00:00, 10772.48it/s]


Loss for epoch 87 is 0.01288256599279492


100%|██████████| 1437/1437 [00:00<00:00, 10913.39it/s]


Loss for epoch 88 is 0.012805876065408617


100%|██████████| 1437/1437 [00:00<00:00, 10941.94it/s]


Loss for epoch 89 is 0.01272595516924107


100%|██████████| 1437/1437 [00:00<00:00, 10873.33it/s]


Loss for epoch 90 is 0.012654328349897048


100%|██████████| 1437/1437 [00:00<00:00, 10851.28it/s]


Loss for epoch 91 is 0.012574557434963878


100%|██████████| 1437/1437 [00:00<00:00, 10850.97it/s]


Loss for epoch 92 is 0.012504225482030592


100%|██████████| 1437/1437 [00:00<00:00, 10513.40it/s]


Loss for epoch 93 is 0.012427941325789168


100%|██████████| 1437/1437 [00:00<00:00, 10596.82it/s]


Loss for epoch 94 is 0.012356064038984215


100%|██████████| 1437/1437 [00:00<00:00, 10219.17it/s]


Loss for epoch 95 is 0.012278452797192734


100%|██████████| 1437/1437 [00:00<00:00, 9557.30it/s]


Loss for epoch 96 is 0.012211524618361906


100%|██████████| 1437/1437 [00:00<00:00, 10409.34it/s]


Loss for epoch 97 is 0.01213686108873909


100%|██████████| 1437/1437 [00:00<00:00, 11016.74it/s]


Loss for epoch 98 is 0.012069334918025467


100%|██████████| 1437/1437 [00:00<00:00, 11179.89it/s]


Loss for epoch 99 is 0.011995663970131404


100%|██████████| 1437/1437 [00:00<00:00, 10946.69it/s]

Loss for epoch 100 is 0.011926086086425208





In [176]:
predictions=[]
for i in range(len(X_test)):
    z1,a1,z2,a2=model_forward(X_test[i],W1,W2)
    y_pred=np.argmax(a2)
    y_test=np.argmax(y_one_hot_label_test[i])
    predictions.append(y_pred==y_test)

In [177]:
sum(np.array(predictions))/len(predictions)

0.975

### Neural Network Using Keras

In [179]:
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam

In [180]:
input_layer = Input(X_train[0].shape)
hidden_layer = Dense(hidden_dim,activation='relu')(input_layer)
output_layer = Dense(output_dim,activation='sigmoid')(hidden_layer)

simple_one_hot_nn = Model(inputs=input_layer,outputs=output_layer)

In [181]:
simple_one_hot_nn.summary()

In [182]:
simple_one_hot_nn.compile(optimizer=keras.optimizers.Adam(learning_rate=0.0001),loss='binary_crossentropy')

In [183]:
history_oh_nn=simple_one_hot_nn.fit(X_train, y_one_hot_label_train, epochs=100, batch_size=1,shuffle=True)

Epoch 1/100
[1m1437/1437[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 567us/step - loss: 0.4483
Epoch 2/100
[1m1437/1437[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 498us/step - loss: 0.2703
Epoch 3/100
[1m1437/1437[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 497us/step - loss: 0.2164
Epoch 4/100
[1m1437/1437[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 497us/step - loss: 0.1718
Epoch 5/100
[1m1437/1437[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 507us/step - loss: 0.1419
Epoch 6/100
[1m1437/1437[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 489us/step - loss: 0.1200
Epoch 7/100
[1m1437/1437[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 529us/step - loss: 0.1047
Epoch 8/100
[1m1437/1437[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 617us/step - loss: 0.0935
Epoch 9/100
[1m1437/1437[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 541us/step - loss: 0.0802
Epoch 10/100
[1m1437/1437[0m [32m━━━━━━━━━━

In [192]:
y_pred=simple_one_hot_nn.predict(X_test)

[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 


In [194]:
y_pred.shape

(360, 10)

In [196]:
np.sum(np.argmax(y_pred,axis=1)==np.argmax(y_one_hot_label_test,axis=1))/len(y_pred)

0.9861111111111112