In [2]:
import numpy as np
from numpy import ndarray

from typing import Dict,Tuple

In [3]:
def tanh(S:ndarray)->ndarray:
    return np.tanh(S)

In [4]:
def tanh_der(W:ndarray)->ndarray:
    return (1-np.power(tanh(W),2))

In [5]:
def exp(h:ndarray)->ndarray:
    return np.exp(h)

In [9]:
def Softm_1d(L:ndarray)->ndarray:
    z = np.zeros([L.shape[0]])
    for s in range(L.shape[0]):
        z[s] = exp(L[s])
    return z/(np.sum(exp(L)))

In [8]:
def Softm_data(U:ndarray)->ndarray:
    Z = np.zeros([U.shape[0],U.shape[1]])
    for s in range(U.shape[1]):
        Z[:,s] = Softm_1d(U[:,s])
    return Z

In [10]:
def Softm_Jac_1d(s:ndarray)->ndarray:
    z = np.zeros([s.shape[0],s.shape[0]])
    n = np.sum(exp(s))
    for w in range(s.shape[0]):
        for h in range(s.shape[0]):
            if w==h:
                z[w,h] = exp(s[w])*(n-exp(s[w]))
            else:
                z[w,h] = -exp(s[w])*exp(s[h])
    
    return z/np.power(n,2)

In [11]:
def sigmoid(W:ndarray)->ndarray:
    return (1/(1+exp(-W)))

In [12]:
def sigmoid_der(h:ndarray)->ndarray:
    return sigmoid(h)*(1-sigmoid(h))

In [51]:
def log(n:ndarray)->ndarray:
    return np.log(n)

In [62]:
def Cross_Entropy_2d(y_pred:ndarray,y_obs:ndarray)->float:
    z = 0
    for s in range(y_pred.shape[0]):
        for w in range(y_obs.shape[1]):
            z+= (y_obs[s,w]*log(y_pred[s,w]) + (1-y_obs[s,w])*log(1-y_pred[s,w]))
            
    return -z/(y_pred.shape[1])

In [16]:
def Cross_Entropy_Grad(y_obs:ndarray,y_pred:ndarray)->ndarray:
    z = np.zeros([y_obs.shape[0],y_pred.shape[1]])
    for s in range(y_obs.shape[0]):
        for w in range(y_pred.shape[1]):
            z[s,w] = (y_obs[s,w]/y_pred[s,w]) + (y_obs[s,w]-1)/(1-y_pred[s,w])
    return -z/(y_obs.shape[1])

In [85]:
def Cross_Entropy_tensor(y_pred:ndarray,y_obs:ndarray)->float:
    return -np.sum((y_obs*log(y_pred)) +((1-y_obs)*log(1-y_pred)))/(y_obs.shape[1])

In [19]:
def initializing_weights(dim_inp:int,dim_hid_1:int,dim_hid_2:int,dim_out:int)->Dict[str,ndarray]:
    weights:Dict[str,ndarray] = {}
    weights['W1'] = np.random.randn(dim_hid_1,dim_inp)
    weights['B1'] = np.random.randn(dim_hid_1,1)
    weights['W2'] = np.random.randn(dim_hid_2,dim_hid_1)
    weights['B2'] = np.random.randn(dim_hid_2,1)
    weights['W3'] = np.random.randn(dim_out,dim_hid_2)
    weights['B3'] = np.random.randn(dim_out,1)
    return weights

In [64]:
def L(S:ndarray,y:ndarray,weights:Dict[str,ndarray])->Tuple[float,Dict[str,ndarray]]:
    g1 = np.dot(weights['W1'],S)
    g2 = g1 + weights['B1']
    g3 = tanh(g2)
    g4 = np.dot(weights['W2'],g3)
    g5 = g4 + weights['B2']
    g6 = sigmoid(g5)
    g7 = np.dot(weights['W3'],g6)
    g8 = g7 + weights['B3']
    g9 = Softm_data(g8)
    g10 = Cross_Entropy_tensor(g9,y)
    
    data:Dict[str,ndarray] = {'S':S,'y':y,'g1':g1,'g2':g2,'g3':g3,'g4':g4,'g5':g5,'g6':g6,'g7':g7,'g8':g8,'g9':g9}
    
    return g10,data

In [95]:
def grads(data:Dict[str,ndarray],weights:Dict[str,ndarray])->Dict[str,ndarray]:
    g9 = data['g9']
    y = data['y']
    g8 = data['g8']
    g6 = data['g6']
    B3 = weights['B3']
    W3 = weights['W3']
    g5 = data['g5']
    g3 = data['g3']
    B2 = weights['B2']
    W2 = weights['W2']
    g2 = data['g2']
    S = data['S']
    B1 = weights['B1']
    grad_g10_g9 = Cross_Entropy_Grad(y,g9)
    grad_g8 = np.zeros([g8.shape[0],g8.shape[1]])
    for s in range(g8.shape[1]):
        grad_g8[:,s] = np.dot(Softm_Jac_1d(g8[:,s]),grad_g10_g9[:,s])
    grad_W3 = np.dot(grad_g8,g6.transpose())
    grad_B3 = (np.dot(grad_g8,np.repeat([1],S.shape[1]))).reshape(B3.shape[0],B3.shape[1])
    grad_g5 = (np.dot(W3.transpose(),grad_g8))*(sigmoid_der(g5))
    grad_W2 = np.dot(grad_g5,g3.transpose())
    grad_B2 = (np.dot(grad_g5,np.repeat([1],S.shape[1]))).reshape(B2.shape[0],B2.shape[1])
    grad_g3 = (np.dot(W2.transpose(),grad_g5))*(tanh_der(g2))
    grad_W1 = np.dot(grad_g3,S.transpose())
    grad_B1 = (np.dot(grad_g3,np.repeat([1],S.shape[1]))).reshape(B1.shape[0],B1.shape[1])
    
    grad:Dict['str',ndarray] = {'W1':grad_W1,'B1':grad_B1,'W2':grad_W2,'B2':grad_B2,'W3':grad_W3,'B3':grad_B3}
        
    return grad
    
    

In [24]:
def predict(S:ndarray,weights:Dict[str,ndarray])->ndarray:
    g1 = np.dot(weights['W1'],S)
    g2 = g1 + weights['B1']
    g3 = tanh(g2)
    g4 = np.dot(weights['W2'],g3)
    g5 = g4 + weights['B2']
    g6 = sigmoid(g5)
    g7 = np.dot(weights['W3'],g6)
    g8 = g7 + weights['B3']
    g9 = Softm_data(g8)
    
    return g9

In [25]:
def permute_data(S:ndarray, w:ndarray):
    perm = np.random.permutation(S.shape[1])
    return S[:,perm],w[:,perm]            

In [86]:
def generate_batch(S:ndarray,y:ndarray,start:int=0, batch_size:int = 10)->Tuple[ndarray,ndarray]:
    assert S.ndim == len(y.shape) == 2
                                                 # Function to generate mini batches.
    if start + batch_size > S.shape[1]:
        batch_size = S.shape[1] - start
    
    S_batch, y_batch = S[:,start:start+batch_size], y[:,start:start + batch_size]
        
    return S_batch, y_batch

In [96]:
def train(S:ndarray,y:ndarray,n_iter:int,lr:float = .01,dim_hid_1:int=20,dim_hid_2:int=50,batch_size:int=100,
         return_obj:bool = False,return_weights:bool = True, seed:int=91221)->None:
    
    if seed:
        np.random.seed(seed)
    start =0
    
    weights = initializing_weights(S.shape[0],dim_hid_1,dim_hid_2,y.shape[0])
    
    Obj = []
    
    for u in range(n_iter):
        if start >= S.shape[1]:
            S,y = permute_data(S,y)
            start = 0
        S_batch, y_batch = generate_batch(S,y,start,batch_size)
        start+= batch_size
        
        obj,data = L(S_batch,y_batch,weights)
        
        if return_obj:
            Obj.append(obj)
            
        loss_grads = grads(data,weights)
        
        for key in weights.keys():
            weights[key] -= lr*loss_grads[key]
            
    
    if return_weights:
        return Obj, weights
    
    return None
        
        
        
        
        
        

In [69]:
from keras.datasets import mnist
from keras.utils import to_categorical

In [70]:
(train_images,train_labels),(test_images,test_labels) = mnist.load_data()

In [71]:
train_images.shape

(60000, 28, 28)

In [72]:
test_labels.shape

(10000,)

In [73]:
train_images[1]

array([[  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0],
       [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0],
       [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0],
       [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0],
       [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,  51, 159, 253, 159,  50,   0,   0,   0,   0,   0,   0,
          0,   0],
       [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,  48, 238, 252, 252, 252, 237,   0,   0,   0,   0,   0,   0,
          0,   0],
       [  

In [74]:
train_images=train_images.reshape(60000,28*28)
test_images =test_images.reshape(10000,28*28)

In [75]:
train_images.shape

(60000, 784)

In [76]:
test_images.shape

(10000, 784)

In [77]:
train_images = train_images.astype('float32')/255
test_images = test_images.astype('float32')/255

In [78]:
train_images[1]

array([0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.     

In [79]:
train_labels = to_categorical(train_labels)
test_labels = to_categorical(test_labels)   # one hot encoding the data

In [83]:
train_labels

array([[0., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 1., 0.]], dtype=float32)

In [106]:
train_info = train(train_images.transpose(),train_labels.transpose(),n_iter =500,lr =.001,dim_hid_1= 30,dim_hid_2 = 50,
                   batch_size = 20,return_obj=True)


In [107]:
train_info

([11.993847517921774,
  11.724371577440461,
  10.998954938252405,
  9.926526922282658,
  11.6563193853811,
  10.233073020457738,
  10.482377287865509,
  10.752293078746826,
  12.920441236486681,
  9.75720119448422,
  10.497141937813986,
  11.409257814228196,
  9.441994702199873,
  9.033995611605649,
  11.777250106131072,
  8.776204517980792,
  11.64661055776681,
  10.098857233915265,
  13.139866908381128,
  8.980653242061951,
  10.288383697961937,
  11.014968116020725,
  8.60360746454034,
  11.623421234926168,
  11.588687446295898,
  9.3235984087955,
  9.135908967437972,
  9.006939570532728,
  10.654254580495092,
  11.453033805242665,
  11.51503186628286,
  11.268722211181844,
  11.640782431530607,
  12.28142056710363,
  8.91849447028903,
  11.091497622694991,
  12.72355342511789,
  10.004439650347944,
  9.601734029232668,
  10.251854349562324,
  10.144816877686825,
  9.419644634588025,
  12.28535417482632,
  8.864348299216596,
  11.347693084515223,
  10.555161200798835,
  10.363127580

In [105]:
train_info[1]

{'W1': array([[ 5.57251073e-01, -1.94113741e+00,  2.40280228e+00, ...,
          1.41535720e-01, -1.58617228e+00,  4.57807598e-01],
        [ 8.23588838e-01, -1.00194735e-01,  1.12247437e+00, ...,
          1.13627858e+00, -1.19514034e+00,  9.68584608e-01],
        [-4.92055924e-01, -4.96350446e-01,  5.39519631e-01, ...,
          6.63408710e-01,  8.51959482e-02,  7.82261671e-01],
        ...,
        [ 1.27597126e-02, -6.40333103e-01, -1.45490272e+00, ...,
         -6.69001830e-01, -1.78408790e-03,  5.28270344e-01],
        [ 7.40276214e-01,  1.25463897e+00, -5.20816446e-01, ...,
          2.60405076e+00, -4.89253337e-01, -4.55959264e-01],
        [-5.14429570e-02, -5.65775487e-01,  9.59626778e-01, ...,
          1.58481440e+00, -1.62882174e+00, -1.62990769e+00]]),
 'B1': array([[ 1.49880645],
        [-0.77066253],
        [ 0.89259681],
        [-0.40950564],
        [-0.61664344],
        [ 0.0568528 ],
        [ 0.70998343],
        [-0.06835569],
        [ 0.58701007],
        [-

In [109]:
train_info[1]

{'W1': array([[ 5.57251073e-01, -1.94113741e+00,  2.40280228e+00, ...,
          1.41535720e-01, -1.58617228e+00,  4.57807598e-01],
        [ 8.23588838e-01, -1.00194735e-01,  1.12247437e+00, ...,
          1.13627858e+00, -1.19514034e+00,  9.68584608e-01],
        [-4.92055924e-01, -4.96350446e-01,  5.39519631e-01, ...,
          6.63408710e-01,  8.51959482e-02,  7.82261671e-01],
        ...,
        [ 1.27597126e-02, -6.40333103e-01, -1.45490272e+00, ...,
         -6.69001830e-01, -1.78408790e-03,  5.28270344e-01],
        [ 7.40276214e-01,  1.25463897e+00, -5.20816446e-01, ...,
          2.60405076e+00, -4.89253337e-01, -4.55959264e-01],
        [-5.14429570e-02, -5.65775487e-01,  9.59626778e-01, ...,
          1.58481440e+00, -1.62882174e+00, -1.62990769e+00]]),
 'B1': array([[ 1.49880645],
        [-0.77066253],
        [ 0.89259681],
        [-0.40950564],
        [-0.61664344],
        [ 0.0568528 ],
        [ 0.70998343],
        [-0.06835569],
        [ 0.58701007],
        [-

In [110]:
weights = train_info[1]

In [112]:
y_train_pred = predict(train_images.transpose(),weights)

In [127]:
y_train_pred[:,1]

array([2.46619443e-03, 5.75795282e-02, 8.27493872e-02, 4.11248326e-02,
       2.50855930e-01, 3.55483605e-01, 1.26442374e-03, 5.24093536e-02,
       1.55883313e-01, 1.83432690e-04])

In [128]:
max(y_train_pred[:,1])

0.3554836052289853

In [129]:
np.argmax(y_train_pred[:,1])

5

In [133]:
y_test_pred = predict(test_images.transpose(),weights)

In [136]:
y_test_pred.shape


(10, 10000)

In [137]:
test_images.shape


(10000, 784)

In [144]:
Cross_Entropy_tensor(y_test_pred,test_labels.transpose())

6.932217526297114