In [None]:
!pip install idx2numpy




In [None]:
import numpy as np
import idx2numpy

train_data=idx2numpy.convert_from_file('/content/train-images-idx3-ubyte')
train_data = np.reshape(train_data,(60000,28*28))
train_label = idx2numpy.convert_from_file('/content/train-labels-idx1-ubyte')
test_data=idx2numpy.convert_from_file('/content/t10k-images-idx3-ubyte')
test_data = np.reshape(test_data,(10000, 28*28))
test_label = idx2numpy.convert_from_file('/content/t10k-labels-idx1-ubyte')
print(train_data.shape)
print(train_label.shape)
print(test_data.shape)
print(test_label.shape)

(60000, 784)
(60000,)
(10000, 784)
(10000,)


In [None]:
def ReLU(x):
  return max(0,x)

In [None]:
def ReLUAll(a):
  temp = a.copy()
  for i in range(a.shape[0]):
    temp[i] = ReLU(temp[i])
  return temp

In [None]:
def softmax(v):
  newv = v.copy()
  newv = np.exp(newv)
  tot = sum(newv)
  newv /= tot
  return newv

In [None]:
def genWB(size, n = 1):
  """
  generates the weight and bias for n hidden layers and the final weight. Default of direct input-output
  returns w, b (weight, bias)
  """
  w = []
  b = []
  # hidden layers
  for i in range(n):
    tempw = np.random.randn(size,size) * np.sqrt(1.0 / size)
    tempb = np.zeros((size, 1)) 
    w.append(tempw)
    b.append(tempb)
  

  # final layer
  tempw = np.random.randn(10,size) * np.sqrt(1.0 / size)
  tempb = np.zeros((10, 1)) 
  w.append(tempw)
  b.append(tempb)

  w = np.array(w)
  b = np.array(b)
  return w, b

In [None]:
def CEE(y, target):
  loss = -np.log(y)
  return loss[target]

In [None]:
def CELF(y, target):
  return sum(CEE(y, target))

In [None]:
def forward(x, w, b):
  temp = x.copy()
  temp = temp.reshape(x.shape[0],1)
  a = w @ temp + b
  return a

In [None]:
def activation(a, t):
  """
  activation function. a is input, t is decider for relu(1)/softmax(0)
  """
  if t:
    return ReLUAll(a)
  else:
    return softmax(a)

In [None]:
def forwardpass(x, w, b):
  z = []
  z.append(x)
  for i in range(w.shape[0]-1):
    c = forward(x, w[i], b[i])
    c = activation(c, 1)
    z.append(c)
  end = forward(c, w[-1], b[-1])
  end = activation(end, 0)
  z.append(end)
  z = np.array(z)
  return z

In [None]:
def genError(z, w, b, target):
  error = []
  eL = z[-1].copy()
  eL[target] -= 1
  error.append(eL)
  ei = eL.copy()
  for i in range((w.shape[0]-1),0,-1):
    ei = np.multiply(w[i].T @ ei, z[i])
    error.append(ei)
  error = np.array(error)
  return error

In [None]:
def gradient(error, z):
  gw = []
  gb = []
  errorflip = error.copy()
  errorflip = np.flip(errorflip)
  for i in range(error.shape[0]):
    grad = errorflip[i] @ z[i].T
    gw.append(grad)
    gb.append(errorflip[i])
  gw = np.array(gw)
  gb = np.array(gb)
  return gw, gb

In [None]:
def gen_mBatches(X, Y, batch_size):
    mini_batches = []
    num_batch = X.shape[0] // batch_size
    data = np.hstack((X, Y)) 
    np.random.shuffle(data)
    for i in range(num_batch+1):
        mini_batch = data[i * batch_size:(i + 1)*batch_size, :] 
        X_mini = mini_batch[:, :-1] 
        Y_mini = mini_batch[:, -1].reshape((-1, 1)) 
        mini_batches.append((X_mini, Y_mini)) 
    return mini_batches

In [None]:
def cycle(x, w, b, target):
  temp = x.copy()
  temp = temp.reshape(x.shape[0],1)
  z = forwardpass(temp, w, b)
  e = genError(z, w, b, target)
  gw, gb = gradient(e, z)
  return gw, gb

In [None]:
def minisgd(x,y ,w, b, epochnum, batchsize, learn_rate):
  sumgw = w - w
  sumgb = b - b
  for i in range(epochnum):
    minib = gen_mBatches(x, y, batchsize)
    for item in minib:
      miniX, miniY = item
      if miniX.shape[0] == 0:
        continue
      for j in range(miniX.shape[0]):
        gw, gb = cycle(miniX[j], w, b, miniY[j])
        sumgw = sumgw + gw
        sumgb = sumgb + gb
      w = w - ((learn_rate/miniX.shape[0]) * sumgw)
      b = b - ((learn_rate/miniX.shape[0]) * sumgb)
      sumgw = w - w
      sumgb = b - b
  return w, b

In [None]:
if __name__ == '__main__':
  w, b = genWB(train_data[0].shape[0],1)
  lr = 0.0000001
  train_copy = train_label.copy()
  train_copy = train_copy.reshape(train_copy.shape[0], 1)
  fw, fb = minisgd(train_data, train_copy, w, b, 2, 10, lr)
  ppp = test_data[:1000]
  qqq = test_label[:1000]
  correct = 0
  tot = ppp.shape[0]
  for itm in range(tot):
    a = ppp[itm].copy()
    a = a.reshape(a.shape[0],1)
    p = forwardpass(a, fw, fb)
    choice = np.argmax(p[-1])
    if choice == qqq[itm]:
      correct += 1
  print("==hit rate==")
  print(correct/tot)
  


  # This is added back by InteractiveShellApp.init_path()
  # Remove the CWD from sys.path while we load stuff.


==hit rate==
0.865
