In [3]:
import pickle
import gzip
import numpy as np
import matplotlib.pyplot as plt
import sys

In [4]:

def load_data():
    f = gzip.open('mnist.pkl.gz', 'rb')
    training_data, validation_data, test_data = pickle.load(f,encoding='latin1')
    f.close()
    return (training_data, validation_data, test_data)

def load_data_wrapper():         
    tr_d, va_d, te_d = load_data()
    training_inputs = [np.reshape(x, (28,28)) for x in tr_d[0][:15000]]
    print(f"train- {len(training_inputs)}")
    training_results = [vectorized_result(y) for y in tr_d[1][:15000]]
    training_data = zip(training_inputs, training_results)
    validation_inputs = [np.reshape(x, (28,28)) for x in va_d[0]]
    print(f'valid- {len(validation_inputs)}')
    validation_data = zip(validation_inputs, va_d[1])
    test_inputs = [np.reshape(x, (28,28)) for x in te_d[0][:3000]]
    print(f"test- {len(test_inputs)}")
    test_results = [vectorized_result(y) for y in te_d[1][:3000]]
    test_data = zip(test_inputs, test_results)
    return (training_data, validation_data, test_data)

def vectorized_result(j):
    e = np.zeros((10, 1))
    e[j] = 1.0
    return e

In [5]:
a=load_data_wrapper()
a=list(a[2]) 

train- 15000
valid- 10000
test- 3000


In [15]:
import numpy as np
from scipy.signal import correlate2d
from scipy.signal import convolve

In [16]:
from sklearn import preprocessing as pp
class Convolution:
    def __init__(self, num_filters: int, size: tuple):
        limit = np.sqrt(2.0 / (1 + num_filters))
        self.weights = np.random.randn(num_filters, size[0], size[1]) * limit
        self.weights=np.ones_like(self.weights)/9
        self.biases = np.random.randn(num_filters, 26, 26) * limit
        self.out_cache = None
        self.Vw = False
        self.Vb = False  # Momentum
        self.Sw = False
        self.Sb = False  # Rmsprop
        self.chg_wt = np.zeros_like(self.weights)
        self.chg_bs = np.zeros_like(self.biases)
    def Fwd_Correlation(self, x):
        std=pp.StandardScaler()
        out = []
        for i, j in zip(self.weights, self.biases):
            out.append(std.fit_transform(correlate2d(x, i, mode='valid') + j))
        self.out_cache = np.array(out)
        return
    def Bwd_Correlation(self, err, x):
        for i in range(len(err)):
            self.chg_wt[i] += convolve(x, err[i], mode='valid')
            self.chg_bs[i] += err[i]
        return
    def Conv_update(self, lr, b_mom, b_rms, eps):
        t=1
        self.chg_bs/=50
        self.chg_wt/=50
        # self.Vw/=1-b_mom**t
        # self.Vb/=1-b_mom**t
        # self.Sw/=1-b_rms**t
        # self.Sb/=1-b_rms**t
        # self.Vw = b_mom * self.Vw + (1 - b_mom) * self.chg_wt
        # self.Vb = b_mom * self.Vb + (1 - b_mom) * self.chg_bs
        # self.Sw = b_rms * self.Sw + (1 - b_rms) * (self.chg_wt) ** 2
        # self.Sb = b_rms * self.Sb + (1 - b_rms) * (self.chg_bs) ** 2
        # self.weights -= lr * (self.Vw / np.sqrt(self.Sw + eps))
        # self.biases -= lr * (self.Vb / np.sqrt(self.Sb + eps))
        self.weights -= lr * self.chg_wt
        self.biases -= lr *self.chg_bs
        self.chg_wt = np.zeros_like(self.weights)
        self.chg_bs = np.zeros_like(self.biases)
        return


In [17]:
import numpy as np

class Dense:
    def __init__(self, prev_num: int, new_num: int):
        limit = np.sqrt(6 / (prev_num + new_num))
        self.weights = np.random.uniform(-limit, limit, size=(new_num, prev_num))
        self.biases = np.random.randn(new_num, 1)
        self.out_cache = None
        self.chg_wt = None
        self.chg_bs = None
        self.Vw = False
        self.Vb = False  # Momentum
        self.Sw = False
        self.Sb = False  # Rmsprop
    def Fwd_Dense(self, x):
        self.out_cache = np.dot(self.weights, x) + self.biases
        return self.out_cache
    def Bwd_Dense(self, err, dNdW):
        dPdN = 1
        dPdB = 1
        if self.chg_bs is None:
            self.chg_bs = err * dPdB
            self.chg_wt = np.dot(err, dNdW.transpose()) * dPdN
        self.chg_bs += err * dPdB
        self.chg_wt += np.dot(err, dNdW.transpose()) * dPdB
        return np.dot(self.weights.transpose(), err)
    def Dense_update(self, lr, b_mom, b_rms, eps):
        t=1
        self.chg_bs/=50
        self.chg_wt/=50
        # self.Vw/=1-b_mom**t
        # self.Vb/=1-b_mom**t
        # self.Sw/=1-b_rms**t
        # self.Sb/=1-b_rms**t
        # self.Vw = b_mom * self.Vw + (1 - b_mom) * self.chg_wt
        # self.Vb = b_mom * self.Vb + (1 - b_mom) * self.chg_bs
        # self.Sw = b_rms * self.Sw + (1 - b_rms) * (self.chg_wt) ** 2
        # self.Sb = b_rms * self.Sb + (1 - b_rms) * (self.chg_bs) ** 2
        # self.weights -= lr * (self.Vw / np.sqrt(self.Sw + eps))
        # self.biases -= lr * (self.Vb / np.sqrt(self.Sb + eps))
        self.weights -= lr * self.chg_wt
        self.biases -= lr * self.chg_bs
        self.chg_bs = None
        self.chg_wt = None

In [18]:
class Maxpooling:
    def __init__(self,size:tuple,stride:int)->None:
        self.stride=stride
        self.size=size[0]
        self.coordinates=False
        self.out_cache=False
    def Fwd_Maxpool(self,ip):
        val=[]
        x=ip.shape
        op=(x[0],x[1]//self.size,x[2]//self.stride)
        crds=[]
        dt={0:(0,0),1:(0,1),2:(1,0),3:(1,1)}
        for s in range(0,ip.shape[0]):
            for i in range(0,ip.shape[1]-1,self.stride):
                for j in range(0,ip.shape[2]-1,self.stride):
                    lt=[k for k in ip[s][i][j:j+self.stride]]
                    for k in ip[s][i+1][j:j+self.stride]:
                        lt.append(k)
                    val.append(max(lt))
                    b=dt[lt.index(val[-1])]
                    crds.append((s,b[0]+i,j+b[1]))
        val=np.array(val)
        val=np.reshape(val,op)
        self.coordinates=crds
        self.out_cache=val
        return
    def Bwd_Maxpool(self,acvn_shp,flt2mat):
        c=0
        temp=np.zeros(acvn_shp)
        for i in range(flt2mat.shape[0]):  
            temp[(self.coordinates[c])]=flt2mat[i][0]
            c+=1
        return temp

In [19]:
class Relu:
    def __init__(self):
        self.out_cache=False 
    def Fwd_Relu(self,ip):
        self.out_cache=np.maximum(0,ip)
        return
    def Bwd_Relu(self,ip):
        return np.array(ip>0,dtype='float')

In [20]:
class Softmax:
    def __init__(self):
        self.out_cache=False
    def Fwd_Softmax(self,val):
        mx=np.max(val)
        val-=mx
        self.out_cache=np.exp(val)/sum(np.exp(val))
        return
    def Bwd_Softmax(self,y):
        label=np.argmax(y)
        loss=self.out_cache-y                                   #-y/(self.out_cache + 10**-100)
        s=np.sum(self.out_cache)
        out=self.out_cache[label]*self.out_cache/(s**2)
        out[label]=self.out_cache[label]*(s-self.out_cache[label])/(s**2)
        return out*loss 

In [21]:
class Sigmoid:
    def __init__(self):
        self.out_cache=False
    def sigmoid_Fwd(self,val):
        self.out_cache= 1.0/(1.0+np.exp(-val))
        return self.out_cache
    def sigmoid_Bwd(self,val):
        return self.sigmoid_Fwd(val)*(1-self.sigmoid_Fwd(val))
        

have to imp batch norm and dropout

In [22]:
class Network:
    def __init__(self,train,valid,test):
        self.train=list(train)
        self.test=list(test)
        # self.valid=list(valid)
        self.batch_size=50
        self.epoch=9
        self.learning_rate=0.01
        self.beta_rms=0.999
        self.beta_momentum=0.9
        self.epsilon=10**-8
        self.model_create=False
        
    def Start(self):
        temp=len(self.train)/self.batch_size
        for i in range(self.epoch):
            np.random.shuffle(self.train)
            lt=[self.train[i:i+self.batch_size] for i in np.arange(0,len(self.train),self.batch_size)]
            count=0
            global Loss
            Loss=[]
            for j in lt:
                sys.stdout.flush()
                sys.stdout.write(f"\r{self.batch_size} -> {count+1}/{temp}")
                self.MGD(j)
                count+=1
            print(f"Loss -> {np.mean(np.array(Loss))}\n")
            if self.test:
                print(f"Epoch {i}-> {self.evall(self.test)} / {len(self.test)}")
            else:
                print(f"Epoch {i}-> complete")
    
    def MGD(self,x):
        for i in x:
            loss=self.model(x=i[0],y=i[1])
            Loss.append(loss)
        Conv1.Conv_update(self.learning_rate,self.beta_momentum,self.beta_rms,self.epsilon)
        Dense1.Dense_update(self.learning_rate,self.beta_momentum,self.beta_rms,self.epsilon) 
        Dense2.Dense_update(self.learning_rate,self.beta_momentum,self.beta_rms,self.epsilon)
        return  
    
    def model(self,x,y):
        if not self.model_create:
            global Conv1,Dense1,Dense2,Relu1,Relu2,Mxpl1,Softmax1
            Conv1=Convolution(16,(3,3))
            Relu1=Relu()
            Mxpl1=Maxpooling((2,2),2)
            Dense1=Dense(2704,100)   #5408 - 32*13x13, flatten removed to ignore extra calc's
            Relu2=Relu()
            # Relu2=Sigmoid()
            Dense2=Dense(100,10)
            Softmax1=Softmax()
            self.model_create=True
        
        Conv1.Fwd_Correlation(x)
        Relu1.Fwd_Relu(Conv1.out_cache)
        Mxpl1.Fwd_Maxpool(Relu1.out_cache)
        Flatten_op=np.reshape(Mxpl1.out_cache.flatten(),(-1,1))    #this one has to be included again
        Dense1.Fwd_Dense(Flatten_op)
        Relu2.Fwd_Relu(Dense1.out_cache)
        Dense2.Fwd_Dense(Relu2.out_cache)
        Softmax1.Fwd_Softmax(Dense2.out_cache)

        loss=-sum(y*np.log(Softmax1.out_cache+ 10**-100))
        combined_err_loss_softmax=Softmax1.Bwd_Softmax(y)   #y`n-yn
        Dense_loss=Dense2.Bwd_Dense(combined_err_loss_softmax,Relu2.out_cache)
        Relu_Loss=Relu2.Bwd_Relu(Dense_loss)
        Dense_loss=Dense1.Bwd_Dense(Relu_Loss,Flatten_op)
        Mxpl_Loss=Mxpl1.Bwd_Maxpool(Relu1.out_cache.shape,Dense_loss)
        Relu_Loss=Relu1.Bwd_Relu(Mxpl_Loss)
        Conv1.Bwd_Correlation(Relu_Loss,x)
        return loss
    
    def evall(self,sett):
        print('\nChecking...')
        lt=0
        for i,j in sett:
            Conv1.Fwd_Correlation(i)
            Relu1.Fwd_Relu(Conv1.out_cache)
            Mxpl1.Fwd_Maxpool(Relu1.out_cache)
            Flatten_op=np.reshape(Mxpl1.out_cache.flatten(),(-1,1))    #this one has to be included again
            Dense1.Fwd_Dense(Flatten_op)
            Relu2.Fwd_Relu(Dense1.out_cache)
            Dense2.Fwd_Dense(Relu2.out_cache)
            Softmax1.Fwd_Softmax(Dense2.out_cache)
            if np.argmax(Softmax1.out_cache)==np.argmax(j):lt+=1
        return lt

In [23]:
dt=load_data_wrapper()
ob=Network(dt[0],dt[1],dt[2])

train- 15000
valid- 10000
test- 3000


In [None]:
ob.Start()