In [None]:
# Class for Linear Layer (If you're stuck, you can refer to code of PyTorch/Tensorflow packages) 
import numpy as np

class LinearLayer:
    def __init__(self,input_layer_size,output_layer_size):
        mean=0
        std_dev=10**(-3)
        self.weight_matrix=np.random.normal(loc=mean,scale=std_dev,size=(input_layer_size,output_layer_size)) 
        # 784 x 128 for layer 1
        # 128 x 10 for layer 2
        # print(self.weight_matrix)
    def forward(self,input_data):
        self.input_data=input_data
        # 100 x 784 for layer 1
        # 100 x 128 for layer 2
        self.score=np.dot(input_data,self.weight_matrix) 
        # 100 x 128 score shape for layer 1
        # 100 x 10 score shape for layer 2
        return self.score
    def backward(self,gradient_output):
        self.gradient_wrt_weights=np.dot(self.input_data.T,gradient_output)
        # 128 x 10 for layer 2
        return np.dot(gradient_output,self.weight_matrix.T)

In [None]:
# class for ReLU
class ReLU:
    def forward(self,input_data):
        self.input_data=input_data
        self.out_data=np.maximum(0,input_data)
        # print(self.out_data.shape) # 100 x 128
        return self.out_data 
    def backward(self,out_grad):
        # print(out_grad.shape)
        # out_grad : 128 x 10
        return out_grad*(self.input_data>0)

In [None]:
# class for MLP
class MLP:
    def __init__(self,input_layer_size,hidden_layer_size,output_layer_size):
        self.linear_layer1=LinearLayer(input_layer_size,hidden_layer_size) # 784 x 128
        self.relu=ReLU()
        self.linear_layer_2=LinearLayer(hidden_layer_size,output_layer_size) # 128 x 10
    def forward(self,input_X):
        layer1_output=self.linear_layer1.forward(input_X)
        # print(layer1_output)
        relu_layer_output=self.relu.forward(layer1_output)
        layer2_output=self.linear_layer_2.forward(relu_layer_output)
        return layer2_output
    def backward(self,gradient_output):
        gradient_output1=self.linear_layer_2.backward(gradient_output)
        gradient_output2=self.relu.backward(gradient_output1)
        gradient_output3=self.linear_layer1.backward(gradient_output2)
        return gradient_output3

In [None]:
# performing one hot encoding
def one_hot_encoding(y_labels,num_classes):
    batch_size= len(y_labels)
    one_hot = np.zeros((batch_size, num_classes))
    one_hot[np.arange(batch_size), y_labels] = 1
    return one_hot
    
def softmax1(scores):
    #softmax
    exp_scores=np.exp(scores)
    # print(scores)
    softmax_prob=exp_scores/np.sum(exp_scores,axis=1,keepdims=True)
    return softmax_prob
    
def cross_entropy_loss(input_y,scores):

    # num_of_datapoints = input_y.shape[0] # Number of samples
    # softmax_prob=softmax1(scores)
    # log_likelihood = -np.log(softmax_prob[range(num_of_datapoints), input_y.argmax(axis=1)])
    # loss = np.sum(log_likelihood) / num_of_datapoints
    # return loss
    total_samples = input_y.shape[0] # Number of samples
    softmax_prob=softmax(scores)
    loss= -np.mean(np.sum(input_y * np.log(softmax_prob), axis=1))
    return loss

In [None]:
training_losses=[]
training_accuracy=[]
def linear_trainer(x_train,y_train,LEARNING_RATE=0.01,BATCH_SIZE=100):
    # LEARNING_RATE=0.001
    epochs=10
    mlp=MLP(input_layer_size=784,hidden_layer_size=128,output_layer_size=10)
    for epoch in range(epochs):
        epoch_loss=0
        correct=0
        total=0
        miniBatch=get_minibatch(batch_size_param=BATCH_SIZE,training_x=x_train,training_y=y_train)
        for index,(input_x,input_y) in enumerate(miniBatch):
            input_x_reshaped=np.reshape(input_x,(BATCH_SIZE,28*28))
            # print(input_x_reshaped.shape) # 100 x 784
            # break
            predicted_data=mlp.forward(input_x_reshaped) # 100 x 10
            #loss
            input_y_one_hot_encoded=one_hot_encoding(input_y,num_classes=10) # 100 x 10
            loss=cross_entropy_loss(input_y_one_hot_encoded,predicted_data)
            epoch_loss+=loss
            mlp.backward(predicted_data - input_y_one_hot_encoded) # passing a 100 x 10 tensor
        
            mlp.linear_layer1.weight_matrix -= LEARNING_RATE*mlp.linear_layer1.gradient_wrt_weights
            mlp.linear_layer_2.weight_matrix -= LEARNING_RATE*mlp.linear_layer_2.gradient_wrt_weights
            
            if index % BATCH_SIZE == 0:
                epoch_loss /= BATCH_SIZE
                train_accuracy = accuracy(input_x, input_y_one_hot_encoded, mlp,BATCH_SIZE)
                training_losses.append(epoch_loss)
                # epoch_loss=0
                training_accuracy.append(train_accuracy*100)
        return mlp

In [None]:
linear_classifier=linear_trainer(x_train=training_x,y_train=training_y)
print(f"{np.mean(train_accuracy)*100}%")