In [33]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/digit-recognizer/sample_submission.csv
/kaggle/input/digit-recognizer/train.csv
/kaggle/input/digit-recognizer/test.csv


In [34]:
import numpy as np
import pandas as pd
import pickle

In [35]:
class Linear:
    # initialisng weight and biases
    def __init__(self, in_features, out_features):
        self.weights = np.random.randn(in_features, out_features) * 0.01
        self.biases = np.zeros((1, out_features))
        self.input = None
        self.grad_weights = None
        self.grad_biases = None

    def forward(self, x): # for forward propogation
        self.input = x
        return np.dot(x, self.weights) + self.biases

    def backward(self, d_out): # for backward propogation
        self.grad_weights = np.dot(self.input.T, d_out)
        self.grad_biases = np.sum(d_out, axis=0, keepdims=True)
        return np.dot(d_out, self.weights.T)

In [36]:
# Activation classes

class ReLU: #Rectified Linear Unit activation
    def forward(self, x):
        self.input = x
        return np.maximum(0, x)

    def backward(self, d_out): # gradients for ReLU activation
        d_input = d_out.copy()
        d_input[self.input <= 0] = 0
        return d_input

class Softmax: #softmax activation
    def forward(self, x):
        exps = np.exp(x - np.max(x, axis=1, keepdims=True))
        self.output = exps / np.sum(exps, axis=1, keepdims=True)
        return self.output

    def backward(self, d_out): # gradients for softmax activation
        return d_out


In [37]:
class CrossEntropyLoss: # calculating cross entropy loss
    def forward(self, y_pred, y_true):
        self.y_pred = y_pred
        self.y_true = y_true
        self.n = y_true.shape[0]
        return -np.sum(y_true * np.log(y_pred + 1e-10)) / self.n

    def backward(self): # computing gradient of cross entropy loss
        return (self.y_pred - self.y_true) / self.n

In [38]:
# Stochastic Gradient Descent 
class SGD:
    def __init__(self, learning_rate=0.01):
        self.learning_rate = learning_rate

    def step(self, layer):
        if hasattr(layer, 'grad_weights'):
            layer.weights -= self.learning_rate * layer.grad_weights
            layer.biases -= self.learning_rate * layer.grad_biases

In [39]:
# Neural Network Model
class Model:
    
    def __init__(self): # initialising model
        self.layers = []
        self.loss_fn = None
        self.optimizer = None

    def add(self, layer): # adding layer to the neural network
        self.layers.append(layer)

    def compile(self, loss_fn, optimizer): # setting up loss and optimizer function
        self.loss_fn = loss_fn
        self.optimizer = optimizer

    def forward(self, x): # forward pass
        for layer in self.layers:
            x = layer.forward(x)
        return x

    def backward(self, loss_grad): # backward pass
        for layer in reversed(self.layers):
            loss_grad = layer.backward(loss_grad)

    def train(self, x_train, y_train, epochs, batch_size): #TRAINING THE MODEL

        for epoch in range(epochs):
            indices = np.arange(x_train.shape[0])
            #np.random.shuffle(indices)
            x_train, y_train = x_train[indices], y_train[indices]
            
            for start in range(0, x_train.shape[0], batch_size):
                end = min(start + batch_size, x_train.shape[0])
                x_batch, y_batch = x_train[start:end], y_train[start:end]
                
                predictions = self.forward(x_batch)
                loss = self.loss_fn.forward(predictions, y_batch)
                loss_grad = self.loss_fn.backward()
                self.backward(loss_grad)
                
                for layer in self.layers:
                    self.optimizer.step(layer)
                
            print(f'EPOCH {epoch + 1}/{epochs}, LOSS: {loss}')

    def evaluate(self, x_test, y_test): # Evaluation of the model
        predictions = self.forward(x_test)
        loss = self.loss_fn.forward(predictions, y_test)
        accuracy = np.mean(np.argmax(predictions, axis=1) == np.argmax(y_test, axis=1))
        return loss, accuracy
    
    def save_weights(self, filename):
        # Save the weights and biases of the model to a file
        weights_dict = {}
        for i, layer in enumerate(self.layers):
            if hasattr(layer, 'weights'):
                weights_dict[f'layer_{i}_weights'] = layer.weights
                weights_dict[f'layer_{i}_biases'] = layer.biases
        
        with open(filename, 'wb') as f:
            pickle.dump(weights_dict, f)
        print(f"Model weights saved to {filename}")

    def load_weights(self, filename):
        # Loading the weights and biases from a file into the model
        with open(filename, 'rb') as f:
            weights_dict = pickle.load(f)
        
        for i, layer in enumerate(self.layers):
            if hasattr(layer, 'weights'):
                layer.weights = weights_dict[f'layer_{i}_weights']
                layer.biases = weights_dict[f'layer_{i}_biases']
        print(f"Model weights loaded from {filename}")

In [40]:

# Load and preprocess data
train_df = pd.read_csv('/kaggle/input/digit-recognizer/train.csv')
x = train_df.drop(columns='label').values / 255.0
y = pd.get_dummies(train_df['label']).values

# Define a simple neural network using the framework
model = Model()
model.add(Linear(784, 128))
model.add(ReLU())
model.add(Linear(128, 10))
model.add(Softmax())

# Compile the model with loss and optimizer
loss = CrossEntropyLoss()
optimizer = SGD(learning_rate=0.01)
model.compile(loss, optimizer)

# Train the model
model.train(x, y, epochs=20, batch_size=64)

# LOAD OR SAVE WEIGHTS HERE

# Evaluate the model
test_loss, test_accuracy = model.evaluate(x, y)
print(f'Test Loss: {test_loss}, Test Accuracy: {test_accuracy}')

EPOCH 1/20, LOSS: 1.428813159730113
EPOCH 2/20, LOSS: 0.5450430090265324
EPOCH 3/20, LOSS: 0.33174064850649326
EPOCH 4/20, LOSS: 0.2251559227225986
EPOCH 5/20, LOSS: 0.1660131002445489
EPOCH 6/20, LOSS: 0.12987812962966078
EPOCH 7/20, LOSS: 0.10587775656422505
EPOCH 8/20, LOSS: 0.08958536692777577
EPOCH 9/20, LOSS: 0.07785262203541149
EPOCH 10/20, LOSS: 0.0691116819314456
EPOCH 11/20, LOSS: 0.062434585860398156
EPOCH 12/20, LOSS: 0.05684364824083592
EPOCH 13/20, LOSS: 0.052093844696240614
EPOCH 14/20, LOSS: 0.048194298867363566
EPOCH 15/20, LOSS: 0.04504730712678991
EPOCH 16/20, LOSS: 0.04250104630567056
EPOCH 17/20, LOSS: 0.04015491065068518
EPOCH 18/20, LOSS: 0.038192599211573655
EPOCH 19/20, LOSS: 0.03639418585936896
EPOCH 20/20, LOSS: 0.03495184577546863
Test Loss: 0.2371432049869906, Test Accuracy: 0.9319285714285714
