In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
df=pd.read_csv("/content/drive/MyDrive/mnist_train.csv")
# print(df.describe())
X_train=df.iloc[:,1:]
y_train=df.iloc[:,0]
X_train=X_train/255.
X_train.shape,y_train.shape
# print(X_train.describe())

((60000, 784), (60000,))

In [None]:
one_hot_Y = np.zeros((y_train.size, y_train.max() + 1))
one_hot_Y[np.arange(y_train.size), y_train] = 1
# one_hot_Y = one_hot_Y.T
one_hot_Y

array([[0., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 1., 0.]])

In [None]:
# X_train_img = X_train.reshape(19999, 28, 28)

In [None]:
# plt.imshow(X_train_img[1])
# plt.show()

In [None]:
class ANN:
    def __init__(self, input_size, hidden_size, output_size, learning_rate=0.01):
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.learning_rate = learning_rate

        np.random.seed(42)
        self.weights_hidden = np.random.randn(input_size, hidden_size)
        self.biases_hidden = np.zeros((1, hidden_size))
        self.weights_output = np.random.randn(hidden_size, output_size)
        self.biases_output = np.zeros((1, output_size))

    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))

    def softmax(self, x):
        exp_x = np.exp(x - np.max(x, axis=1, keepdims=True))
        return exp_x / np.sum(exp_x, axis=1, keepdims=True)

    # def cross_entropy_loss(self, y_true, y_pred):
    #     return -np.sum(y_true * np.log(y_pred + 1e-10)) / len(y_true)

    def cross_entropy_loss(self,y_true, y_pred):
        return -np.sum(y_true * np.log(y_pred + 1e-10)) / len(y_true)


    def forward_propagation(self, X):
        hidden_layer_input = np.dot(X, self.weights_hidden) + self.biases_hidden
        hidden_layer_output = self.sigmoid(hidden_layer_input)
        output_layer_input = np.dot(hidden_layer_output, self.weights_output) + self.biases_output
        output_layer_output = self.softmax(output_layer_input)

        return hidden_layer_output, output_layer_output

    def backward_propagation(self, X, y_true, hidden_layer_output, output_layer_output):
        output_error = output_layer_output - y_true
        hidden_layer_error = np.dot(output_error, self.weights_output.T) * (hidden_layer_output * (1 - hidden_layer_output))

        self.weights_output -= self.learning_rate * np.dot(hidden_layer_output.T, output_error)
        self.biases_output -= self.learning_rate * np.sum(output_error, axis=0, keepdims=True)
        self.weights_hidden -= self.learning_rate * np.dot(X.T, hidden_layer_error)
        self.biases_hidden -= self.learning_rate * np.sum(hidden_layer_error, axis=0, keepdims=True)

    def train(self, X_train, y_train, epochs=20):
        for epoch in range(epochs):
            # print(X_train.shape)
            hidden_layer_output, output_layer_output = self.forward_propagation(X_train)

            loss = self.cross_entropy_loss(y_train, output_layer_output)
            self.backward_propagation(X_train, y_train, hidden_layer_output, output_layer_output)
            # if epoch%10==0:
            print(f"Epoch {epoch+1}/{epochs}, Loss: {loss} , Accuracy: {self.evaluate(output_layer_output, y_train)}")

    def evaluate(self, output_layer_output_test, y_test):

        predicted_labels = np.argmax(output_layer_output_test, axis=1)
        true_labels = np.argmax(y_test, axis=1)
        return np.mean(predicted_labels == true_labels)

    def predict(self,X_test):
        _,output_layer_output_test = self.forward_propagation(X_test)
        return output_layer_output_test

In [None]:
input_size = X_train.shape[1]
hidden_size = 100
output_size = 10
learning_rate = 0.0001
model=ANN(input_size, hidden_size, output_size, learning_rate)
model.train(X_train,one_hot_Y,epochs=100)

Epoch 1/100, Loss: 9.38119326197296 , Accuracy: 0.09163333333333333
Epoch 2/100, Loss: 11.731342468736669 , Accuracy: 0.22208333333333333
Epoch 3/100, Loss: 15.848748808551758 , Accuracy: 0.12208333333333334
Epoch 4/100, Loss: 12.605630702518045 , Accuracy: 0.22578333333333334
Epoch 5/100, Loss: 10.280657336251931 , Accuracy: 0.24006666666666668
Epoch 6/100, Loss: 7.538277120460236 , Accuracy: 0.2939833333333333
Epoch 7/100, Loss: 4.679571788606929 , Accuracy: 0.32961666666666667
Epoch 8/100, Loss: 2.4421282065612515 , Accuracy: 0.44376666666666664
Epoch 9/100, Loss: 1.5713168940806599 , Accuracy: 0.5191
Epoch 10/100, Loss: 1.271567373306016 , Accuracy: 0.6044833333333334
Epoch 11/100, Loss: 1.1446423617341086 , Accuracy: 0.63725
Epoch 12/100, Loss: 1.0730100446537478 , Accuracy: 0.6599666666666667
Epoch 13/100, Loss: 1.0178210669278682 , Accuracy: 0.6788833333333333
Epoch 14/100, Loss: 0.9727335687526253 , Accuracy: 0.6942333333333334
Epoch 15/100, Loss: 0.9343628560706294 , Accuracy:

In [None]:
df=pd.read_csv("/content/drive/MyDrive/mnist_test.csv")
X_test=df.iloc[:,1:]
y_test=df.iloc[:,0]
X_test=X_test.to_numpy()/255.
y_test=y_test.to_numpy()
X_test.shape,y_test.shape

((10000, 784), (10000,))

In [None]:
# predictions = make_predictions(X_test, W1, b1, W2, b2)
y_pred=model.predict(X_test)

In [None]:
def top_k_accuracy(y_true, y_pred_prob, k):
    if len(y_pred_prob.shape) > 1 and y_pred_prob.shape[1] > k:
        y_pred = np.argsort(y_pred_prob, axis=1)[:, -k:]
    else:
        y_pred = np.argsort(y_pred_prob, axis=1)[:, -k:]

    # Check if true label is among the top-k predicted classes
    if len(y_pred) > 0:
        correct_predictions_top_k = np.sum(np.any(y_pred == y_true[:, np.newaxis], axis=1))
    else:
        correct_predictions_top_k = 0

    # Calculate accuracy
    total_samples = len(y_true)
    accuracy_top_k = correct_predictions_top_k / total_samples
    return accuracy_top_k



def precision(y_true, y_pred, class_label=1):
    true_positives = np.sum((y_true == class_label) & (y_pred == class_label))
    predicted_positives = np.sum(y_pred == class_label)
    return true_positives / predicted_positives

def recall(y_true, y_pred, class_label=1):
    true_positives = np.sum((y_true == class_label) & (y_pred == class_label))
    actual_positives = np.sum(y_true == class_label)
    return true_positives / actual_positives

def f1_score(y_true, y_pred, class_label=1):
    prec = precision(y_true, y_pred, class_label)
    rec = recall(y_true, y_pred, class_label)
    return 2 * (prec * rec) / (prec + rec)

def confusion_matrix_custom(y_true, y_pred):
    unique_labels = np.unique(np.concatenate([y_true, y_pred]))
    matrix = np.zeros((len(unique_labels), len(unique_labels)), dtype=int)
    for i, true_label in enumerate(unique_labels):
        for j, pred_label in enumerate(unique_labels):
            matrix[i, j] = np.sum((y_true == true_label) & (y_pred == pred_label))
    return matrix

def classification_metric(y_test, y_pred):
    print(f'''
Top 5 Accuracy :{top_k_accuracy(y_test, y_pred, 5):.3f}
Top 1 Accuracy :{top_k_accuracy(y_test, y_pred, 1):.3f}
# Classication Report:
# Precision(class 0): {precision(y_test, np.argmax(y_pred, axis=1) , class_label=0):.2f}
# Recall(class 0)   : {recall(y_test, np.argmax(y_pred, axis=1) , class_label=0):.2f}
# F1-score(class 0) : {f1_score(y_test, np.argmax(y_pred, axis=1) , class_label=0):.2f}

# Precision(class 1): {precision(y_test,np.argmax(y_pred, axis=1) , class_label=1):.2f}
# Recall(class 1)   : {recall(y_test, np.argmax(y_pred, axis=1) , class_label=1):.2f}
# F1-score(class 1) : {f1_score(y_test, np.argmax(y_pred, axis=1) , class_label=1):.2f}

Confusion Matrix:
{confusion_matrix_custom(y_test,np.argmax(y_pred, axis=1) )}
''')
classification_metric( y_test,y_pred)


Top 5 Accuracy :0.991
Top 1 Accuracy :0.881
# Classication Report:
# Precision(class 0): 0.92
# Recall(class 0)   : 0.94
# F1-score(class 0) : 0.93

# Precision(class 1): 0.97
# Recall(class 1)   : 0.97
# F1-score(class 1) : 0.97

Confusion Matrix:
[[ 924    0    4    3    3   13   17    6    9    1]
 [   0 1098    8    5    0    3    4    0   16    1]
 [  13    3  880   23   18   10   20   17   43    5]
 [   9    2   27  885    2   37    5    9   23   11]
 [   4    2    8    3  858    5   15    7   12   68]
 [  13    3    5   53    9  742   20    7   31    9]
 [  22    5   11    1   18   22  869    0   10    0]
 [   0   14   28    6    7    5    0  900    5   63]
 [  13    5   23   43   17   25   22   12  800   14]
 [  10    3    3   11   59   13    1   38   16  855]]

