In [None]:


import numpy as np 
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import numpy as np
import pandas as pd

In [None]:
data  = pd.read_csv('/kaggle/input/digit-recognizer/train.csv')

In [None]:
data.head()

In [None]:
import numpy as np



data = np.array(data)

class_1_samples = data[data[:, 0] == 1]
class_3_samples = data[data[:, 0] == 3]
class_9_samples = data[data[:, 0] == 9]

boosted_class_1 = np.repeat(class_1_samples, 3, axis=0)
boosted_class_3 = np.repeat(class_3_samples, 3, axis=0)
boosted_class_9 = np.repeat(class_9_samples, 3, axis=0)

other_classes = data[
    (data[:, 0] != 1) & 
    (data[:, 0] != 3) & 
    (data[:, 0] != 9)
]

boosted_data = np.vstack((other_classes, boosted_class_1, boosted_class_3, boosted_class_9))

np.random.seed(42)  
np.random.shuffle(boosted_data)

train_data = boosted_data[:1000]
test_data = boosted_data[1000:]


x_train = train_data[:, 1:] / 255.0         
y_train = train_data[:, 0].astype(int)     

x_test = test_data[:, 1:] / 255.0
y_test = test_data[:, 0].astype(int)


In [None]:
np.shape(x)

In [None]:
np.shape(y)

In [None]:
import numpy as np

def init_params():
    w1 = np.random.randn(784, 10)
    w2 = np.random.randn(10, 10)
    b1 = np.random.randn(1, 10)
    b2 = np.random.randn(1, 10)
    return w1, w2, b1, b2

def relu(x):
    return np.maximum(0, x)

def relu_deriv(x):
    return np.where(x > 0, 1, 0)

def softmax(x):
    exp = np.exp(x - np.max(x, axis=1, keepdims=True))  
    return exp / np.sum(exp, axis=1, keepdims=True)

def one_hot_encode(y):
    y = np.array(y)
    num_classes = np.max(y) + 1
    one_hot = np.zeros((y.size, num_classes))
    one_hot[np.arange(y.size), y] = 1
    return one_hot

def forward_prop(x, w1, w2, b1, b2):
    z1 = np.dot(x, w1) + b1
    A1 = relu(z1)
    z2 = np.dot(A1, w2) + b2
    A2 = softmax(z2)
    return z1, A1, z2, A2

def backprop(z1, A1, z2, A2, x, y, w2):
    y_onehot = one_hot_encode(y)
    
    dz2 = A2 - y_onehot                            
    dw2 = np.dot(A1.T, dz2) / x.shape[0]         
    db2 = np.mean(dz2, axis=0, keepdims=True)    

    dz1 = np.dot(dz2, w2.T) * relu_deriv(z1)     
    dw1 = np.dot(x.T, dz1) / x.shape[0]           
    db1 = np.mean(dz1, axis=0, keepdims=True)   

    return dw1, dw2, db1, db2

def update(w1, w2, b1, b2, dw1, dw2, db1, db2, alpha):
    w1 -= alpha * dw1
    w2 -= alpha * dw2
    b1 -= alpha * db1
    b2 -= alpha * db2
    return w1, w2, b1, b2


In [None]:
def get_predictions(A2):
    return np.argmax(A2, axis=1) 

def get_accuracy(predictions, Y):
    print("Predictions:", predictions[:10])
    print("Actual     :", Y[:10])
    return np.mean(predictions == Y)

def gradient_descent(X, Y, alpha, iterations, hidden_size=256):
    W1 = np.random.randn(784, hidden_size) * np.sqrt(2. / 784)
    b1 = np.zeros((1, hidden_size))
    W2 = np.random.randn(hidden_size, 10) * np.sqrt(2. / hidden_size)
    b2 = np.zeros((1, 10))

    for i in range(iterations):
        Z1, A1, Z2, A2 = forward_prop(X, W1, W2, b1, b2)

        dW1, dW2, db1, db2 = backprop(Z1, A1, Z2, A2, X, Y, W2)

        W1, W2, b1, b2 = update(W1, W2, b1, b2, dW1, dW2, db1, db2, alpha)

        if i % 10 == 0 or i == iterations - 1:
            predictions = get_predictions(A2)
            acc = get_accuracy(predictions, Y)
            print(f"Iteration {i}: Accuracy = {acc:.4f}")

    return W1, W2, b1, b2


In [None]:
W1, W2, b1, b2= gradient_descent(x_train, y_train, 0.10, 500)



In [None]:
_, _, _, A2 = forward_prop(x_test, W1, W2, b1, b2)
predictions = get_predictions(A2)
accuracy = get_accuracy(predictions, y_test)
print(f"Test Accuracy: {accuracy * 100:.2f}%")