In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


In [2]:
data=pd.read_csv('../Dataset/fashion-mnist_train.csv')


In [3]:
data.dtypes

label       int64
pixel1      int64
pixel2      int64
pixel3      int64
pixel4      int64
            ...  
pixel780    int64
pixel781    int64
pixel782    int64
pixel783    int64
pixel784    int64
Length: 785, dtype: object

In [4]:
X_train=data.drop(columns='label').astype('float32').values
y_train=data['label'].values

In [5]:

test_df  = pd.read_csv("../Dataset/fashion-mnist_test.csv")


X_test  = test_df.drop(columns='label').astype(np.float32)
y_test  = test_df['label']


In [6]:
def normalize(X):
    return X/255.0

In [7]:
def sigmoid(z):
    return (1/(1+np.exp(-z)))

In [8]:
X_train=normalize(X_train)
X_test=normalize(X_test)


In [9]:
def relu(z):
    return np.maximum(0,z)
 

In [10]:
def relu_derivative(Z):
    return (Z>0).astype('float')

In [11]:
def softmax(z):
    z = z - np.max(z, axis=1, keepdims=True)
    expz = np.exp(z)
    return expz / np.sum(expz, axis=1, keepdims=True)


In [12]:
np.random.seed(42)

W1 = np.random.randn(784, 128) * np.sqrt(2 / 784)
b1 = np.zeros((1, 128))

W2 = np.random.randn(128, 64) * np.sqrt(2 / 128)
b2 = np.zeros((1, 64))

W3 = np.random.randn(64, 10) * np.sqrt(2 / 64)
b3 = np.zeros((1, 10))


In [13]:
def Input_dense(a_in,w,b):
    Z=np.matmul(a_in,w)+b
    a_out=relu(Z)
    return a_out,Z

def Output_dense(a_in,w,b):
    Z=np.matmul(a_in,w)+b
    a_out=softmax(Z)
    return a_out,Z

    

In [14]:
def Seqential(X,W1,B1,W2,B2,W3,B3):
    A_1,Z1=Input_dense(X,W1,B1)
    A_2,Z2=Input_dense(A_1,W2,B2)
    A_OUT,Z3=Output_dense(A_2,W3,B3)
    CACHE=(X,Z1,A_1,Z2,A_2,Z3,A_OUT)
    return A_OUT,CACHE

In [15]:
def cross_entropy(y_hat,y_true):
    n=y_true.shape[0]
    return -np.mean(np.log(y_hat[np.arange(n),y_true]+1e-9))

In [16]:
def backward(cache,y_true):
    X, Z1, A1, Z2, A2, Z3, Y_hat = cache
    n=y_true.shape[0]
    Y=np.zeros_like(Y_hat)
    Y[np.arange(n),y_true]=1

    dZ3 = (Y_hat - Y) / n
    dw3=A2.T@dZ3
    db3=np.sum(dZ3,axis=0,keepdims=True)

    dA2=dZ3@W3.T
    dZ2 = dA2 * relu_derivative(Z2)

    dW2 = A1.T @ dZ2
    db2 = np.sum(dZ2, axis=0,keepdims=True)

    dA1 = dZ2 @ W2.T
    dZ1 = dA1 * relu_derivative(Z1)
    dW1 = X.T @ dZ1
    db1 = np.sum(dZ1, axis=0,keepdims=True)

    return dW1, db1, dW2, db2, dw3, db3


 

    






In [None]:
learning_rate = 0.001
epochs = 10
batch_size = 32

for epoch in range(epochs):

    perm = np.random.permutation(len(X_train))
    Xs = X_train[perm]
    ys = y_train[perm]

    for i in range(0, len(Xs), batch_size):
        X_batch = Xs[i:i+batch_size]
        y_batch = ys[i:i+batch_size]

        y_hat, cache = Seqential(X_batch, W1, b1, W2, b2, W3, b3)
        loss = cross_entropy(y_hat, y_batch)

        dW1, db1, dW2, db2, dW3, db3 = backward(cache, y_batch)

        W1 -= learning_rate * dW1
        b1 -= learning_rate * db1
        W2 -= learning_rate * dW2
        b2 -= learning_rate * db2
        W3 -= learning_rate * dW3
        b3 -= learning_rate * db3

    print(f"Epoch {epoch+1}, Loss: {loss:.4f}")


Epoch 1, Loss: 0.3155
Epoch 2, Loss: 0.2591
Epoch 3, Loss: 0.4314
Epoch 4, Loss: 0.1854
Epoch 5, Loss: 0.2592
Epoch 6, Loss: 0.5790
Epoch 7, Loss: 0.3288
Epoch 8, Loss: 0.3601
Epoch 9, Loss: 0.2457
Epoch 10, Loss: 0.3219


In [23]:
y_pred_probs,_ = Seqential(X_train, W1, b1, W2, b2, W3, b3)
y_pred = np.argmax(y_pred_probs, axis=1)

accuracy = np.mean(y_pred == y_train)
print("trained set accuracy:", accuracy)


trained set accuracy: 0.8925166666666666


In [24]:
y_test_pred, _ = Seqential(X_test.values, W1, b1, W2, b2, W3, b3)
y_test_labels = np.argmax(y_test_pred, axis=1)

accuracy = np.mean(y_test_labels == y_test.values)
print("Test Accuracy:", accuracy)


Test Accuracy: 0.8781
