In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [2]:
df = pd.read_csv('fashion-mnist_test.csv')

In [3]:
y = df.iloc[0:, 0].values.reshape(-1, 1) #starting from 1st row because csv first row is column names


In [4]:
X = df.iloc[0:, 1:].values
X = X.astype(float) / 255.0

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [6]:
X.shape

(10000, 784)

In [7]:
class NeuralNetworkSigmoid:
    def __init__(self, input_size, hidden_size, output_size):
        
        np.random.seed(42)
        # For Hidden Neurons
        self.w1 = np.random.randn(hidden_size, input_size)
        self.b1 = np.zeros((hidden_size, 1))

        # For Output Neurons
        self.w2 = np.random.randn(hidden_size, output_size)
        self.b2 = np.zeros((output_size, 1))

    def sigmoid(self, x): # For Hidden Layers
        return 1 / (1 + np.exp(-x)) 
    
    def sigmoid_derivative(self,x):
        s = self.sigmoid(x)
        return s * (1 - s)

    def softmax(self, x): # For Output Layer
        return np.exp(x) / sum (np.exp(x))
    
    def forward(self, X):
        # Forward pass
        self.z1 = np.dot(X, self.w1) + self.b1 #Z corresponds to pre activation value
        self.a1 = self.sigmoid(self.z1) #a corresponds to activation value

        self.z2 = np.dot(self.a1, self.w2) + self.b2 #Z corresponds to pre activation value
        self.a2 = self.softmax(self.z2)#a corresponds to activation value


        return self.a2

    def compute_loss(self, y_true, y_pred):
        loss = -np.sum(y_true * np.log(y_pred + 1e-10)) / len(y_true)
        return loss

    def backward(self, X, y_true, learning_rate):

        m = X.shape[0]
        
        self.dz2 = (self.a2 - y_true) #derivative of soft max 
        
        self.dw2 = (1/m)*np.dot(self.a1.T, self.dz2)
        
        self.db2 = (1/m)*np.sum(self.dz2, axis = 0, keepdims = True)
        self.dz1 = (1/m)*np.dot(self.w2, self.dz2.T).T*self.sigmoid_derivative(self.a1) #derivative of sigmoid
        
        self.dw1 = (1/m)*np.dot(X.T, self.dz1)
        self.db1 = (1/m)*np.sum(self.dz1, axis = 0, keepdims = True)

        self.w2 = self.w2 - learning_rate*self.dw2
        self.b2 = self.b2 - learning_rate*self.db2
        

        self.w1 = self.w1 - learning_rate*self.dw1
        self.b1 = self.b1 - learning_rate*self.db1

 
    def train(self, X, y, epochs, learning_rate):
        for epoch in range(epochs):
            predictions = self.forward(X)
            loss = self.compute_loss(y, predictions)
            self.backward(X, y, learning_rate)
            
            if epoch % 10 == 0:
                print(f"Epoch {epoch}, Loss: {loss}")

    def predict(self, X):
        return self.forward(X)

In [9]:
input_size = X_train.shape[1]
hidden_size = 30
output_size = 10

nns = NeuralNetworkSigmoid(input_size, hidden_size, output_size)

nns.train(X_train, y_train, epochs=100,learning_rate=0.003)

Epoch 0, Loss: 473.35119993700334
Epoch 10, Loss: 468.5098024071775
Epoch 20, Loss: 464.92716781644646
Epoch 30, Loss: 462.54821634979703
Epoch 40, Loss: 461.3177438759756
Epoch 50, Loss: 461.1815982915682
Epoch 60, Loss: 462.08788200502994
Epoch 70, Loss: 463.98782724228886
Epoch 80, Loss: 466.8361598834183
Epoch 90, Loss: 470.5909437537707


In [10]:
test_predictions = nns.forward(X_train)

# Convert one-hot encoded predictions back to class labels
predicted_labels = np.argmax(test_predictions, axis=1)

# Calculate accuracy
accuracy = np.mean(predicted_labels == np.argmax(y_train, axis=1))
print(f"Accuracy: {accuracy * 100:.2f}%")

Accuracy: 5.09%
