In [162]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

In [163]:
df1 = pd.read_csv('fashion-mnist_train.csv')
df2=pd.read_csv('fashion-mnist_test.csv')

In [164]:
y_train = df1.iloc[0:, 0].values.reshape(-1, 1) #starting from 1st row because csv first row is column names
y_test = df2.iloc[0:, 0].values.reshape(-1, 1)

In [165]:
X_train = df1.iloc[0:, 1:].values.T
X_train = X_train.astype(float) / 255.0

X_test = df2.iloc[0:, 1:].values.T
X_test = X_test.astype(float) / 255.0

In [182]:
class NeuralNetwork:
    def __init__(self, input_size, hidden_size, output_size):
        # For Hidden Neurons
        self.w1 = np.random.randn(hidden_size, input_size)*0.01
        self.b1 = np.zeros((hidden_size, 1))

        # For Output Neurons
        self.w2 = np.random.randn(output_size, hidden_size)*0.01
        self.b2 = np.zeros((output_size, 1))


    def one_hot(self,Y):
        
        one_hot_Y = np.eye(10)[Y.flatten()]
        one_hot_Y=one_hot_Y.T
        return one_hot_Y


    def activation(self,x,alpha=00.1): #tanh
        # return np.tanh(x)
        return np.maximum(0, x)
    
    def activation_deriv(selfx,alpha=00.1):
        # tanh_x = self.activation(x)
        # return 1 - tanh_x**2

        # return x>0
        
        dx = np.ones_like(x)
        dx[x < 0] = alpha
        return dx
        

    def softmax(self, x):
        ex = np.exp(x)  
        return ex / np.sum(ex, axis=0)
        
    
    def forward(self, X):
        # Forward pass
        self.z1 = np.dot(self.w1, X) + self.b1 #Z corresponds to pre activation value
        self.a1 = self.activation(self.z1) #a corresponds to activation value

        self.z2 = np.dot(self.w2,self.a1 ) + self.b2 #Z corresponds to pre activation value

        self.a2 = self.softmax(self.z2)#a corresponds to activation value
        return self.a2

    def compute_loss(self, y, y_pred):
        m = y.shape[1]
        cost = -(1/m)*np.sum(y*np.log(y_pred))
        return cost

    def backward(self, X, y_true, learning_rate):

        m = X.shape[1]

        self.dz2 = (self.a2 - y_true) #derivative of soft max 
        self.dw2 = (1/m)*np.dot(self.dz2,self.a1.T )
        self.db2 = (1/m)*np.sum(self.dz2,axis=1,keepdims=True)


        self.dz1 = (1/m) * np.dot(self.w2.T, self.dz2) * self.activation_deriv(self.z1) #derivative of activation
        self.dw1 = (1/m)*np.dot(self.dz1,X.T )
        self.db1 = (1/m)*np.sum(self.dz1,axis=1,keepdims=True)

        self.w2 = self.w2 - learning_rate*self.dw2
        self.b2 = self.b2 - learning_rate*self.db2
        

        self.w1 = self.w1 - learning_rate*self.dw1
        self.b1 = self.b1 - learning_rate*self.db1


 
    def train(self, X, y, epochs, learning_rate):
        y=self.one_hot(y)
        loss_list=[]
        for epoch in range(epochs):
            predictions = self.forward(X)
            loss = self.compute_loss(y, predictions)
            self.backward(X, y, learning_rate)
            loss_list.append(loss)
            print(f"Epoch {epoch}, Loss: {loss}")
        plt.plot(range(1, len(loss_list) + 1), loss_list, marker='X')
        plt.xlabel('Epoch')
        plt.ylabel('Cost')
        plt.title('Cost vs Epoch')
        plt.show()

    def predict(self, X):
        return self.forward(X)

In [184]:
input_size = X_train.shape[0]
hidden_size = 1000
output_size = 10    

nn = NeuralNetwork(input_size, hidden_size, output_size)

nn.train(X_train, y_train, epochs=50,learning_rate=0.1)

Epoch 0, Loss: 2.306674038515013
Epoch 1, Loss: 2.2800487657465363
Epoch 2, Loss: 2.255329278479819
Epoch 3, Loss: 2.2321315765677063
Epoch 4, Loss: 2.210169626429108
Epoch 5, Loss: 2.189228142444228
Epoch 6, Loss: 2.169143631905517
Epoch 7, Loss: 2.149790919694031
Epoch 8, Loss: 2.131073412426159
Epoch 9, Loss: 2.1129159761164726
Epoch 10, Loss: 2.0952596757638706
Epoch 11, Loss: 2.07805786142032


In [None]:
test_predictions = nn.forward(X_test)

# Convert one-hot encoded predictions back to class labels
predicted_labels = np.argmax(test_predictions, axis=0)

# Calculate accuracy
correct_predictions = np.sum(predicted_labels.reshape(-1, 1) == y_test)
accuracy = correct_predictions / len(predicted_labels.reshape(-1, 1))
print(f"Accuracy: {accuracy * 100:.2f}%")

Accuracy: 35.76%


In [177]:
train_predictions = nn.forward(X_train)

# Convert one-hot encoded predictions back to class labels
predicted_labels = np.argmax(train_predictions, axis=0)

# Calculate accuracy
correct_predictions = np.sum(predicted_labels.reshape(-1, 1) == y_train)
accuracy = correct_predictions / len(predicted_labels.reshape(-1, 1))
print(f"Accuracy: {accuracy * 100:.2f}%")

Accuracy: 35.91%
