In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/fashionmnist/t10k-labels-idx1-ubyte
/kaggle/input/fashionmnist/t10k-images-idx3-ubyte
/kaggle/input/fashionmnist/fashion-mnist_test.csv
/kaggle/input/fashionmnist/fashion-mnist_train.csv
/kaggle/input/fashionmnist/train-labels-idx1-ubyte
/kaggle/input/fashionmnist/train-images-idx3-ubyte


In [2]:
import numpy as np
import random
import time
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix
from sklearn.preprocessing import OneHotEncoder

In [8]:
class SingleLayerNN:
    def __init__(self, input_size, output_size, learning_rate=0.01):
        self.weights = np.random.randn(input_size, output_size) * 0.01  # Small random values
        self.bias = np.zeros((1, output_size))
        self.learning_rate = learning_rate

    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))

    def sigmoid_derivative(self, x):
        return x * (1 - x)  # Derivative of sigmoid

    def forward(self, X):
        self.input = X
        self.z = np.dot(X, self.weights) + self.bias  # Linear transformation
        self.output = self.sigmoid(self.z)  # Apply activation
        return self.output

    def backward(self, y_true):
        error = self.output - y_true  # Error in prediction
        d_output = error * self.sigmoid_derivative(self.output)  # Delta (Gradient)
        
        d_weights = np.dot(self.input.T, d_output)  # Weight gradient
        d_bias = np.sum(d_output, axis=0, keepdims=True)  # Bias gradient

        # Update weights and bias using gradient descent
        self.weights -= self.learning_rate * d_weights
        self.bias -= self.learning_rate * d_bias

        loss = np.mean(error**2)  # Mean Squared Error
        return loss

    def train(self, X, y, epochs=100, X_test=None, y_test=None):
        start_time = time.time()
        
        for epoch in range(epochs):
            output = self.forward(X)
            loss = self.backward(y)
            
            if epoch % 10 == 0:
                print(f"Epoch {epoch}, Loss: {loss:.4f}")
        
        end_time = time.time()
        print(f"Total Training Time: {end_time - start_time:.2f} seconds")
        
        if X_test is not None and y_test is not None:
            self.evaluate(X_test, y_test)

    def predict(self, X):
        return (self.forward(X) > 0.5).astype(int)  # Convert probabilities to class labels
    
    def evaluate(self, X_test, y_test):
        y_pred = self.predict(X_test)
        
        
        accuracy = accuracy_score(np.argmax(y_test, axis=1), np.argmax(y_pred, axis=1))
        f1 = f1_score(np.argmax(y_test, axis=1), np.argmax(y_pred, axis=1), average='macro')
        conf_matrix = confusion_matrix(np.argmax(y_test, axis=1), np.argmax(y_pred, axis=1))

        print(f"Accuracy: {accuracy:.4f}, F1-score: {f1:.4f}")
        print("Confusion Matrix:\n", conf_matrix)



In [9]:
train_data = pd.read_csv("/kaggle/input/fashionmnist/fashion-mnist_train.csv")
test_data = pd.read_csv("/kaggle/input/fashionmnist/fashion-mnist_test.csv")

# **2. Preprocess Data**
X_train = train_data.iloc[:, 1:].values / 255.0  
y_train = train_data.iloc[:, 0].values.reshape(-1, 1)  

X_test = test_data.iloc[:, 1:].values / 255.0  
y_test = test_data.iloc[:, 0].values.reshape(-1, 1)  

# **3. One-Hot Encoding for Labels**
encoder = OneHotEncoder(sparse=False)
y_train_onehot = encoder.fit_transform(y_train)
y_test_onehot = encoder.transform(y_test)

# **4. Initialize Model**
input_size = X_train.shape[1]  
output_size = y_train_onehot.shape[1]  
model = SingleLayerNN(input_size, output_size, learning_rate=0.1)

# **5. Train Model**
model.train(X_train, y_train_onehot, epochs=100, X_test=X_test, y_test=y_test_onehot)

# **6. Evaluate Model**
# y_pred = model.predict(X_test)
# y_pred = np.argmax(model.forward(X_test), axis=1)  # Convert one-hot predictions to class labels
# y_test = y_test.ravel()  # Ensure y_test is a 1D array

# accuracy = accuracy_score(y_test, y_pred)
# f1 = f1_score(y_test, y_pred, average='macro')
# #conf_matrix = confusion_matrix(y_test, y_pred)

# print(f"Accuracy: {accuracy:.4f}")
# print(f"F1 Score: {f1:.4f}")
#print("Confusion Matrix:")
#print(conf_matrix)



Epoch 0, Loss: 0.2479
Epoch 10, Loss: 0.1000
Epoch 20, Loss: 0.1000
Epoch 30, Loss: 0.1000
Epoch 40, Loss: 0.1000
Epoch 50, Loss: 0.1000
Epoch 60, Loss: 0.1000
Epoch 70, Loss: 0.1000
Epoch 80, Loss: 0.1000
Epoch 90, Loss: 0.1000
Total Training Time: 18.31 seconds
Accuracy: 0.1000, F1-score: 0.0182
Confusion Matrix:
 [[1000    0    0    0    0    0    0    0    0    0]
 [1000    0    0    0    0    0    0    0    0    0]
 [1000    0    0    0    0    0    0    0    0    0]
 [1000    0    0    0    0    0    0    0    0    0]
 [1000    0    0    0    0    0    0    0    0    0]
 [1000    0    0    0    0    0    0    0    0    0]
 [1000    0    0    0    0    0    0    0    0    0]
 [1000    0    0    0    0    0    0    0    0    0]
 [1000    0    0    0    0    0    0    0    0    0]
 [1000    0    0    0    0    0    0    0    0    0]]


In [10]:
class Multilayer_Perceptron(object):
    def __init__(self, sizes):
        self.layers = len(sizes)
        self.sizes = sizes
        self.biases = [np.random.randn(y,1) for y in sizes[1:]]
        self.weights = [np.random.randn(y,x) for x, y in zip(sizes[:-1], sizes[1:])]


    def feedforward(self, a):
        for b, w in zip(self.biases, self.weights):
            a = self.sigmoid(np.dot(w,a)+b)
        return a

    def backpropagation(self, x, y):
        nabla_b = [np.zeros(b.shape) for b in self.biases]
        nabla_w = [np.zeros(w.shape) for w in self.weights]
        activation = x
        activation_list = [x]
        z_list = []
        for b, w in zip(self.biases, self.weights):
            z = np.dot(w, activation)+b
            z_list.append(z)
            activation = self.sigmoid(z)
            activation_list.append(activation)

        delta = self.cost_derivative(activation_list[-1], y) * self.sigmoid_prime(z_list[-1]) 
        nabla_b[-1] = delta
        nabla_w[-1] = np.dot(delta, activation_list[-2].transpose())

        for layer in range(2, self.layers):
            z = z_list[-layer]
            sp = self.sigmoid_prime(z)
            delta = np.dot(self.weights[-layer+1].transpose(), delta)*sp
            nabla_b[-layer] = delta
            nabla_w[-layer] = np.dot(delta, activation_list[-layer-1].transpose())

        return (nabla_b, nabla_w)


    def update_mini_batch(self, mini_batch, eta):
        nabla_b = [np.zeros(b.shape) for b in self.biases]
        nabla_w = [np.zeros(w.shape) for w in self.weights]
        for x, y in mini_batch:
            delta_nabla_b, delta_nabla_w = self.backpropagation(x, y)
            nabla_b = [nb+dnb for nb, dnb in zip(nabla_b, delta_nabla_b)]
            nabla_w = [nw+dnw for nw, dnw in zip(nabla_w, delta_nabla_w)]

        self.weights = [w-(eta/len(mini_batch))*nw for w,nw in zip(self.weights, nabla_w)]
        self.biases = [b-(eta/len(mini_batch))*nb for b, nb in zip(self.biases, nabla_b)]


    def Stochastic_Gradient_Descent(self, epochs, training_data, mini_batch_size, eta, test_data=None):
        n= len(training_data)
        
        if test_data:
            test_n = len(test_data)
            #n = len(training_data)
        start_time = time.time()
        for j in range(epochs):
            random.shuffle(training_data)
            mini_batches = [training_data[k:k+mini_batch_size] for k in range(0, n, mini_batch_size)]
            for mini_batch in mini_batches:
                self.update_mini_batch(mini_batch, eta)

            if test_data:
                #print('here')
                #print("Epoch{0}: {1}/{2}".format(j, self.evaluate(test_data), test_n))
                accuracy, f1, conf_matrix = self.evaluate(test_data)
                print(f"Epoch {j}: Accuracy = {accuracy:.4f}, F1-score = {f1:.4f}")
                print("Confusion Matrix:\n", conf_matrix)
       
            else:
                #print('code is here')
                print("Epoch{0} complete".format(j))
        end_time = time.time()
        total_time = end_time-start_time
        print(f"Total Training Time: {end_time - start_time:.2f} seconds")

    def cost_derivative(self, output_activations, y):
        return (output_activations-y)
    
    def sigmoid(self, z):
        return 1/(1+np.exp(-z))

    def sigmoid_prime(self, z):
        result = self.sigmoid(z)*(1-self.sigmoid(z))
        return result


    def evaluate(self, test_data):
        y_pred = []
        y_true = []
        for x, y in test_data:
            predicted = np.argmax(self.feedforward(x))
            actual = np.argmax(y)
            y_pred.append(predicted)
            y_true.append(actual)
        #test_results = [(np.argmax(self.feedforward(x)), np.argmax(y)) for (x, y) in test_data]
        #return sum(int(x == y) for (x, y) in test_results)
        accuracy = accuracy_score(y_true, y_pred)
        f1 = f1_score(y_true, y_pred, average="macro")
        conf_matrix = confusion_matrix(y_true, y_pred)

        return accuracy, f1, conf_matrix


        
        


In [11]:
training_data = pd.read_csv('/kaggle/input/fashionmnist/fashion-mnist_train.csv')
test_data = pd.read_csv('/kaggle/input/fashionmnist/fashion-mnist_test.csv')
y_train = training_data['label']
X_train = training_data.drop(['label'], axis=1)
y_test = test_data['label']
X_test = test_data.drop(['label'], axis=1)



In [12]:
X_train = X_train.values/255.0
X_test = X_test.values/255.0


# Convert labels to one-hot encoding
def one_hot_encode(y, num_classes=10):
    one_hot = np.zeros((len(y), num_classes))
    one_hot[np.arange(len(y)), y] = 1
    return one_hot

y_train_oh = one_hot_encode(y_train)
y_test_oh = one_hot_encode(y_test)

# Reshape inputs to column vectors
X_train = [x.reshape(-1, 1) for x in X_train]
y_train_oh = [y.reshape(-1, 1) for y in y_train_oh]
X_test = [x.reshape(-1, 1) for x in X_test]
y_test_oh = [y.reshape(-1, 1) for y in y_test_oh]

# Combine into tuples
training_data = list(zip(X_train, y_train_oh))
test_data = list(zip(X_test, y_test_oh))


In [13]:
# Define model architecture: [784 input nodes, 128 hidden nodes, 10 output nodes]
mlp = Multilayer_Perceptron([784, 16, 16, 10])

# Train the model
mlp.Stochastic_Gradient_Descent(
    epochs=10, 
    training_data=training_data, 
    mini_batch_size=32, 
    eta=0.1, 
    test_data=test_data
)


Epoch 0: Accuracy = 0.3332, F1-score = 0.2591
Confusion Matrix:
 [[ 21 203 255 186 214   0  78   7  35   1]
 [  9 859  31  38  24   0  27   6   6   0]
 [  0  81 138  69 597   0  89  10  13   3]
 [ 21 282  82 369 135   0  62   3  46   0]
 [  3  53 105 137 557   0  92  28  25   0]
 [ 17 128  24  51  32   0   7 577  87  77]
 [ 10 165 177 100 387   0 127  11  22   1]
 [  0  19   0   2   7   0   0 949  11  12]
 [  6 148  27  37 186   0  18 304 268   6]
 [  1  79  31  18 141   0  18 619  49  44]]
Epoch 1: Accuracy = 0.4888, F1-score = 0.4385
Confusion Matrix:
 [[609  66  49 119  87   0  39   1  28   2]
 [ 58 884  10  26   5   0  14   1   2   0]
 [ 48  26 192  38 599   0  79   2  10   6]
 [234 165  12 468  67   0  16   0  38   0]
 [131  25  81  88 589   0  50   2  29   5]
 [ 31  63  25  12  22   0   4 449  78 316]
 [254  58 147  64 354   0  86   1  33   3]
 [  0   4   0   0   3   0   0 837   3 153]
 [ 17  29  36  18 120   0  21  85 449 225]
 [  1   9  41   2  24   0   2 106  41 774]]
Epoch 2: