In [231]:
import numpy as np
from sklearn.metrics import f1_score

class Dense:
    def __init__(self, input_dim, output_dim):
        np.random.seed(37)
        self.weights = np.random.randn(input_dim, output_dim) * np.sqrt(2 / (input_dim + output_dim))
        self.bias = np.zeros((1, output_dim))
    
    def forward(self, x):
        self.input = x
        self.output = np.dot(x, self.weights) + self.bias
        return self.output

    def backward(self, d_out):
        d_weights = np.dot(self.input.T, d_out)
        d_bias = np.sum(d_out, axis=0, keepdims=True)
        d_input = np.dot(d_out, self.weights.T)
        # Store gradients for the optimizer
        self.d_weights = d_weights
        self.d_bias = d_bias
        return d_input

class ReLU:
    def forward(self, x):
        self.input = x
        return np.maximum(0, x)
    
    def backward(self, d_out):
        return d_out * (self.input > 0)

class Dropout:
    def __init__(self, rate):
        self.rate = rate
    
    def forward(self, x, training=True):
        #print(x.shape)
        if training:
            self.mask = (np.random.rand(*x.shape) > self.rate) / (1 - self.rate)
            return x * self.mask
        return x
    
    def backward(self, d_out):
        return d_out * self.mask

class Adam:
    def __init__(self, params, lr=0.001, beta1=0.9, beta2=0.999, epsilon=1e-8):
        self.lr = lr
        self.beta1 = beta1
        self.beta2 = beta2
        self.epsilon = epsilon
        #print(params[0].weights.shape,params[0].bias.shape)
        self.m = [(np.zeros_like(param.weights),np.zeros_like(param.bias)) for param in params] 
        self.v = [(np.zeros_like(param.weights),np.zeros_like(param.bias)) for param in params]
        #print(self.m[1][1].shape)
        self.t = 0
    
    def update(self, params):
        self.t += 1
        #for i, param in enumerate(params):
        #    print(i,param.weights.shape,param.bias.shape)
        
        for i, param in enumerate(params):
            self.m[i]=list(self.m[i])
            self.v[i]=list(self.v[i])
            self.m[i][0] = self.beta1 * self.m[i][0] + (1 - self.beta1) * param.d_weights
            self.m[i][1] = self.beta1 * self.m[i][1] + (1 - self.beta1) * param.d_bias

            self.v[i][0] = self.beta2 * self.v[i][0] + (1 - self.beta2) * param.d_weights**2
            self.v[i][1] = self.beta2 * self.v[i][1] + (1 - self.beta2) * param.d_bias**2
            self.m[i]=tuple(self.m[i])
            self.v[i]=tuple(self.v[i])

            m_hat = self.m[i][0] / (1 - self.beta1**self.t)
            v_hat = self.v[i][0] / (1 - self.beta2**self.t)
            param.weights -= self.lr * m_hat / (np.sqrt(v_hat) + self.epsilon)

            m_hat = self.m[i][1] / (1 - self.beta1**self.t)
            v_hat = self.v[i][1] / (1 - self.beta2**self.t)
            param.bias -= self.lr * m_hat / (np.sqrt(v_hat) + self.epsilon)
            

class SoftmaxCrossEntropy:
    def forward(self, logits, labels):
        #print(labels.shape)
        exps = np.exp(logits - np.max(logits, axis=1, keepdims=True))
        #exps = np.exp(logits)
        self.probs = exps / np.sum(exps, axis=1, keepdims=True)
        self.labels = labels
        return -np.mean(np.sum(labels * np.log(self.probs + 1e-9), axis=1))

    def backward(self):
        return (self.probs - self.labels) / self.labels.shape[0]

class Model:
    def __init__(self, layers, learning_rate=0.001):
        self.layers = layers
        self.optimizer = Adam(params=[layer for layer in layers if isinstance(layer, Dense)], lr=learning_rate)
        self.loss_fn = SoftmaxCrossEntropy()
    
    def forward(self, x):
        for layer in self.layers:
            x = layer.forward(x)
        return x
    
    def backward(self, d_out):
        for layer in reversed(self.layers):
            d_out = layer.backward(d_out)

    def train_step(self, x, y):
        logits = self.forward(x)
        loss = self.loss_fn.forward(logits, y)
        d_out = self.loss_fn.backward()
        self.backward(d_out)
        self.optimizer.update(params=[layer for layer in self.layers if isinstance(layer, Dense)])
        return loss
    
    
    def get_scores(self, x, y,val=False):
        logits = self.forward(x)
        exp_logits = np.exp(logits - np.max(logits, axis=1, keepdims=True))
        #exp_logits=np.exp(logits)
        
        probs = exp_logits / np.sum(exp_logits, axis=1, keepdims=True)

        predictions = np.argmax(probs, axis=1)

        labels = y
        if len(labels.shape) == 1:
            num_classes = np.max(labels) + 1
            labels = np.eye(num_classes)[labels]
        loss = -np.mean(np.sum(labels * np.log(probs + 1e-9), axis=1))

        if len(labels.shape) != 1:
            labels=np.argmax(labels, axis=1)
        accuracy= np.mean(predictions == labels)*100
        
        f1_macro = f1_score(labels, predictions, average='macro')
        if(val):
             return {
            'Loss': loss,
            'Accuracy': accuracy,
            'F1 Score': f1_macro
            }
        else:
            return {
            'Loss': loss,
            'Accuracy': accuracy
            } 
        


In [232]:
import pickle

def save_model_weights(layers, filename="model_weights.pkl"):
    model_weights = {}
    for i, layer in enumerate(layers):
        model_weights[f'layer_{i}'] = {
            'weights': layer.weights,
            'bias': layer.bias
        }
    
    with open(filename, 'wb') as file:
        pickle.dump(model_weights, file)
    print(f"Model weights saved to {filename}")

def load_model_weights(layers, filename="model_weights.pkl"):
    with open(filename, 'rb') as file:
        model_weights = pickle.load(file)

    for i, layer in enumerate(layers):
        if f'layer_{i}' in model_weights:
            layer.weights = model_weights[f'layer_{i}']['weights']
            layer.bias = model_weights[f'layer_{i}']['bias']
    print(f"Model weights loaded from {filename}")


# After training your model
#save_model_weights(layers, "trained_model.pkl")
#load_model_weights(layers,"trained_model.pkl")




In [233]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

df = pd.read_csv('fashion-mnist_train.csv')
print(df.shape)
y = df['label']
num_classes = np.max(y) + 1
y = np.eye(num_classes)[y]
X = df.drop(columns=['label'])
scaler = StandardScaler()
X = scaler.fit_transform(X)
X, X_val, y, y_val = train_test_split(X, y, test_size=0.1, random_state=37)

df_test=pd.read_csv('fashion-mnist_test.csv')
y_test = df_test['label']
X_test = df_test.drop(columns=['label'])
X_test=scaler.transform(X_test) #-------------------------------------------IMPORTANT

layers = [
    Dense(X.shape[1], 256),
    ReLU(),
    Dropout(0.1),
    Dense(256, 128),
    ReLU(),
    Dropout(0.1),
    Dense(128, 32),
    ReLU(),
    Dropout(0.1),
    Dense(32, 10),
]

model = Model(layers, learning_rate=0.001)

(60000, 785)


# If you want to load best model weights

In [234]:
'''
layers=[layer for layer in model.layers if isinstance(layer, Dense)]
load_model_weights(layers,"trained_model.pkl")

metrics_test=model.get_scores(X_test, y_test, val=True)

print(f"Test Accuracy: {metrics_test['Accuracy']:.2f}%, Test loss: {metrics_test['Loss']}, F1 score: {metrics_test['F1 Score']:.2f}")
'''

'\nlayers=[layer for layer in model.layers if isinstance(layer, Dense)]\nload_model_weights(layers,"trained_model.pkl")\n\nmetrics_test=model.get_scores(X_test, y_test, val=True)\n\nprint(f"Test Accuracy: {metrics_test[\'Accuracy\']:.2f}%, Test loss: {metrics_test[\'Loss\']}, F1 score: {metrics_test[\'F1 Score\']:.2f}")\n'

# Else if you want to train a new version

In [235]:
#hyperparameters
num_epochs = 10
batch_size = 50
num_batches = X.shape[0] // batch_size

#training loop
for epoch in range(num_epochs):
    epoch_loss = 0
    for i in range(num_batches):
        #minibatch
        start = i * batch_size
        end = start + batch_size
        X_batch = X[start:end]
        y_batch = y[start:end]
        
        #loss calculation
        logits = model.forward(X_batch)
        loss = model.loss_fn.forward(logits, y_batch)
        
        #full pass and optimization step
        model.train_step(X_batch, y_batch)
        
        epoch_loss += loss
    
    #average loss for the epoch
    avg_loss = epoch_loss / num_batches
    print(f"Epoch {epoch + 1}/{num_epochs}, avg Loss: {avg_loss:.4f}")
    #metrics after each epoch
    metrics_train=model.get_scores(X, y, val=False)
    metrics_val=model.get_scores(X_val, y_val,val=True)
    print(f"Training loss: {metrics_train['Loss']}, Training accuracy: {metrics_train['Accuracy']:.2f}%")
    print(f"Validation loss: {metrics_val['Loss']}, Validation accuracy: {metrics_val['Accuracy']:.2f}%, F1 score: {metrics_val['F1 Score']:.4f}")

    

# Testset
'''df_test=pd.read_csv('fashion-mnist_test.csv')
y_test = df_test['label']
X_test = df_test.drop(columns=['label'])
X_test=scaler.transform(X_test) #-------------------------------------------IMPORTANT'''
metrics_test=model.get_scores(X_test, y_test, val=True)

print(f"Test Accuracy: {metrics_test['Accuracy']:.2f}%, Test loss: {metrics_test['Loss']}, F1 score: {metrics_test['F1 Score']:.4f}")

Epoch 1/10, avg Loss: 0.5356
Training loss: 0.41274306099637836, Training accuracy: 84.92%
Validation loss: 0.44853934331198775, Validation accuracy: 83.95%, F1 score: 0.8385
Epoch 2/10, avg Loss: 0.3960
Training loss: 0.3597915720058951, Training accuracy: 86.96%
Validation loss: 0.40084167801076764, Validation accuracy: 85.93%, F1 score: 0.8579
Epoch 3/10, avg Loss: 0.3587
Training loss: 0.3264731810931201, Training accuracy: 88.18%
Validation loss: 0.37286984335779916, Validation accuracy: 86.63%, F1 score: 0.8652
Epoch 4/10, avg Loss: 0.3312
Training loss: 0.30859571039046435, Training accuracy: 88.77%
Validation loss: 0.37756188439351185, Validation accuracy: 86.73%, F1 score: 0.8659
Epoch 5/10, avg Loss: 0.3145
Training loss: 0.28867270425162345, Training accuracy: 89.47%
Validation loss: 0.35903098458913824, Validation accuracy: 87.62%, F1 score: 0.8747
Epoch 6/10, avg Loss: 0.2931
Training loss: 0.27402089866513735, Training accuracy: 89.85%
Validation loss: 0.3450726516525373,

# If you want to save newly trained model

In [None]:
'''
layers=[layer for layer in model.layers if isinstance(layer, Dense)]
save_model_weights(layers, "best.pkl")
'''

Model weights saved to best.pkl
