# Class Sample Visualization

In [None]:
import numpy as np
import wandb
from tensorflow.keras.datasets import fashion_mnist

# Initialize wandb
wandb.init(project="fashion-mnist-dataset", name="class_samples")

# Load Fashion-MNIST dataset
(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()

# Class labels
class_labels = [
    "T-shirt/top", "Trouser", "Pullover", "Dress", "Coat",
    "Sandal", "Shirt", "Sneaker", "Bag", "Ankle boot"
]

# Log one sample image for each class
images = []
for i in range(10):
    idx = np.where(y_train == i)[0][0]
    image = x_train[idx]
    images.append(wandb.Image(image, caption=class_labels[i]))

# Log images to wandb
wandb.log({"fashion-mnist-visualization": images})

# Finish wandb run
wandb.finish()

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mma23c014[0m ([33mma23c014-indian-institute-of-technology-madras[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
[1m29515/29515[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
[1m26421880/26421880[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
[1m5148/5148[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz
[1m4422102/4422102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


# Feedforward Neural Network

In [None]:
import argparse
import numpy as np
import wandb
from tensorflow.keras.datasets import fashion_mnist

# Activation Functions and Derivatives
def identity(x): return x
def identity_derivative(x): return np.ones_like(x)

def sigmoid(x): return 1 / (1 + np.exp(-x))
def sigmoid_derivative(x): return sigmoid(x) * (1 - sigmoid(x))

def tanh(x): return np.tanh(x)
def tanh_derivative(x): return 1 - np.tanh(x)**2

def relu(x): return np.maximum(0, x)
def relu_derivative(x): return (x > 0).astype(float)

activation_functions = {
    "identity": (identity, identity_derivative),
    "sigmoid": (sigmoid, sigmoid_derivative),
    "tanh": (tanh, tanh_derivative),
    "ReLU": (relu, relu_derivative)
}

# Loss Functions
def mse(y_true, y_pred): return np.mean((y_true - y_pred) ** 2)
def mse_derivative(y_true, y_pred): return 2 * (y_pred - y_true) / y_true.size

def cross_entropy(y_true, y_pred):
    return -np.sum(y_true * np.log(y_pred + 1e-8)) / y_true.shape[0]

def cross_entropy_derivative(y_true, y_pred):
    return -y_true / (y_pred + 1e-8)

loss_functions = {
    "mean_squared_error": (mse, mse_derivative),
    "cross_entropy": (cross_entropy, cross_entropy_derivative)
}

# Neural Network Class
class FeedForwardNN:
    def __init__(self, input_size, output_size, hidden_layers, hidden_size, activation, weight_init):
        self.input_size = input_size
        self.output_size = output_size
        self.hidden_layers = hidden_layers
        self.hidden_size = hidden_size
        self.activation_func, self.activation_deriv = activation_functions[activation]

        # Weight Initialization
        if weight_init == "random":
            self.weights = [np.random.randn(self.input_size, self.hidden_size) * 0.01]
            self.biases = [np.zeros((1, self.hidden_size))]
            for _ in range(hidden_layers - 1):
                self.weights.append(np.random.randn(self.hidden_size, self.hidden_size) * 0.01)
                self.biases.append(np.zeros((1, self.hidden_size)))
            self.weights.append(np.random.randn(self.hidden_size, self.output_size) * 0.01)
            self.biases.append(np.zeros((1, self.output_size)))
        elif weight_init == "Xavier":
            self.weights = [np.random.randn(self.input_size, self.hidden_size) * np.sqrt(1 / self.input_size)]
            self.biases = [np.zeros((1, self.hidden_size))]
            for _ in range(hidden_layers - 1):
                self.weights.append(np.random.randn(self.hidden_size, self.hidden_size) * np.sqrt(1 / self.hidden_size))
                self.biases.append(np.zeros((1, self.hidden_size)))
            self.weights.append(np.random.randn(self.hidden_size, self.output_size) * np.sqrt(1 / self.hidden_size))
            self.biases.append(np.zeros((1, self.output_size)))

    def forward(self, X):
        self.a = [X]
        self.z = []
        for i in range(self.hidden_layers):
            self.z.append(self.a[-1] @ self.weights[i] + self.biases[i])
            self.a.append(self.activation_func(self.z[-1]))
        self.z.append(self.a[-1] @ self.weights[-1] + self.biases[-1])
        self.a.append(self.softmax(self.z[-1]))  # Output Layer (Softmax)
        return self.a[-1]

    def softmax(self, x):
        exp_x = np.exp(x - np.max(x, axis=1, keepdims=True))
        return exp_x / np.sum(exp_x, axis=1, keepdims=True)

    def backward(self, X, y, learning_rate, loss_derivative):
        m = X.shape[0]
        dZ = loss_derivative(y, self.a[-1])
        dW = self.a[-2].T @ dZ / m
        dB = np.sum(dZ, axis=0, keepdims=True) / m

        self.weights[-1] -= learning_rate * dW
        self.biases[-1] -= learning_rate * dB

        for i in range(self.hidden_layers - 1, -1, -1):
            dZ = dZ @ self.weights[i + 1].T * self.activation_deriv(self.z[i])
            dW = self.a[i].T @ dZ / m
            dB = np.sum(dZ, axis=0, keepdims=True) / m
            self.weights[i] -= learning_rate * dW
            self.biases[i] -= learning_rate * dB


# Training Function
def train(args):
    wandb.init(project=args.wandb_project, entity=args.wandb_entity)

    # Load dataset
    (X_train, y_train), (X_test, y_test) = fashion_mnist.load_data()
    X_train, X_test = X_train / 255.0, X_test / 255.0
    X_train, X_test = X_train.reshape(-1, 28 * 28), X_test.reshape(-1, 28 * 28)

    y_train_one_hot = np.eye(10)[y_train]

    # Initialize Model
    model = FeedForwardNN(input_size=784, output_size=10,
                          hidden_layers=args.num_layers, hidden_size=args.hidden_size,
                          activation=args.activation, weight_init=args.weight_init)

    loss_fn, loss_deriv = loss_functions[args.loss]


# Training Loop
    for epoch in range(args.epochs):
        for i in range(0, len(X_train), args.batch_size):
            X_batch = X_train[i:i + args.batch_size]
            y_batch = y_train_one_hot[i:i + args.batch_size]

            y_pred = model.forward(X_batch)
            model.backward(X_batch, y_batch, args.learning_rate, loss_deriv)

        y_train_pred = model.forward(X_train)
        train_loss = loss_fn(y_train_one_hot, y_train_pred)

        print(f"Epoch {epoch + 1}/{args.epochs}, Loss: {train_loss:.4f}")
        wandb.log({"epoch": epoch + 1, "loss": train_loss})
# Argument Parser
if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("-wp", "--wandb_project", type=str, default="fashion-mnist-dataset)")
    parser.add_argument("-we", "--wandb_entity", type=str, default="ma23c014-indian-institute-of-technology-madras")
    parser.add_argument("-e", "--epochs", type=int, default=1)
    parser.add_argument("-b", "--batch_size", type=int, default=4)
    parser.add_argument("-l", "--loss", type=str, choices=["mean_squared_error", "cross_entropy"], default="cross_entropy")
    parser.add_argument("-o", "--optimizer", type=str, choices=["sgd"], default="sgd")
    parser.add_argument("-lr", "--learning_rate", type=float, default=0.1)
    parser.add_argument("-nhl", "--num_layers", type=int, default=1)
    parser.add_argument("-sz", "--hidden_size", type=int, default=4)
    parser.add_argument("-a", "--activation", type=str, choices=["identity", "sigmoid", "tanh", "ReLU"], default="sigmoid")
    parser.add_argument("-w_i", "--weight_init", type=str, choices=["random", "Xavier"], default="random")

    args, unknown = parser.parse_known_args()
    train(args)

Epoch 1/1, Loss: 16.5786
