In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import pandas as pd
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

In [2]:
# Load dataset from CSV
def load_fashion_mnist(csv_path):
    data=pd.read_csv(csv_path).values  
    labels=data[:,0]  
    images=data[:,1:] 

    # Normalize pixel values to [0,1]
    images=images.astype(np.float32)/255.0

    return images,labels

In [4]:
# Load train and test datasets
train_images,train_labels=load_fashion_mnist("dataset\\fashion-mnist_train.csv")
test_images,test_labels=load_fashion_mnist("dataset\\fashion-mnist_test.csv")

# Split train into (train + validation)
num_train=int(0.8*train_images.shape[0])
val_images,val_labels=train_images[num_train:],train_labels[num_train:]
train_images,train_labels =train_images[:num_train],train_labels[:num_train]

print(f"Train:{train_images.shape},Validation:{val_images.shape},Test:{test_images.shape}")

Train:(48000, 784),Validation:(12000, 784),Test:(10000, 784)


In [10]:

X_train=train_images.reshape(-1,1,28,28)

X_val=val_images.reshape(-1,1,28,28)

print(type(X_train),type(X_val),type(train_labels),type(val_labels))

print(X_train.shape)






<class 'numpy.ndarray'> <class 'numpy.ndarray'> <class 'numpy.ndarray'> <class 'numpy.ndarray'>
(48000, 1, 28, 28)


In [11]:
x_train=X_train.reshape(X_train.shape[0],-1)
x_test=X_val.reshape(X_val.shape[0],-1)

y_train=train_labels
y_test=val_labels

def one_hot_encode(y,num_classes=10):
    one_hot=np.zeros((y.shape[0],num_classes))
    one_hot[np.arange(y.shape[0]),y]=1
    return one_hot
y_train=one_hot_encode(y_train)
y_test=one_hot_encode(y_test)

input_size=784
hidden_size1=128
hidden_size2=64
output_size=10
learning_rate=0.03
epochs=30
batch_size=32
dropout_rate=0.2

np.random.seed(42)
W1=np.random.randn(input_size,hidden_size1)*0.01
b1=np.zeros((1,hidden_size1))
W2=np.random.randn(hidden_size1,hidden_size2)*0.01
b2=np.zeros((1,hidden_size2))
W3=np.random.randn(hidden_size2,output_size)*0.01
b3=np.zeros((1,output_size))

# Activation functions
def relu(Z):
    return np.maximum(0,Z)

def relu_derivative(Z):
    return np.where(Z>0,1,0)

def leaky_relu(Z,alpha=0.01):
    return np.where(Z>0,Z,alpha*Z)

def leaky_relu_derivative(Z,alpha=0.01):
    return np.where(Z>0,1,alpha)

def tanh(Z):
    return np.tanh(Z)

def tanh_derivative(Z):
    return 1-np.tanh(Z)**2

def gelu(Z):
    return 0.5*Z*(1+np.tanh(np.sqrt(2/np.pi)*(Z+0.044715*Z**3)))

def gelu_derivative(Z):
    return 0.5*(1+np.tanh(np.sqrt(2/np.pi)*(Z+0.044715*Z**3)))+(Z*(1-np.tanh(np.sqrt(2/np.pi)*(Z+0.044715*Z**3))**2)*(np.sqrt(2/np.pi)*(1+3*0.044715*Z**2))*0.5)

def softmax(Z):
    expZ=np.exp(Z-np.max(Z,axis=1,keepdims=True))
    return expZ/np.sum(expZ,axis=1,keepdims=True)

def cross_entropy_loss(Y_true,Y_pred):
    return -np.mean(np.sum(Y_true*np.log(Y_pred+1e-9),axis=1))

activation_function=relu
activation_derivative=relu_derivative

def forward_pass(X,dropout_rate=0.2):
    global mask1,mask2
    Z1=np.matmul(X,W1)+b1
    A1=activation_function(Z1)
    mask1=(np.random.rand(*A1.shape)>dropout_rate)
    A1*=mask1
    A1/=(1-dropout_rate)
    Z2=np.matmul(A1,W2)+b2
    A2=activation_function(Z2)
    mask2=(np.random.rand(*A2.shape)>dropout_rate)
    A2*=mask2
    A2/=(1-dropout_rate)
    Z3=np.matmul(A2,W3)+b3
    A3=softmax(Z3)
    return Z1,A1,Z2,A2,Z3,A3

def backward_pass(X,Y,Z1,A1,Z2,A2,Z3,A3,dropout_rate=0.2):
    global W1,b1,W2,b2,W3,b3
    dZ3=A3-Y
    dW3=np.matmul(A2.T,dZ3)/X.shape[0]
    db3=np.mean(dZ3, axis=0, keepdims=True)
    dA2=np.matmul(dZ3, W3.T)
    dA2*=mask2
    dA2/=(1-dropout_rate)
    dZ2=dA2*activation_derivative(Z2)
    dW2=np.matmul(A1.T,dZ2)/X.shape[0]
    db2=np.mean(dZ2,axis=0,keepdims=True)
    dA1=np.matmul(dZ2,W2.T)
    dA1*=mask1
    dA1/=(1-dropout_rate)
    dZ1=dA1*activation_derivative(Z1)
    dW1=np.matmul(X.T,dZ1)/X.shape[0]
    db1=np.mean(dZ1,axis=0,keepdims=True)
    W1-=learning_rate*dW1
    b1-=learning_rate*db1
    W2-=learning_rate*dW2
    b2-=learning_rate*db2
    W3-=learning_rate*dW3
    b3-=learning_rate*db3

loss_array=[]
prev_loss=float('inf')
accuracy_array=[]

def predict(X):
    _,_,_,_,_,A3=forward_pass(X,dropout_rate=0)
    return np.argmax(A3,axis=1)

for epoch in range(epochs):
    indices=np.random.permutation(x_train.shape[0])
    x_train_shuffled=x_train[indices]
    y_train_shuffled=y_train[indices]

    epoch_loss=0
    for i in range(0,x_train.shape[0],batch_size):
        X_batch=x_train_shuffled[i:i+batch_size]
        Y_batch=y_train_shuffled[i:i+batch_size]

        Z1,A1,Z2,A2,Z3,A3=forward_pass(X_batch,dropout_rate)
        loss=cross_entropy_loss(Y_batch,A3)
        backward_pass(X_batch,Y_batch,Z1,A1,Z2,A2,Z3,A3,dropout_rate)
        epoch_loss+=loss

    loss_array.append(epoch_loss)
    if epoch_loss>prev_loss:
        learning_rate/=2
        print("Learning decreased")
    prev_loss=epoch_loss
    if (epoch+1)%5==0:
        y_pred=predict(x_test)
        y_true=np.argmax(y_test,axis=1)
        accuracy=np.mean(y_pred==y_true)
        accuracy_array.append(accuracy)
        print(f"Epoch {epoch+1} of {epochs}, Loss: {epoch_loss:.4f}, Accuracy: {accuracy * 100:.2f}%")
    else:
        print(f"Epoch {epoch+1} of {epochs}, Loss: {epoch_loss:.4f}")



Epoch 1 of 30, Loss: 2391.4933
Epoch 2 of 30, Loss: 1216.3366
Epoch 3 of 30, Loss: 938.7772
Epoch 4 of 30, Loss: 810.1677
Epoch 5 of 30, Loss: 738.0521, Accuracy: 84.43%


KeyboardInterrupt: 

In [19]:
class CNNModel(nn.Module):
    def __init__(self, num_classes=10, pooling_type='max'):
        super(CNNModel, self).__init__()
        self.pooling_type = pooling_type

        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        
        self.feature_size = 64 * 7 * 7  
        
        self.fc = nn.Linear(self.feature_size, num_classes)  

    def forward(self, x, extract_features=False):
        x = F.relu(self.conv1(x))
        x = self._pooling_layer(x)

        x = F.relu(self.conv2(x))
        x = self._pooling_layer(x)

        x = x.view(x.size(0), -1) 

        if extract_features:
            return x  
        
        x = self.fc(x)  
        return x

    def _pooling_layer(self, x):
        return F.max_pool2d(x,2)

In [24]:
cnn_model = CNNModel(num_classes=10)
X_train_1=torch.from_numpy(X_train).float()
Y_train=torch.from_numpy(train_labels).long()
X_val_1=torch.from_numpy(X_val).float()
Y_val=torch.from_numpy(val_labels).long()

train_dataset = TensorDataset(X_train_1, Y_train)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)

criterion=nn.CrossEntropyLoss()
optimizer=optim.Adam(cnn_model.parameters(),lr=0.001)

In [25]:



num_epochs = 5
for epoch in range(num_epochs):
    cnn_model.train()
    total_loss = 0
    for images, labels in train_loader:
        optimizer.zero_grad()
        outputs = cnn_model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1}, Loss: {total_loss:.4f}")

print("Training complete!")



Epoch 1, Loss: 377.8107
Epoch 2, Loss: 252.2528
Epoch 3, Loss: 219.0842
Epoch 4, Loss: 196.9246
Epoch 5, Loss: 180.6882
Training complete!


In [26]:
cnn_model.eval()
outputs = cnn_model(X_val_1)
_, predictions = torch.max(outputs, 1)
accuracy = torch.sum(predictions == Y_val).item() / len(Y_val) * 100
print(f"Test Accuracy: {accuracy:.2f}%")

Test Accuracy: 90.39%


In [30]:
with torch.no_grad():
    features_train = cnn_model(X_train_1, extract_features=True)
    features_val = cnn_model(X_val_1, extract_features=True)

print("Extracted Features Shape:", features_train.shape)


Extracted Features Shape: torch.Size([48000, 3136])


In [31]:


print("Extracted features shape:",features_train.shape)

print(type(features_train))
print(type(features_val))
print(type(train_labels))
print(type(val_labels))


Extracted features shape: torch.Size([48000, 3136])
<class 'torch.Tensor'>
<class 'torch.Tensor'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>


In [32]:
np_1,np_2=features_train,features_val
x_train,y_train,x_test,y_test=np_1.cpu().detach().numpy(),train_labels,np_2.cpu().detach().numpy(),val_labels
x_train,x_test =x_train/x_train.max(),x_test/x_test.max()
x_train=x_train.reshape(x_train.shape[0],-1)
x_test=x_test.reshape(x_test.shape[0],-1)

print(x_train.shape)

def one_hot_encode(y,num_classes=10):
    one_hot=np.zeros((y.shape[0],num_classes))
    one_hot[np.arange(y.shape[0]),y]=1
    return one_hot
y_train=one_hot_encode(y_train)
y_test=one_hot_encode(y_test)

input_size=3136
hidden_size1=128
hidden_size2=64
output_size=10
learning_rate=0.03
epochs=30
batch_size=32
dropout_rate=0.2

np.random.seed(42)
W1=np.random.randn(input_size,hidden_size1)*0.01
b1=np.zeros((1,hidden_size1))
W2=np.random.randn(hidden_size1,hidden_size2)*0.01
b2=np.zeros((1,hidden_size2))
W3=np.random.randn(hidden_size2,output_size)*0.01
b3=np.zeros((1,output_size))

# Activation functions
def relu(Z):
    return np.maximum(0,Z)

def relu_derivative(Z):
    return np.where(Z>0,1,0)

def leaky_relu(Z,alpha=0.01):
    return np.where(Z>0,Z,alpha*Z)

def leaky_relu_derivative(Z,alpha=0.01):
    return np.where(Z>0,1,alpha)

def tanh(Z):
    return np.tanh(Z)

def tanh_derivative(Z):
    return 1-np.tanh(Z)**2

def gelu(Z):
    return 0.5*Z*(1+np.tanh(np.sqrt(2/np.pi)*(Z+0.044715*Z**3)))

def gelu_derivative(Z):
    return 0.5*(1+np.tanh(np.sqrt(2/np.pi)*(Z+0.044715*Z**3)))+(Z*(1-np.tanh(np.sqrt(2/np.pi)*(Z+0.044715*Z**3))**2)*(np.sqrt(2/np.pi)*(1+3*0.044715*Z**2))*0.5)

def softmax(Z):
    expZ=np.exp(Z-np.max(Z,axis=1,keepdims=True))
    return expZ/np.sum(expZ,axis=1,keepdims=True)

def cross_entropy_loss(Y_true,Y_pred):
    return -np.mean(np.sum(Y_true*np.log(Y_pred+1e-9),axis=1))

activation_function=relu
activation_derivative=relu_derivative

def forward_pass(X,dropout_rate=0.2):
    global mask1,mask2
    Z1=np.matmul(X,W1)+b1
    A1=activation_function(Z1)
    mask1=(np.random.rand(*A1.shape)>dropout_rate)
    A1*=mask1
    A1/=(1-dropout_rate)
    Z2=np.matmul(A1,W2)+b2
    A2=activation_function(Z2)
    mask2=(np.random.rand(*A2.shape)>dropout_rate)
    A2*=mask2
    A2/=(1-dropout_rate)
    Z3=np.matmul(A2,W3)+b3
    A3=softmax(Z3)
    return Z1,A1,Z2,A2,Z3,A3

def backward_pass(X,Y,Z1,A1,Z2,A2,Z3,A3,dropout_rate=0.2):
    global W1,b1,W2,b2,W3,b3
    dZ3=A3-Y
    dW3=np.matmul(A2.T,dZ3)/X.shape[0]
    db3=np.mean(dZ3, axis=0, keepdims=True)
    dA2=np.matmul(dZ3, W3.T)
    dA2*=mask2
    dA2/=(1-dropout_rate)
    dZ2=dA2*activation_derivative(Z2)
    dW2=np.matmul(A1.T,dZ2)/X.shape[0]
    db2=np.mean(dZ2,axis=0,keepdims=True)
    dA1=np.matmul(dZ2,W2.T)
    dA1*=mask1
    dA1/=(1-dropout_rate)
    dZ1=dA1*activation_derivative(Z1)
    dW1=np.matmul(X.T,dZ1)/X.shape[0]
    db1=np.mean(dZ1,axis=0,keepdims=True)
    W1-=learning_rate*dW1
    b1-=learning_rate*db1
    W2-=learning_rate*dW2
    b2-=learning_rate*db2
    W3-=learning_rate*dW3
    b3-=learning_rate*db3

loss_array=[]
prev_loss=float('inf')
accuracy_array=[]

def predict(X):
    _,_,_,_,_,A3=forward_pass(X,dropout_rate=0)
    return np.argmax(A3,axis=1)

for epoch in range(epochs):
    indices=np.random.permutation(x_train.shape[0])
    x_train_shuffled=x_train[indices]
    y_train_shuffled=y_train[indices]

    epoch_loss=0
    for i in range(0,x_train.shape[0],batch_size):
        X_batch=x_train_shuffled[i:i+batch_size]
        Y_batch=y_train_shuffled[i:i+batch_size]

        Z1,A1,Z2,A2,Z3,A3=forward_pass(X_batch,dropout_rate)
        loss=cross_entropy_loss(Y_batch,A3)
        backward_pass(X_batch,Y_batch,Z1,A1,Z2,A2,Z3,A3,dropout_rate)
        epoch_loss+=loss

    loss_array.append(epoch_loss)
    if epoch_loss>prev_loss:
        learning_rate/=2
        print("Learning decreased")
    prev_loss=epoch_loss
    if (epoch+1)%5==0:
        y_pred=predict(x_test)
        y_true=np.argmax(y_test,axis=1)
        accuracy=np.mean(y_pred==y_true)
        accuracy_array.append(accuracy)
        print(f"Epoch {epoch+1} of {epochs}, Loss: {epoch_loss:.4f}, Accuracy: {accuracy * 100:.2f}%")
    else:
        print(f"Epoch {epoch+1} of {epochs}, Loss: {epoch_loss:.4f}")

# plt.figure(figsize=(10, 6))
# fig, ax1 = plt.subplots(figsize=(10, 6))

# ax1.plot(range(1, epochs + 1), loss_array, color='orange', linestyle='-', linewidth=1, label='Loss')
# ax1.set_xlabel('Epochs')
# ax1.set_ylabel('Loss', color='orange')
# ax1.tick_params(axis='y')

# ax2 = ax1.twinx()
# ax2.plot(range(5, epochs + 1, 5), accuracy_array, color='blue', linestyle='--', linewidth=1, label='Accuracy')
# ax2.set_ylabel('Accuracy', color='blue')
# ax2.tick_params(axis='y')

# plt.title('Training Loss and Test Accuracy')
# fig.tight_layout()
# plt.show()


(48000, 3136)
Epoch 1 of 30, Loss: 3282.1922
Epoch 2 of 30, Loss: 1420.9801
Epoch 3 of 30, Loss: 928.7048
Epoch 4 of 30, Loss: 771.3124
Epoch 5 of 30, Loss: 678.7016, Accuracy: 84.99%
Epoch 6 of 30, Loss: 618.2691
Epoch 7 of 30, Loss: 562.3737
Epoch 8 of 30, Loss: 503.3449
Epoch 9 of 30, Loss: 465.9221
Epoch 10 of 30, Loss: 432.4650, Accuracy: 90.11%
Epoch 11 of 30, Loss: 412.5715
Epoch 12 of 30, Loss: 395.5066
Epoch 13 of 30, Loss: 383.6852
Epoch 14 of 30, Loss: 369.9119
Epoch 15 of 30, Loss: 361.2814, Accuracy: 90.97%
Epoch 16 of 30, Loss: 347.2040
Epoch 17 of 30, Loss: 343.5088
Epoch 18 of 30, Loss: 330.4753
Epoch 19 of 30, Loss: 321.7928
Epoch 20 of 30, Loss: 317.0097, Accuracy: 91.27%
Epoch 21 of 30, Loss: 308.7700
Epoch 22 of 30, Loss: 302.9335
Epoch 23 of 30, Loss: 293.3309
Epoch 24 of 30, Loss: 290.5275
Epoch 25 of 30, Loss: 283.1849, Accuracy: 91.65%
Epoch 26 of 30, Loss: 277.3105
Epoch 27 of 30, Loss: 271.0014
Epoch 28 of 30, Loss: 267.5697
Epoch 29 of 30, Loss: 262.1845
Epoc