In [36]:
import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as nn
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader,TensorDataset, Dataset
import matplotlib.pyplot as plt
from tqdm import tqdm

In [37]:
df = pd.read_csv('/Users/abhishekwaghchaure/Desktop/Datasets/fashion MNIST/fashion-mnist_train.csv')

In [38]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 60000 entries, 0 to 59999
Columns: 785 entries, label to pixel784
dtypes: int64(785)
memory usage: 359.3 MB


In [39]:
df.head()

Unnamed: 0,label,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783,pixel784
0,2,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,9,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,6,0,0,0,0,0,0,0,5,0,...,0,0,0,30,43,0,0,0,0,0
3,0,0,0,0,1,2,0,0,0,0,...,3,0,0,0,0,1,0,0,0,0
4,3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [40]:
X = df.drop(columns = ['label'], axis = 1).values
y = df['label'].values

In [41]:
X[0:2]

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]], shape=(2, 784))

In [42]:
y[0:2]

array([2, 9])

In [43]:
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2, random_state=42)

In [44]:
X_train = X_train/255.0
X_test = X_test/255.0

In [45]:
class CustomDataset(Dataset):
    def __init__(self, features, labels):
        self.features = torch.tensor(features, dtype=torch.float32).reshape(-1,1,28,28)
        self.labels = torch.tensor(labels, dtype=torch.long)

    def __len__(self):
         return len(self.features)

    def __getitem__(self, index):
         return self.features[index], self.labels[index]

In [46]:
train_data = CustomDataset(X_train, y_train)
test_data = CustomDataset(X_test, y_test)

In [47]:
next(iter(train_data))

(tensor([[[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000, 

In [48]:
train_loader = DataLoader(train_data, batch_size = 32, shuffle = True)
test_loader = DataLoader(test_data, batch_size = 32, shuffle = False)

In [49]:
class MyCNN(nn.Module):
    def __init__(self, num_features):
        super().__init__()
        self.convolution = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=(3,3), padding='same'),
            nn.ReLU(),
            nn.BatchNorm2d(32),
            nn.MaxPool2d(kernel_size=(2,2), stride = (2,2)),

            nn.Conv2d(32, 64, kernel_size=(3,3), padding='same'),
            nn.ReLU(),
            nn.BatchNorm2d(64),
            nn.MaxPool2d(kernel_size=(2,2), stride = (2,2))
        )

        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(64 * 7 * 7 , 128),
            nn.ReLU(),
            nn.Dropout(p = 0.4),

            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Dropout(p = 0.4),

            nn.Linear(64, num_features)
        )

    def forward(self, x):
        x = self.convolution(x)
        x = self.classifier(x)

        return x

In [50]:
leraning_rate = 0.01
epochs = 100

In [51]:
model = MyCNN(10)
model.to(device = 'mps')

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr = leraning_rate, weight_decay = 1e-4)

In [52]:
model.parameters

<bound method Module.parameters of MyCNN(
  (convolution): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=same)
    (1): ReLU()
    (2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
    (4): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=same)
    (5): ReLU()
    (6): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (7): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
  )
  (classifier): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Linear(in_features=3136, out_features=128, bias=True)
    (2): ReLU()
    (3): Dropout(p=0.4, inplace=False)
    (4): Linear(in_features=128, out_features=64, bias=True)
    (5): ReLU()
    (6): Dropout(p=0.4, inplace=False)
    (7): Linear(in_features=64, out_features=10, bias=True)
  )
)>

In [55]:
from torchsummary import summary
model = MyCNN(10)
summary(model, input_size=(1,28,28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 32, 28, 28]             320
              ReLU-2           [-1, 32, 28, 28]               0
       BatchNorm2d-3           [-1, 32, 28, 28]              64
         MaxPool2d-4           [-1, 32, 14, 14]               0
            Conv2d-5           [-1, 64, 14, 14]          18,496
              ReLU-6           [-1, 64, 14, 14]               0
       BatchNorm2d-7           [-1, 64, 14, 14]             128
         MaxPool2d-8             [-1, 64, 7, 7]               0
           Flatten-9                 [-1, 3136]               0
           Linear-10                  [-1, 128]         401,536
             ReLU-11                  [-1, 128]               0
          Dropout-12                  [-1, 128]               0
           Linear-13                   [-1, 64]           8,256
             ReLU-14                   

In [None]:
## training loop lets go !!!!!!!!
for epoch in range(epochs):
    total_epoch_loss = 0
    total_correct = 0
    total_samples = 0

    with tqdm(train_loader, unit = 'batch') as tepoch:
        tepoch.set_description(f'Epoch : {epoch + 1}/{epochs}')

        for features, labels in tepoch:
            y_hat = model(features)
            loss = criterion(y_hat, labels)

            optimizer.zero_grad()
            loss.backward()

            optimizer.step()

            total_epoch_loss = total_epoch_loss + loss.item()
            # correct = y_hat.argmax(dim = 1) == labels.sum().item()
            correct = (y_hat.argmax(dim=1) == labels).sum().item()
            total_correct = total_correct + correct
            total_samples = total_samples + len(labels)

            batch_accuracy = correct/len(labels) * 100
            tepoch.set_postfix(batch_loss = loss.item(), batch_accuracy = batch_accuracy)

    epoch_loss = total_epoch_loss/len(train_loader)
    epoch_accuracy = total_correct/total_samples
    print(f"Epoch = {epoch + 1} ---- Epoch_Loss = {epoch_loss:.4f} ---- Epoch_Accuracy = {epoch_accuracy:.4f}")

Epoch : 1/100: 100%|██████████| 1500/1500 [00:26<00:00, 55.90batch/s, batch_accuracy=12.5, batch_loss=2.29]


Epoch = 1 ---- Epoch_Loss = 2.3114 ---- Epoch_Accuracy = 0.1075


Epoch : 2/100: 100%|██████████| 1500/1500 [00:27<00:00, 55.36batch/s, batch_accuracy=18.8, batch_loss=2.22]


Epoch = 2 ---- Epoch_Loss = 2.3105 ---- Epoch_Accuracy = 0.1077


Epoch : 3/100: 100%|██████████| 1500/1500 [00:27<00:00, 55.42batch/s, batch_accuracy=12.5, batch_loss=2.31]


Epoch = 3 ---- Epoch_Loss = 2.3121 ---- Epoch_Accuracy = 0.1073


Epoch : 4/100:  70%|██████▉   | 1047/1500 [00:18<00:08, 54.27batch/s, batch_accuracy=12.5, batch_loss=2.3] 