In [1]:
import pandas as pd
import torch
from sklearn.model_selection import train_test_split
from torch import nn
from torch.utils.data import DataLoader, Dataset
import torch.optim as optim
import matplotlib.pyplot as plt
import optuna

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


In [3]:
torch.manual_seed(42)  # for reproducibility

<torch._C.Generator at 0x76eaebd97bd0>

In [4]:
df1 = pd.read_csv('/home/darshan39/Downloads/fashionmnist/fashion-mnist_train.csv')
df2 = pd.read_csv('/home/darshan39/Downloads/fashionmnist/fashion-mnist_test.csv')
df = pd.concat([df1, df2], ignore_index=True)

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 70000 entries, 0 to 69999
Columns: 785 entries, label to pixel784
dtypes: int64(785)
memory usage: 419.2 MB


In [6]:
x = df.iloc[:, 1:].values
y = df.iloc[:, 0].values

In [7]:
x_train, x_test, y_train, y_test = train_test_split(
    x, y,
    test_size=0.2,
    random_state=42,
)

In [8]:
x_train = x_train.astype("float32") / 255.0
x_test = x_test.astype("float32") / 255.0

In [9]:
x_train

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], shape=(56000, 784), dtype=float32)

In [10]:
class FashionMNISTDataset(Dataset):

    def __init__(self, features, labels):
        self.features = torch.tensor(features, dtype=torch.float32).reshape(-1, 1, 28, 28)
        self.labels = torch.tensor(labels, dtype=torch.long)

    def __len__(self):
        return len(self.features)

    def __getitem__(self, idx):
        return self.features[idx], self.labels[idx]

In [11]:
train_dataset = FashionMNISTDataset(x_train, y_train)

In [12]:
train_dataset[0]

(tensor([[[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0039, 0.0000, 0.0510,
           0.4627, 0.1843, 0.0000, 0.0000, 

In [13]:
test_dataset = FashionMNISTDataset(x_test, y_test)

In [14]:

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, pin_memory=True)

In [15]:
class newNN(nn.Module):
    def __init__(self, input_size):
        super(newNN, self).__init__()
        
        self.features = nn.Sequential(

            nn.Conv2d(in_channels=input_size, out_channels=32, kernel_size=3, stride=1, padding='same'),
            nn.ReLU(),
            nn.BatchNorm2d(32),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding='same'),
            nn.ReLU(),
            nn.BatchNorm2d(64),
            nn.MaxPool2d(kernel_size=2, stride=2),

        )
        self.classifier = nn.Sequential(

            nn.Flatten(),
            nn.Linear(64*7*7, 128),
            nn.ReLU(),
            nn.Dropout(p=0.4),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Dropout(p=0.4),
            nn.Linear(64, 10)
        )

    def forward(self, x):
        x =  self.features(x)
        x = self.classifier(x)
        return x
    

In [16]:
learning_rate = 0.01
epochs = 100

In [17]:
model = newNN(1)
model = model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=learning_rate, weight_decay=1e-4)

In [18]:
for epoch in range(epochs):
    total_loss = 0.0
    for bacth_fetures, batch_labels in train_loader:
        batch_fetures, batch_labels = bacth_fetures.to(device), batch_labels.to(device)

        outputs = model(batch_fetures)
        loss = criterion(outputs, batch_labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss = total_loss + loss.item()
    
    epoch_loss = total_loss / len(train_loader)
    print(f"Epoch {epoch+1}, Loss: {epoch_loss:.4f}")

Epoch 1, Loss: 0.6091
Epoch 2, Loss: 0.3687
Epoch 3, Loss: 0.3087
Epoch 4, Loss: 0.2770
Epoch 5, Loss: 0.2513
Epoch 6, Loss: 0.2359
Epoch 7, Loss: 0.2205
Epoch 8, Loss: 0.2060
Epoch 9, Loss: 0.1931
Epoch 10, Loss: 0.1837
Epoch 11, Loss: 0.1709
Epoch 12, Loss: 0.1653
Epoch 13, Loss: 0.1545
Epoch 14, Loss: 0.1452
Epoch 15, Loss: 0.1383
Epoch 16, Loss: 0.1304
Epoch 17, Loss: 0.1224
Epoch 18, Loss: 0.1174
Epoch 19, Loss: 0.1134
Epoch 20, Loss: 0.1062
Epoch 21, Loss: 0.0996
Epoch 22, Loss: 0.0976
Epoch 23, Loss: 0.0910
Epoch 24, Loss: 0.0892
Epoch 25, Loss: 0.0827
Epoch 26, Loss: 0.0790
Epoch 27, Loss: 0.0764
Epoch 28, Loss: 0.0728
Epoch 29, Loss: 0.0727
Epoch 30, Loss: 0.0675
Epoch 31, Loss: 0.0631
Epoch 32, Loss: 0.0625
Epoch 33, Loss: 0.0591
Epoch 34, Loss: 0.0560
Epoch 35, Loss: 0.0576
Epoch 36, Loss: 0.0572
Epoch 37, Loss: 0.0532
Epoch 38, Loss: 0.0516
Epoch 39, Loss: 0.0494
Epoch 40, Loss: 0.0473
Epoch 41, Loss: 0.0458
Epoch 42, Loss: 0.0437
Epoch 43, Loss: 0.0443
Epoch 44, Loss: 0.04

In [19]:
model.eval()

newNN(
  (features): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=same)
    (1): ReLU()
    (2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=same)
    (5): ReLU()
    (6): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (7): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (classifier): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Linear(in_features=3136, out_features=128, bias=True)
    (2): ReLU()
    (3): Dropout(p=0.4, inplace=False)
    (4): Linear(in_features=128, out_features=64, bias=True)
    (5): ReLU()
    (6): Dropout(p=0.4, inplace=False)
    (7): Linear(in_features=64, out_features=10, bias=True)
  )
)

In [20]:
correct = 0
total = 0
with torch.no_grad():
    for batch_fetures, batch_labels in test_loader:
        batch_fetures, batch_labels = batch_fetures.to(device), batch_labels.to(device)
        outputs = model(batch_fetures)
        _, predicted = torch.max(outputs.data, 1)
        total += batch_labels.size(0)
        correct += (predicted == batch_labels).sum().item()
print(f"Test Accuracy: {100 * correct / total:.2f}%")

Test Accuracy: 92.48%


In [21]:
total = 0
correct = 0
with torch.no_grad():
    for batch_fetures, batch_labels in train_loader:
        batch_fetures, batch_labels = batch_fetures.to(device), batch_labels.to(device)
        outputs = model(batch_fetures)
        _, predicted = torch.max(outputs.data, 1)
        total += batch_labels.size(0)
        correct += (predicted == batch_labels).sum().item()
print(f"Train Accuracy: {100 * correct / total:.2f}%")

Train Accuracy: 99.88%
