In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/fashionmnist/t10k-labels-idx1-ubyte
/kaggle/input/fashionmnist/t10k-images-idx3-ubyte
/kaggle/input/fashionmnist/fashion-mnist_test.csv
/kaggle/input/fashionmnist/fashion-mnist_train.csv
/kaggle/input/fashionmnist/train-labels-idx1-ubyte
/kaggle/input/fashionmnist/train-images-idx3-ubyte


In [2]:
from sklearn.model_selection import train_test_split
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt

In [3]:
# randome state
torch.manual_seed(42)

<torch._C.Generator at 0x7858aaebb470>

In [4]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [5]:
df = pd.read_csv('/kaggle/input/fashionmnist/fashion-mnist_train.csv')
df.head()

Unnamed: 0,label,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783,pixel784
0,2,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,9,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,6,0,0,0,0,0,0,0,5,0,...,0,0,0,30,43,0,0,0,0,0
3,0,0,0,0,1,2,0,0,0,0,...,3,0,0,0,0,1,0,0,0,0
4,3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [6]:
df.shape

(60000, 785)

In [7]:
X = df.iloc[:, 1:].values
y = df.iloc[:, 0].values

# train test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# scaling
X_train = X_train/255.0
X_test = X_test/255

In [8]:
class CustomDataset(Dataset):
    def __init__(self, features, labels):
        self.features = torch.tensor(features, dtype=torch.float32).reshape(-1, 1, 28, 28) # (batch_size, channels, width, height)
        self.labels = torch.tensor(labels, dtype=torch.long)

    def __len__(self):
        return len(self.features)

    def __getitem__(self, index):
        return self.features[index], self.labels[index]

In [9]:
train_dataset = CustomDataset(X_train, y_train)
test_dataset = CustomDataset(X_test, y_test)

In [10]:
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, pin_memory=True)

#### Architecture
image (1, 28, 28) {grayscale image} ---> Conv Layer (32 filter - 3x3, Zero Padding) + MaxPooling (2x2, stride = 2) ---> Conv Layer (64 filter - 3x3, Zero Padding) + MaxPooling (2x2, stride = 2) ---> Flatten ---> Fully Connected Layer ---> 128 N ---> 64 N ---> 10 N (output layer)

In [11]:
class CustomNN(nn.Module):
    def __init__(self, input_features):
        super().__init__()

        self.features = nn.Sequential(
            nn.Conv2d(input_features, 32, kernel_size=3, padding = 'same'),
            nn.ReLU(),
            nn.BatchNorm2d(32), # filters
            nn.MaxPool2d(kernel_size=2, stride = 2),

            nn.Conv2d(32, 64, kernel_size=3, padding = 'same'),
            nn.ReLU(),
            nn.BatchNorm2d(64),
            nn.MaxPool2d(kernel_size=2, stride = 2),
        )
        self.classifier = nn.Sequential(
            nn.Flatten(),
            
            nn.Linear(64*7*7, 128),
            nn.ReLU(),
            nn.Dropout(p=0.4),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Dropout(p=0.4),

            nn.Linear(64, 10)
        )

    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x

In [12]:
learning_rate = 0.01
epochs = 100

In [13]:
model = CustomNN(1)

model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=learning_rate, weight_decay=1e-4)

In [14]:
# training loop
for epoch in range(epochs):
    total_epoch_loss = 0
    for batch_features, batch_labels in train_loader:
        # move data to gpu
        batch_features, batch_labels = batch_features.to(device), batch_labels.to(device)
        
        # forward pass
        outputs = model(batch_features)

        # loss calc.
        loss = criterion(outputs, batch_labels)

        # backward prop
        optimizer.zero_grad() # clear previous gradients
        loss.backward()

        # update gradients
        optimizer.step()

        total_epoch_loss = total_epoch_loss + loss.item()
        
    avg_loss = total_epoch_loss/len(train_loader)
    print(f"Epoch: {epoch+1}, Loss: {avg_loss}")

Epoch: 1, Loss: 0.645764212201039
Epoch: 2, Loss: 0.3855956253260374
Epoch: 3, Loss: 0.3269298223083218
Epoch: 4, Loss: 0.2906766528512041
Epoch: 5, Loss: 0.2684087316567699
Epoch: 6, Loss: 0.24543450677394868
Epoch: 7, Loss: 0.2317977262586355
Epoch: 8, Loss: 0.21200263266886274
Epoch: 9, Loss: 0.19924064063529173
Epoch: 10, Loss: 0.18785916786423573
Epoch: 11, Loss: 0.1741490151528269
Epoch: 12, Loss: 0.16479040394971767
Epoch: 13, Loss: 0.15527757383479426
Epoch: 14, Loss: 0.14707749420497568
Epoch: 15, Loss: 0.14178179269377142
Epoch: 16, Loss: 0.13196539153469106
Epoch: 17, Loss: 0.12392696688013773
Epoch: 18, Loss: 0.12154236876095335
Epoch: 19, Loss: 0.11181760843936354
Epoch: 20, Loss: 0.10845250668609514
Epoch: 21, Loss: 0.10341922196062903
Epoch: 22, Loss: 0.09728056827140973
Epoch: 23, Loss: 0.09184706817334518
Epoch: 24, Loss: 0.08592614394115905
Epoch: 25, Loss: 0.07976328049468187
Epoch: 26, Loss: 0.08095818672992755
Epoch: 27, Loss: 0.07770882428359861
Epoch: 28, Loss: 0

In [15]:
model.eval()

# perform evaluation
total = 0
correct = 0

with torch.no_grad():
    for batch_features, batch_labels in test_loader:
        # move data to gpu
        batch_features, batch_labels = batch_features.to(device), batch_labels.to(device)
        
        outputs = model(batch_features)
        _, predicted = torch.max(outputs, 1) # extract max value from each row (highest prob)

        # update total and correct
        total = total + batch_labels.shape[0]
        correct = correct + (predicted == batch_labels).sum().item()
        
    print("Testing Accuracy: ", correct/total)

Testing Accuracy:  0.9255
