In [1]:
import torch

In [2]:
torch.manual_seed(42)

<torch._C.Generator at 0x7e4c9030dc10>

In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

print(f"using device: {device}")

using device: cuda


In [4]:
import pandas as pd

In [5]:
df = pd.read_csv('/content/fashion-mnist_train.csv')

df.head()

Unnamed: 0,label,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783,pixel784
0,2,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,9,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,6,0,0,0,0,0,0,0,5,0,...,0,0,0,30,43,0,0,0,0,0
3,0,0,0,0,1,2,0,0,0,0,...,3,0,0,0,0,1,0,0,0,0
4,3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [6]:
df.shape

(60000, 785)

In [7]:
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader

In [8]:
X = df.iloc[:, 1: ].values
y = df.iloc[:, 0].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)

In [9]:
X_train = X_train/255.
X_test = X_test/255.

In [10]:
class CustomDataset(Dataset):
  def __init__(self, features, labels):

    self.features = torch.tensor(features, dtype = torch.float32).reshape(-1, 1, 28, 28)

    self.labels = torch.tensor(labels, dtype = torch.long)

  def __len__(self):
    return len(self.features)

  def __getitem__(self, index):
    return self.features[index], self.labels[index]

In [11]:
train_dataset = CustomDataset(X_train, y_train)

test_dataset = CustomDataset(X_test, y_test)

In [12]:
train_loader = DataLoader(
    train_dataset,
    shuffle = True,
    batch_size = 32,
    pin_memory = True
)

test_loader = DataLoader(
    test_dataset,
    shuffle = False,
    batch_size = 32,
    pin_memory = True
)

In [13]:
import torch.nn as nn
import torch.optim as optim

In [14]:
class MyNN(nn.Module):

  def __init__(self, input_features):
    super().__init__()

    self.features = nn.Sequential(
        nn.Conv2d(
            in_channels = input_features,
            out_channels = 32,
            kernel_size = 3,
            padding = 'same'
        ),
        nn.ReLU(),
        nn.BatchNorm2d(32),
        nn.MaxPool2d(kernel_size = 2, stride = 2),

        nn.Conv2d(
            in_channels = 32,
            out_channels = 64,
            kernel_size = 3,
            padding = 'same'
        ),
        nn.ReLU(),
        nn.BatchNorm2d(64),
        nn.MaxPool2d(kernel_size = 2, stride = 2)
    )

    self.classifier = nn.Sequential(
        nn.Flatten(),
        nn.Linear(in_features = 64 * 7 * 7, out_features = 128),
        nn.ReLU(),
        nn.Dropout(p = 0.4),

        nn.Linear(in_features = 128, out_features = 64),
        nn.ReLU(),
        nn.Dropout(p = 0.4),

        nn.Linear(in_features = 64, out_features = 10)
    )

  def forward(self, x):
    x = self.features(x)
    x = self.classifier(x)

    return x

In [15]:
learning_rate = 0.01
epochs = 100

model = MyNN(input_features = 1)
model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr = learning_rate, weight_decay = 1e-4)

In [16]:
for epoch in range(epochs):
  total_epocsh_loss = 0

  for batch_features, batch_labels in train_loader:
    batch_features, batch_labels = batch_features.to(device), batch_labels.to(device)

    outputs = model(batch_features)

    loss = criterion(outputs, batch_labels)

    optimizer.zero_grad()
    loss.backward()

    optimizer.step()

    total_epocsh_loss += loss.item()

  avg_loss = total_epocsh_loss / len(train_loader)

  print(f"epoch: {epoch+1}, loss: {avg_loss}")

epoch: 1, loss: 0.6448179016311963
epoch: 2, loss: 0.3850010028580825
epoch: 3, loss: 0.3257300717929999
epoch: 4, loss: 0.2895661453406016
epoch: 5, loss: 0.2634353046864271
epoch: 6, loss: 0.24371515599638224
epoch: 7, loss: 0.23019618014122048
epoch: 8, loss: 0.2124596534334123
epoch: 9, loss: 0.19993377064789336
epoch: 10, loss: 0.1875840346046413
epoch: 11, loss: 0.1751425965136538
epoch: 12, loss: 0.16589952625893056
epoch: 13, loss: 0.15517213882769768
epoch: 14, loss: 0.14742719308768087
epoch: 15, loss: 0.14151767203304916
epoch: 16, loss: 0.13481267099517086
epoch: 17, loss: 0.12461822751335179
epoch: 18, loss: 0.12226823945359017
epoch: 19, loss: 0.11108025208658849
epoch: 20, loss: 0.10826815406442619
epoch: 21, loss: 0.10296780346430993
epoch: 22, loss: 0.09587389697274193
epoch: 23, loss: 0.09137028745639449
epoch: 24, loss: 0.08434105165419169
epoch: 25, loss: 0.08356114776773998
epoch: 26, loss: 0.08119640407167995
epoch: 27, loss: 0.07896475848119007
epoch: 28, loss: 0

In [17]:
model.eval()

MyNN(
  (features): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=same)
    (1): ReLU()
    (2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=same)
    (5): ReLU()
    (6): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (7): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (classifier): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Linear(in_features=3136, out_features=128, bias=True)
    (2): ReLU()
    (3): Dropout(p=0.4, inplace=False)
    (4): Linear(in_features=128, out_features=64, bias=True)
    (5): ReLU()
    (6): Dropout(p=0.4, inplace=False)
    (7): Linear(in_features=64, out_features=10, bias=True)
  )
)

In [19]:
# evaluation on test data

total = 0
correct = 0

with torch.no_grad():
  for batch_features, batch_labels in test_loader:
    batch_features, batch_labels = batch_features.to(device), batch_labels.to(device)

    outputs = model(batch_features)

    _, predictions = torch.max(outputs, 1)

    total += batch_labels.shape[0]
    correct += (predictions == batch_labels).sum().item()

  print(f"accuracy: {correct/total}")

accuracy: 0.9253333333333333
