In [1]:
import sys
import torch
from torch.utils.data import DataLoader
from torch import nn
import matplotlib.pyplot as plt
import torch.nn.functional as F
import torchvision.datasets as datasets
from torchvision.transforms import ToTensor

![alt text](./cnn.png "Title")

In [2]:

mnist_train = datasets.FashionMNIST(root='./data', download=True, train=True, transform=ToTensor())
mnist_test = datasets.FashionMNIST(root='./data', download=True, train=False, transform=ToTensor())

In [3]:

train_dataloader = DataLoader(mnist_train, batch_size=32, shuffle=True)
test_dataloader = DataLoader(mnist_test, batch_size=32, shuffle=True)


In [4]:
model = nn.Sequential(
    nn.Conv2d(1, 3, kernel_size=(3, 3), padding=1, padding_mode="reflect"),
    nn.ReLU(),
    nn.Flatten(),
    nn.Linear(2352, 100),
    nn.ReLU(),
    nn.Linear(100, 10)
)

- nn.Conv2d(1, 3, kernel_size=(3, 3), padding=1)

Input: 1 channel (grayscale).

Output: 3 channels (3 filters).

Kernel: 3×3 filter, slides over the image to extract features.

Padding keeps size = 28×28 after convolution.

Now the image is shaped (3, 28, 28).

- nn.ReLU()

Adds non-linearity (so network can learn complex patterns).

- nn.Flatten()

Turns the (3, 28, 28) feature map into a flat vector.

3×28×28=2352
3×28×28=2352. That’s why the next layer has input size 2352.

- nn.Linear(2352, 100)

Fully connected layer: maps 2352 features → 100 features.

- nn.ReLU()

Another non-linearity.

- nn.Linear(100, 10)

Final classifier layer: 10 outputs (because Fashion-MNIST has 10 classes).

In [5]:

image = mnist_train[0][0].reshape(1, 1, 28, 28)
output = model(image)
print(output.shape)

torch.Size([1, 10])


In [6]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [7]:
for i in range(0 ,10):
    model.train()
    
    loss_sum = 0
    for X, y in train_dataloader:
        y = F.one_hot(y, num_classes=10).type(torch.float32)

        optimizer.zero_grad()
        outputs = model(X)
        loss = loss_fn(outputs, y)
        loss.backward()
        optimizer.step()

        loss_sum+=loss.item()
    print(loss_sum)

918.5741228684783
625.3834326155484
554.5283737443388
500.3174466062337
464.851099755615
428.3626783154905
395.7108837836422
368.4077425841242
340.5217226119712
314.1243452131748


In [8]:

model.eval()
with torch.no_grad():
    accurate = 0
    total = 0
    for X, y in test_dataloader:
        outputs = nn.functional.softmax(model(X), dim=1) 
        correct_pred = (y == outputs.max(dim=1).indices)
        total+=correct_pred.size(0)
        accurate+=correct_pred.type(torch.int).sum().item()
    print("Accuracy on validation data:", accurate / total)

Accuracy on validation data: 0.8965


## MaxPooling 

In [9]:
model = nn.Sequential(
    nn.Conv2d(1, 3, kernel_size=(3, 3), padding=1, padding_mode="reflect"),
    nn.MaxPool2d(kernel_size=2),
    nn.ReLU(),
    nn.Flatten(),
    nn.Linear(588, 100),
    nn.ReLU(),
    nn.Linear(100, 10)
)

In [10]:

loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)


In [11]:
for i in range(0, 10):
    model.train()
    
    loss_sum = 0
    for X, y in train_dataloader:
        y = F.one_hot(y, num_classes=10).type(torch.float32)

        optimizer.zero_grad()
        outputs = model(X)
        loss = loss_fn(outputs, y)
        loss.backward()
        optimizer.step()

        loss_sum+=loss.item()
    print(loss_sum)

1088.2179971262813
773.8737029209733
666.1563119292259
604.8004098087549


KeyboardInterrupt: 

In [None]:
model.eval()
with torch.no_grad():
    accurate = 0
    total = 0
    for X, y in test_dataloader:
        outputs = nn.functional.softmax(model(X), dim=1) 
        correct_pred = (y == outputs.max(dim=1).indices)
        total+=correct_pred.size(0)
        accurate+=correct_pred.type(torch.int).sum().item()
    print("Accuracy on validation data:", accurate / total)

## GPU 

In [15]:
print(torch.cuda.is_available())   # True
print(torch.cuda.get_device_name(0))  # Name of your GPU

True
NVIDIA GeForce MX450


### Device setup

In [16]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Training on: {device}")
if torch.cuda.is_available():
    print(torch.cuda.get_device_name(0))

Training on: cuda
NVIDIA GeForce MX450


In [17]:
model = nn.Sequential(
    nn.Conv2d(1, 3, kernel_size=(3, 3), padding=1, padding_mode="reflect"),
    nn.MaxPool2d(kernel_size=2),
    nn.ReLU(),
    nn.Flatten(),
    nn.Linear(588, 100),
    nn.ReLU(),
    nn.Linear(100, 10)
).to(device)


In [18]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)


In [19]:
for epoch in range(1, 11):
    model.train()
    loss_sum = 0

    for X, y in train_dataloader:
        X, y = X.to(device), y.to(device)

        optimizer.zero_grad()
        outputs = model(X)
        loss = loss_fn(outputs, y)
        loss.backward()
        optimizer.step()

        loss_sum += loss.item()

    print(f"[MaxPool] Epoch {epoch} - Loss: {loss_sum:.4f}")


[MaxPool] Epoch 1 - Loss: 1053.0380
[MaxPool] Epoch 2 - Loss: 733.0779
[MaxPool] Epoch 3 - Loss: 650.6327
[MaxPool] Epoch 4 - Loss: 599.4823
[MaxPool] Epoch 5 - Loss: 558.6002
[MaxPool] Epoch 6 - Loss: 522.0804
[MaxPool] Epoch 7 - Loss: 496.1502
[MaxPool] Epoch 8 - Loss: 471.9763
[MaxPool] Epoch 9 - Loss: 451.2240
[MaxPool] Epoch 10 - Loss: 431.9807


In [20]:
model.eval()
with torch.no_grad():
    accurate = 0
    total = 0
    for X, y in test_dataloader:
        X, y = X.to(device), y.to(device)
        outputs = F.softmax(model(X), dim=1)
        correct_pred = (y == outputs.argmax(dim=1))
        total += y.size(0)
        accurate += correct_pred.sum().item()
    print("[MaxPool] Accuracy on validation data:", accurate / total)

[MaxPool] Accuracy on validation data: 0.8897


## more complex model

In [21]:
model = nn.Sequential(
    nn.Sequential(
        nn.Conv2d(1, 3, kernel_size=(3, 3), padding=1, padding_mode="reflect"),
        nn.MaxPool2d(kernel_size=2),
        nn.ReLU()
    ),
    nn.Sequential(
        nn.Conv2d(3, 6, kernel_size=(3, 3), padding=1, padding_mode="reflect"),
        nn.MaxPool2d(kernel_size=2),
        nn.ReLU(),
    ),
    nn.Flatten(),
    nn.Sequential(
        nn.Linear(294, 100),
        nn.ReLU(),
        nn.Linear(100, 10)
    )
).to(device)
print(model)

Sequential(
  (0): Sequential(
    (0): Conv2d(1, 3, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), padding_mode=reflect)
    (1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (2): ReLU()
  )
  (1): Sequential(
    (0): Conv2d(3, 6, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), padding_mode=reflect)
    (1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (2): ReLU()
  )
  (2): Flatten(start_dim=1, end_dim=-1)
  (3): Sequential(
    (0): Linear(in_features=294, out_features=100, bias=True)
    (1): ReLU()
    (2): Linear(in_features=100, out_features=10, bias=True)
  )
)


In [22]:
loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)


In [23]:
for i in range(0, 10):
    model.train()
    
    loss_sum = 0
    for X, y in train_dataloader:
        y = F.one_hot(y, num_classes=10).type(torch.float32).to(device)
        X = X.to(device)

        optimizer.zero_grad()
        outputs = model(X)
        loss = loss_fn(outputs, y)
        loss.backward()
        optimizer.step()

        loss_sum+=loss.item()
    print(loss_sum)

1113.313080072403
756.3749868422747
662.9428348839283
609.2082643024623
574.2079039029777
548.4008865840733
524.3564683329314
502.0268983859569
486.3244345188141
467.8021500688046


In [24]:
model.eval()
with torch.no_grad():
    accurate = 0
    total = 0
    for X, y in test_dataloader:
        X = X.to(device)
        y = y.to(device)
        outputs = nn.functional.softmax(model(X), dim=1) 
        correct_pred = (y == outputs.max(dim=1).indices)
        total+=correct_pred.size(0)
        accurate+=correct_pred.type(torch.int).sum().item()
    print("Accuracy on validation data:", accurate / total)

Accuracy on validation data: 0.8881


## Larger CNN

In [25]:
model = nn.Sequential(
    nn.Sequential(
        nn.Conv2d(1, 32, kernel_size=(3, 3), padding=1, padding_mode="reflect"),
        nn.MaxPool2d(kernel_size=2),
        nn.ReLU()
    ),
    nn.Sequential(
        nn.Conv2d(32, 64, kernel_size=(3, 3), padding=1, padding_mode="reflect"),
        nn.MaxPool2d(kernel_size=2),
        nn.ReLU(),
    ),
    nn.Flatten(),
    nn.Sequential(
        nn.Linear(64 * 7 * 7, 1000),
        nn.ReLU(),
        nn.Linear(1000, 100),
        nn.ReLU(),
        nn.Linear(100, 10)
    )
).to(device)

In [26]:
loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)


In [27]:
for i in range(0, 10):
    model.train()
    
    loss_sum = 0
    for X, y in train_dataloader:
        y = F.one_hot(y, num_classes=10).type(torch.float32).to(device)
        X = X.to(device)

        optimizer.zero_grad()
        outputs = model(X)
        loss = loss_fn(outputs, y)
        loss.backward()
        optimizer.step()

        loss_sum+=loss.item()
    print(loss_sum)

800.1645726002753
506.5415538381785
413.3224321035668
347.1238796496764
287.43498641671613
236.00117878778838
187.5386844757013
154.37384290696355
124.61289830028545
104.9892877957027


In [28]:
model.eval()
with torch.no_grad():
    accurate = 0
    total = 0
    for X, y in test_dataloader:
        X = X.to(device)
        y = y.to(device)
        outputs = nn.functional.softmax(model(X), dim=1) 
        correct_pred = (y == outputs.max(dim=1).indices)
        total+=correct_pred.size(0)
        accurate+=correct_pred.type(torch.int).sum().item()
    print("Accuracy on validation data:", accurate / total)

Accuracy on validation data: 0.9201


## applying dropout

In [29]:
model = nn.Sequential(
    nn.Sequential(
        nn.Conv2d(1, 32, kernel_size=(3, 3), padding=1, padding_mode="reflect"),
        nn.MaxPool2d(kernel_size=2),
        nn.ReLU(),
        nn.Dropout(0.1)
    ),
    nn.Sequential(
        nn.Conv2d(32, 64, kernel_size=(3, 3), padding=1, padding_mode="reflect"),
        nn.MaxPool2d(kernel_size=2),
        nn.ReLU(),
        nn.Dropout(0.1)
    ),
    nn.Flatten(),
    nn.Sequential(
        nn.Linear(64 * 7 * 7, 1000),
        nn.ReLU(),
        nn.Dropout(0.3),
        nn.Linear(1000, 100),
        nn.ReLU(),
        nn.Dropout(0.5),
        nn.Linear(100, 10)
    )
).to(device)
print(model)

Sequential(
  (0): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), padding_mode=reflect)
    (1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (2): ReLU()
    (3): Dropout(p=0.1, inplace=False)
  )
  (1): Sequential(
    (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), padding_mode=reflect)
    (1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (2): ReLU()
    (3): Dropout(p=0.1, inplace=False)
  )
  (2): Flatten(start_dim=1, end_dim=-1)
  (3): Sequential(
    (0): Linear(in_features=3136, out_features=1000, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.3, inplace=False)
    (3): Linear(in_features=1000, out_features=100, bias=True)
    (4): ReLU()
    (5): Dropout(p=0.5, inplace=False)
    (6): Linear(in_features=100, out_features=10, bias=True)
  )
)


In [30]:

loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)


In [31]:
for i in range(0, 10):
    model.train()
    
    loss_sum = 0
    for X, y in train_dataloader:
        y = F.one_hot(y, num_classes=10).type(torch.float32).to(device)
        X = X.to(device)

        optimizer.zero_grad()
        outputs = model(X)
        loss = loss_fn(outputs, y)
        loss.backward()
        optimizer.step()

        loss_sum+=loss.item()
    print(loss_sum)

1001.566509835422
613.5317837782204
519.0851547382772
460.40099885314703
419.17323758266866
383.80130382440984
344.9323789924383
329.2082580961287
302.02937741577625
289.1371460594237


In [32]:
model.eval()
with torch.no_grad():
    accurate = 0
    total = 0
    for X, y in test_dataloader:
        X = X.to(device)
        y = y.to(device)
        outputs = nn.functional.softmax(model(X), dim=1) 
        correct_pred = (y == outputs.max(dim=1).indices)
        total+=correct_pred.size(0)
        accurate+=correct_pred.type(torch.int).sum().item()
    print("Accuracy on validation data:", accurate / total)

Accuracy on validation data: 0.9252
