In [1]:
import torch
import torchvision
train = torchvision.datasets.CIFAR10(root='./data',train=True,download=False,transform=torchvision.transforms.ToTensor())
test = torchvision.datasets.CIFAR10(root='./data',train=False,download=False,transform=torchvision.transforms.ToTensor())

In [2]:
trainloader = torch.utils.data.DataLoader(train,batch_size=512,shuffle=True)
testloader = torch.utils.data.DataLoader(test,batch_size=512,shuffle=False)

In [36]:
import torch.nn as nn
import torch.nn.functional as F
import torch

class ImprovedCNN(nn.Module):
    def __init__(self, num_classes=10):
        super(ImprovedCNN, self).__init__()

        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm2d(32)

        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=2, padding=1)  
        self.bn2 = nn.BatchNorm2d(64)

        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, stride=3, padding=1)
        self.bn3 = nn.BatchNorm2d(128)

        self.conv4 = nn.Conv2d(128, 256, kernel_size=5, stride=3, padding=1)  
        self.bn4 = nn.BatchNorm2d(256)

        self.fc1 = nn.Linear(256*4, 256)
        self.bn_fc1 = nn.BatchNorm1d(256)  
        self.fc2 = nn.Linear(256, num_classes)

        self.dropout = nn.Dropout(0.3)  

    def forward(self, x):
        x = F.silu(self.bn1(self.conv1(x)))
        x = F.silu(self.bn2(self.conv2(x)))

        x = F.silu(self.bn3(self.conv3(x)))
        x = F.silu(self.bn4(self.conv4(x)))

        x = x.flatten(1)
        x = self.dropout(F.silu(self.bn_fc1(self.fc1(x))))
        return self.fc2(x)
        return x

model = ImprovedCNN(num_classes=10)

In [37]:
with torch.no_grad():
  a = next(iter(trainloader))
  print(a[0].shape)
  a = model(a[0])
  print(a.shape)

torch.Size([512, 3, 32, 32])
torch.Size([512, 10])


In [38]:
device = "cuda"
loss = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
model = model.to(device)
from tqdm import tqdm
from tqdm import tqdm

from tqdm import tqdm

for i in range(100):
    correct, total, running_loss = 0, 0, 0
    train_bar = tqdm(trainloader, desc=f'Train Epoch {i}')

    for image, label in train_bar:
        image, label = image.to(device), label.to(device)

        out = model(image)
        loss_value = loss(out, label)

        optimizer.zero_grad()
        loss_value.backward()
        optimizer.step()

        predictions = out.argmax(dim=1)
        correct += (predictions == label).sum().item()
        total += label.size(0)
        running_loss += loss_value.item()

        train_bar.set_postfix(loss=running_loss / total, acc=100 * correct / total)

    correct, total, test_loss = 0, 0, 0
    test_bar = tqdm(testloader, desc=f'Test Epoch {i}')

    with torch.no_grad():
        for test, test_label in test_bar:
            test, test_label = test.to(device), test_label.to(device)

            test_out = model(test)
            loss_value = loss(test_out, test_label)
            test_loss += loss_value.item()

            predictions = test_out.argmax(dim=1)
            correct += (predictions == test_label).sum().item()
            total += test_label.size(0)

            test_bar.set_postfix(loss=test_loss / total, acc=100 * correct / total)

Train Epoch 0: 100%|██████████| 98/98 [00:04<00:00, 24.15it/s, acc=51.8, loss=0.00263]
Test Epoch 0: 100%|██████████| 20/20 [00:00<00:00, 39.47it/s, acc=60.9, loss=0.00219]
Train Epoch 1: 100%|██████████| 98/98 [00:03<00:00, 25.08it/s, acc=66.2, loss=0.00187]
Test Epoch 1: 100%|██████████| 20/20 [00:00<00:00, 40.41it/s, acc=66.6, loss=0.00189]
Train Epoch 2: 100%|██████████| 98/98 [00:03<00:00, 24.95it/s, acc=75.1, loss=0.00141]
Test Epoch 2: 100%|██████████| 20/20 [00:00<00:00, 39.81it/s, acc=68.5, loss=0.00179]
Train Epoch 3: 100%|██████████| 98/98 [00:03<00:00, 24.99it/s, acc=80.7, loss=0.00109]
Test Epoch 3: 100%|██████████| 20/20 [00:00<00:00, 40.10it/s, acc=70, loss=0.0018]   
Train Epoch 4:  52%|█████▏    | 51/98 [00:02<00:01, 24.30it/s, acc=87, loss=0.000748]  


KeyboardInterrupt: 