In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms

# Check if GPU is available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("Device:", device)

# CIFAR-10 preprocessing
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

# Load CIFAR-10 dataset
trainset = torchvision.datasets.CIFAR100(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True, num_workers=2)

# Load CIFAR-10 test dataset
testset = torchvision.datasets.CIFAR100(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=False, num_workers=2)

class CNNWithoutAttention(nn.Module):
    def __init__(self):
        super(CNNWithoutAttention, self).__init__()

        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.fc1 = nn.Linear(128 * 8 * 8, 512)
        self.fc2 = nn.Linear(512, 100)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(x, 2, 2)
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, 2, 2)
        x = x.view(-1, 128 * 8 * 8)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Initialize the model and move it to GPU
model = CNNWithoutAttention().to(device)

# Train the model
def train_model(model, criterion, optimizer, num_epochs=5):
    for epoch in range(num_epochs):
        running_loss = 0.0
        for i, data in enumerate(trainloader, 0):
            inputs, labels = data[0].to(device), data[1].to(device)
            optimizer.zero_grad()

            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            if i % 200 == 199:  # Print every 200 mini-batches
                print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 200))
                running_loss = 0.0

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Train the model
train_model(model, criterion, optimizer, num_epochs=10)

# Define a function to test the model
def test_model(model, testloader):
    correct = 0
    total = 0
    with torch.no_grad():
        for data in testloader:
            images, labels = data[0].to(device), data[1].to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print('Accuracy of the network on the 10000 test images: %.2f %%' % (100 * correct / total))

# Test the model
test_model(model, testloader)

Device: cuda:0
Downloading https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz to ./data/cifar-100-python.tar.gz


100%|██████████| 169001437/169001437 [00:03<00:00, 43308172.99it/s]


Extracting ./data/cifar-100-python.tar.gz to ./data
Files already downloaded and verified
[1,   200] loss: 4.014
[1,   400] loss: 3.386
[1,   600] loss: 3.092
[2,   200] loss: 2.670
[2,   400] loss: 2.621
[2,   600] loss: 2.540
[3,   200] loss: 2.206
[3,   400] loss: 2.216
[3,   600] loss: 2.199
[4,   200] loss: 1.871
[4,   400] loss: 1.881
[4,   600] loss: 1.905
[5,   200] loss: 1.516
[5,   400] loss: 1.585
[5,   600] loss: 1.624
[6,   200] loss: 1.220
[6,   400] loss: 1.283
[6,   600] loss: 1.357
[7,   200] loss: 0.942
[7,   400] loss: 1.028
[7,   600] loss: 1.082
[8,   200] loss: 0.692
[8,   400] loss: 0.782
[8,   600] loss: 0.867
[9,   200] loss: 0.504
[9,   400] loss: 0.565
[9,   600] loss: 0.671
[10,   200] loss: 0.368
[10,   400] loss: 0.422
[10,   600] loss: 0.496
Accuracy of the network on the 10000 test images: 37.20 %
