In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.models as models
from torch.utils.data import DataLoader
from torchvision.datasets import CIFAR10
from torchvision import transforms
import time



## AlexNet

In [2]:
transform = transforms.Compose(
    [transforms.Resize(224), transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

batch_size = 16

trainset = CIFAR10(root='./data', train=True, download=True, transform=transform)
trainloader = DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=2)

testset = CIFAR10(root='./data', train=False, download=True, transform=transform)
testloader = DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=2)

classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

Files already downloaded and verified
Files already downloaded and verified


In [11]:
# Load the pre-trained AlexNet model
alexnet = models.alexnet(pretrained=True)

# Modify the classifier for CIFAR-10
alexnet.classifier[6] = nn.Linear(alexnet.classifier[6].in_features, 10)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(alexnet.parameters(), lr=0.001)

# Transfer the model to GPU if available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
alexnet.to(device)
print(device)

cuda:0


In [15]:
epochs = 10  # Suggested number of epochs for fine-tuning
start_time = time.time()

for epoch in range(epochs):  
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data[0].to(device), data[1].to(device)

        optimizer.zero_grad()

        outputs = alexnet(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % 20 == 19:    # Print every 2000 mini-batches
            print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 20:.3f}')
            running_loss = 0.0

print('Finished Training')
total_time = time.time() - start_time
print(f'Total training time: {total_time // 60}m {total_time % 60}s')


[1,    20] loss: 0.237
[1,    40] loss: 0.230
[1,    60] loss: 0.280
[1,    80] loss: 0.321
[1,   100] loss: 0.341
[1,   120] loss: 0.337
[1,   140] loss: 0.326
[1,   160] loss: 0.314
[1,   180] loss: 0.316
[1,   200] loss: 0.319
[1,   220] loss: 0.353
[1,   240] loss: 0.331
[1,   260] loss: 0.276
[1,   280] loss: 0.300
[1,   300] loss: 0.309
[1,   320] loss: 0.305
[1,   340] loss: 0.304
[1,   360] loss: 0.332
[1,   380] loss: 0.293
[2,    20] loss: 0.269
[2,    40] loss: 0.247
[2,    60] loss: 0.269
[2,    80] loss: 0.255
[2,   100] loss: 0.279
[2,   120] loss: 0.261
[2,   140] loss: 0.268
[2,   160] loss: 0.312
[2,   180] loss: 0.303
[2,   200] loss: 0.376
[2,   220] loss: 0.336
[2,   240] loss: 0.281
[2,   260] loss: 0.243
[2,   280] loss: 0.307
[2,   300] loss: 0.287
[2,   320] loss: 0.306
[2,   340] loss: 0.293
[2,   360] loss: 0.292
[2,   380] loss: 0.322
[3,    20] loss: 0.228
[3,    40] loss: 0.237
[3,    60] loss: 0.211
[3,    80] loss: 0.269
[3,   100] loss: 0.254
[3,   120] 

In [16]:
# Evaluate the model on the test set
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data[0].to(device), data[1].to(device)
        outputs = alexnet(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy of the network on the 10000 test images: {100 * correct / total}%')

Accuracy of the network on the 10000 test images: 81.32%


## VGG16

In [3]:
# Load the pre-trained VGG16 model.
vgg16 = models.vgg16(pretrained=True)

# Freeze training for all layers
for param in vgg16.features.parameters():
    param.require_grad = False

# Modify the classifier for CIFAR-10.
# The number of inputs to the first linear layer might vary depending on the VGG variant used.
# VGG16 typically ends with 4096 units in its last fully connected layer.
vgg16.classifier[6] = nn.Linear(4096, 10)

# Define loss function and optimizer.
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(vgg16.classifier.parameters(), lr=0.001)  # Only train the classifier layers.

# Transfer the model to GPU if available.
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
vgg16.to(device)

print(device)



cuda:0


In [4]:
# Training
epochs = 10
start_time = time.time()

for epoch in range(epochs):
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data[0].to(device), data[1].to(device)

        optimizer.zero_grad()

        outputs = vgg16(inputs)  # Use vgg16 here instead of alexnet
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % 20 == 19:  # Print every 20 mini-batches
            print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 20:.3f}')
            running_loss = 0.0

print('Finished Training')
total_time = time.time() - start_time
print(f'Total training time: {total_time // 60:.0f}m {total_time % 60:.0f}s')

[1,    20] loss: 2.353
[1,    40] loss: 1.575
[1,    60] loss: 1.497
[1,    80] loss: 1.525
[1,   100] loss: 1.174
[1,   120] loss: 1.168
[1,   140] loss: 1.205
[1,   160] loss: 1.128
[1,   180] loss: 1.234
[1,   200] loss: 1.220
[1,   220] loss: 1.254
[1,   240] loss: 1.222
[1,   260] loss: 1.186
[1,   280] loss: 1.324
[1,   300] loss: 1.384
[1,   320] loss: 1.379
[1,   340] loss: 1.445
[1,   360] loss: 1.281
[1,   380] loss: 1.180
[1,   400] loss: 1.033
[1,   420] loss: 1.476
[1,   440] loss: 1.206
[1,   460] loss: 1.445
[1,   480] loss: 1.229
[1,   500] loss: 1.317
[1,   520] loss: 1.133
[1,   540] loss: 1.114
[1,   560] loss: 1.217
[1,   580] loss: 1.267
[1,   600] loss: 1.087
[1,   620] loss: 1.173
[1,   640] loss: 1.163
[1,   660] loss: 1.140
[1,   680] loss: 0.995
[1,   700] loss: 1.128
[1,   720] loss: 1.089
[1,   740] loss: 1.344
[1,   760] loss: 1.229
[1,   780] loss: 1.066
[1,   800] loss: 1.256
[1,   820] loss: 1.161
[1,   840] loss: 1.169
[1,   860] loss: 1.027
[1,   880] 

In [5]:
# Evaluate the model on the test set
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data[0].to(device), data[1].to(device)
        outputs = vgg16(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy of the network on the 10000 test images: {100 * correct / total}%')

Accuracy of the network on the 10000 test images: 78.6%


## Inception V3

In [7]:
# Define transformations, Inception v3 expects 299x299 images
transform = transforms.Compose([
    transforms.Resize(299),  # Inception v3 expects images of size 299x299
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
])

batch_size = 16

trainset = CIFAR10(root='./data', train=True, download=True, transform=transform)
trainloader = DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=2)

testset = CIFAR10(root='./data', train=False, download=True, transform=transform)
testloader = DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=2)

classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

Files already downloaded and verified
Files already downloaded and verified


In [6]:
# Load the pre-trained Inception v3 model
inception = models.inception_v3(pretrained=True)

# Freeze training for all layers
for param in inception.parameters():
    param.requires_grad = False

# Handle the auxilary net
num_ftrs = inception.AuxLogits.fc.in_features
inception.AuxLogits.fc = nn.Linear(num_ftrs, 10)

# Handle the primary net
num_ftrs = inception.fc.in_features
inception.fc = nn.Linear(num_ftrs, 10)

Downloading: "https://download.pytorch.org/models/inception_v3_google-0cc3c7bd.pth" to C:\Users\mohdk/.cache\torch\hub\checkpoints\inception_v3_google-0cc3c7bd.pth
100%|██████████| 104M/104M [00:01<00:00, 65.5MB/s] 


In [8]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(inception.fc.parameters(), lr=0.001)  # Train the classifier

# Transfer the model to GPU if available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
inception.to(device)

print(device)


cuda:0


In [9]:
# Training
epochs = 10
start_time = time.time()

for epoch in range(epochs):
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data[0].to(device), data[1].to(device)

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward + backward + optimize
        # Inception v3 has an auxiliary output, which is also used during training
        outputs, aux_outputs = inception(inputs)
        loss1 = criterion(outputs, labels)
        loss2 = criterion(aux_outputs, labels)
        loss = loss1 + 0.4 * loss2  # Combine losses
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % 20 == 19:  # print every 20 mini-batches
            print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 20:.3f}')
            running_loss = 0.0

print('Finished Training')
total_time = time.time() - start_time
print(f'Total training time: {total_time // 60:.0f}m {total_time % 60:.0f}s')

[1,    20] loss: 3.260
[1,    40] loss: 2.970
[1,    60] loss: 2.832
[1,    80] loss: 2.639
[1,   100] loss: 2.616
[1,   120] loss: 2.531
[1,   140] loss: 2.412
[1,   160] loss: 2.423
[1,   180] loss: 2.279
[1,   200] loss: 2.190
[1,   220] loss: 2.333
[1,   240] loss: 2.144
[1,   260] loss: 2.260
[1,   280] loss: 2.229
[1,   300] loss: 2.224
[1,   320] loss: 2.127
[1,   340] loss: 2.138
[1,   360] loss: 2.048
[1,   380] loss: 2.144
[1,   400] loss: 2.074
[1,   420] loss: 2.188
[1,   440] loss: 2.124
[1,   460] loss: 2.079
[1,   480] loss: 2.047
[1,   500] loss: 2.061
[1,   520] loss: 2.104
[1,   540] loss: 2.021
[1,   560] loss: 1.928
[1,   580] loss: 2.116
[1,   600] loss: 1.980
[1,   620] loss: 2.032
[1,   640] loss: 2.092
[1,   660] loss: 1.958
[1,   680] loss: 2.018
[1,   700] loss: 2.003
[1,   720] loss: 1.995
[1,   740] loss: 2.008
[1,   760] loss: 2.057
[1,   780] loss: 2.028
[1,   800] loss: 2.096
[1,   820] loss: 1.993
[1,   840] loss: 2.055
[1,   860] loss: 1.993
[1,   880] 

In [11]:
# Evaluate the model on the test set
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data[0].to(device), data[1].to(device)
        outputs = inception(images)
        _, predicted = torch.max(outputs.logits.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy of the network on the 10000 test images: {100 * correct / total}%')

Accuracy of the network on the 10000 test images: 65.74%


## Squeezenet

In [12]:
transform = transforms.Compose([
    transforms.Resize(224),  # Resize the images to 224x224 for SqueezeNet
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
])

batch_size = 16

trainset = CIFAR10(root='./data', train=True, download=True, transform=transform)
trainloader = DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=2)

testset = CIFAR10(root='./data', train=False, download=True, transform=transform)
testloader = DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=2)


Files already downloaded and verified
Files already downloaded and verified


In [14]:
squeezenet = models.squeezenet1_1(pretrained=True)

# Freeze training for all layers
for param in squeezenet.parameters():
    param.requires_grad = False

# Modify the classifier
squeezenet.classifier[1] = nn.Conv2d(512, 10, kernel_size=(1,1), stride=(1,1))
squeezenet.num_classes = 10

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(squeezenet.classifier.parameters(), lr=0.001)  # Train only the classifier

# Transfer the model to GPU if available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
squeezenet.to(device)


Downloading: "https://download.pytorch.org/models/squeezenet1_1-b8a52dc0.pth" to C:\Users\mohdk/.cache\torch\hub\checkpoints\squeezenet1_1-b8a52dc0.pth
100%|██████████| 4.73M/4.73M [00:00<00:00, 36.0MB/s]


SqueezeNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=True)
    (3): Fire(
      (squeeze): Conv2d(64, 16, kernel_size=(1, 1), stride=(1, 1))
      (squeeze_activation): ReLU(inplace=True)
      (expand1x1): Conv2d(16, 64, kernel_size=(1, 1), stride=(1, 1))
      (expand1x1_activation): ReLU(inplace=True)
      (expand3x3): Conv2d(16, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (expand3x3_activation): ReLU(inplace=True)
    )
    (4): Fire(
      (squeeze): Conv2d(128, 16, kernel_size=(1, 1), stride=(1, 1))
      (squeeze_activation): ReLU(inplace=True)
      (expand1x1): Conv2d(16, 64, kernel_size=(1, 1), stride=(1, 1))
      (expand1x1_activation): ReLU(inplace=True)
      (expand3x3): Conv2d(16, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (expand3x3_activation): ReLU(inplace=True)
    )
    (5): MaxPool2d

In [17]:
epochs = 10
start_time = time.time()

for epoch in range(epochs):
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data[0].to(device), data[1].to(device)

        optimizer.zero_grad()

        outputs = squeezenet(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % 20 == 19:  # print every 20 mini-batches
            print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 20:.3f}')
            running_loss = 0.0

print('Finished Training')
total_time = time.time() - start_time
print(f'Total training time: {total_time // 60:.0f}m {total_time % 60:.0f}s')

# Evaluation code remains the same


[1,    20] loss: 2.410
[1,    40] loss: 1.945
[1,    60] loss: 1.630
[1,    80] loss: 1.406
[1,   100] loss: 1.266
[1,   120] loss: 1.052
[1,   140] loss: 1.200
[1,   160] loss: 0.963
[1,   180] loss: 1.146
[1,   200] loss: 1.091
[1,   220] loss: 0.964
[1,   240] loss: 0.990
[1,   260] loss: 0.971
[1,   280] loss: 0.935
[1,   300] loss: 0.913
[1,   320] loss: 0.911
[1,   340] loss: 1.032
[1,   360] loss: 0.783
[1,   380] loss: 0.894
[1,   400] loss: 0.831
[1,   420] loss: 0.962
[1,   440] loss: 0.818
[1,   460] loss: 0.891
[1,   480] loss: 0.866
[1,   500] loss: 0.784
[1,   520] loss: 0.865
[1,   540] loss: 0.779
[1,   560] loss: 0.850
[1,   580] loss: 0.847
[1,   600] loss: 0.830
[1,   620] loss: 0.732
[1,   640] loss: 0.806
[1,   660] loss: 0.741
[1,   680] loss: 0.706
[1,   700] loss: 0.664
[1,   720] loss: 0.670
[1,   740] loss: 0.750
[1,   760] loss: 0.735
[1,   780] loss: 0.774
[1,   800] loss: 0.711
[1,   820] loss: 0.753
[1,   840] loss: 0.679
[1,   860] loss: 0.774
[1,   880] 

In [20]:
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data[0].to(device), data[1].to(device)
        outputs = squeezenet(images)
        
        # Instead of using outputs.logits, use outputs directly
        _, predicted = torch.max(outputs, 1)
        
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy of the network on the 10000 test images: {100 * correct / total}%')


Accuracy of the network on the 10000 test images: 79.54%


## ResNet

In [21]:
# Adjustments for ResNet: Update normalization in the transform
transform = transforms.Compose([
    transforms.Resize(224),  # ResNet expects 224x224 input
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),  # Adjust normalization
])

batch_size = 16

# Load CIFAR-10 dataset
trainset = CIFAR10(root='./data', train=True, download=True, transform=transform)
trainloader = DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=2)

testset = CIFAR10(root='./data', train=False, download=True, transform=transform)
testloader = DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=2)

Files already downloaded and verified
Files already downloaded and verified


In [22]:
# Load a pre-trained ResNet model
resnet = models.resnet18(pretrained=True)

# Freeze training for all "feature extraction" layers
for param in resnet.parameters():
    param.requires_grad = False

# Modify the final layer for 10 CIFAR-10 classes
num_ftrs = resnet.fc.in_features
resnet.fc = nn.Linear(num_ftrs, 10)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(resnet.fc.parameters(), lr=0.001)  # Optimize only the final layer

# Transfer the model to GPU if available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
resnet.to(device)




ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [23]:
# Training loop (remains similar)
epochs = 10
start_time = time.time()

for epoch in range(epochs):
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data[0].to(device), data[1].to(device)

        optimizer.zero_grad()

        outputs = resnet(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % 20 == 19:  # print every 20 mini-batches
            print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 20:.3f}')
            running_loss = 0.0

print('Finished Training')
total_time = time.time() - start_time
print(f'Total training time: {total_time // 60:.0f}m {total_time % 60:.0f}s')

[1,    20] loss: 2.248
[1,    40] loss: 1.924
[1,    60] loss: 1.729
[1,    80] loss: 1.612
[1,   100] loss: 1.434
[1,   120] loss: 1.357
[1,   140] loss: 1.311
[1,   160] loss: 1.316
[1,   180] loss: 1.200
[1,   200] loss: 1.133
[1,   220] loss: 1.097
[1,   240] loss: 1.074
[1,   260] loss: 1.108
[1,   280] loss: 1.035
[1,   300] loss: 1.042
[1,   320] loss: 0.961
[1,   340] loss: 0.903
[1,   360] loss: 0.939
[1,   380] loss: 0.997
[1,   400] loss: 0.993
[1,   420] loss: 0.931
[1,   440] loss: 0.870
[1,   460] loss: 0.900
[1,   480] loss: 0.871
[1,   500] loss: 0.843
[1,   520] loss: 0.890
[1,   540] loss: 0.866
[1,   560] loss: 0.761
[1,   580] loss: 0.991
[1,   600] loss: 0.940
[1,   620] loss: 0.851
[1,   640] loss: 0.838
[1,   660] loss: 0.817
[1,   680] loss: 0.797
[1,   700] loss: 0.816
[1,   720] loss: 0.885
[1,   740] loss: 0.846
[1,   760] loss: 0.810
[1,   780] loss: 0.902
[1,   800] loss: 0.878
[1,   820] loss: 0.760
[1,   840] loss: 0.762
[1,   860] loss: 0.815
[1,   880] 

In [25]:
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data[0].to(device), data[1].to(device)
        outputs = resnet(images)
        
        # Instead of using outputs.logits, use outputs directly
        _, predicted = torch.max(outputs, 1)
        
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy of the network on the 10000 test images: {100 * correct / total}%')

Accuracy of the network on the 10000 test images: 75.14%


## DenseNet

In [26]:
# Transformations with normalization suited for DenseNet
transform = transforms.Compose([
    transforms.Resize(224),  # Resize images to 224x224
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),  # Normalization for DenseNet
])

batch_size = 16

# Load CIFAR-10 dataset
trainset = CIFAR10(root='./data', train=True, download=True, transform=transform)
trainloader = DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=2)

testset = CIFAR10(root='./data', train=False, download=True, transform=transform)
testloader = DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=2)


Files already downloaded and verified
Files already downloaded and verified


In [27]:
# Load a pre-trained DenseNet model
densenet = models.densenet121(pretrained=True)

# Freeze all model parameters
for param in densenet.parameters():
    param.requires_grad = False

# Replace the classifier of DenseNet
num_ftrs = densenet.classifier.in_features
densenet.classifier = nn.Linear(num_ftrs, 10)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(densenet.classifier.parameters(), lr=0.001)  # Optimize only the classifier

# Transfer the model to GPU if available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
densenet.to(device)

Downloading: "https://download.pytorch.org/models/densenet121-a639ec97.pth" to C:\Users\mohdk/.cache\torch\hub\checkpoints\densenet121-a639ec97.pth
100%|██████████| 30.8M/30.8M [00:00<00:00, 45.1MB/s]


DenseNet(
  (features): Sequential(
    (conv0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (norm0): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu0): ReLU(inplace=True)
    (pool0): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (denseblock1): _DenseBlock(
      (denselayer1): _DenseLayer(
        (norm1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv1): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu2): ReLU(inplace=True)
        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      )
      (denselayer2): _DenseLayer(
        (norm1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu

In [28]:
# Training loop
epochs = 10
start_time = time.time()

for epoch in range(epochs):
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data[0].to(device), data[1].to(device)

        optimizer.zero_grad()

        outputs = densenet(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % 20 == 19:  # print every 20 mini-batches
            print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 20:.3f}')
            running_loss = 0.0

print('Finished Training')
total_time = time.time() - start_time
print(f'Total training time: {total_time // 60:.0f}m {total_time % 60:.0f}s')

[1,    20] loss: 2.157
[1,    40] loss: 1.794
[1,    60] loss: 1.632
[1,    80] loss: 1.448
[1,   100] loss: 1.329
[1,   120] loss: 1.272
[1,   140] loss: 1.231
[1,   160] loss: 1.099
[1,   180] loss: 1.118
[1,   200] loss: 1.069
[1,   220] loss: 1.018
[1,   240] loss: 1.051
[1,   260] loss: 0.983
[1,   280] loss: 0.905
[1,   300] loss: 0.956
[1,   320] loss: 0.941
[1,   340] loss: 0.862
[1,   360] loss: 0.913
[1,   380] loss: 0.895
[1,   400] loss: 0.840
[1,   420] loss: 0.830
[1,   440] loss: 0.814
[1,   460] loss: 0.838
[1,   480] loss: 0.844
[1,   500] loss: 0.872
[1,   520] loss: 0.793
[1,   540] loss: 0.737
[1,   560] loss: 0.792
[1,   580] loss: 0.846
[1,   600] loss: 0.795
[1,   620] loss: 0.768
[1,   640] loss: 0.911
[1,   660] loss: 0.735
[1,   680] loss: 0.843
[1,   700] loss: 0.751
[1,   720] loss: 0.766
[1,   740] loss: 0.694
[1,   760] loss: 0.757
[1,   780] loss: 0.692
[1,   800] loss: 0.719
[1,   820] loss: 0.739
[1,   840] loss: 0.840
[1,   860] loss: 0.821
[1,   880] 

In [29]:
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data[0].to(device), data[1].to(device)
        outputs = densenet(images)
        
        # Instead of using outputs.logits, use outputs directly
        _, predicted = torch.max(outputs, 1)
        
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy of the network on the 10000 test images: {100 * correct / total}%')

Accuracy of the network on the 10000 test images: 78.77%
