In [13]:
import torch
from torchvision import transforms
import torch.nn as nn
from sklearn.model_selection import StratifiedShuffleSplit
from torchvision.datasets import CIFAR10
import torchvision.models as models
import torch.optim as optim

In [14]:
# define device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [15]:
# define transforms for images
transform = transforms.Compose(
              [transforms.ToTensor(),
              transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

In [16]:
cifar10_train = CIFAR10(root='./data', train=True, download=True, transform=transform)
cifar10_test = CIFAR10(root='./data', train=False, download=True, transform=transform)

train_loader = torch.utils.data.DataLoader(cifar10_train, batch_size=32, shuffle=True)
test_loader = torch.utils.data.DataLoader(cifar10_test, batch_size=32, shuffle=True)

Files already downloaded and verified
Files already downloaded and verified


In [17]:
# Load the EfficientNetB0 model
effnet = models.efficientnet_b0(pretrained=True)


# Remove the last layer (classifier)
effnet.classifier = torch.nn.Identity()


In [18]:
# Add global average pooling, dropout, and dense layers
class Cifar10ClassfierModel(nn.Module):
    def __init__(self, effnet, num_classes):
        super(Cifar10ClassfierModel, self).__init__()
        self.effnet = effnet
        self.relu = nn.ReLU()
        self.batch_norm = nn.BatchNorm2d(1280)
        self.dropout = nn.Dropout(p=0.5)
        self.dense1 = nn.Linear(1280, 640)
        self.dense2 = nn.Linear(640, 320)
        self.dense3 = nn.Linear(320, 160)
        self.dense4 = nn.Linear(160, 10)
        self.dense = nn.Linear(1280, num_classes)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.effnet(x)
        x = self.dense1(x)
        x = self.relu(x)
        x = self.dense2(x)
        x = self.relu(x)
        x = self.dense3(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.dense4(x)
        # x = self.sigmoid(x)
        return x

In [19]:
# Create an instance of the classifier model

num_classes = 10  # Replace with the desired number of output classes
model = Cifar10ClassfierModel(effnet, num_classes)


In [20]:
# Set the model to evaluation mode
model.train()

# Print the modified model architecture
print(model)

model.to(device)

Cifar10ClassfierModel(
  (effnet): EfficientNet(
    (features): Sequential(
      (0): Conv2dNormActivation(
        (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): SiLU(inplace=True)
      )
      (1): Sequential(
        (0): MBConv(
          (block): Sequential(
            (0): Conv2dNormActivation(
              (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
              (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (2): SiLU(inplace=True)
            )
            (1): SqueezeExcitation(
              (avgpool): AdaptiveAvgPool2d(output_size=1)
              (fc1): Conv2d(32, 8, kernel_size=(1, 1), stride=(1, 1))
              (fc2): Conv2d(8, 32, kernel_size=(1, 1), stride=(1, 1))
              (activation): SiLU(inplace=True)
             

Cifar10ClassfierModel(
  (effnet): EfficientNet(
    (features): Sequential(
      (0): Conv2dNormActivation(
        (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): SiLU(inplace=True)
      )
      (1): Sequential(
        (0): MBConv(
          (block): Sequential(
            (0): Conv2dNormActivation(
              (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
              (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (2): SiLU(inplace=True)
            )
            (1): SqueezeExcitation(
              (avgpool): AdaptiveAvgPool2d(output_size=1)
              (fc1): Conv2d(32, 8, kernel_size=(1, 1), stride=(1, 1))
              (fc2): Conv2d(8, 32, kernel_size=(1, 1), stride=(1, 1))
              (activation): SiLU(inplace=True)
             

In [21]:
from torchsummary import summary

summary(model, (3, 32, 32))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 32, 16, 16]             864
       BatchNorm2d-2           [-1, 32, 16, 16]              64
              SiLU-3           [-1, 32, 16, 16]               0
            Conv2d-4           [-1, 32, 16, 16]             288
       BatchNorm2d-5           [-1, 32, 16, 16]              64
              SiLU-6           [-1, 32, 16, 16]               0
 AdaptiveAvgPool2d-7             [-1, 32, 1, 1]               0
            Conv2d-8              [-1, 8, 1, 1]             264
              SiLU-9              [-1, 8, 1, 1]               0
           Conv2d-10             [-1, 32, 1, 1]             288
          Sigmoid-11             [-1, 32, 1, 1]               0
SqueezeExcitation-12           [-1, 32, 16, 16]               0
           Conv2d-13           [-1, 16, 16, 16]             512
      BatchNorm2d-14           [-1, 16,

In [22]:
# Define the optimizer and the initial learning rate
optimizer = optimizer = optim.Adam(model.parameters(), lr=0.0001)

# Define the learning rate scheduler
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=70, gamma=0.1)

# Define the loss function
criterion = nn.CrossEntropyLoss()

In [23]:
# Train the model
num_epochs = 150
for epoch in range(num_epochs):
    # Adjust the learning rate
    model.train()

    scheduler.step()
    total = 0
    correct = 0
    # Train the model for one epoch
    for images, labels in train_loader:
        optimizer.zero_grad()
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
    accuracy = correct / total
    # Print the loss after each epoch
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item()},Accuracy:{100 * accuracy:.2f}%")

    # Change learning rate after 25 epochs
    if epoch == 70:
        for param_group in optimizer.param_groups:
            param_group['lr'] = 0.00001



Epoch [1/150], Loss: 0.9812472462654114,Accuracy:48.43%
Epoch [2/150], Loss: 1.2390272617340088,Accuracy:66.95%
Epoch [3/150], Loss: 1.191545009613037,Accuracy:72.49%
Epoch [4/150], Loss: 1.0468144416809082,Accuracy:76.46%
Epoch [5/150], Loss: 0.8646208643913269,Accuracy:79.33%
Epoch [6/150], Loss: 0.4564710557460785,Accuracy:81.24%
Epoch [7/150], Loss: 0.46780911087989807,Accuracy:82.97%
Epoch [8/150], Loss: 0.3951375186443329,Accuracy:84.65%
Epoch [9/150], Loss: 0.8171417713165283,Accuracy:86.08%
Epoch [10/150], Loss: 0.7907727956771851,Accuracy:87.29%
Epoch [11/150], Loss: 0.6710140705108643,Accuracy:88.51%
Epoch [12/150], Loss: 0.14784230291843414,Accuracy:89.42%
Epoch [13/150], Loss: 0.342241495847702,Accuracy:90.21%
Epoch [14/150], Loss: 0.6821338534355164,Accuracy:91.17%
Epoch [15/150], Loss: 0.2643417418003082,Accuracy:91.83%
Epoch [16/150], Loss: 0.09649477899074554,Accuracy:92.32%
Epoch [17/150], Loss: 0.19204480946063995,Accuracy:92.98%
Epoch [18/150], Loss: 0.23336072266101

KeyboardInterrupt: ignored

In [24]:
model.eval()  # Set the model to evaluation mode

total = 0
correct = 0

with torch.no_grad():
    for images, labels in test_loader:
        # Forward pass
        images = images.to(device)
        labels = labels.to(device)

        outputs = model(images)

        # Get the predicted labels
        _, predicted = torch.max(outputs.data, 1)

        # Update total and correct predictions
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

# Calculate the accuracy
accuracy = correct / total

# Print the accuracy on the test set
print(f"Accuracy on the test set: {100 * accuracy:.2f}%")


Accuracy on the test set: 85.14%


In [25]:
from sklearn.metrics import f1_score, confusion_matrix
# Lists to store predicted labels and ground truth labels
pred_labels = []
true_labels = []

# Iterate over the test dataset and make predictions
with torch.no_grad():
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        predicted = torch.argmax(outputs, dim=1)
        pred_labels.extend(predicted.tolist())
        true_labels.extend(labels.tolist())

# Compute F1 score
f1 = f1_score(true_labels, pred_labels, average='macro')
print(f"F1 Score: {f1}")

# Compute confusion matrix
cm = confusion_matrix(true_labels, pred_labels)
print("Confusion Matrix:")
print(cm)





F1 Score: 0.8508247501686974
Confusion Matrix:
[[897   9  16   8   9   4   3   4  38  12]
 [ 14 920   3   2   0   0   3   0  18  40]
 [ 37   1 832  29  35  20  28   9   5   4]
 [ 26   8  51 685  33 115  49  17  11   5]
 [  9   1  52  38 817  18  25  30   9   1]
 [  9   1  38 136  20 749  15  29   1   2]
 [  5   1  27  26  10  10 912   3   5   1]
 [  8   1  16  23  26  40   6 876   0   4]
 [ 31   6   4   8   2   0   1   2 937   9]
 [ 22  48   1   8   0   3   3   3  23 889]]


In [26]:
from google.colab import files
torch.save(model, 'cifar10_classifier_model.pth')

# download checkpoint file
files.download('cifar10_classifier_model.pth')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>