In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from sklearn.metrics import precision_score, recall_score, f1_score
import time
import numpy as np


In [3]:

# Define the Depth-wise Separable Convolution
class DepthwiseSeparableConv(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0):
        super(DepthwiseSeparableConv, self).__init__()
        self.depthwise = nn.Conv2d(in_channels, in_channels, kernel_size, stride, padding, groups=in_channels)
        self.pointwise = nn.Conv2d(in_channels, out_channels, 1)

    def forward(self, x):
        x = self.depthwise(x)
        x = self.pointwise(x)
        return x

# Define a Dense Block
class DenseBlock(nn.Module):
    def __init__(self, in_channels, growth_rate, num_layers):
        super(DenseBlock, self).__init__()
        self.layers = nn.ModuleList()
        for i in range(num_layers):
            self.layers.append(nn.Sequential(
                DepthwiseSeparableConv(in_channels + i * growth_rate, growth_rate, kernel_size=3, padding=1),
                nn.BatchNorm2d(growth_rate),
                nn.ReLU(inplace=True)
            ))

    def forward(self, x):
        for layer in self.layers:
            out = layer(x)
            x = torch.cat([x, out], dim=1)
        return x

# Define the CNN Architecture
class DenseNet(nn.Module):
    def __init__(self, num_blocks, growth_rate, num_classes=10):
        super(DenseNet, self).__init__()
        self.initial_conv = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True)
        )
        self.dense_blocks = nn.ModuleList()
        in_channels = 64
        for _ in range(num_blocks):
            self.dense_blocks.append(DenseBlock(in_channels, growth_rate, num_layers=4))
            in_channels += 4 * growth_rate
        self.global_pool = nn.AdaptiveAvgPool2d(1)
        self.fc = nn.Linear(in_channels, num_classes)

    def forward(self, x):
        x = self.initial_conv(x)
        for block in self.dense_blocks:
            x = block(x)
        x = self.global_pool(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x

In [4]:

# Load CIFAR-10 Dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261))
])

train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Initialize Model, Loss Function, and Optimizer
device = torch.device("mps" if torch.backends.mps.is_available() else "cuda" if torch.cuda.is_available() else "cpu")
model = DenseNet(num_blocks=4, growth_rate=32).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [5]:
# Training Function
def train(model, train_loader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    
    return running_loss / len(train_loader)

# Evaluation Function
def evaluate(model, test_loader, criterion, device):
    model.eval()
    running_loss = 0.0
    all_preds = []
    all_labels = []
    
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            running_loss += loss.item()
            
            _, preds = torch.max(outputs, 1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    # Corrected Accuracy Calculation
    accuracy = (np.array(all_preds) == np.array(all_labels)).mean()

    # Compute Metrics
    precision = precision_score(all_labels, all_preds, average='macro')
    recall = recall_score(all_labels, all_preds, average='macro')
    f1 = f1_score(all_labels, all_preds, average='macro')

    return running_loss / len(test_loader), accuracy, precision, recall, f1

In [7]:

# Training Loop
num_epochs = 100
for epoch in range(num_epochs):
    start_time = time.time()
    train_loss = train(model, train_loader, criterion, optimizer, device)
    test_loss, accuracy, precision, recall, f1 = evaluate(model, test_loader, criterion, device)
    print(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss:.4f}, Test Loss: {test_loss:.4f}, "
          f"Accuracy: {accuracy:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}, F1: {f1:.4f}, "
          f"Time: {time.time() - start_time:.2f}s")

# Compare Computational Efficiency
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f"Number of parameters in the model: {count_parameters(model)}")

KeyboardInterrupt: 

In [8]:
class StandardCNN(nn.Module):
    def __init__(self, num_classes=10):
        super(StandardCNN, self).__init__()
        self.conv_layers = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.fc = nn.Linear(256 * 8 * 8, num_classes)

    def forward(self, x):
        x = self.conv_layers(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x


In [9]:
model_cnn = StandardCNN().to(device)
optimizer_cnn = optim.Adam(model_cnn.parameters(), lr=0.001)

In [10]:
num_epochs = 100
for epoch in range(num_epochs):
    start_time = time.time()
    train_loss = train(model_cnn, train_loader, criterion, optimizer_cnn, device)
    test_loss, accuracy, precision, recall, f1 = evaluate(model_cnn, test_loader, criterion, device)
    
    print(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss:.4f}, Test Loss: {test_loss:.4f}, "
          f"Accuracy: {accuracy:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}, F1: {f1:.4f}, "
          f"Time: {time.time() - start_time:.2f}s")
# Compare Computational Efficiency
print(f"Number of parameters of DENSE net model: {count_parameters(model)}")
print(f"Number of parameters of Traditional CNN model: {count_parameters(model_cnn)}")


Epoch 1/100, Train Loss: 1.1349, Test Loss: 0.8693, Accuracy: 0.7053, Precision: 0.7080, Recall: 0.7053, F1: 0.7047, Time: 53.92s
Epoch 2/100, Train Loss: 0.7380, Test Loss: 0.7547, Accuracy: 0.7412, Precision: 0.7437, Recall: 0.7412, F1: 0.7401, Time: 52.59s
Epoch 3/100, Train Loss: 0.5783, Test Loss: 0.7536, Accuracy: 0.7509, Precision: 0.7611, Recall: 0.7509, F1: 0.7497, Time: 51.70s
Epoch 4/100, Train Loss: 0.4475, Test Loss: 0.7801, Accuracy: 0.7541, Precision: 0.7551, Recall: 0.7541, F1: 0.7537, Time: 52.96s
Epoch 5/100, Train Loss: 0.3515, Test Loss: 0.8548, Accuracy: 0.7449, Precision: 0.7537, Recall: 0.7449, F1: 0.7479, Time: 53.63s
Epoch 6/100, Train Loss: 0.2642, Test Loss: 1.0168, Accuracy: 0.7406, Precision: 0.7470, Recall: 0.7406, F1: 0.7416, Time: 52.99s
Epoch 7/100, Train Loss: 0.2073, Test Loss: 1.0978, Accuracy: 0.7401, Precision: 0.7413, Recall: 0.7401, F1: 0.7402, Time: 53.44s
Epoch 8/100, Train Loss: 0.1748, Test Loss: 1.2751, Accuracy: 0.7271, Precision: 0.7289, R

KeyboardInterrupt: 