In [None]:
# Import necessary libraries
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import matplotlib.pyplot as plt
import numpy as np

# Define the AlexNet architecture
class AlexNet(nn.Module):
    def __init__(self, num_classes=10):  # Default is 10 classes for CIFAR-10
        super(AlexNet, self).__init__()
        # Feature extraction layers (convolutional layers)
        self.features = nn.Sequential(
            # Conv1: Input channels=3 (RGB), Output channels=96, Kernel size=11x11, Stride=4, Padding=0
            nn.Conv2d(3, 96, kernel_size=11, stride=4, padding=0),  # Output: 55x55x96
            nn.ReLU(inplace=True),  # ReLU activation
            nn.MaxPool2d(kernel_size=3, stride=2),  # Max pooling, Output: 27x27x96
            # Conv2: Input channels=96, Output channels=256, Kernel size=5x5, Padding=2
            nn.Conv2d(96, 256, kernel_size=5, padding=2),  # Output: 27x27x256
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),  # Max pooling, Output: 13x13x256
            # Conv3: Input channels=256, Output channels=384, Kernel size=3x3, Padding=1
            nn.Conv2d(256, 384, kernel_size=3, padding=1),  # Output: 13x13x384
            nn.ReLU(inplace=True),
            # Conv4: Input channels=384, Output channels=384, Kernel size=3x3, Padding=1
            nn.Conv2d(384, 384, kernel_size=3, padding=1),  # Output: 13x13x384
            nn.ReLU(inplace=True),
            # Conv5: Input channels=384, Output channels=256, Kernel size=3x3, Padding=1
            nn.Conv2d(384, 256, kernel_size=3, padding=1),  # Output: 13x13x256
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),  # Max pooling, Output: 6x6x256
        )
        # Adaptive average pooling to ensure output is 6x6x256
        self.avgpool = nn.AdaptiveAvgPool2d((6, 6))
        # Fully connected layers (classifier)
        self.classifier = nn.Sequential(
            nn.Dropout(p=0.5),  # Dropout for regularization
            nn.Linear(256 * 6 * 6, 4096),  # Fully connected layer
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.5),  # Dropout for regularization
            nn.Linear(4096, 4096),  # Fully connected layer
            nn.ReLU(inplace=True),
            nn.Linear(4096, num_classes),  # Output layer (10 classes for CIFAR-10)
        )

    def forward(self, x):
        # Forward pass through the network
        x = self.features(x)  # Pass through convolutional layers
        x = self.avgpool(x)  # Apply adaptive average pooling
        x = torch.flatten(x, 1)  # Flatten the tensor for fully connected layers
        x = self.classifier(x)  # Pass through fully connected layers
        return x

# Prepare the CIFAR-10 dataset
transform = transforms.Compose([
    transforms.Resize((227, 227)),  # Resize images to 227x227 for AlexNet
    transforms.ToTensor(),  # Convert images to PyTorch tensors
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),  # Normalize to [-1, 1]
])

# Load CIFAR-10 training and test datasets
train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

# Create data loaders for training and testing
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# Initialize the model, loss function, and optimizer
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")  # Use GPU if available
model = AlexNet(num_classes=10).to(device)  # Move model to GPU
criterion = nn.CrossEntropyLoss()  # Loss function for classification
optimizer = optim.Adam(model.parameters(), lr=0.001)  # Optimizer (Adam)

# Train the model
num_epochs = 10  # Number of training epochs
for epoch in range(num_epochs):
    model.train()  # Set model to training mode
    running_loss = 0.0  # Track loss for each epoch
    for i, (inputs, labels) in enumerate(train_loader):
        inputs, labels = inputs.to(device), labels.to(device)  # Move data to GPU
        optimizer.zero_grad()  # Clear gradients
        outputs = model(inputs)  # Forward pass
        loss = criterion(outputs, labels)  # Compute loss
        loss.backward()  # Backward pass (compute gradients)
        optimizer.step()  # Update weights
        running_loss += loss.item()  # Accumulate loss
        if (i + 1) % 100 == 0:  # Print loss every 100 steps
            print(f"Epoch [{epoch + 1}/{num_epochs}], Step [{i + 1}/{len(train_loader)}], Loss: {loss.item():.4f}")
    # Print average loss for the epoch
    print(f"Epoch [{epoch + 1}/{num_epochs}], Average Loss: {running_loss / len(train_loader):.4f}")

# Test the model on the test set
model.eval()  # Set model to evaluation mode
correct = 0  # Count correct predictions
total = 0  # Total number of predictions
with torch.no_grad():  # Disable gradient computation
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)  # Move data to GPU
        outputs = model(inputs)  # Forward pass
        _, predicted = torch.max(outputs.data, 1)  # Get predicted class
        total += labels.size(0)  # Update total count
        correct += (predicted == labels).sum().item()  # Update correct count

# Print accuracy on the test set
print(f"Accuracy on the test set: {100 * correct / total:.2f}%")

# Save the trained model
torch.save(model.state_dict(), "alexnet_cifar10.pth")

# Test the model on 10 images and plot the results
def imshow(img):
    img = img / 2 + 0.5  # Unnormalize the image
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))  # Convert from Tensor to NumPy and plot
    plt.show()

# Get 10 random test images
dataiter = iter(test_loader)
images, labels = next(dataiter)
images, labels = images[:10].to(device), labels[:10].to(device)

# Make predictions
outputs = model(images)
_, predicted = torch.max(outputs, 1)

# Move images and labels back to CPU for plotting
images = images.cpu()
labels = labels.cpu()
predicted = predicted.cpu()

# Plot the images with their predicted and true labels
class_names = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
plt.figure(figsize=(10, 4))
for i in range(10):
    plt.subplot(2, 5, i + 1)
    imshow(images[i])
    plt.title(f"True: {class_names[labels[i]]}\nPred: {class_names[predicted[i]]}")
    plt.axis('off')
plt.tight_layout()
plt.show()

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data\cifar-10-python.tar.gz


100%|██████████| 170M/170M [00:26<00:00, 6.50MB/s] 


Extracting ./data\cifar-10-python.tar.gz to ./data
Files already downloaded and verified
