In [None]:
from torchvision import transforms, datasets
from torch.utils.data.dataset import random_split
import torch.utils.data
import torchvision
import torch.optim as optim
import matplotlib.pyplot as plt
import numpy as np

In [None]:
transform = transforms.Compose(
    [transforms.ToTensor(),  # convert images to pytorch tensors
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]  # normalize the images
)

batch_size = 128

full_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)

num_train = len(full_dataset)
num_val = int(0.2 * num_train)  # 20% for validation
train_dataset, val_dataset = random_split(full_dataset, [num_train - num_val, num_val])

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=True, num_workers=4)

test_dataset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=4)

classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')  # CIFAR-10 classes

CNNs

In [None]:
import torch.nn as nn
import torch.nn.functional as F


class BaseCNN(nn.Module):
    def __init__(self):
        super(BaseCNN, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=16, kernel_size=5, padding=2)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.conv2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=5, padding=2)

        self.dropout = nn.Dropout(0.5)
        # self.dropout = nn.Dropout(0.3)
        # self.dropout = nn.Dropout(0.2)
        
        self.fc1 = nn.Linear(in_features=32 * 8 * 8, out_features=120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)  # Output layer with 10 classes

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))

        x = torch.flatten(x, 1)  # Flatten all dimensions except the batch dimension

        x = F.relu(self.fc1(x))
        self.dropout(x)
        x = F.relu(self.fc2(x))
        self.dropout(x)

        x = self.fc3(x)
        
        return x
    
class threeLayerCNN(nn.Module):
    def __init__(self):
        super(threeLayerCNN, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=16, kernel_size=5, padding=2)
        self.bn1 = nn.BatchNorm2d(num_features=16)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)

        self.conv2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=5, padding=2)
        self.bn2 = nn.BatchNorm2d(num_features=32)

        self.conv3 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=5, padding=2)
        self.bn3 = nn.BatchNorm2d(num_features=64)

        self.dropout = nn.Dropout(0.5)
        # self.dropout = nn.Dropout(0.3)
        # self.dropout = nn.Dropout(0.2)

        self.fc1 = nn.Linear(in_features=64 * 4 * 4, out_features=120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))

        x = torch.flatten(x, 1)

        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = F.relu(self.fc2(x))
        x = self.dropout(x)

        x = self.fc3(x)

        return x
    
class fourLayerCNN(nn.Module):
    def __init__(self):
        super(fourLayerCNN, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=16, kernel_size=5, padding=2)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)

        self.conv2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=5, padding=2)

        self.conv3 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=5, padding=2)

        self.conv4 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=5, padding=2)

        # self.dropout = nn.Dropout(0.5)
        # self.dropout = nn.Dropout(0.3)
        # self.dropout = nn.Dropout(0.2)

        self.fc1 = nn.Linear(in_features=128 * 2 * 2, out_features=120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))
        x = self.pool(F.relu(self.conv4(x)))

        x = torch.flatten(x, 1)

        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))

        x = self.fc3(x)

        return x
        
class fiveLayerCNN(nn.Module):
    def __init__(self):
        super(fiveLayerCNN, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=16, kernel_size=5, padding=2)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)

        self.conv2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=5, padding=2)

        self.conv3 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=5, padding=2)

        self.conv4 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=5, padding=2)

        self.conv5 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=5, padding=2)

        self.fc1 = nn.Linear(in_features=256 * 1 * 1, out_features=120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))
        x = self.pool(F.relu(self.conv4(x)))
        x = self.pool(F.relu(self.conv5(x)))

        x = torch.flatten(x, 1)

        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))

        x = self.fc3(x)

        return x
    
def train_model(model, criterion, optimizer, train_loader, device):
    model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, labels)

        loss.backward()
        optimizer.step()

        _, predicted = torch.max(outputs.data, 1)
        total_predictions = labels.size(0)
        correct_predictions = (predicted == labels).sum().item()

        running_loss += loss.item()

    avg_train_loss = running_loss / len(train_loader)
    trian_accuracy = 100 * correct_predictions / total_predictions
    return avg_train_loss, trian_accuracy

def validate_model(model, criterion, val_loader, device):
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            val_loss += loss.item()

    avg_val_loss = val_loss / len(val_loader)
    return avg_val_loss

def evaluate_model(model, test_loader, device, classes):
    correct = 0
    total = 0
    model.eval()
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    # print(f'Accuracy of the network on the 10000 test images: {accuracy:.2f} %')
    return accuracy

# {'BaseCNN': BaseCNN(), 'threeLayerCNN': threeLayerCNN()}
cnn = threeLayerCNN()  # change this to different models to test them

learning_rate = 0.001  # 0.005 0.003 0.001 0.0008 0.0005 0.0003 0.0001

criterion = nn.CrossEntropyLoss()
SGD_optimizer = optim.SGD(cnn.parameters(), lr=learning_rate, momentum=0.9, weight_decay=5e-4)
Adam_optimizer = optim.Adam(cnn.parameters(), lr=learning_rate, weight_decay=5e-4)
Rms_prop_optimizer = optim.RMSprop(cnn.parameters(), lr=learning_rate, alpha=0.9, eps=1e-08, weight_decay=5e-4, momentum=0.9, centered=False)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
cnn.to(device)

train_losses = []
train_accuracies = []
val_losses = []
test_accuracies = []

best_val_loss = float('inf')
epochs_no_improve = 0
n_epochs_stops = 5
early_stop = False

for epoch in range(100):
    train_loss, train_accuracy = train_model(cnn, criterion, SGD_optimizer, train_loader, device)
    val_loss = validate_model(cnn, criterion, val_loader, device)
    testing_accuracy = evaluate_model(cnn, test_loader, device, classes)

    print(f'Epoch {epoch+1}, Training Loss: {train_loss:.3f}, Train Accuracy: {train_accuracy:.2f}% Validation Loss: {val_loss:.3f}, Testing Accuracy: {testing_accuracy:.2f}%')

    train_losses.append(train_loss)
    train_accuracies.append(train_accuracy)
    val_losses.append(val_loss)
    test_accuracies.append(testing_accuracy)

    if val_loss < best_val_loss:
        best_val_loss = val_loss
        epochs_no_improve = 0
        torch.save(cnn.state_dict(), 'best_model.pth')
    else:
        epochs_no_improve += 1
        if epochs_no_improve == n_epochs_stops:
            print(f'Early stopping at epoch {epoch+1}')
            early_stop = True
            break

if not early_stop:
    print('Finished Training')

evaluate_model(cnn, test_loader, device, classes)

if early_stop:
    cnn.load_state_dict(torch.load('best_model.pth'))

PATH = './cifar_net.pth'
torch.save(cnn.state_dict(), PATH)

plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
plt.plot(train_losses, label='Training Loss')
plt.plot(val_losses, label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training and Validation Loss')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(train_accuracies, label='Training Accuracy')
plt.plot(test_accuracies, label='Testing Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy (%)')
plt.title('Training and Testing Accuracy')
plt.legend()

plt.tight_layout()
plt.show()

### Accuracy for each class

In [None]:
# Get a batch of test images
dataiter = iter(test_loader)
images, labels = next(dataiter)
images, labels = images.to(device), labels.to(device)

# Display the images
imshow(torchvision.utils.make_grid(images.cpu()))  # Move images to CPU for plotting
print('GroundTruth:', ' '.join(f'{classes[labels[j]]:5s}' for j in range(4)))

# Get predictions from the model
outputs = cnn(images)
_, predicted = torch.max(outputs, 1)

print('Predicted: ', ' '.join(f'{classes[predicted[j]]:5s}' for j in range(4)))

# Evaluate the model on the entire test set
correct = 0
total = 0
with torch.no_grad():
    for data in test_loader:
        images, labels = data[0].to(device), data[1].to(device)
        outputs = cnn(images)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy of the network on the 10000 test images: {100 * correct // total:.2f} %')

# Calculate accuracy for each class
correct_pred = {classname: 0 for classname in classes}
total_pred = {classname: 0 for classname in classes}

with torch.no_grad():
    for data in test_loader:
        images, labels = data[0].to(device), data[1].to(device)
        outputs = cnn(images)
        _, predictions = torch.max(outputs, 1)
        for label, prediction in zip(labels, predictions):
            if label == prediction:
                correct_pred[classes[label]] += 1
            total_pred[classes[label]] += 1

# Print accuracy for each class
for classname, correct_count in correct_pred.items():
    accuracy = 100 * float(correct_count) / total_pred[classname]
    print(f'Accuracy for class: {classname:5s} is {accuracy:.1f} %')

### ResNet

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader, random_split
from torchvision.models import resnet18, resnet34, resnet50, ResNet18_Weights, ResNet34_Weights, ResNet50_Weights

# Setup
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# model = resnet18(weights=ResNet18_Weights.DEFAULT)
# model = resnet34(weights=ResNet34_Weights.DEFAULT)
model = resnet50(weights=ResNet50_Weights.DEFAULT)
model.fc = nn.Linear(model.fc.in_features, 10)  # Modify for CIFAR-10 classes
model.to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

# Data preprocessing and loading
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Adjusting to the expected input size of ResNet
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

full_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
num_train = len(full_dataset)
num_val = int(0.2 * num_train)
train_dataset, val_dataset = random_split(full_dataset, [num_train - num_val, num_val])

batch_size = 64
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True, num_workers=4)
test_loader = DataLoader(datasets.CIFAR10(root='./data', train=False, download=True, transform=transform), batch_size=batch_size, shuffle=False, num_workers=4)

# Training and evaluation function
def train_and_evaluate_with_early_stopping(model, train_loader, val_loader, criterion, optimizer, num_epochs=10, patience=3):
    train_losses, val_losses, train_accuracies, val_accuracies = [], [], [], []
    best_val_loss = float('inf')
    epochs_no_improve = 0
    
    for epoch in range(num_epochs):
        model.train()
        total_train, correct_train, running_loss = 0, 0, 0.0
        
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total_train += labels.size(0)
            correct_train += (predicted == labels).sum().item()

        train_loss = running_loss / len(train_loader)
        train_accuracy = 100 * correct_train / total_train
        train_losses.append(train_loss)
        train_accuracies.append(train_accuracy)

        model.eval()
        val_loss, correct_val, total_val = 0.0, 0, 0
        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)
                val_loss += loss.item()
                _, predicted = torch.max(outputs.data, 1)
                total_val += labels.size(0)
                correct_val += (predicted == labels).sum().item()

        val_loss /= len(val_loader)
        val_accuracy = 100 * correct_val / total_val
        val_losses.append(val_loss)
        val_accuracies.append(val_accuracy)

        print(f'Epoch {epoch+1}: Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.2f}%, Val Loss: {val_loss:.4f}, Val Accuracy: {val_accuracy:.2f}%')

        # Early stopping logic
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            torch.save(model.state_dict(), 'best_model.pth')
            epochs_no_improve = 0
        else:
            epochs_no_improve += 1
            if epochs_no_improve == patience:
                print(f'Early stopping triggered after {epoch + 1} epochs!')
                break

    return train_losses, train_accuracies, val_losses, val_accuracies

# Run training with early stopping
train_losses, train_accuracies, val_losses, val_accuracies = train_and_evaluate_with_early_stopping(
    model, train_loader, val_loader, criterion, optimizer, num_epochs=50, patience=5
)

# Plotting the results
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.plot(train_losses, label='Train Loss')
plt.plot(val_losses, label='Validation Loss')
plt.title('Loss over epochs')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(train_accuracies, label='Train Accuracy')
plt.plot(val_accuracies, label='Validation Accuracy')
plt.title('Accuracy over epochs')
plt.xlabel('Epoch')
plt.ylabel('Accuracy (%)')
plt.legend()

plt.show()
