In this assignment, we will continue working on image classification using PyTorch and develop another model for the intel image dataset

In [None]:
import torch
import torchvision
import torchvision.datasets as dset
import torchvision.transforms as vtransforms

# Set the GPU to device 0
gpu = torch.device('cuda:0')

print(f'PyTorch version= {torch.__version__}')
print(f'torchvision version= {torchvision.__version__}')
print(f'CUDA available= {torch.cuda.is_available()}')

PyTorch version= 2.2.1+cu121
torchvision version= 0.17.1+cu121
CUDA available= False


In [None]:
if torch.cuda.is_available():
    # CUDA Installation
    print('CUDA Version')
    !nvcc --version
    print()

    # CUDNN Installation
    print(f'CUDNN Version: {torch.backends.cudnn.version()}')
    print(f'Number of CUDA Devices: {torch.cuda.device_count()}')
    print(f'Active CUDA Device: {torch.cuda.current_device()}')
    print(f'Available devices: {torch.cuda.device_count()}, Name: {torch.cuda.get_device_name(0)}')
    print(f'Current CUDA device: {torch.cuda.current_device()}')

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import os
import zipfile
import cv2
import matplotlib.pyplot as plt
import numpy as np

# Unzip the dataset
zip_path = '/content/drive/My Drive/archive.zip'
extract_folder = '/content/dataset'
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_folder)

# Setup for loading images
IMGSIZE = (128, 128)
CNAMES = ['buildings', 'forest', 'glacier', 'mountain', 'sea', 'street']
X_tr, y_tr, X_ts, y_ts = [], [], [], []

# Load training images
path = extract_folder + '/seg_train/seg_train'
for label in CNAMES:
    label_path = os.path.join(path, label)
    for f in sorted([_ for _ in os.listdir(label_path) if _.lower().endswith('.jpg')]):
        img = cv2.imread(os.path.join(label_path, f))
        img_resized = cv2.resize(img, IMGSIZE)
        X_tr.append(img_resized)
        y_tr.append(CNAMES.index(label))


In [None]:
# Convert lists to numpy arrays for better handling
X_tr = np.array(X_tr)
y_tr = np.array(y_tr)

# Display a few images
plt.figure(figsize=(10, 10))
for i in range(9):
    ax = plt.subplot(3, 3, i + 1)
    plt.imshow(cv2.cvtColor(X_tr[i], cv2.COLOR_BGR2RGB))  # Convert BGR to RGB for displaying
    plt.title(CNAMES[y_tr[i]])
    plt.axis("off")
plt.show()

print("Number of color channels:", X_tr.shape[-1])

In [None]:
X_tr = np.array(X_tr, dtype=np.float32)

# Scale the pixel values to the range [0, 1]
X_tr /= 255.0

print("Array shape:", X_tr.shape)
print("Pixel range:", X_tr.min(), X_tr.max())

1. [60 pts] Create a convolutional neural network (CNN), train it on the testing portion of the dataset, and report its reclassification performance. 95% reclassification and 75% testing performance should be achievable without any hyperparameter tuning. (Hint: My model, which is similar to the model in module notebook, took around 10 minutes to train 10 epochs without a GPU)

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader
import numpy as np

# Helper function to calculate output shape of each convolution layer
def findConv2dOutShape(H, W, conv, pool=2):
    kernel_size, stride, padding, dilation = conv.kernel_size, conv.stride, conv.padding, conv.dilation
    H = np.floor((H + 2 * padding[0] - dilation[0] * (kernel_size[0] - 1) - 1) / stride[0] + 1)
    W = np.floor((W + 2 * padding[1] - dilation[1] * (kernel_size[1] - 1) - 1) / stride[1] + 1)
    if pool:
        H, W = H / pool, W / pool
    return int(H), int(W)

# Setup dataset loaders
def get_dataloader(img_size, batch_size, path):
    transform = transforms.Compose([
        transforms.Resize(img_size),
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])
    dataset = datasets.ImageFolder(root=path, transform=transform)
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=4)
    return dataloader

train_path = '/content/dataset/seg_train/seg_train'
test_path = '/content/dataset/seg_test/seg_test'

train_loader = get_dataloader((128, 128), 64, train_path)
test_loader = get_dataloader((128, 128), 64, test_path)

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader
import numpy as np


# Simple CNN Model Definition
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 16, kernel_size=5, padding=2)  # Output size changes for 3 input channels
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=5, padding=2)
        self.fc1 = nn.Linear(32 * 32 * 32, 512)  # Adjusted for output size of conv2
        self.fc2 = nn.Linear(512, 6)  # Adjusted for 6 classes in Intel dataset

    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))
        x = self.pool(torch.relu(self.conv2(x)))
        x = x.view(-1, 32 * 32 * 32)  # Flatten
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Initialize model and training components
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = SimpleCNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [None]:
%%time
# Training the model
for epoch in range(10):
    model.train()
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
    print(f'Epoch {epoch+1}, Loss: {loss.item()}')


In [None]:
# Evaluate the model
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print(f'Accuracy of the network on the test images: {accuracy}%')

In [None]:
# Set the model to evaluation mode
model.eval()

correct_train = 0
total_train = 0

with torch.no_grad():
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total_train += labels.size(0)
        correct_train += (predicted == labels).sum().item()

# Calculate and print the reclassification (training) accuracy
reclassification_accuracy = 100 * correct_train / total_train
print(f'Reclassification Accuracy on Training Data: {reclassification_accuracy:.2f}%')

2. [20 pts] Add regularization and/or drop-out features to your CNN. Report your model's best
performance. As the performance standard deviation decreases the model is deemed to be
more robust. Why?

**Ans.**

In [None]:
class CNNwithRegDrop(nn.Module):
    def __init__(self):
        super(CNNwithRegDrop, self).__init__()
        self.conv1 = nn.Conv2d(3, 16, kernel_size=5, padding=2)
        self.pool = nn.MaxPool2d(2, 2)
        self.dropout1 = nn.Dropout(0.25)  # Dropout after pooling
        self.conv2 = nn.Conv2d(16, 32, kernel_size=5, padding=2)
        self.dropout2 = nn.Dropout(0.25)  # Dropout after second pooling
        self.fc1 = nn.Linear(32 * 32 * 32, 512)
        self.dropout3 = nn.Dropout(0.5)  # Dropout before final FC layer
        self.fc2 = nn.Linear(512, 6)

    def forward(self, x):
        x = self.dropout1(self.pool(torch.relu(self.conv1(x))))
        x = self.dropout2(self.pool(torch.relu(self.conv2(x))))
        x = x.view(-1, 32 * 32 * 32)
        x = self.dropout3(torch.relu(self.fc1(x)))
        x = self.fc2(x)
        return x


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = CNNwithRegDrop().to(device)
criterion = nn.CrossEntropyLoss()
# Include L2 Regularization in the optimizer
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=0.01)  # weight_decay is the L2 penalty

In [None]:
%%time

epoch_accuracies = []

# Training the model
for epoch in range(30):
    model.train()
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    # Evaluate the model after each epoch
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    epoch_accuracy = 100 * correct / total
    epoch_accuracies.append(epoch_accuracy)
    print(f'Epoch {epoch+1}, Loss: {loss.item()}, Accuracy: {epoch_accuracy}%')

# Calculate and print the standard deviation of accuracies
std_deviation = np.std(epoch_accuracies)
print(f'Standard Deviation of Accuracies over Training: {std_deviation:.2f}%')

The implementation of regularization techniques like dropout and weight decay in a model contributes to enhanced robustness by preventing overfitting and promoting a more distributed representation of learning. This leads to a decrease in performance variability, indicating a more consistent and stable model across different datasets. The decreased standard deviation in performance metrics not only shows that the model is effectively handling diverse data inputs and learning genuine patterns but also reflects its stable learning process and robustness, increasing confidence in its deployment in real-world applications.



3. [20 pts] Add batch normalization and early stopping features to the pipeline and demonstrate their effectiveness.

**Ans.**
- Batch Normalization:

In [None]:
%%time

class BatchCNN(nn.Module):
    def __init__(self):
        super(BatchCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 16, kernel_size=5, padding=2)
        self.bn1 = nn.BatchNorm2d(16)
        self.pool = nn.MaxPool2d(2, 2)
        self.dropout1 = nn.Dropout(0.25)

        self.conv2 = nn.Conv2d(16, 32, kernel_size=5, padding=2)
        self.bn2 = nn.BatchNorm2d(32)
        self.dropout2 = nn.Dropout(0.25)

        self.fc1 = nn.Linear(32 * 32 * 32, 512)
        self.bn3 = nn.BatchNorm1d(512)
        self.dropout3 = nn.Dropout(0.5)

        self.fc2 = nn.Linear(512, 6)

    def forward(self, x):
        x = self.dropout1(self.pool(self.bn1(torch.relu(self.conv1(x)))))
        x = self.dropout2(self.pool(self.bn2(torch.relu(self.conv2(x)))))
        x = x.view(-1, 32 * 32 * 32)
        x = self.dropout3(self.bn3(torch.relu(self.fc1(x))))
        x = self.fc2(x)
        return x

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = BatchCNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=0.01)

for epoch in range(30):
    model.train()
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    print(f'Epoch {epoch+1}, Loss: {loss.item()}')

model.eval()
correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print(f'Accuracy of the network on the test images: {accuracy}%')

Batch Normalization typically shows its effectiveness through faster convergence rates, allowing usage of higher learning rates reliably and making the network less sensitive to initialization.

In [None]:
# Early stopping criteria
class EarlyStopping:
    def __init__(self, patience=5, verbose=False, delta=0):
        self.patience = patience
        self.verbose = verbose
        self.delta = delta
        self.best_score = None
        self.epochs_no_improve = 0
        self.early_stop = False

    def __call__(self, val_loss, model):
        score = -val_loss

        if self.best_score is None:
            self.best_score = score
        elif score < self.best_score + self.delta:
            self.epochs_no_improve += 1
            if self.verbose:
                print(f'EarlyStopping counter: {self.epochs_no_improve} out of {self.patience}')
            if self.epochs_no_improve >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.epochs_no_improve = 0


early_stopping = EarlyStopping(patience=5, verbose=True)

for epoch in range(50):  # More epochs for demonstration
    model.train()
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    # Validation phase
    model.eval()
    val_loss = 0
    with torch.no_grad():
        for inputs, labels in val_loader:  # Ensure a validation loader is defined similarly to train_loader
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            val_loss += criterion(outputs, labels).item()

    val_loss /= len(val_loader)
    print(f'Epoch {epoch+1}, Validation Loss: {val_loss}')

    # Call early stopping
    early_stopping(val_loss, model)
    if early_stopping.early_stop:
        print("Early stopping")
        break

Early Stopping is demonstrated effective if it stops training once the model begins to overfit (validation performance starts to degrade or stops improving).