In [19]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Subset
from torchvision.datasets import CIFAR10
from tqdm import tqdm

### SUBQUESTION 1: TRAIN CNN AND COMPARE WITH FCNN

In [3]:
# Define the CNN model
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, kernel_size=5)
        self.relu1 = nn.ReLU()
        self.pool1 = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, kernel_size=5)
        self.relu2 = nn.ReLU()
        self.pool2 = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.relu3 = nn.ReLU()
        self.fc2 = nn.Linear(120, 84)
        self.relu4 = nn.ReLU()
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool1(self.relu1(self.conv1(x)))
        x = self.pool2(self.relu2(self.conv2(x)))
        x = x.view(-1, 16 * 5 * 5)
        x = self.relu3(self.fc1(x))
        x = self.relu4(self.fc2(x))
        x = self.fc3(x)
        return x

# Define training parameters
lr = 0.001
batch_size = 64
epochs = 10

In [4]:
# Load CIFAR-10 dataset
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
train_dataset = CIFAR10(root='./data', train=True, download=True, transform=transform)
test_dataset = CIFAR10(root='./data', train=False, download=True, transform=transform)

train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)


Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:11<00:00, 14230557.91it/s]


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


In [5]:
# Initialize the CNN model
cnn_model = CNN()

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(cnn_model.parameters(), lr=lr)

# Train the CNN model
for epoch in range(epochs):
    cnn_model.train()
    train_acc = 0.0

    for images, labels in tqdm(train_loader, desc=f'Epoch {epoch + 1}/{epochs}'):
        optimizer.zero_grad()
        outputs = cnn_model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        _, predicted = torch.max(outputs.data, 1)
        train_acc += (predicted == labels).sum().item()

    train_accuracy = train_acc / len(train_loader.dataset)

    # Evaluate on the test set
    cnn_model.eval()
    test_acc = 0.0

    with torch.no_grad():
        for images, labels in test_loader:
            outputs = cnn_model(images)
            _, predicted = torch.max(outputs.data, 1)
            test_acc += (predicted == labels).sum().item()

    test_accuracy = test_acc / len(test_loader.dataset)

    print(f'Train Accuracy: {train_accuracy:.4f}, Test Accuracy: {test_accuracy:.4f}')

Epoch 1/10: 100%|██████████| 782/782 [00:23<00:00, 33.70it/s]


Train Accuracy: 0.3982, Test Accuracy: 0.4925


Epoch 2/10: 100%|██████████| 782/782 [00:20<00:00, 37.53it/s]


Train Accuracy: 0.5184, Test Accuracy: 0.5457


Epoch 3/10: 100%|██████████| 782/782 [00:23<00:00, 33.13it/s]


Train Accuracy: 0.5652, Test Accuracy: 0.5707


Epoch 4/10: 100%|██████████| 782/782 [00:20<00:00, 37.29it/s]


Train Accuracy: 0.5950, Test Accuracy: 0.5887


Epoch 5/10: 100%|██████████| 782/782 [00:21<00:00, 36.77it/s]


Train Accuracy: 0.6187, Test Accuracy: 0.6036


Epoch 6/10: 100%|██████████| 782/782 [00:20<00:00, 38.24it/s]


Train Accuracy: 0.6401, Test Accuracy: 0.6132


Epoch 7/10: 100%|██████████| 782/782 [00:20<00:00, 37.84it/s]


Train Accuracy: 0.6586, Test Accuracy: 0.6337


Epoch 8/10: 100%|██████████| 782/782 [00:27<00:00, 28.22it/s]


Train Accuracy: 0.6720, Test Accuracy: 0.6277


Epoch 9/10: 100%|██████████| 782/782 [00:22<00:00, 35.49it/s]


Train Accuracy: 0.6872, Test Accuracy: 0.6395


Epoch 10/10: 100%|██████████| 782/782 [00:21<00:00, 36.53it/s]


Train Accuracy: 0.7016, Test Accuracy: 0.6416


In the previous homework we obtained the following results:
-   Overall Test Accuracy: 0.6790
-   Test Accuracy for cat: 0.6220
-   Test Accuracy for dog: 0.5123
-   Test Accuracy for ship: 0.8879
-   The classifier performs best on the class "ship"

And here, with CNN, we obtain an overall test accuracy of 0.6416, which is very comparable to FCNN. This might suggest an upperbound on the performance since we are getting the same performance with two different architectures.

### SUBQUESTION 2: SHUFFLE THE PIXELS AND TRAINING CNN AGAIN

In [6]:
import numpy as np

def shuffle_pixels(dataset):
    shuffled_data = []
    for data, label in dataset:
        data = np.array(data)
        flattened_data = data.flatten()
        np.random.shuffle(flattened_data)
        shuffled_data.append((torch.Tensor(flattened_data.reshape(data.shape)), label))
    return shuffled_data

# Shuffle pixels for training and test sets
shuffled_train_data = shuffle_pixels(train_dataset)
shuffled_test_data = shuffle_pixels(test_dataset)

# Create DataLoader for shuffled datasets
shuffled_train_loader = DataLoader(dataset=shuffled_train_data, batch_size=batch_size, shuffle=True)
shuffled_test_loader = DataLoader(dataset=shuffled_test_data, batch_size=batch_size, shuffle=False)


In [7]:
# Initialize the CNN model
cnn_model = CNN()

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(cnn_model.parameters(), lr=lr)

# Train the CNN model
for epoch in range(epochs):
    cnn_model.train()
    train_acc = 0.0

    for images, labels in tqdm(shuffled_train_loader, desc=f'Epoch {epoch + 1}/{epochs}'):
        optimizer.zero_grad()
        outputs = cnn_model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        _, predicted = torch.max(outputs.data, 1)
        train_acc += (predicted == labels).sum().item()

    train_accuracy = train_acc / len(train_loader.dataset)

    # Evaluate on the test set
    cnn_model.eval()
    test_acc = 0.0

    with torch.no_grad():
        for images, labels in test_loader:
            outputs = cnn_model(images)
            _, predicted = torch.max(outputs.data, 1)
            test_acc += (predicted == labels).sum().item()

    test_accuracy = test_acc / len(shuffled_test_loader.dataset)

    print(f'Train Accuracy: {train_accuracy:.4f}, Test Accuracy: {test_accuracy:.4f}')

Epoch 1/10: 100%|██████████| 782/782 [00:08<00:00, 91.99it/s] 


Train Accuracy: 0.1883, Test Accuracy: 0.1414


Epoch 2/10: 100%|██████████| 782/782 [00:09<00:00, 78.33it/s] 


Train Accuracy: 0.2034, Test Accuracy: 0.1330


Epoch 3/10: 100%|██████████| 782/782 [00:10<00:00, 77.18it/s] 


Train Accuracy: 0.2053, Test Accuracy: 0.1377


Epoch 4/10: 100%|██████████| 782/782 [00:08<00:00, 90.38it/s] 


Train Accuracy: 0.2079, Test Accuracy: 0.1379


Epoch 5/10: 100%|██████████| 782/782 [00:09<00:00, 86.15it/s] 


Train Accuracy: 0.2069, Test Accuracy: 0.1372


Epoch 6/10: 100%|██████████| 782/782 [00:08<00:00, 94.73it/s] 


Train Accuracy: 0.2086, Test Accuracy: 0.1331


Epoch 7/10: 100%|██████████| 782/782 [00:08<00:00, 89.98it/s] 


Train Accuracy: 0.2102, Test Accuracy: 0.1477


Epoch 8/10: 100%|██████████| 782/782 [00:08<00:00, 91.13it/s] 


Train Accuracy: 0.2113, Test Accuracy: 0.1281


Epoch 9/10: 100%|██████████| 782/782 [00:08<00:00, 97.23it/s] 


Train Accuracy: 0.2145, Test Accuracy: 0.1448


Epoch 10/10: 100%|██████████| 782/782 [00:08<00:00, 90.03it/s] 


Train Accuracy: 0.2164, Test Accuracy: 0.1387


As we can see, shuffling operation reduced the accuracy drastically. The explanation for that phennomena is because shuffling the pixels disrupts spatial information in the images, making it challenging for models to learn meaningful patterns. For CNNs, which inherently leverage spatial hierarchies, shuffling can significantly degrade performance.

### SUBQUESTION 3: SHUFFLE THE PIXELS AND TRAINING FCNN AGAIN

When we shuffle the pixels on the previous homework's code, we see that it is still in the order of 0.65 does not affect the accuracy since all of the pixels are connected and FCNN does not make any use of spatial information, it is basically the same thing for FCNN.