In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
import time
from torch.utils.data import DataLoader

# 1. Data Preparation
data_path = './data'
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4915, 0.4823, 0.4468), (0.2470, 0.2435, 0.2616))
])

train_dataset = datasets.CIFAR10(data_path, train=True, download=True, transform=transform)
test_dataset = datasets.CIFAR10(data_path, train=False, download=True, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# 2. Define CNN Architecture
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv_layers = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=1),  # Conv layer 1
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),       # Pooling layer 1
            nn.Conv2d(32, 64, kernel_size=3, padding=1), # Conv layer 2
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)        # Pooling layer 2
        )
        self.fc_layers = nn.Sequential(
            nn.Flatten(),
            nn.Linear(64 * 8 * 8, 512),                 # Fully connected layer
            nn.ReLU(),
            nn.Linear(512, 10),                         # Output layer for 10 classes
            nn.LogSoftmax(dim=1)
        )

    def forward(self, x):
        x = self.conv_layers(x)
        x = self.fc_layers(x)
        return x

# 3. Initialize Model, Loss Function, and Optimizer
model = CNN()
loss_fn = nn.NLLLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

# 4. Training the Model
n_epochs = 10
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

start_time = time.time()

for epoch in range(n_epochs):
    model.train()
    train_loss = 0.0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        # Forward pass
        outputs = model(images)
        loss = loss_fn(outputs, labels)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        train_loss += loss.item() * images.size(0)

    train_loss /= len(train_loader.dataset)
    print(f"Epoch {epoch+1}/{n_epochs}, Training Loss: {train_loss:.4f}")

end_time = time.time()
training_time = end_time - start_time

# 5. Evaluation
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = correct / total
print(f"Training Time: {training_time:.2f} seconds")
print(f"Final Training Loss: {train_loss:.4f}")
print(f"Evaluation Accuracy: {accuracy:.4f}")

# 6. Analyze Results
print("Analysis:")
print(f"- Training time for 10 epochs was {training_time / 60:.2f} minutes.")
print(f"- Training loss steadily decreased over epochs.")
print(f"- Evaluation accuracy on test data indicates how well the model generalizes.")


Files already downloaded and verified
Files already downloaded and verified
Epoch 1/10, Training Loss: 1.2476
Epoch 2/10, Training Loss: 0.8524
Epoch 3/10, Training Loss: 0.6570
Epoch 4/10, Training Loss: 0.4797
Epoch 5/10, Training Loss: 0.3149
Epoch 6/10, Training Loss: 0.1850
Epoch 7/10, Training Loss: 0.1171
Epoch 8/10, Training Loss: 0.0847
Epoch 9/10, Training Loss: 0.0729
Epoch 10/10, Training Loss: 0.0715
Training Time: 1118.67 seconds
Final Training Loss: 0.0715
Evaluation Accuracy: 0.7216
Analysis:
- Training time for 200 epochs was 18.64 minutes.
- Training loss steadily decreased over epochs.
- Evaluation accuracy on test data indicates how well the model generalizes.


In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
import time
from torch.utils.data import DataLoader

# 1. Data Preparation (reuse from Problem 1a)
data_path = './data'
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4915, 0.4823, 0.4468), (0.2470, 0.2435, 0.2616))
])

train_dataset = datasets.CIFAR10(data_path, train=True, download=True, transform=transform)
test_dataset = datasets.CIFAR10(data_path, train=False, download=True, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# 2. Define Extended CNN Architecture
class ExtendedCNN(nn.Module):
    def __init__(self):
        super(ExtendedCNN, self).__init__()
        self.conv_layers = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=1),  # Conv layer 1
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),       # Pooling layer 1
            nn.Conv2d(32, 64, kernel_size=3, padding=1), # Conv layer 2
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),       # Pooling layer 2
            nn.Conv2d(64, 128, kernel_size=3, padding=1),# Conv layer 3
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)        # Pooling layer 3
        )
        self.fc_layers = nn.Sequential(
            nn.Flatten(),
            nn.Linear(128 * 4 * 4, 512),                # Fully connected layer
            nn.ReLU(),
            nn.Linear(512, 10),                         # Output layer for 10 classes
            nn.LogSoftmax(dim=1)
        )

    def forward(self, x):
        x = self.conv_layers(x)
        x = self.fc_layers(x)
        return x

# 3. Initialize Model, Loss Function, and Optimizer
model = ExtendedCNN()
loss_fn = nn.NLLLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

# 4. Training the Model
n_epochs = 10
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

start_time = time.time()

for epoch in range(n_epochs):
    model.train()
    train_loss = 0.0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        # Forward pass
        outputs = model(images)
        loss = loss_fn(outputs, labels)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        train_loss += loss.item() * images.size(0)

    train_loss /= len(train_loader.dataset)

    # Print loss every 10 epochs
    if (epoch + 1) % 10 == 0:
        print(f"Epoch {epoch+1}/{n_epochs}, Training Loss: {train_loss:.4f}")

end_time = time.time()
training_time = end_time - start_time

# 5. Evaluation
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = correct / total
print(f"Training Time: {training_time:.2f} seconds")
print(f"Final Training Loss: {train_loss:.4f}")
print(f"Evaluation Accuracy: {accuracy:.4f}")

# 6. Analyze Results
print("Analysis:")
print(f"- The model's training time after 10 epochs is proportional to its increased complexity.")
print(f"- Compare the accuracy and loss with Problem 1a to determine if additional complexity improves performance.")
print(f"- Check for overfitting by evaluating training loss vs. test accuracy.")


Files already downloaded and verified
Files already downloaded and verified
Epoch 10/10, Training Loss: 0.1121
Training Time: 1278.99 seconds
Final Training Loss: 0.1121
Evaluation Accuracy: 0.7482
Analysis:
- The model's training time after 10 epochs is proportional to its increased complexity.
- Compare the accuracy and loss with Problem 1a to determine if additional complexity improves performance.
- Check for overfitting by evaluating training loss vs. test accuracy.


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
import time
from torch.utils.data import DataLoader

# 1. Data Preparation
data_path = './data'
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4915, 0.4823, 0.4468), (0.2470, 0.2435, 0.2616))
])

train_dataset = datasets.CIFAR10(data_path, train=True, download=True, transform=transform)
test_dataset = datasets.CIFAR10(data_path, train=False, download=True, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# 2. Define ResNet Block
class ResNetBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1):
        super(ResNetBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.shortcut = nn.Sequential()
        if stride != 1 or in_channels != out_channels:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride),
                nn.BatchNorm2d(out_channels)
            )

    def forward(self, x):
        identity = self.shortcut(x)
        out = self.conv1(x)
        out = self.bn1(out)
        out = torch.relu(out)
        out = self.conv2(out)
        out = self.bn2(out)
        out += identity
        out = torch.relu(out)
        return out

class ResNet10(nn.Module):
    def __init__(self):
        super(ResNet10, self).__init__()
        self.initial = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.layer1 = self._make_layer(64, 64, 2, stride=1)
        self.layer2 = self._make_layer(64, 128, 2, stride=2)
        self.layer3 = self._make_layer(128, 256, 3, stride=2)
        self.layer4 = self._make_layer(256, 512, 3, stride=2)
        self.fc = nn.Linear(512 * 2 * 2, 10)  # Adjust input size to match output of last layer

    def _make_layer(self, in_channels, out_channels, num_blocks, stride):
        layers = []
        layers.append(ResNetBlock(in_channels, out_channels, stride))
        for _ in range(1, num_blocks):
            layers.append(ResNetBlock(out_channels, out_channels))
        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.initial(x)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        print(x.shape)  # Debug: Check output size before the fully connected layer
        x = torch.flatten(x, 1)
        x = self.fc(x)
        return torch.log_softmax(x, dim=1)


# 4. Initialize Model, Loss Function, and Optimizer
model = ResNet10()
loss_fn = nn.NLLLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

# 5. Training the Model
n_epochs = 10
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

start_time = time.time()

for epoch in range(n_epochs):
    model.train()
    train_loss = 0.0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        # Forward pass
        outputs = model(images)
        loss = loss_fn(outputs, labels)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        train_loss += loss.item() * images.size(0)

    train_loss /= len(train_loader.dataset)

    # Print loss every 10 epochs
    if (epoch + 1) % 10 == 0:
        print(f"Epoch {epoch+1}/{n_epochs}, Training Loss: {train_loss:.4f}")

end_time = time.time()
training_time = end_time - start_time

# 6. Evaluation
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = correct / total
print(f"Training Time: {training_time:.2f} seconds")
print(f"Final Training Loss: {train_loss:.4f}")
print(f"Evaluation Accuracy: {accuracy:.4f}")

# 7. Analyze Results
print("Analysis:")
print(f"- ResNet-10 achieved a balance between deeper architecture and training time.")
print(f"- Compare training time, loss, and accuracy with Problem 1b.")
print(f"- Check for overfitting by evaluating the test accuracy against training loss.")


Files already downloaded and verified
Files already downloaded and verified
torch.Size([64, 512, 2, 2])
torch.Size([64, 512, 2, 2])
torch.Size([64, 512, 2, 2])
torch.Size([64, 512, 2, 2])
torch.Size([64, 512, 2, 2])
torch.Size([64, 512, 2, 2])
torch.Size([64, 512, 2, 2])
torch.Size([64, 512, 2, 2])
torch.Size([64, 512, 2, 2])
torch.Size([64, 512, 2, 2])
torch.Size([64, 512, 2, 2])
torch.Size([64, 512, 2, 2])
torch.Size([64, 512, 2, 2])
torch.Size([64, 512, 2, 2])
torch.Size([64, 512, 2, 2])
torch.Size([64, 512, 2, 2])
torch.Size([64, 512, 2, 2])
torch.Size([64, 512, 2, 2])
torch.Size([64, 512, 2, 2])
torch.Size([64, 512, 2, 2])
torch.Size([64, 512, 2, 2])
torch.Size([64, 512, 2, 2])
torch.Size([64, 512, 2, 2])
torch.Size([64, 512, 2, 2])
torch.Size([64, 512, 2, 2])
torch.Size([64, 512, 2, 2])
torch.Size([64, 512, 2, 2])
torch.Size([64, 512, 2, 2])
torch.Size([64, 512, 2, 2])
torch.Size([64, 512, 2, 2])
torch.Size([64, 512, 2, 2])
torch.Size([64, 512, 2, 2])
torch.Size([64, 512, 2, 2])
