<a href="https://colab.research.google.com/github/Nscaglio/4105/blob/main/hw7.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# initialize libraries
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from google.colab import drive
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import torch.nn.functional as F
import time
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

1.A


In [6]:
# Define transformation to normalize the CIFAR-10 data
transform = transforms.Compose([
    transforms.ToTensor(),  # Convert images to PyTorch tensors
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # Normalize to [-1, 1]
])

# Load CIFAR-10 training and test datasets
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

# Create DataLoader for training and testing
trainloader = DataLoader(trainset, batch_size=128, shuffle=True)
testloader = DataLoader(testset, batch_size=128, shuffle=False)
class CNNModel(nn.Module):
    def __init__(self):
        super(CNNModel, self).__init__()
        # First convolutional layer
        self.conv1 = nn.Conv2d(3, 32, 3, padding=1)  # Input: 3x32x32, Output: 32x32x32
        self.pool = nn.MaxPool2d(2, 2)  # Pooling layer with 2x2 kernel

        # Second convolutional layer
        self.conv2 = nn.Conv2d(32, 64, 3, padding=1)  # Output: 64x16x16

        # Fully connected layers
        self.fc1 = nn.Linear(64 * 8 * 8, 512)  # Flattened size after pooling (64 * 8 * 8 = 4096)
        self.fc2 = nn.Linear(512, 10)  # 10 classes for CIFAR-10

    def forward(self, x):
        # Apply first convolution -> ReLU -> Max Pooling
        x = self.pool(F.relu(self.conv1(x)))  # Apply Conv1 -> ReLU -> Pooling

        # Apply second convolution -> ReLU -> Max Pooling
        x = self.pool(F.relu(self.conv2(x)))  # Apply Conv2 -> ReLU -> Pooling


        # Flatten the output for the fully connected layer
        x = torch.flatten(x, 1)  # Flatten: (batch_size, 64 * 8 * 8)


        # Apply fully connected layers
        x = F.relu(self.fc1(x))  # Apply FC1 -> ReLU
        x = self.fc2(x)  # Output layer (no activation, softmax applied later)
        return x


# Initialize the model
model = CNNModel()

Files already downloaded and verified
Files already downloaded and verified


In [7]:
criterion = nn.CrossEntropyLoss()  # For multi-class classification
optimizer = optim.Adam(model.parameters(), lr=0.001)  # Adam optimizer

# Training the model
epochs = 200
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

train_loss_history = []
train_accuracy_history = []
test_accuracy_history = []

start_time = time.time()  # Track the start time

for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for inputs, labels in trainloader:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()  # Zero the gradients

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # Backward pass
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        # Calculate accuracy
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    epoch_loss = running_loss / len(trainloader)
    epoch_accuracy = 100 * correct / total
    train_loss_history.append(epoch_loss)
    train_accuracy_history.append(epoch_accuracy)

    # Evaluate on the test dataset
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in testloader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    test_accuracy = 100 * correct / total
    test_accuracy_history.append(test_accuracy)

    # Print statistics
    if (epoch + 1) % 20 == 0:  # Print every 20 epochs
        print(f"Epoch [{epoch+1}/{epochs}], Loss: {epoch_loss:.4f}, Train Accuracy: {epoch_accuracy:.2f}%, Test Accuracy: {test_accuracy:.2f}%")

# Record training time
end_time = time.time()
training_time = end_time - start_time
print(f"\nTraining Time: {training_time:.2f} seconds")

# Final test accuracy
print(f"\nFinal Test Accuracy: {test_accuracy_history[-1]:.2f}%")

Epoch [20/200], Loss: 0.0343, Train Accuracy: 98.84%, Test Accuracy: 72.77%
Epoch [40/200], Loss: 0.0206, Train Accuracy: 99.35%, Test Accuracy: 72.33%
Epoch [60/200], Loss: 0.0248, Train Accuracy: 99.27%, Test Accuracy: 72.22%
Epoch [80/200], Loss: 0.0127, Train Accuracy: 99.62%, Test Accuracy: 72.68%
Epoch [100/200], Loss: 0.0115, Train Accuracy: 99.66%, Test Accuracy: 72.85%
Epoch [120/200], Loss: 0.0140, Train Accuracy: 99.67%, Test Accuracy: 72.15%
Epoch [140/200], Loss: 0.0088, Train Accuracy: 99.78%, Test Accuracy: 72.22%
Epoch [160/200], Loss: 0.0127, Train Accuracy: 99.73%, Test Accuracy: 72.03%
Epoch [180/200], Loss: 0.0172, Train Accuracy: 99.68%, Test Accuracy: 71.63%
Epoch [200/200], Loss: 0.0116, Train Accuracy: 99.79%, Test Accuracy: 71.59%

Training Time: 3124.93 seconds

Final Test Accuracy: 71.59%


1.B

In [9]:
# Define the model
class ExtendedCNN(nn.Module):
    def __init__(self):
        super(ExtendedCNN, self).__init__()

        # Convolutional layers with BatchNorm and Pooling
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm2d(16)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(32)
        self.conv3 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.bn3 = nn.BatchNorm2d(64)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)

        # Fully connected layers with reduced input size
        self.fc1 = nn.Linear(64 * 4 * 4, 1024)  # Reduced size after pooling
        self.fc2 = nn.Linear(1024, 512)
        self.fc3 = nn.Linear(512, 10)  # CIFAR-10 has 10 classes

    def forward(self, x):
        # Forward pass through convolutional layers with BatchNorm and Pooling
        x = self.pool(F.relu(self.bn1(self.conv1(x))))
        x = self.pool(F.relu(self.bn2(self.conv2(x))))
        x = self.pool(F.relu(self.bn3(self.conv3(x))))

        # Flatten the tensor
        x = x.view(x.size(0), -1)  # Flatten: (batch_size, 64 * 4 * 4)

        # Forward pass through fully connected layers
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)  # Output layer

        return x

# Test the network with a random input
model = ExtendedCNN()
# Set up the loss function and optimizer
criterion = nn.CrossEntropyLoss()  # For multi-class classification
optimizer = optim.Adam(model.parameters(), lr=0.001)  # Adam optimizer

# Move model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Set up the number of epochs
epochs = 200

# For tracking loss, accuracy, and training time
train_loss_history = []
train_accuracy_history = []
test_accuracy_history = []

start_time = time.time()  # Track the start time

# Training the model
for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for inputs, labels in trainloader:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()  # Zero the gradients

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # Backward pass
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        # Calculate accuracy
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    # Average loss and accuracy for the epoch
    epoch_loss = running_loss / len(trainloader)
    epoch_accuracy = 100 * correct / total
    train_loss_history.append(epoch_loss)
    train_accuracy_history.append(epoch_accuracy)

    # Evaluate on the test dataset
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in testloader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    test_accuracy = 100 * correct / total
    test_accuracy_history.append(test_accuracy)

    # Print statistics every 20 epochs
    if (epoch + 1) % 20 == 0:  # Print every 20 epochs
        print(f"Epoch [{epoch+1}/{epochs}], Loss: {epoch_loss:.4f}, Train Accuracy: {epoch_accuracy:.2f}%, Test Accuracy: {test_accuracy:.2f}%")

# Record the total training time
end_time = time.time()
training_time = end_time - start_time
print(f"\nTraining Time: {training_time:.2f} seconds")

# Final test accuracy
print(f"\nFinal Test Accuracy: {test_accuracy_history[-1]:.2f}%")

Epoch [20/200], Loss: 0.0651, Train Accuracy: 97.73%, Test Accuracy: 76.35%
Epoch [40/200], Loss: 0.0344, Train Accuracy: 98.91%, Test Accuracy: 76.24%
Epoch [60/200], Loss: 0.0167, Train Accuracy: 99.46%, Test Accuracy: 76.19%
Epoch [80/200], Loss: 0.0189, Train Accuracy: 99.39%, Test Accuracy: 76.42%
Epoch [100/200], Loss: 0.0166, Train Accuracy: 99.47%, Test Accuracy: 75.01%
Epoch [120/200], Loss: 0.0214, Train Accuracy: 99.39%, Test Accuracy: 75.91%
Epoch [140/200], Loss: 0.0114, Train Accuracy: 99.65%, Test Accuracy: 75.83%
Epoch [160/200], Loss: 0.0128, Train Accuracy: 99.63%, Test Accuracy: 76.24%
Epoch [180/200], Loss: 0.0093, Train Accuracy: 99.73%, Test Accuracy: 75.81%
Epoch [200/200], Loss: 0.0196, Train Accuracy: 99.49%, Test Accuracy: 76.19%

Training Time: 3144.85 seconds

Final Test Accuracy: 76.19%


2

In [15]:
class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1):
        super(ResidualBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(out_channels)

        self.shortcut = nn.Sequential()
        if in_channels != out_channels or stride != 1:
            self.shortcut = nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride)

    def forward(self, x):
        residual = self.shortcut(x)
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += residual
        return F.relu(out)

# Define ResNet10 Architecture
class ResNet10(nn.Module):
    def __init__(self):
        super(ResNet10, self).__init__()

        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm2d(16)

        # 5 Residual Blocks with reduced depth and stride
        self.res_blocks = nn.Sequential(
            ResidualBlock(16, 16, stride=1),
            ResidualBlock(16, 32, stride=2),
            ResidualBlock(32, 64, stride=2),
            ResidualBlock(64, 128, stride=2),
            ResidualBlock(128, 256, stride=2)
        )

        # Calculate the size of the feature map after all convolutional layers
        self._calculate_conv_output_size()

        # Fully connected layer
        self.fc = nn.Linear(self.conv_output_size, 10)  # Output layer with 10 classes

    def _calculate_conv_output_size(self):
        # Create a dummy input tensor of shape (1, 3, 32, 32) to simulate a batch of images
        dummy_input = torch.zeros(1, 3, 32, 32)
        dummy_output = self._forward_conv(dummy_input)
        self.conv_output_size = dummy_output.numel()  # This gives the total number of elements in the output tensor

    def _forward_conv(self, x):
        x = F.relu(self.bn1(self.conv1(x)))
        x = self.res_blocks(x)
        return x

    def forward(self, x):
        x = F.relu(self.bn1(self.conv1(x)))
        x = self.res_blocks(x)
        x = x.view(x.size(0), -1)  # Flatten to (batch_size, channels * height * width)
        x = self.fc(x)
        return x

# Test the architecture with a random input
model = ResNet10()
# Move the model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Set up the loss function and optimizer
criterion = nn.CrossEntropyLoss()  # For multi-class classification
optimizer = optim.Adam(model.parameters(), lr=0.001)  # Adam optimizer

# Set up the number of epochs
epochs = 200

# For tracking loss, accuracy, and training time
train_loss_history = []
train_accuracy_history = []
test_accuracy_history = []

# Assuming you already have trainloader and testloader for CIFAR-10 data
start_time = time.time()  # Track the start time

# Training the model
for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for inputs, labels in trainloader:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()  # Zero the gradients

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # Backward pass
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        # Calculate accuracy
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    # Average loss and accuracy for the epoch
    epoch_loss = running_loss / len(trainloader)
    epoch_accuracy = 100 * correct / total
    train_loss_history.append(epoch_loss)
    train_accuracy_history.append(epoch_accuracy)

    # Evaluate on the test dataset
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in testloader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    test_accuracy = 100 * correct / total
    test_accuracy_history.append(test_accuracy)

    # Print statistics every 20 epochs
    if (epoch + 1) % 20 == 0:  # Print every 20 epochs
        print(f"Epoch [{epoch+1}/{epochs}], Loss: {epoch_loss:.4f}, Train Accuracy: {epoch_accuracy:.2f}%, Test Accuracy: {test_accuracy:.2f}%")

# Record the total training time
end_time = time.time()
training_time = end_time - start_time
print(f"\nTraining Time: {training_time:.2f} seconds")

# Final test accuracy
print(f"\nFinal Test Accuracy: {test_accuracy_history[-1]:.2f}%")

Epoch [20/200], Loss: 0.0423, Train Accuracy: 98.55%, Test Accuracy: 77.22%
Epoch [40/200], Loss: 0.0204, Train Accuracy: 99.33%, Test Accuracy: 77.94%
Epoch [60/200], Loss: 0.0144, Train Accuracy: 99.49%, Test Accuracy: 78.03%
Epoch [80/200], Loss: 0.0130, Train Accuracy: 99.58%, Test Accuracy: 77.73%
Epoch [100/200], Loss: 0.0156, Train Accuracy: 99.47%, Test Accuracy: 77.35%
Epoch [120/200], Loss: 0.0059, Train Accuracy: 99.82%, Test Accuracy: 78.14%
Epoch [140/200], Loss: 0.0140, Train Accuracy: 99.54%, Test Accuracy: 77.87%
Epoch [160/200], Loss: 0.0101, Train Accuracy: 99.69%, Test Accuracy: 78.38%
Epoch [180/200], Loss: 0.0056, Train Accuracy: 99.78%, Test Accuracy: 78.27%
Epoch [200/200], Loss: 0.0083, Train Accuracy: 99.73%, Test Accuracy: 78.70%

Training Time: 3532.98 seconds

Final Test Accuracy: 78.70%
