In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(784, 100)
        self.fc2 = nn.Linear(100, 50)
        self.fc3 = nn.Linear(50, 10)

    def forward(self, x):
        x = x.view(-1, 784)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 3, 3)  # input channels: 1, output channels: 3, kernel size: 3
        self.pool = nn.MaxPool2d(2, 2)   # kernel size: 2, stride: 2
        self.conv2 = nn.Conv2d(3, 5, 3)  # input channels: 3, output channels: 5, kernel size: 3
        self.fc1 = nn.Linear(5 * 5 * 5, 30)  # 5x5 image size after 3 max pooling layers
        self.fc2 = nn.Linear(30, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 5 * 5 * 5)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Create instances of MLP and CNN
mlp_model = MLP()
cnn_model = CNN()

# Print the models
print("MLP model architecture:")
print(mlp_model)

print("\nCNN model architecture:")
print(cnn_model)


MLP model architecture:
MLP(
  (fc1): Linear(in_features=784, out_features=100, bias=True)
  (fc2): Linear(in_features=100, out_features=50, bias=True)
  (fc3): Linear(in_features=50, out_features=10, bias=True)
)

CNN model architecture:
CNN(
  (conv1): Conv2d(1, 3, kernel_size=(3, 3), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(3, 5, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Linear(in_features=125, out_features=30, bias=True)
  (fc2): Linear(in_features=30, out_features=10, bias=True)
)


In [7]:
import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, SubsetRandomSampler
import torch.nn as nn
import torch.optim as optim
import numpy as np

# Set random seed for reproducibility
torch.manual_seed(42)
np.random.seed(42)

# Define transforms
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

# Load Fashion MNIST dataset
train_set = torchvision.datasets.FashionMNIST(root='./data', train=True, download=True, transform=transform)
test_set = torchvision.datasets.FashionMNIST(root='./data', train=False, download=True, transform=transform)

# Define data loaders
train_loader = DataLoader(train_set, batch_size=64, shuffle=True)
test_loader = DataLoader(test_set, batch_size=64, shuffle=False)

# Split training set into training and validation
num_train = len(train_set)
indices = list(range(num_train))
split = int(np.floor(0.1 * num_train))  # 10% of training set for validation
np.random.shuffle(indices)
train_idx, valid_idx = indices[split:], indices[:split]

train_sampler = SubsetRandomSampler(train_idx)
valid_sampler = SubsetRandomSampler(valid_idx)

train_loader = DataLoader(train_set, batch_size=64, sampler=train_sampler)
valid_loader = DataLoader(train_set, batch_size=64, sampler=valid_sampler)

# Define MLP model
class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(784, 100)
        self.fc2 = nn.Linear(100, 50)
        self.fc3 = nn.Linear(50, 10)

    def forward(self, x):
        x = x.view(-1, 784)
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# Define CNN model
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 3, 3)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(3, 5, 3)
        self.fc1 = nn.Linear(5 * 5 * 5, 30)
        self.fc2 = nn.Linear(30, 10)

    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))
        x = self.pool(torch.relu(self.conv2(x)))
        x = self.pool(torch.relu(self.conv2(x)))
        x = x.view(-1, 5 * 5 * 5)
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Define training function
def train(model, train_loader, valid_loader, criterion, optimizer, num_epochs=5):
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        for inputs, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item() * inputs.size(0)

        # Validation
        model.eval()
        validation_loss = 0.0
        correct = 0
        total = 0
        with torch.no_grad():
            for inputs, labels in valid_loader:
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                validation_loss += loss.item() * inputs.size(0)
                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        print(f"Epoch {epoch+1}/{num_epochs}, "
              f"Training Loss: {running_loss/len(train_loader.dataset):.4f}, "
              f"Validation Loss: {validation_loss/len(valid_loader.dataset):.4f}, "
              f"Validation Accuracy: {(100 * correct / total):.2f}%")

# Define evaluation function
def test(model, test_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in test_loader:
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    print(f"Test Accuracy: {(100 * correct / total):.2f}%")

# Create instances of MLP and CNN
mlp_model = MLP()
cnn_model = CNN()

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
mlp_optimizer = optim.Adam(mlp_model.parameters(), lr=0.001)
cnn_optimizer = optim.Adam(cnn_model.parameters(), lr=0.001)

# Train MLP
print("Training MLP...")
train(mlp_model, train_loader, valid_loader, criterion, mlp_optimizer)

# Test MLP
print("Testing MLP...")
test(mlp_model, test_loader)

# Train CNN
print("\nTraining CNN...")
train(cnn_model, train_loader, valid_loader, criterion, cnn_optimizer)

# Test CNN
print("Testing CNN...")
test(cnn_model, test_loader)



Training MLP...
Epoch 1/5, Training Loss: 0.4847, Validation Loss: 0.0415, Validation Accuracy: 84.83%
Epoch 2/5, Training Loss: 0.3513, Validation Loss: 0.0396, Validation Accuracy: 85.72%
Epoch 3/5, Training Loss: 0.3168, Validation Loss: 0.0344, Validation Accuracy: 87.42%
Epoch 4/5, Training Loss: 0.2943, Validation Loss: 0.0335, Validation Accuracy: 87.92%
Epoch 5/5, Training Loss: 0.2786, Validation Loss: 0.0327, Validation Accuracy: 88.35%
Testing MLP...
Test Accuracy: 87.16%

Training CNN...


RuntimeError: Given groups=1, weight of size [5, 3, 3, 3], expected input[64, 5, 5, 5] to have 3 channels, but got 5 channels instead

In [8]:
import torch.optim as optim

# Define a range of learning rates and batch sizes to explore
learning_rates = [0.001, 0.0005, 0.0001]
batch_sizes = [32, 64, 128]

best_accuracy = 0
best_lr = None
best_batch_size = None

for lr in learning_rates:
    for batch_size in batch_sizes:
        # Create instances of MLP and CNN
        mlp_model = MLP()
        cnn_model = CNN()

        # Define loss function and optimizer
        criterion = nn.CrossEntropyLoss()
        mlp_optimizer = optim.Adam(mlp_model.parameters(), lr=lr)
        cnn_optimizer = optim.Adam(cnn_model.parameters(), lr=lr)

        # Train MLP
        print(f"Training MLP with lr={lr} and batch_size={batch_size}...")
        train(mlp_model, train_loader, valid_loader, criterion, mlp_optimizer)

        # Test MLP
        print("Testing MLP...")
        accuracy = test(mlp_model, test_loader)
        
        # Update best hyperparameters if necessary
        if accuracy > best_accuracy:
            best_accuracy = accuracy
            best_lr = lr
            best_batch_size = batch_size

        # Train CNN
        print(f"\nTraining CNN with lr={lr} and batch_size={batch_size}...")
        train(cnn_model, train_loader, valid_loader, criterion, cnn_optimizer)

        # Test CNN
        print("Testing CNN...")
        accuracy = test(cnn_model, test_loader)
        
        # Update best hyperparameters if necessary
        if accuracy > best_accuracy:
            best_accuracy = accuracy
            best_lr = lr
            best_batch_size = batch_size

print(f"Best accuracy: {best_accuracy:.2f}%")
print(f"Best learning rate: {best_lr}")
print(f"Best batch size: {best_batch_size}")


Training MLP with lr=0.001 and batch_size=32...
Epoch 1/5, Training Loss: 0.4852, Validation Loss: 0.0428, Validation Accuracy: 84.27%
Epoch 2/5, Training Loss: 0.3524, Validation Loss: 0.0376, Validation Accuracy: 85.75%
Epoch 3/5, Training Loss: 0.3197, Validation Loss: 0.0363, Validation Accuracy: 86.37%
Epoch 4/5, Training Loss: 0.2952, Validation Loss: 0.0364, Validation Accuracy: 86.30%
Epoch 5/5, Training Loss: 0.2767, Validation Loss: 0.0349, Validation Accuracy: 87.12%
Testing MLP...
Test Accuracy: 86.49%


TypeError: '>' not supported between instances of 'NoneType' and 'int'

In [10]:
import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, SubsetRandomSampler
import torch.nn as nn
import torch.optim as optim
import numpy as np

# Set random seed for reproducibility
torch.manual_seed(42)
np.random.seed(42)

# Define transforms
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

# Load Fashion MNIST dataset
train_set = torchvision.datasets.FashionMNIST(root='./data', train=True, download=True, transform=transform)
test_set = torchvision.datasets.FashionMNIST(root='./data', train=False, download=True, transform=transform)

# Define data loaders
train_loader = DataLoader(train_set, batch_size=64, shuffle=True)
test_loader = DataLoader(test_set, batch_size=64, shuffle=False)

# Split training set into training and validation
num_train = len(train_set)
indices = list(range(num_train))
split = int(np.floor(0.1 * num_train))  # 10% of training set for validation
np.random.shuffle(indices)
train_idx, valid_idx = indices[split:], indices[:split]

train_sampler = SubsetRandomSampler(train_idx)
valid_sampler = SubsetRandomSampler(valid_idx)

train_loader = DataLoader(train_set, batch_size=64, sampler=train_sampler)
valid_loader = DataLoader(train_set, batch_size=64, sampler=valid_sampler)

# Define MLP model with ReLU activation
class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(784, 100)
        self.fc2 = nn.Linear(100, 50)
        self.fc3 = nn.Linear(50, 10)

    def forward(self, x):
        x = x.view(-1, 784)
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# Define CNN model with ReLU activation
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 3, 3)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(3, 5, 3)
        self.fc1 = nn.Linear(5 * 5 * 5, 30)
        self.fc2 = nn.Linear(30, 10)

    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))
        x = self.pool(torch.relu(self.conv2(x)))
        x = self.pool(torch.relu(self.conv2(x)))
        x = x.view(-1, 5 * 5 * 5)
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Define training function
def train(model, train_loader, valid_loader, criterion, optimizer, num_epochs=5):
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        for inputs, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item() * inputs.size(0)

        # Validation
        model.eval()
        validation_loss = 0.0
        correct = 0
        total = 0
        with torch.no_grad():
            for inputs, labels in valid_loader:
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                validation_loss += loss.item() * inputs.size(0)
                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        print(f"Epoch {epoch+1}/{num_epochs}, "
              f"Training Loss: {running_loss/len(train_loader.dataset):.4f}, "
              f"Validation Loss: {validation_loss/len(valid_loader.dataset):.4f}, "
              f"Validation Accuracy: {(100 * correct / total):.2f}%")

# Define evaluation function
def test(model, test_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in test_loader:
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = 100 * correct / total
    print(f"Test Accuracy: {accuracy:.2f}%")
    return accuracy

# Create instances of MLP and CNN
mlp_model = MLP()
cnn_model = CNN()

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
mlp_optimizer = optim.Adam(mlp_model.parameters(), lr=0.001)
cnn_optimizer = optim.Adam(cnn_model.parameters(), lr=0.001)

# Train MLP
print("Training MLP...")
train(mlp_model, train_loader, valid_loader, criterion, mlp_optimizer)

# Test MLP
print("Testing MLP...")
mlp_accuracy = test(mlp_model, test_loader)

# Train CNN
print("\nTraining CNN...")
train(cnn_model, train_loader, valid_loader, criterion, cnn_optimizer)

# Test CNN
print("Testing CNN...")
cnn_accuracy = test(cnn_model, test_loader)

print(f"\nMLP Test Accuracy: {mlp_accuracy:.2f}%")
print(f"CNN Test Accuracy: {cnn_accuracy:.2f}%")


Training MLP...
Epoch 1/5, Training Loss: 0.4847, Validation Loss: 0.0415, Validation Accuracy: 84.83%
Epoch 2/5, Training Loss: 0.3513, Validation Loss: 0.0396, Validation Accuracy: 85.72%
Epoch 3/5, Training Loss: 0.3168, Validation Loss: 0.0344, Validation Accuracy: 87.42%
Epoch 4/5, Training Loss: 0.2943, Validation Loss: 0.0335, Validation Accuracy: 87.92%
Epoch 5/5, Training Loss: 0.2786, Validation Loss: 0.0327, Validation Accuracy: 88.35%
Testing MLP...
Test Accuracy: 87.16%

Training CNN...


RuntimeError: Given groups=1, weight of size [5, 3, 3, 3], expected input[64, 5, 5, 5] to have 3 channels, but got 5 channels instead

In [11]:
import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, SubsetRandomSampler
import torch.nn as nn
import torch.optim as optim
import numpy as np

# Set random seed for reproducibility
torch.manual_seed(42)
np.random.seed(42)

# Define transforms
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

# Load Fashion MNIST dataset
train_set = torchvision.datasets.FashionMNIST(root='./data', train=True, download=True, transform=transform)
test_set = torchvision.datasets.FashionMNIST(root='./data', train=False, download=True, transform=transform)

# Define data loaders
train_loader = DataLoader(train_set, batch_size=64, shuffle=True)
test_loader = DataLoader(test_set, batch_size=64, shuffle=False)

# Split training set into training and validation
num_train = len(train_set)
indices = list(range(num_train))
split = int(np.floor(0.1 * num_train))  # 10% of training set for validation
np.random.shuffle(indices)
train_idx, valid_idx = indices[split:], indices[:split]

train_sampler = SubsetRandomSampler(train_idx)
valid_sampler = SubsetRandomSampler(valid_idx)

train_loader = DataLoader(train_set, batch_size=64, sampler=train_sampler)
valid_loader = DataLoader(train_set, batch_size=64, sampler=valid_sampler)

# Define MLP model with Sigmoid activation
class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(784, 100)
        self.fc2 = nn.Linear(100, 50)
        self.fc3 = nn.Linear(50, 10)

    def forward(self, x):
        x = x.view(-1, 784)
        x = torch.sigmoid(self.fc1(x))
        x = torch.sigmoid(self.fc2(x))
        x = self.fc3(x)
        return x

# Define CNN model with Sigmoid activation
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 3, 3)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(3, 5, 3)
        self.fc1 = nn.Linear(5 * 5 * 5, 30)
        self.fc2 = nn.Linear(30, 10)

    def forward(self, x):
        x = self.pool(torch.sigmoid(self.conv1(x)))
        x = self.pool(torch.sigmoid(self.conv2(x)))
        x = self.pool(torch.sigmoid(self.conv2(x)))
        x = x.view(-1, 5 * 5 * 5)
        x = torch.sigmoid(self.fc1(x))
        x = self.fc2(x)
        return x

# Define training function
def train(model, train_loader, valid_loader, criterion, optimizer, num_epochs=5):
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        for inputs, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item() * inputs.size(0)

        # Validation
        model.eval()
        validation_loss = 0.0
        correct = 0
        total = 0
        with torch.no_grad():
            for inputs, labels in valid_loader:
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                validation_loss += loss.item() * inputs.size(0)
                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        print(f"Epoch {epoch+1}/{num_epochs}, "
              f"Training Loss: {running_loss/len(train_loader.dataset):.4f}, "
              f"Validation Loss: {validation_loss/len(valid_loader.dataset):.4f}, "
              f"Validation Accuracy: {(100 * correct / total):.2f}%")

# Define evaluation function
def test(model, test_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in test_loader:
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = 100 * correct / total
    print(f"Test Accuracy: {accuracy:.2f}%")
    return accuracy

# Create instances of MLP and CNN
mlp_model = MLP()
cnn_model = CNN()

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
mlp_optimizer = optim.Adam(mlp_model.parameters(), lr=0.001)
cnn_optimizer = optim.Adam(cnn_model.parameters(), lr=0.001)

# Train MLP
print("Training MLP...")
train(mlp_model, train_loader, valid_loader, criterion, mlp_optimizer)

# Test MLP
print("Testing MLP...")
mlp_accuracy = test(mlp_model, test_loader)

# Train CNN
print("\nTraining CNN...")
train(cnn_model, train_loader, valid_loader, criterion, cnn_optimizer)

# Test CNN
print("Testing CNN...")
cnn_accuracy = test(cnn_model, test_loader)

print(f"\nMLP Test Accuracy: {mlp_accuracy:.2f}%")
print(f"CNN Test Accuracy: {cnn_accuracy:.2f}%")


Training MLP...
Epoch 1/5, Training Loss: 0.7967, Validation Loss: 0.0516, Validation Accuracy: 82.18%
Epoch 2/5, Training Loss: 0.4056, Validation Loss: 0.0430, Validation Accuracy: 84.97%
Epoch 3/5, Training Loss: 0.3503, Validation Loss: 0.0378, Validation Accuracy: 86.42%
Epoch 4/5, Training Loss: 0.3200, Validation Loss: 0.0361, Validation Accuracy: 86.60%
Epoch 5/5, Training Loss: 0.3002, Validation Loss: 0.0337, Validation Accuracy: 88.07%
Testing MLP...
Test Accuracy: 86.70%

Training CNN...


RuntimeError: Given groups=1, weight of size [5, 3, 3, 3], expected input[64, 5, 5, 5] to have 3 channels, but got 5 channels instead

In [12]:
import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, SubsetRandomSampler
import torch.nn as nn
import torch.optim as optim
import numpy as np

# Set random seed for reproducibility
torch.manual_seed(42)
np.random.seed(42)

# Define transforms
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

# Load Fashion MNIST dataset
train_set = torchvision.datasets.FashionMNIST(root='./data', train=True, download=True, transform=transform)
test_set = torchvision.datasets.FashionMNIST(root='./data', train=False, download=True, transform=transform)

# Define data loaders
train_loader = DataLoader(train_set, batch_size=64, shuffle=True)
test_loader = DataLoader(test_set, batch_size=64, shuffle=False)

# Split training set into training and validation
num_train = len(train_set)
indices = list(range(num_train))
split = int(np.floor(0.1 * num_train))  # 10% of training set for validation
np.random.shuffle(indices)
train_idx, valid_idx = indices[split:], indices[:split]

train_sampler = SubsetRandomSampler(train_idx)
valid_sampler = SubsetRandomSampler(valid_idx)

train_loader = DataLoader(train_set, batch_size=64, sampler=train_sampler)
valid_loader = DataLoader(train_set, batch_size=64, sampler=valid_sampler)

# Define MLP model with Dropout
class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(784, 100)
        self.dropout1 = nn.Dropout(p=0.2)
        self.fc2 = nn.Linear(100, 50)
        self.dropout2 = nn.Dropout(p=0.2)
        self.fc3 = nn.Linear(50, 10)
        self.dropout3 = nn.Dropout(p=0.2)

    def forward(self, x):
        x = x.view(-1, 784)
        x = torch.relu(self.fc1(x))
        x = self.dropout1(x)
        x = torch.relu(self.fc2(x))
        x = self.dropout2(x)
        x = torch.relu(self.fc3(x))
        x = self.dropout3(x)
        return x

# Define CNN model with Dropout
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 3, 3)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(3, 5, 3)
        self.fc1 = nn.Linear(5 * 5 * 5, 30)
        self.dropout = nn.Dropout(p=0.2)
        self.fc2 = nn.Linear(30, 10)

    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))
        x = self.pool(torch.relu(self.conv2(x)))
        x = self.pool(torch.relu(self.conv2(x)))
        x = x.view(-1, 5 * 5 * 5)
        x = torch.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x

# Define training function
def train(model, train_loader, valid_loader, criterion, optimizer, num_epochs=5):
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        for inputs, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item() * inputs.size(0)

        # Validation
        model.eval()
        validation_loss = 0.0
        correct = 0
        total = 0
        with torch.no_grad():
            for inputs, labels in valid_loader:
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                validation_loss += loss.item() * inputs.size(0)
                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        print(f"Epoch {epoch+1}/{num_epochs}, "
              f"Training Loss: {running_loss/len(train_loader.dataset):.4f}, "
              f"Validation Loss: {validation_loss/len(valid_loader.dataset):.4f}, "
              f"Validation Accuracy: {(100 * correct / total):.2f}%")

# Define evaluation function
def test(model, test_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in test_loader:
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = 100 * correct / total
    print(f"Test Accuracy: {accuracy:.2f}%")
    return accuracy

# Create instances of MLP and CNN
mlp_model = MLP()
cnn_model = CNN()

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
mlp_optimizer = optim.Adam(mlp_model.parameters(), lr=0.001)
cnn_optimizer = optim.Adam(cnn_model.parameters(), lr=0.001)

# Train MLP
print("Training MLP...")
train(mlp_model, train_loader, valid_loader, criterion, mlp_optimizer)

# Test MLP
print("Testing MLP...")
mlp_accuracy = test(mlp_model, test_loader)

# Train CNN
print("\nTraining CNN...")
train(cnn_model, train_loader, valid_loader, criterion, cnn_optimizer)

# Test CNN
print("Testing CNN...")
cnn_accuracy = test(cnn_model, test_loader)

print(f"\nMLP Test Accuracy: {mlp_accuracy:.2f}%")
print(f"CNN Test Accuracy: {cnn_accuracy:.2f}%")


Training MLP...
Epoch 1/5, Training Loss: 0.9823, Validation Loss: 0.0594, Validation Accuracy: 82.95%
Epoch 2/5, Training Loss: 0.8412, Validation Loss: 0.0519, Validation Accuracy: 85.67%
Epoch 3/5, Training Loss: 0.8151, Validation Loss: 0.0512, Validation Accuracy: 86.03%
Epoch 4/5, Training Loss: 0.7920, Validation Loss: 0.0468, Validation Accuracy: 86.05%
Epoch 5/5, Training Loss: 0.7762, Validation Loss: 0.0463, Validation Accuracy: 87.37%
Testing MLP...
Test Accuracy: 86.26%

Training CNN...


RuntimeError: Given groups=1, weight of size [5, 3, 3, 3], expected input[64, 5, 5, 5] to have 3 channels, but got 5 channels instead

In [None]:
import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, SubsetRandomSampler
import torch.nn as nn
import torch.optim as optim
import numpy as np
import matplotlib.pyplot as plt

# Set random seed for reproducibility
torch.manual_seed(42)
np.random.seed(42)

# Define transforms
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

# Load Fashion MNIST dataset
train_set = torchvision.datasets.FashionMNIST(root='./data', train=True, download=True, transform=transform)
test_set = torchvision.datasets.FashionMNIST(root='./data', train=False, download=True, transform=transform)

# Define data loaders
train_loader = DataLoader(train_set, batch_size=64, shuffle=True)
test_loader = DataLoader(test_set, batch_size=64, shuffle=False)

# Split training set into training and validation
num_train = len(train_set)
indices = list(range(num_train))
split = int(np.floor(0.1 * num_train))  # 10% of training set for validation
np.random.shuffle(indices)
train_idx, valid_idx = indices[split:], indices[:split]

train_sampler = SubsetRandomSampler(train_idx)
valid_sampler = SubsetRandomSampler(valid_idx)

train_loader = DataLoader(train_set, batch_size=64, sampler=train_sampler)
valid_loader = DataLoader(train_set, batch_size=64, sampler=valid_sampler)

# Define MLP model with Dropout
class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(784, 100)
        self.dropout1 = nn.Dropout(p=0.2)
        self.fc2 = nn.Linear(100, 50)
        self.dropout2 = nn.Dropout(p=0.2)
        self.fc3 = nn.Linear(50, 10)
        self.dropout3 = nn.Dropout(p=0.2)

    def forward(self, x):
        x = x.view(-1, 784)
        x = torch.relu(self.fc1(x))
        x = self.dropout1(x)
        x = torch.relu(self.fc2(x))
        x = self.dropout2(x)
        x = torch.relu(self.fc3(x))
        x = self.dropout3(x)
        return x

# Define CNN model with Dropout
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 3, 3)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(3, 5, 3)
        self.fc1 = nn.Linear(5 * 5 * 5, 30)
        self.dropout = nn.Dropout(p=0.2)
        self.fc2 = nn.Linear(30, 10)

    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))
        x = self.pool(torch.relu(self.conv2(x)))
        x = self.pool(torch.relu(self.conv2(x)))
        x = x.view(-1, 5 * 5 * 5)
        x = torch.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x

# Define training function
def train(model, train_loader, valid_loader, criterion, optimizer, num_epochs=5):
    train_loss_history = []
    valid_accuracy_history = []

    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        for inputs, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item() * inputs.size(0)

        # Validation
        model.eval()
        validation_loss = 0.0
        correct = 0
        total = 0
        with torch.no_grad():
            for inputs, labels in valid_loader:
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                validation_loss += loss.item() * inputs.size(0)
                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        epoch_train_loss = running_loss / len(train_loader.dataset)
        epoch_valid_loss = validation_loss / len(valid_loader.dataset)
        epoch_valid_accuracy = correct / total * 100

        train_loss_history.append(epoch_train_loss)
        valid_accuracy_history.append(epoch_valid_accuracy)

        print(f"Epoch {epoch+1}/{num_epochs}, "
              f"Training Loss: {epoch_train_loss:.4f}, "
              f"Validation Loss: {epoch_valid_loss:.4f}, "
              f"Validation Accuracy: {epoch_valid_accuracy:.2f}%")

    return train_loss_history, valid_accuracy_history

# Define evaluation function
def test(model, test_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in test_loader:
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = 100 * correct / total
    print(f"Test Accuracy: {accuracy:.2f}%")
    return accuracy

# Create instances of MLP and CNN
mlp_model = MLP()
cnn_model = CNN()

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
mlp_optimizer = optim.Adam(mlp_model.parameters(), lr=0.001)
cnn_optimizer = optim.Adam(cnn_model.parameters(), lr=0.001)

# Train MLP
print("Training MLP...")
mlp_train_loss, mlp_valid_accuracy = train(mlp_model, train_loader, valid_loader, criterion, mlp_optimizer, num_epochs=50)

# Test MLP
print("Testing MLP...")
mlp_test_accuracy = test(mlp_model, test_loader)

# Train CNN
print("\nTraining CNN...")
cnn_train_loss, cnn_valid_accuracy = train(cnn_model, train_loader, valid_loader, criterion, cnn_optimizer, num_epochs=50)

# Test CNN
print("Testing CNN...")
cnn_test_accuracy = test(cnn_model, test_loader)

print(f"\nMLP Test Accuracy: {mlp_test_accuracy:.2f}%")
print(f"CNN Test Accuracy: {cnn_test_accuracy:.2f}%")

# Plot and save curves
epochs = range(1, 51)

plt.figure(figsize=(10, 5))

plt.subplot(1, 2, 1)
plt.plot(epochs, mlp_train_loss, label='MLP Training Loss')
plt.xlabel('Epochs')
plt.ylabel('Training Loss')
plt.title('MLP Training Loss')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(epochs, mlp_valid_accuracy, label='MLP Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Validation Accuracy (%)')
plt.title('MLP Validation Accuracy')
plt.legend()

plt.tight_layout()
plt.savefig('mlp_training_curve.png')

plt.figure(figsize=(10, 5))

plt.subplot(1, 2, 1)
plt.plot(epochs, cnn_train_loss, label='CNN Training Loss')
plt.xlabel('Epochs')
plt.ylabel('Training Loss')
plt.title('CNN Training Loss')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(epochs, cnn_valid_accuracy, label='CNN Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Validation Accuracy (%)')
plt.title('CNN Validation Accuracy')
plt.legend()

plt.tight_layout()
plt.savefig('cnn_training_curve.png')

plt.show()


Training MLP...
Epoch 1/50, Training Loss: 0.9823, Validation Loss: 0.0594, Validation Accuracy: 82.95%
Epoch 2/50, Training Loss: 0.8412, Validation Loss: 0.0519, Validation Accuracy: 85.67%
Epoch 3/50, Training Loss: 0.8151, Validation Loss: 0.0512, Validation Accuracy: 86.03%
Epoch 4/50, Training Loss: 0.7920, Validation Loss: 0.0468, Validation Accuracy: 86.05%
Epoch 5/50, Training Loss: 0.7762, Validation Loss: 0.0463, Validation Accuracy: 87.37%
Epoch 6/50, Training Loss: 0.7673, Validation Loss: 0.0438, Validation Accuracy: 87.40%
Epoch 7/50, Training Loss: 0.7485, Validation Loss: 0.0451, Validation Accuracy: 87.45%
Epoch 8/50, Training Loss: 0.7403, Validation Loss: 0.0456, Validation Accuracy: 87.42%
Epoch 9/50, Training Loss: 0.7434, Validation Loss: 0.0438, Validation Accuracy: 87.92%
Epoch 10/50, Training Loss: 0.7343, Validation Loss: 0.0423, Validation Accuracy: 88.30%
Epoch 11/50, Training Loss: 0.7198, Validation Loss: 0.0434, Validation Accuracy: 88.12%
Epoch 12/50, T