In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

In [2]:
# Fixed seed for reproducibility
torch.manual_seed(0)

# define hyperparameters
batch_size = 64
epochs = 10
learning_rate = 0.001
weight_decay = 1e-4  # L2 regularization

# dataset transformation
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])


train_dataset = datasets.CIFAR10(root='../data', train=True, download=True, transform=transform)
test_dataset = datasets.CIFAR10(root='../data', train=False, download=True, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

Files already downloaded and verified
Files already downloaded and verified


In [3]:
class FCNN(nn.Module):
    def __init__(self):
        super(FCNN, self).__init__()
        self.fc1 = nn.Linear(3 * 32 * 32, 512)  # First layer, 3*32*32 size
        self.fc2 = nn.Linear(512, 256)  # Second layer
        self.fc3 = nn.Linear(256, 128)  # Third layer
        self.fc4 = nn.Linear(128, 10)  # Fourth layer, 10 classes

    def forward(self, x):
        x = x.view(-1, 3 * 32 * 32)  # Flatten the input
        x = F.relu(self.fc1(x))  # ReLU activation
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = self.fc4(x)  # Output layer
        return x


model = FCNN()

In [4]:
criterion = nn.CrossEntropyLoss() # Cross-entropy loss

In [9]:
criterion = nn.MSELoss()    # MSE loss

In [17]:
criterion = nn.L1Loss()    # L1 loss

In [5]:
optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)  # Adam optimizer with L2 regularization

In [18]:
optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9)  # SGD optimizer with momentum

In [6]:
# Training function
def train(model, train_loader, criterion, optimizer):
    model.train()
    total_loss = 0
    correct = 0
    total = 0
    
    for data, target in train_loader:
        optimizer.zero_grad()  # clear gradients
        output = model(data)  # forward pass
        loss = criterion(output, target)  # compute loss
        loss.backward()  # backward pass
        optimizer.step()  # update weights
        
        total_loss += loss.item()
        _, predicted = torch.max(output, 1) # fetch the predicted class
        total += target.size(0)
        correct += (predicted == target).sum().item()
    
    accuracy = 100 * correct / total
    avg_loss = total_loss / len(train_loader)
    return avg_loss, accuracy

# Testing function
def test(model, test_loader, criterion):
    model.eval()
    total_loss = 0
    correct = 0
    total = 0
    
    with torch.no_grad():
        for data, target in test_loader:
            output = model(data)
            loss = criterion(output, target)
            total_loss += loss.item()
            _, predicted = torch.max(output, 1)
            total += target.size(0)
            correct += (predicted == target).sum().item()
    
    accuracy = 100 * correct / total
    avg_loss = total_loss / len(test_loader)
    return avg_loss, accuracy


In [19]:
# Training loop
def train(model, train_loader, criterion, optimizer):
    model.train()
    total_loss = 0
    correct = 0
    total = 0
    
    for data, target in train_loader:
        optimizer.zero_grad()  # clear
        output = model(data)  # forward pass
        
        # one-hot encoding for MSE
        target_one_hot = torch.zeros(data.size(0), 10).to(data.device)
        target_one_hot.scatter_(1, target.view(-1, 1), 1)
        
        loss = criterion(output, target_one_hot)  # calculate loss
        loss.backward()  # backward loss
        optimizer.step()  # update w
        
        total_loss += loss.item()
        _, predicted = torch.max(output, 1) 
        total += target.size(0)
        correct += (predicted == target).sum().item()
    
    accuracy = 100 * correct / total
    avg_loss = total_loss / len(train_loader)
    return avg_loss, accuracy

# Testing Function
def test(model, test_loader, criterion):
    model.eval()
    total_loss = 0
    correct = 0
    total = 0
    
    with torch.no_grad():
        for data, target in test_loader:
            output = model(data)

            target_one_hot = torch.zeros(data.size(0), 10).to(data.device)
            target_one_hot.scatter_(1, target.view(-1, 1), 1)
            
            loss = criterion(output, target_one_hot)
            total_loss += loss.item()
            _, predicted = torch.max(output, 1)
            total += target.size(0)
            correct += (predicted == target).sum().item()
    
    accuracy = 100 * correct / total
    avg_loss = total_loss / len(test_loader)
    return avg_loss, accuracy


In [20]:
# train and eval
for epoch in range(epochs):
    train_loss, train_acc = train(model, train_loader, criterion, optimizer)
    test_loss, test_acc = test(model, test_loader, criterion)
    
    print(f"Epoch [{epoch+1}/{epochs}]")
    print(f"Train Loss: {train_loss:.4f}, Train Accuracy: {train_acc:.2f}%")
    print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_acc:.2f}%")

# save model
torch.save(model.state_dict(), "fcnn_cifar10.pth")

Epoch [1/10]
Train Loss: 0.0962, Train Accuracy: 26.48%
Test Loss: 0.0978, Test Accuracy: 22.90%
Epoch [2/10]
Train Loss: 0.0959, Train Accuracy: 26.81%
Test Loss: 0.0977, Test Accuracy: 23.32%
Epoch [3/10]
Train Loss: 0.0957, Train Accuracy: 26.80%
Test Loss: 0.0975, Test Accuracy: 24.70%
Epoch [4/10]
Train Loss: 0.0955, Train Accuracy: 27.04%
Test Loss: 0.0974, Test Accuracy: 26.97%
Epoch [5/10]
Train Loss: 0.0953, Train Accuracy: 27.15%
Test Loss: 0.0974, Test Accuracy: 22.56%
Epoch [6/10]
Train Loss: 0.0951, Train Accuracy: 27.15%
Test Loss: 0.0972, Test Accuracy: 26.46%
Epoch [7/10]
Train Loss: 0.0950, Train Accuracy: 27.07%
Test Loss: 0.0972, Test Accuracy: 25.64%
Epoch [8/10]
Train Loss: 0.0949, Train Accuracy: 26.61%
Test Loss: 0.0971, Test Accuracy: 20.19%
Epoch [9/10]
Train Loss: 0.0948, Train Accuracy: 27.06%
Test Loss: 0.0971, Test Accuracy: 19.50%
Epoch [10/10]
Train Loss: 0.0947, Train Accuracy: 26.78%
Test Loss: 0.0970, Test Accuracy: 24.56%


## CrossEntropyLoss + Adam + L2 regularization

```bash
Epoch [1/10]
Train Loss: 1.6622, Train Accuracy: 40.77%
Test Loss: 1.5286, Test Accuracy: 46.19%
Epoch [2/10]
Train Loss: 1.4520, Train Accuracy: 48.63%
Test Loss: 1.4298, Test Accuracy: 49.72%
Epoch [3/10]
Train Loss: 1.3521, Train Accuracy: 52.26%
Test Loss: 1.3949, Test Accuracy: 50.66%
Epoch [4/10]
Train Loss: 1.2810, Train Accuracy: 54.37%
Test Loss: 1.3669, Test Accuracy: 52.03%
Epoch [5/10]
Train Loss: 1.2144, Train Accuracy: 56.81%
Test Loss: 1.3516, Test Accuracy: 53.43%
Epoch [6/10]
Train Loss: 1.1593, Train Accuracy: 58.65%
Test Loss: 1.3260, Test Accuracy: 53.78%
Epoch [7/10]
Train Loss: 1.1110, Train Accuracy: 60.33%
Test Loss: 1.3400, Test Accuracy: 53.27%
Epoch [8/10]
Train Loss: 1.0657, Train Accuracy: 61.90%
Test Loss: 1.3611, Test Accuracy: 53.36%
Epoch [9/10]
...
Test Loss: 1.3677, Test Accuracy: 53.21%
Epoch [10/10]
Train Loss: 0.9843, Train Accuracy: 64.45%
Test Loss: 1.4113, Test Accuracy: 52.86%
```

## MSE + Adam + L2 regularization

    
```bash
Epoch [1/10]
Train Loss: 0.0759, Train Accuracy: 38.59%
Test Loss: 0.0711, Test Accuracy: 43.33%
Epoch [2/10]
Train Loss: 0.0717, Train Accuracy: 42.59%
Test Loss: 0.0700, Test Accuracy: 45.26%
Epoch [3/10]
Train Loss: 0.0692, Train Accuracy: 45.00%
Test Loss: 0.0687, Test Accuracy: 45.98%
Epoch [4/10]
Train Loss: 0.0681, Train Accuracy: 46.38%
Test Loss: 0.0687, Test Accuracy: 46.00%
Epoch [5/10]
Train Loss: 0.0670, Train Accuracy: 47.24%
Test Loss: 0.0681, Test Accuracy: 46.58%
Epoch [6/10]
Train Loss: 0.0664, Train Accuracy: 48.05%
Test Loss: 0.0673, Test Accuracy: 47.33%
Epoch [7/10]
Train Loss: 0.0657, Train Accuracy: 48.64%
Test Loss: 0.0671, Test Accuracy: 47.29%
Epoch [8/10]
Train Loss: 0.0655, Train Accuracy: 48.78%
Test Loss: 0.0657, Test Accuracy: 48.33%
Epoch [9/10]
...
Test Loss: 0.0657, Test Accuracy: 49.06%
Epoch [10/10]
Train Loss: 0.0647, Train Accuracy: 49.52%
Test Loss: 0.0655, Test Accuracy: 48.77%
```

## L1 loss + SDG + L2 regularization

```bash
Epoch [1/10]
Train Loss: 0.0962, Train Accuracy: 26.48%
Test Loss: 0.0978, Test Accuracy: 22.90%
Epoch [2/10]
Train Loss: 0.0959, Train Accuracy: 26.81%
Test Loss: 0.0977, Test Accuracy: 23.32%
Epoch [3/10]
Train Loss: 0.0957, Train Accuracy: 26.80%
Test Loss: 0.0975, Test Accuracy: 24.70%
Epoch [4/10]
Train Loss: 0.0955, Train Accuracy: 27.04%
Test Loss: 0.0974, Test Accuracy: 26.97%
Epoch [5/10]
Train Loss: 0.0953, Train Accuracy: 27.15%
Test Loss: 0.0974, Test Accuracy: 22.56%
Epoch [6/10]
Train Loss: 0.0951, Train Accuracy: 27.15%
Test Loss: 0.0972, Test Accuracy: 26.46%
Epoch [7/10]
Train Loss: 0.0950, Train Accuracy: 27.07%
Test Loss: 0.0972, Test Accuracy: 25.64%
Epoch [8/10]
Train Loss: 0.0949, Train Accuracy: 26.61%
Test Loss: 0.0971, Test Accuracy: 20.19%
Epoch [9/10]
...
Test Loss: 0.0971, Test Accuracy: 19.50%
Epoch [10/10]
Train Loss: 0.0947, Train Accuracy: 26.78%
Test Loss: 0.0970, Test Accuracy: 24.56%
```

# Conclusion

# Give me a detailed conclusion of the experiments

这次实验使用了三种不同的损失函数，分别是 CrossEntropyLoss，MSE 和 L1 loss。
用了两种不同的优化器，分别是 Adam 和 SDG。还使用了 L2 正则化。
实验结果表明， CrossEntropyLoss + Adam + L2 正则化的准确率最高，达到了52.86%。MSE + Adam + L2 正则化的准确率为49.06%，L1 loss + SDG + L2 正则化的准确率为24.56%。

- 原因分析：

1. CrossEntropyLoss + Adam + L2 正则化的准确率最高，可能是因为 CrossEntropyLoss 是分类问题的常用损失函数，适合多分类问题。Adam 是一种自适应学习率的优化器，能够更快地收敛。L2 正则化可以防止过拟合，提高模型的泛化能力。
2. MSE + Adam + L2 正则化的准确率次高，可能是因为 MSE 是回归问题的常用损失函数，不太适合多分类问题。但是由于使用了 L2 正则化，可以防止过拟合，提高模型的泛化能力。
3. L1 loss + SDG + L2 正则化的准确率最低，可能是因为 L1 loss 对异常值更敏感，容易受到异常值的影响。SDG 是一种随机梯度下降优化器，收敛速度较慢。L2 正则化可以防止过拟合，提高模型的泛化能力。



