## PyTorch Datasets and DataLoaders

The CIFAR-10 dataset consists of 60000 32x32 colour images in 10 classes, with 6000 images per class. There are 50000 training images and 10000 test images.

In [2]:
!pip install torch torchvision
#!pip install transform
import torch
import torchvision
import torchvision.transforms as transforms

# Define data transformations for training data
transform_train = transforms.Compose([
    transforms.RandomHorizontalFlip(),  # 随机水平翻转图像
    transforms.RandomCrop(32, padding=4),  # 随机裁剪图像，并在周围填充0
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # Normalize
])

# Define data transformations for test data
transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # Normalize
])

# Load the CIFAR10 dataset
train_dataset = torchvision.datasets.CIFAR10(
    root='./data', train=True, download=True, transform=transform_train
)

test_dataset = torchvision.datasets.CIFAR10(
    root='./data', train=False, download=True, transform=transform_test
)

# Create DataLoaders for train and test datasets
train_loader = torch.utils.data.DataLoader(
    train_dataset, batch_size=64, shuffle=True, num_workers=2
)

test_loader = torch.utils.data.DataLoader(
    test_dataset, batch_size=64, shuffle=False, num_workers=2
)

print("Num training examples: {}".format(len(train_dataset)))
print("Num test examples: {}".format(len(test_dataset)))


# List of class labels
classes = [
    'plane', 'car', 'bird', 'cat',
    'deer', 'dog', 'frog', 'horse', 'ship', 'truck'
]

Files already downloaded and verified
Files already downloaded and verified
Num training examples: 50000
Num test examples: 10000


In [2]:
# Fetch one batch of data
dataiter = iter(train_loader)
images, labels = next(dataiter)

# Inspect the shapes and value ranges of images (X) and labels (Y)
images_shape = images.shape
images_dtype = images.dtype
images_min = images.min().item()
images_max = images.max().item()

labels_shape = labels.shape
labels_dtype = labels.dtype

images_shape, images_dtype, images_min, images_max, labels_shape, labels_dtype

(torch.Size([64, 3, 32, 32]),
 torch.float32,
 -1.0,
 1.0,
 torch.Size([64]),
 torch.int64)

## Visualizing Examples from the CIFAR10 Dataset

In [3]:
%pip install matplotlib
import matplotlib.pyplot as plt
import numpy as np

# Function to unnormalize and display an image
def imshow(img):
    img = img / 2 + 0.5  # Unnormalize
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()

# Get a batch of training data
dataiter = iter(train_loader)
images, labels = next(dataiter)

# Display the images in a grid along with their labels
imshow(torchvision.utils.make_grid(images[:16]))
print(" -- ".join(f"{classes[labels[j]]}" for j in range(8)))
print(" -- ".join(f"{classes[labels[j]]}" for j in range(8,16)))

Note: you may need to restart the kernel to use updated packages.


: 

## Training on the GPU and Evaluating Performance
Calculate train and test accuracy

In [3]:
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

# Set the random seed for reproducibility
torch.manual_seed(42)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(42)

# Define the neural network
class TwoLayerNet(torch.nn.Module):
    def __init__(self, D_in, H, D_out):
        super(TwoLayerNet, self).__init__()
        self.linear1 = torch.nn.Linear(D_in, H)
        self.dropout = nn.Dropout(p=0.5)  # Add dropout layer with 50% dropout rate
        self.bn1 = nn.BatchNorm1d(H)  # Add batch normalization layer
        self.linear2 = torch.nn.Linear(H, D_out)
        self.device = 'cpu'
        self.use_dropout = False  # Flag to control dropout
        self.use_bn = False  # Flag to control batch normalization

        # Initialize weights of self.linear with a small random number from a normal distribution
        #nn.init.normal_(self.linear1.weight, mean=0.0, std=0.01)
        #nn.init.normal_(self.linear2.weight, mean=0.0, std=0.01)
        nn.init.kaiming_uniform_(self.linear1.weight, mode='fan_in', nonlinearity='relu')
        nn.init.kaiming_uniform_(self.linear2.weight, mode='fan_in', nonlinearity='relu')

    def forward(self, x):
        #x = x.view(-1, 3*32*32)  # 將x展平以匹配全連接層的期望輸入形狀
        #h_relu = self.linear1(x).clamp(min=0)
        #y_pred = self.linear2(h_relu)
        #return y_pred
        x = x.view(-1, 3*32*32)  # Flatten input to match the input shape expected by the fully connected layer
        h = self.linear1(x)
        if self.use_dropout:
            h = self.dropout(h)  # Apply dropout if flag is set
        if self.use_bn:
            h = self.bn1(h)  # Apply batch normalization if flag is set
        h_relu = h.clamp(min=0)
        y_pred = self.linear2(h_relu)
        return y_pred
    
    def to(self, device):
        self.device = device  # Set the device attribute
        super().to(device)    # Call the parent class' to method    
    
    def evaluate_model(self, test_loader, criterion):
        # let's evaluate its performance on the test dataset.
        self.eval()
        # Test the neural network
        correct = 0
        total = 0
        test_loss = 0.0

        # Disable gradient calculation
        with torch.no_grad():
            for inputs, labels in test_loader:
                #print(inputs.size(), labels.size())

                # Move the inputs and labels to the GPU if available
                inputs = inputs.to(self.device)
                labels = labels.to(self.device)

                # Forward pass
                outputs = self(inputs)

                # Calculate test loss
                loss = criterion(outputs, labels)
                test_loss += loss.item() * inputs.size(0)  # 将损失累加，考虑到每个批次的大小
                
                # Get the predicted class
                _, predicted = torch.max(outputs.data, 1)

                # Update the total number of samples and correct predictions
                total += labels.size(0) 
                correct += (predicted == labels).sum().item()

        # 计算平均测试损失
        avg_test_loss = test_loss / len(test_loader.dataset)
        # Calculate the accuracy
        test_accuracy = 100 * correct / total
        return avg_test_loss, test_accuracy

In [4]:
def reset_and_initialize():
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print("Using device:", device)
    net = TwoLayerNet(3*32*32, 1000, 10)
    net.to(device)
    optimizer = optim.Adam(net.parameters(), lr=0.001, weight_decay=0.001)#weight_decay=L2λ 添加一个与权重大小相关的L2项来抑制过拟合，影响梯度更新。
    return net, optimizer, device

In [5]:
def custom_loss(outputs, targets, model, lambda1):
    criterion = nn.CrossEntropyLoss()
    data_loss = criterion(outputs, targets)
    reg_loss = 0
    p = model.linear1.out_features  # Number of hidden nodes
    m = model.linear1.in_features  # Number of input nodes
    for param in model.parameters():
        reg_loss += torch.sum(param**2)
    total_loss = data_loss + (lambda1 / (p + 1 + p * (m + 1))) * reg_loss
    return total_loss

### tuning_EB + regularizing_EB

In [11]:
# Example usage
net, optimizer, device = reset_and_initialize()
criterion = nn.CrossEntropyLoss() #分類任務的損失函數

trn_loss_eb = []
trn_acc_eb = []
val_loss_eb = []
val_acc_eb = []

trn_loss_eb_r = []
trn_acc_eb_r = []
val_loss_eb_r = []
val_acc_eb_r = [] 

# Train the neural network
max_epoch = 5
epoch_eb_r = 0
max_epoch_r = 5
epsilon_r = 0.5

for epoch_eb in range(max_epoch):
    # Set the model to training mode
    net.train()  
    running_loss = 0.0
    train_correct = 0
    train_total = 0

    for i, (inputs, labels) in enumerate(train_loader, 0): #每個epoch内，模型處理782個mini-batch=64張圖，共50000張圖。
        # Move the inputs and labels to the GPU if available
        inputs = inputs.to(device)
        labels = labels.to(device)
        optimizer.zero_grad()# Zero the gradients
        outputs = net(inputs)# Forward pass
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # Update the running loss
        running_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        train_total += labels.size(0)
        train_correct += (predicted == labels).sum().item()

    # 訓練損失: 782個mini-batch loss的平均值
    avg_train_loss = running_loss / (i + 1)
    train_accuracy = 100 * train_correct / train_total
    trn_loss_eb.append(avg_train_loss)
    trn_acc_eb.append(train_accuracy)

    # Evaluate on test data after each epoch
    avg_test_loss, test_accuracy = TwoLayerNet.evaluate_model(net, test_loader, criterion)
    print(f"Epoch: {epoch_eb + 1}, TrainLoss: {avg_train_loss:.4f}, TrainAccuracy: {train_accuracy:.2f}%, TestLoss: {avg_test_loss:.4f}, TestAccuracy: {test_accuracy:.2f}%")
    val_loss_eb.append(avg_test_loss)
    val_acc_eb.append(test_accuracy)

print('Training phase completes.')
# Save the acceptable state
torch.save(net.state_dict(), 'acceptable_2lnn.pth')

# Load weights if needed and begin the regularization phase
net.load_state_dict(torch.load('acceptable_2lnn.pth'))
print("Regularization phase starts.")

# Run a loop with custom loss that includes regularization
while epoch_eb_r < max_epoch_r:
    net.train()
    running_loss_r = 0.0
    train_correct_r = 0
    train_total_r = 0
    for i, (inputs, labels) in enumerate(train_loader):
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = net(inputs)
        loss = custom_loss(outputs, labels, net, 0.001)  # Use the custom loss function
        loss.backward(retain_graph=True)
        optimizer.step()
        
        running_loss_r += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        train_total += labels.size(0)
        train_correct += (predicted == labels).sum().item()
        # Update the running loss
        running_loss += loss.item()

        _, predicted = torch.max(outputs.data, 1)
        train_total_r += labels.size(0)
        train_correct_r += (predicted == labels).sum().item()

    # 訓練損失: 782個mini-batch loss的平均值
    avg_train_loss_r = running_loss_r / (i + 1)
    train_accuracy_r = 100 * train_correct_r / train_total_r
    trn_loss_eb_r.append(avg_train_loss_r)
    trn_acc_eb_r.append(train_accuracy_r)

    # Evaluate on test data after each epoch
    avg_test_loss_r, test_accuracy_r = TwoLayerNet.evaluate_model(net, test_loader, criterion)
    print(f"Epoch: {epoch_eb_r + 1}, TrainLoss: {avg_train_loss_r:.4f}, TrainAccuracy: {train_accuracy_r:.2f}%, TestLoss: {avg_test_loss_r:.4f}, TestAccuracy: {test_accuracy_r:.2f}%")
    val_loss_eb_r.append(avg_test_loss_r)
    val_acc_eb_r.append(test_accuracy_r)
    epoch_eb_r += 1

if avg_train_loss_r < epsilon_r:
    print(f"Stopping regularizing in the epoch {epoch_eb_r+1} as the average train loss {avg_train_loss_r:.4f} is less than epsilon {epsilon_r}, acceptable 2LNN with new weights.")
elif avg_train_loss_r >= epsilon_r:
    print(f"Stopping regularizing in the epoch {epoch_eb_r+1} as new training did not converge, the average train loss {avg_train_loss_r:.4f} is larger than epsilon {epsilon_r}, acceptable 2LNN but using old weights.")
    net.load_state_dict(torch.load('acceptable_2lnn.pth'))


Using device: cpu
Epoch: 1, TrainLoss: 1.9404, TrainAccuracy: 34.05%, TestLoss: 2.0453, TestAccuracy: 37.34%
Epoch: 2, TrainLoss: 1.7097, TrainAccuracy: 39.23%, TestLoss: 1.7969, TestAccuracy: 39.69%
Epoch: 3, TrainLoss: 1.6668, TrainAccuracy: 40.49%, TestLoss: 1.9671, TestAccuracy: 38.84%
Epoch: 4, TrainLoss: 1.6542, TrainAccuracy: 40.85%, TestLoss: 2.2143, TestAccuracy: 36.60%
Epoch: 5, TrainLoss: 1.6533, TrainAccuracy: 41.17%, TestLoss: 2.0067, TestAccuracy: 38.29%
Training phase completes.
Regularization phase starts.
Epoch: 1, TrainLoss: 1.6367, TrainAccuracy: 41.50%, TestLoss: 1.7896, TestAccuracy: 40.54%
Epoch: 2, TrainLoss: 1.6314, TrainAccuracy: 41.90%, TestLoss: 1.8771, TestAccuracy: 39.79%
Epoch: 3, TrainLoss: 1.6322, TrainAccuracy: 41.86%, TestLoss: 1.8855, TestAccuracy: 40.14%
Epoch: 4, TrainLoss: 1.6219, TrainAccuracy: 42.18%, TestLoss: 1.7788, TestAccuracy: 41.61%
Epoch: 5, TrainLoss: 1.6213, TrainAccuracy: 42.12%, TestLoss: 1.8081, TestAccuracy: 42.56%
Stopping regulari

### tuning_EB + regularizing_BN

In [15]:
# Example usage
net, optimizer, device = reset_and_initialize()
criterion = nn.CrossEntropyLoss() #分類任務的損失函數

trn_loss_eb = []
trn_acc_eb = []
val_loss_eb = []
val_acc_eb = []

trn_loss_eb_r_bn = []
trn_acc_eb_r_bn = []
val_loss_eb_r_bn = []
val_acc_eb_r_bn = [] 

# Train the neural network
max_epoch = 2
epoch_eb_r_bn = 0
max_epoch_r_bn = 2
epsilon_r_bn = 0.5

for epoch_eb in range(max_epoch):
    # Set the model to training mode
    net.train()  
    running_loss = 0.0
    train_correct = 0
    train_total = 0

    for i, (inputs, labels) in enumerate(train_loader, 0): #每個epoch内，模型處理782個mini-batch=64張圖，共50000張圖。
        # Move the inputs and labels to the GPU if available
        inputs = inputs.to(device)
        labels = labels.to(device)
        optimizer.zero_grad()# Zero the gradients
        outputs = net(inputs)# Forward pass
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # Update the running loss
        running_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        train_total += labels.size(0)
        train_correct += (predicted == labels).sum().item()

    # 訓練損失: 782個mini-batch loss的平均值
    avg_train_loss = running_loss / (i + 1)
    train_accuracy = 100 * train_correct / train_total
    trn_loss_eb.append(avg_train_loss)
    trn_acc_eb.append(train_accuracy)

    # Evaluate on test data after each epoch
    avg_test_loss, test_accuracy = TwoLayerNet.evaluate_model(net, test_loader, criterion)
    print(f"Epoch: {epoch_eb + 1}, TrainLoss: {avg_train_loss:.4f}, TrainAccuracy: {train_accuracy:.2f}%, TestLoss: {avg_test_loss:.4f}, TestAccuracy: {test_accuracy:.2f}%")
    val_loss_eb.append(avg_test_loss)
    val_acc_eb.append(test_accuracy)

print('Training phase completes.')
# Save the acceptable state
torch.save(net.state_dict(), 'acceptable_2lnn.pth')

# Load weights if needed and begin the regularization phase
net.load_state_dict(torch.load('acceptable_2lnn.pth'))
net.use_bn = True  # Enable bn for regularization phase
print("Regularization phase starts.")

# Run a loop with custom loss that includes regularization
while epoch_eb_r_bn < max_epoch_r_bn:
    net.train()
    running_loss_r_bn = 0.0
    train_correct_r_bn = 0
    train_total_r_bn = 0
    for i, (inputs, labels) in enumerate(train_loader):
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = net(inputs)
        loss_r_bn = criterion(outputs, labels)  # Use the custom loss function
        loss_r_bn.backward(retain_graph=True)
        optimizer.step()
        
        # Update the running loss
        running_loss_r_bn += loss_r_bn.item()
        _, predicted = torch.max(outputs.data, 1)
        train_total_r_bn += labels.size(0)
        train_correct_r_bn += (predicted == labels).sum().item()

    # 訓練損失: 782個mini-batch loss的平均值
    avg_train_loss_r_bn = running_loss_r_bn / (i + 1)
    train_accuracy_r_bn = 100 * train_correct_r_bn / train_total_r_bn
    trn_loss_eb_r_bn.append(avg_train_loss_r_bn)
    trn_acc_eb_r_bn.append(train_accuracy_r_bn)

    # Evaluate on test data after each epoch
    avg_test_loss_r_bn, test_accuracy_r_bn = TwoLayerNet.evaluate_model(net, test_loader, criterion)
    print(f"Epoch: {epoch_eb_r_bn + 1}, TrainLoss: {avg_train_loss_r_bn:.4f}, TrainAccuracy: {train_accuracy_r_bn:.2f}%, TestLoss: {avg_test_loss_r_bn:.4f}, TestAccuracy: {test_accuracy_r_bn:.2f}%")
    val_loss_eb_r_bn.append(avg_test_loss_r_bn)
    val_acc_eb_r_bn.append(test_accuracy_r_bn)
    epoch_eb_r_bn += 1

if avg_train_loss_r_bn < epsilon_r_bn:
    print(f"Stopping regularizing in the epoch {epoch_eb_r_bn+1} as the average train loss {avg_train_loss_r_bn:.4f} is less than epsilon {epsilon_r_bn}, acceptable 2LNN with new weights.")
elif avg_train_loss_r_bn >= epsilon_r_bn:
    net.load_state_dict(torch.load('acceptable_2lnn.pth'))
    print(f"Stopping regularizing in the epoch {epoch_eb_r_bn+1} as new training did not converge, the average train loss {avg_train_loss_r_bn:.4f} is larger than epsilon {epsilon_r_bn}, acceptable 2LNN but using old weights.")
    


Using device: cpu
Epoch: 1, TrainLoss: 1.9511, TrainAccuracy: 33.80%, TestLoss: 2.4108, TestAccuracy: 35.26%
Epoch: 2, TrainLoss: 1.7178, TrainAccuracy: 38.98%, TestLoss: 1.8659, TestAccuracy: 39.09%
Training phase completes.
Regularization phase starts.
Epoch: 1, TrainLoss: 1.7504, TrainAccuracy: 37.39%, TestLoss: 1.6336, TestAccuracy: 41.54%
Epoch: 2, TrainLoss: 1.6907, TrainAccuracy: 39.61%, TestLoss: 1.5730, TestAccuracy: 44.39%
Stopping regularizing in the epoch 3 as new training did not converge, the average train loss 1.6907 is larger than epsilon 0.5, acceptable 2LNN but using old weights.


### tuning_EB + regularizing_DO

In [7]:
# Example usage
net, optimizer, device = reset_and_initialize()
criterion = nn.CrossEntropyLoss() #分類任務的損失函數

trn_loss_eb = []
trn_acc_eb = []
val_loss_eb = []
val_acc_eb = []

trn_loss_eb_r_dr = []
trn_acc_eb_r_dr = []
val_loss_eb_r_dr = []
val_acc_eb_r_dr = [] 

# Train the neural network
max_epoch = 2
epoch_eb_r_dr = 0
max_epoch_r_dr = 2
epsilon_r_dr = 0.5

for epoch_eb in range(max_epoch):
    # Set the model to training mode
    net.train()  
    running_loss = 0.0
    train_correct = 0
    train_total = 0

    for i, (inputs, labels) in enumerate(train_loader, 0): #每個epoch内，模型處理782個mini-batch=64張圖，共50000張圖。
        # Move the inputs and labels to the GPU if available
        inputs = inputs.to(device)
        labels = labels.to(device)
        optimizer.zero_grad()# Zero the gradients
        outputs = net(inputs)# Forward pass
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # Update the running loss
        running_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        train_total += labels.size(0)
        train_correct += (predicted == labels).sum().item()

    # 訓練損失: 782個mini-batch loss的平均值
    avg_train_loss = running_loss / (i + 1)
    train_accuracy = 100 * train_correct / train_total
    trn_loss_eb.append(avg_train_loss)
    trn_acc_eb.append(train_accuracy)

    # Evaluate on test data after each epoch
    avg_test_loss, test_accuracy = TwoLayerNet.evaluate_model(net, test_loader, criterion)
    print(f"Epoch: {epoch_eb + 1}, TrainLoss: {avg_train_loss:.4f}, TrainAccuracy: {train_accuracy:.2f}%, TestLoss: {avg_test_loss:.4f}, TestAccuracy: {test_accuracy:.2f}%")
    val_loss_eb.append(avg_test_loss)
    val_acc_eb.append(test_accuracy)

print('Training phase completes.')
# Save the acceptable state
torch.save(net.state_dict(), 'acceptable_2lnn.pth')

# Load weights if needed and begin the regularization phase
net.load_state_dict(torch.load('acceptable_2lnn.pth'))
net.use_dropout = True  # Enable dropout for regularization phase
print("Regularization phase starts.")

# Run a loop with custom loss that includes regularization
while epoch_eb_r_dr < max_epoch_r_dr:
    net.train()
    running_loss_r_dr = 0.0
    train_correct_r_dr = 0
    train_total_r_dr = 0
    for i, (inputs, labels) in enumerate(train_loader):
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = net(inputs)
        loss_r_dr = criterion(outputs, labels)  # Use the custom loss function
        loss_r_dr.backward(retain_graph=True)
        optimizer.step()
        
        running_loss_r_dr += loss_r_dr.item()
        _, predicted = torch.max(outputs.data, 1)
        train_total_r_dr += labels.size(0)
        train_correct_r_dr += (predicted == labels).sum().item()


    # 訓練損失: 782個mini-batch loss的平均值
    avg_train_loss_r_dr = running_loss_r_dr / (i + 1)
    train_accuracy_r_dr = 100 * train_correct_r_dr / train_total_r_dr
    trn_loss_eb_r_dr.append(avg_train_loss_r_dr)
    trn_acc_eb_r_dr.append(train_accuracy_r_dr)

    # Evaluate on test data after each epoch
    avg_test_loss_r_dr, test_accuracy_r_dr = TwoLayerNet.evaluate_model(net, test_loader, criterion)
    print(f"Epoch: {epoch_eb_r_dr + 1}, TrainLoss: {avg_train_loss_r_dr:.4f}, TrainAccuracy: {train_accuracy_r_dr:.2f}%, TestLoss: {avg_test_loss_r_dr:.4f}, TestAccuracy: {test_accuracy_r_dr:.2f}%")
    val_loss_eb_r_dr.append(avg_test_loss_r_dr)
    val_acc_eb_r_dr.append(test_accuracy_r_dr)
    epoch_eb_r_dr += 1

if avg_train_loss_r_dr < epsilon_r_dr:
    print(f"Stopping regularizing in the epoch {epoch_eb_r_dr+1} as the average train loss {avg_train_loss_r_dr:.4f} is less than epsilon {epsilon_r_dr}, acceptable 2LNN with new weights.")
elif avg_train_loss_r_dr >= epsilon_r_bn:
    net.load_state_dict(torch.load('acceptable_2lnn.pth'))
    print(f"Stopping regularizing in the epoch {epoch_eb_r_dr+1} as new training did not converge, the average train loss {avg_train_loss_r_dr:.4f} is larger than epsilon {epsilon_r_dr}, acceptable 2LNN but using old weights.")
    


Using device: cpu


### tuning_LG

In [15]:
# Example usage
net, optimizer, device = reset_and_initialize()
criterion = nn.CrossEntropyLoss() #分類任務的損失函數

trn_loss_lg = []
trn_acc_lg = []
val_loss_lg = []
val_acc_lg = []

epsilon = 1.6
epoch_lg = 0

while True:
    # Set the model to training mode
    net.train()  
    running_loss = 0.0
    train_correct = 0
    train_total = 0

    for i, (inputs, labels) in enumerate(train_loader, 0): #每個epoch内，模型處理782個mini-batch=64張圖，共50000張圖。
        # Move the inputs and labels to the GPU if available
        inputs = inputs.to(device)
        labels = labels.to(device)

        # Zero the gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = net(inputs)

        # Compute the loss
        loss = criterion(outputs, labels)

        # Backward pass
        loss.backward()
        optimizer.step()

        # Update the running loss
        running_loss += loss.item()

        _, predicted = torch.max(outputs.data, 1)
        train_total += labels.size(0)
        train_correct += (predicted == labels).sum().item()

    # 訓練損失: 782個mini-batch loss的平均值
    avg_train_loss = running_loss / (i + 1)
    train_accuracy = 100 * train_correct / train_total
    trn_loss_lg.append(avg_train_loss)
    trn_acc_lg.append(train_accuracy)

    # Evaluate on test data after each epoch
    avg_test_loss, test_accuracy = TwoLayerNet.evaluate_model(net, test_loader, criterion)
    print(f"Epoch: {epoch_lg+1}, TrainLoss: {avg_train_loss:.4f}, TrainAccuracy: {train_accuracy:.2f}%, TestLoss: {avg_test_loss:.4f}, TestAccuracy: {test_accuracy:.2f}%")
    val_loss_lg.append(avg_test_loss)
    val_acc_lg.append(test_accuracy)
    
    if avg_train_loss < epsilon:
        print(f"Stopping training in the epoch {epoch_lg+1} as the average train loss {avg_train_loss:.4f} is less than epsilon {epsilon}, acceptable 2LNN.")
        break
    epoch_lg += 1

Using device: cpu
Epoch: 1, TrainLoss: 1.9448, TrainAccuracy: 33.51%, TestLoss: 2.4100, TestAccuracy: 35.24%
Epoch: 2, TrainLoss: 1.7156, TrainAccuracy: 39.30%, TestLoss: 1.8786, TestAccuracy: 38.38%
Epoch: 3, TrainLoss: 1.6748, TrainAccuracy: 39.98%, TestLoss: 1.8491, TestAccuracy: 40.63%
Epoch: 4, TrainLoss: 1.6597, TrainAccuracy: 40.90%, TestLoss: 1.7286, TestAccuracy: 41.25%
Epoch: 5, TrainLoss: 1.6513, TrainAccuracy: 41.32%, TestLoss: 1.9521, TestAccuracy: 37.82%
Epoch: 6, TrainLoss: 1.6431, TrainAccuracy: 41.82%, TestLoss: 1.7475, TestAccuracy: 40.61%
Epoch: 7, TrainLoss: 1.6301, TrainAccuracy: 41.66%, TestLoss: 1.9061, TestAccuracy: 38.77%
Epoch: 8, TrainLoss: 1.6250, TrainAccuracy: 42.25%, TestLoss: 1.8637, TestAccuracy: 40.69%
Epoch: 9, TrainLoss: 1.6263, TrainAccuracy: 41.88%, TestLoss: 1.8138, TestAccuracy: 41.20%
Epoch: 10, TrainLoss: 1.6235, TrainAccuracy: 42.31%, TestLoss: 1.9024, TestAccuracy: 39.39%
Epoch: 11, TrainLoss: 1.6195, TrainAccuracy: 42.33%, TestLoss: 1.8763, 

### tuning_EB_LG

In [16]:
# Example usage
net, optimizer, device = reset_and_initialize()
criterion = nn.CrossEntropyLoss() #分類任務的損失函數

trn_loss_eb_lg = []
trn_acc_eb_lg = []
val_loss_eb_lg = []
val_acc_eb_lg = []  

# Train the neural network
epsilon = 1.6
max_epochs = 10
epoch_eb_lg = 0

#scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=num_epochs, eta_min=0)
while True:
    # Set the model to training mode
    net.train()  
    running_loss = 0.0
    train_correct = 0
    train_total = 0

    for i, (inputs, labels) in enumerate(train_loader, 0): #每個epoch内，模型處理782個mini-batch=64張圖，共50000張圖。
        # Move the inputs and labels to the GPU if available
        inputs = inputs.to(device)
        labels = labels.to(device)


        # Zero the gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = net(inputs)

        # Compute the loss
        loss = criterion(outputs, labels)

        # Backward pass
        loss.backward()
        optimizer.step()

        # Update the running loss
        running_loss += loss.item()

        _, predicted = torch.max(outputs.data, 1)
        train_total += labels.size(0)
        train_correct += (predicted == labels).sum().item()

    # 訓練損失: 782個mini-batch loss的平均值
    avg_train_loss = running_loss / (i + 1)
    train_accuracy = 100 * train_correct / train_total
    trn_loss_eb_lg.append(avg_train_loss)
    trn_acc_eb_lg.append(train_accuracy)

    # Evaluate on test data after each epoch
    avg_test_loss, test_accuracy = TwoLayerNet.evaluate_model(net, test_loader, criterion)
    print(f"Epoch: {epoch_eb_lg+1}, TrainLoss: {avg_train_loss:.4f}, TrainAccuracy: {train_accuracy:.2f}%, TestLoss: {avg_test_loss:.4f}, TestAccuracy: {test_accuracy:.2f}%")
    val_loss_eb_lg.append(avg_test_loss)
    val_acc_eb_lg.append(test_accuracy)

    if avg_train_loss < epsilon:
        print(f"Stopping training in the epoch {epoch_eb_lg+1} as the average train loss {avg_train_loss:.4f} is less than epsilon {epsilon}, acceptable 2LNN yes~")
        break
    elif epoch_eb_lg >= max_epochs:
        print(f"Stopping training as the epoch {epoch_eb_lg+1} is larger than max epochs {max_epochs} , unacceptable 2LNN.")
        break

    epoch_eb_lg += 1

Using device: cpu
Epoch: 1, TrainLoss: 1.9384, TrainAccuracy: 33.87%, TestLoss: 2.4364, TestAccuracy: 35.99%
Epoch: 2, TrainLoss: 1.7188, TrainAccuracy: 38.95%, TestLoss: 2.1307, TestAccuracy: 36.13%
Epoch: 3, TrainLoss: 1.6733, TrainAccuracy: 39.99%, TestLoss: 1.8197, TestAccuracy: 40.57%
Epoch: 4, TrainLoss: 1.6526, TrainAccuracy: 41.25%, TestLoss: 1.6833, TestAccuracy: 43.32%
Epoch: 5, TrainLoss: 1.6448, TrainAccuracy: 41.47%, TestLoss: 1.7939, TestAccuracy: 40.14%
Epoch: 6, TrainLoss: 1.6366, TrainAccuracy: 41.87%, TestLoss: 1.8496, TestAccuracy: 41.89%
Epoch: 7, TrainLoss: 1.6332, TrainAccuracy: 41.77%, TestLoss: 1.9389, TestAccuracy: 39.18%
Epoch: 8, TrainLoss: 1.6240, TrainAccuracy: 42.17%, TestLoss: 2.0265, TestAccuracy: 37.37%
Epoch: 9, TrainLoss: 1.6205, TrainAccuracy: 42.29%, TestLoss: 2.4311, TestAccuracy: 37.35%
Epoch: 10, TrainLoss: 1.6192, TrainAccuracy: 42.18%, TestLoss: 1.9803, TestAccuracy: 40.55%
Epoch: 11, TrainLoss: 1.6117, TrainAccuracy: 42.51%, TestLoss: 1.6993, 

### tuning_LG_UA + regularizing_LG_UA

In [16]:
# Example usage
net, optimizer, device = reset_and_initialize()
criterion = nn.CrossEntropyLoss() #分類任務的損失函數

trn_loss_lg_ua = []
trn_acc_lg_ua = []
val_loss_lg_ua = []
val_acc_lg_ua = []

trn_loss_lg_ua_r = []
trn_acc_lg_ua_r = []
val_loss_lg_ua_r = []
val_acc_lg_ua_r = [] 

# Train the neural network
eta = 0.001
epsilon = 1.8
epsilon_eta = 0.00001
last_loss = float('inf')
epoch_lg_ua = 0

eta_r = 0.001
epsilon_r = 1.72
epsilon_eta_r = 0.0008
last_loss_r = float('inf')
epoch_lg_ua_r = 0

while True:
    net.train()
    running_loss = 0.0
    train_correct = 0
    train_total = 0
    for i, (inputs, labels) in enumerate(train_loader):
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

        _, predicted = torch.max(outputs.data, 1)
        train_total += labels.size(0)
        train_correct += (predicted == labels).sum().item()
                
    ## 訓練損失: 782個mini-batch loss的平均值
    avg_train_loss = running_loss / len(train_loader) #len(train_loader)
    train_accuracy = 100 * train_correct / train_total
    #trn_loss_lg_ua.append(avg_train_loss)
    #trn_acc_lg_ua.append(train_accuracy)

    ## 測試損失
    avg_test_loss, test_accuracy = TwoLayerNet.evaluate_model(net, test_loader, criterion)
    print(f"Epoch: {epoch_lg_ua+1}, lr={eta}, TrainLoss: {avg_train_loss:.4f}, TrainAccuracy: {train_accuracy:.2f}%, TestLoss: {avg_test_loss:.4f}, TestAccuracy: {test_accuracy:.2f}%")
    #val_loss_lg_ua.append(avg_test_loss)
    #val_acc_lg_ua.append(test_accuracy)

    if avg_train_loss < epsilon:
        print(f"Stopping training in the epoch {epoch_lg_ua+1} as the average train loss {avg_train_loss:.4f} is less than epsilon {epsilon}, acceptable 2LNN.")
        break
    elif avg_train_loss < last_loss:
        eta *= 1.2
    else:
        eta *= 0.7
        if eta < epsilon_eta:
            print(f"Stopping training in the epoch {epoch_lg_ua+1} as the eata {eta} is smaller than epsilon_eta {epsilon_eta}, unacceptable 2LNN.")
            break

    # Update the optimizer's learning rate
    for param_group in optimizer.param_groups:
        param_group['lr'] = eta

    last_loss = avg_train_loss
    epoch_lg_ua += 1

print('Training phase completes.')
# Save the acceptable state
torch.save(net.state_dict(), 'acceptable_2lnn.pth')

# Load weights if needed and begin the regularization phase
net.load_state_dict(torch.load('acceptable_2lnn.pth'))
print("Regularization phase starts.")

# 保存初始正則化階段的最佳權重
best_weights_r = net.state_dict().copy()

# Run a loop with custom loss that includes regularization
while True:
    net.train()
    running_loss_r = 0.0
    train_correct_r = 0
    train_total_r = 0
    for i, (inputs, labels) in enumerate(train_loader):
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = net(inputs)
        loss = custom_loss(outputs, labels, net, 0.001)
        loss.backward()
        optimizer.step()

        running_loss_r += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        train_total_r += labels.size(0)
        train_correct_r += (predicted == labels).sum().item()
                
    # 訓練損失: 782個mini-batch loss的平均值
    avg_train_loss_r = running_loss_r/ len(train_loader) #len(train_loader)
    train_accuracy_r = 100 * train_correct_r / train_total_r
    trn_loss_lg_ua_r.append(avg_train_loss_r)
    trn_acc_lg_ua_r.append(train_accuracy_r)

    # 測試損失
    avg_test_loss_r, test_accuracy_r = TwoLayerNet.evaluate_model(net, test_loader, criterion)
    print(f"Epoch: {epoch_lg_ua_r+1}, lr={eta_r}, TrainLoss: {avg_train_loss_r:.4f}, TrainAccuracy: {train_accuracy_r:.2f}%, TestLoss: {avg_test_loss_r:.4f}, TestAccuracy: {test_accuracy_r:.2f}%")
    val_loss_lg_ua_r.append(avg_test_loss_r)
    val_acc_lg_ua_r.append(test_accuracy_r)
    
    if avg_train_loss_r <= last_loss_r:
        if avg_train_loss_r < epsilon_r:#@@
            eta_r *= 1.2
            best_weights_r = net.state_dict().copy()
        else:
            # 恢復上次的最佳權重
            net.load_state_dict(best_weights_r)
            print(f"Stopping regularizing in the epoch {epoch_lg_ua_r+1} as the average train loss {avg_train_loss_r:.4f} is larger than epsilon {epsilon_r}, acceptable 2LNN.") #大部分情況下是新的網路權重
            break
        # 保存當前最佳權重

    else:
        eta_r *= 0.7
        if eta_r < epsilon_eta_r:
            net.load_state_dict(best_weights_r)
            print(f"Stopping regularizing in the epoch {epoch_lg_ua_r+1} as the eata {eta_r} is smaller than epsilon_eta {epsilon_eta_r}, acceptable 2LNN.")
            break

    # Update the optimizer's learning rate
    for param_group in optimizer.param_groups:
        param_group['lr'] = eta_r

    last_loss_r = avg_train_loss_r
    epoch_lg_ua_r += 1

Using device: cpu
Epoch: 1, lr=0.001, TrainLoss: 1.9383, TrainAccuracy: 33.72%, TestLoss: 2.4334, TestAccuracy: 37.46%
Epoch: 2, lr=0.0012, TrainLoss: 1.7462, TrainAccuracy: 38.12%, TestLoss: 1.9765, TestAccuracy: 38.60%
Stopping training in the epoch 2 as the average train loss 1.7462 is less than epsilon 1.8, acceptable 2LNN.
Training phase completes.
Regularization phase starts.
Epoch: 1, lr=0.001, TrainLoss: 1.6973, TrainAccuracy: 39.15%, TestLoss: 1.8590, TestAccuracy: 39.38%
Epoch: 2, lr=0.0012, TrainLoss: 1.6859, TrainAccuracy: 39.91%, TestLoss: 1.8314, TestAccuracy: 38.33%
Epoch: 3, lr=0.0014399999999999999, TrainLoss: 1.7290, TrainAccuracy: 39.13%, TestLoss: 2.3120, TestAccuracy: 36.62%
Epoch: 4, lr=0.0010079999999999998, TrainLoss: 1.6363, TrainAccuracy: 41.44%, TestLoss: 1.8594, TestAccuracy: 39.97%
Epoch: 5, lr=0.0012095999999999997, TrainLoss: 1.6606, TrainAccuracy: 40.84%, TestLoss: 1.9479, TestAccuracy: 39.82%
Epoch: 6, lr=0.0008467199999999997, TrainLoss: 1.5986, TrainA

### tuning_EB_LG_UA + regularizing_EB_LG_UA

In [13]:
# Example usage
net, optimizer, device = reset_and_initialize()
criterion = nn.CrossEntropyLoss() #分類任務的損失函數

trn_loss_eb_lg_ua = []
trn_acc_eb_lg_ua = []
val_loss_eb_lg_ua = []
val_acc_eb_lg_ua = []

trn_loss_eb_lg_ua_r = []
trn_acc_eb_lg_ua_r = []
val_loss_eb_lg_ua_r = []
val_acc_eb_lg_ua_r = []  

# Train the neural network
eta = 0.001
max_epochs = 10
epsilon = 1.6
epsilon_eta = 0.00001
last_loss = float('inf')
epoch_eb_lg_ua = 0

eta_r = 0.001
max_epochs_r = 10
epsilon_r = 1.55
epsilon_eta_r = 0.00001
last_loss_r = float('inf')
epoch_eb_lg_ua_r = 0

while True:
    net.train()
    running_loss = 0.0
    train_correct = 0
    train_total = 0
    for i, (inputs, labels) in enumerate(train_loader):
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

        _, predicted = torch.max(outputs.data, 1)
        train_total += labels.size(0)
        train_correct += (predicted == labels).sum().item()
    
    # 訓練損失: 782個mini-batch loss的平均值
    avg_train_loss = running_loss / len(train_loader) #len(train_loader)
    train_accuracy = 100 * train_correct / train_total
    trn_loss_eb_lg_ua.append(avg_train_loss)
    trn_acc_eb_lg_ua.append(train_accuracy)

    # 測試損失
    avg_test_loss, test_accuracy = TwoLayerNet.evaluate_model(net, test_loader, criterion)
    print(f"Epoch: {epoch_eb_lg_ua+1}, lr={eta}, TrainLoss: {avg_train_loss:.4f}, TrainAccuracy: {train_accuracy:.2f}%, TestLoss: {avg_test_loss:.4f}, TestAccuracy: {test_accuracy:.2f}%")
    val_loss_eb_lg_ua.append(avg_test_loss)
    val_acc_eb_lg_ua.append(test_accuracy)
    
    if avg_train_loss <= epsilon:
        print(f"Stopping training in the epoch {epoch_eb_lg_ua+1} as the average train loss {avg_train_loss:.4f} is less than epsilon {epsilon}, acceptable 2LNN.")
        break
    elif epoch_eb_lg_ua >= max_epochs:
        print(f"Stopping training as the epoch {epoch_eb_lg_ua+1} is larger than  max epochs {max_epochs} , unacceptable 2LNN.")
        break
    elif avg_train_loss < last_loss:
        eta *= 1.2
    else:
        eta *= 0.7
        if eta < epsilon_eta:
            print(f"Stopping training  in the epoch {epoch_eb_lg_ua+1} as the eta {eta} is smaller than epsilon_eta {epsilon_eta}, unacceptable 2LNN.")
            break
    
    # Update the optimizer's learning rate
    for param_group in optimizer.param_groups:
        param_group['lr'] = eta

    last_loss = avg_train_loss
    epoch_eb_lg_ua += 1
    
print('Training phase completes.')
# Save the acceptable state
torch.save(net.state_dict(), 'acceptable_2lnn.pth')

# Load weights if needed and begin the regularization phase
net.load_state_dict(torch.load('acceptable_2lnn.pth'))
print("Regularization phase starts.")

# 保存初始正則化階段的最佳權重
best_weights_r = net.state_dict().copy()

# Run a loop with custom loss that includes regularization
while True:
    net.train()
    running_loss_r = 0.0
    train_correct_r = 0
    train_total_r = 0
    for i, (inputs, labels) in enumerate(train_loader):
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss_r += loss.item()

        _, predicted = torch.max(outputs.data, 1)
        train_total_r += labels.size(0)
        train_correct_r += (predicted == labels).sum().item()
    
    # 訓練損失: 782個mini-batch loss的平均值
    avg_train_loss_r = running_loss_r / len(train_loader) #len(train_loader)
    train_accuracy_r = 100 * train_correct_r / train_total_r
    trn_loss_eb_lg_ua_r.append(avg_train_loss_r)
    trn_acc_eb_lg_ua_r.append(train_accuracy_r)

    # 測試損失
    avg_test_loss_r, test_accuracy_r = TwoLayerNet.evaluate_model(net, test_loader, criterion)
    print(f"Epoch: {epoch_eb_lg_ua_r+1}, lr={eta_r}, TrainLoss: {avg_train_loss_r:.4f}, TrainAccuracy: {train_accuracy_r:.2f}%, TestLoss: {avg_test_loss_r:.4f}, TestAccuracy: {test_accuracy_r:.2f}%")
    val_loss_eb_lg_ua_r.append(avg_test_loss_r)
    val_acc_eb_lg_ua_r.append(test_accuracy_r)
    
    if avg_train_loss_r < epsilon_r:
        print(f"Stopping regularizing in the epoch {epoch_eb_lg_ua_r+1} as the average train loss {avg_train_loss_r:.4f} is less than epsilon {epsilon_r}, acceptable 2LNN.")
        break
    else:
        if epoch_eb_lg_ua_r >= max_epochs_r:
            net.load_state_dict(best_weights_r)
            print(f"Stopping regularizing as the epoch {epoch_eb_lg_ua_r+1} is larger than  max epochs {max_epochs_r} , acceptable 2LNN.")
            break
        elif avg_train_loss_r < last_loss_r:
            eta_r *= 1.2
            best_weights_r = net.state_dict().copy()
        else:
            eta_r *= 0.7
            if eta_r < epsilon_eta_r:
                net.load_state_dict(best_weights_r)
                print(f"Stopping regularizing in the epoch {epoch_eb_lg_ua_r+1} as the eta {eta_r} is smaller than epsilon_eta {epsilon_eta_r}, acceptable 2LNN.")
                break
    
    # Update the optimizer's learning rate
    for param_group in optimizer.param_groups:
        param_group['lr'] = eta_r

    last_loss_r = avg_train_loss_r
    epoch_eb_lg_ua_r += 1

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x0000016E4F3EC160>
Traceback (most recent call last):
  File "g:\我的雲端硬碟\Python\.conda\lib\site-packages\torch\utils\data\dataloader.py", line 1479, in __del__
    self._shutdown_workers()
  File "g:\我的雲端硬碟\Python\.conda\lib\site-packages\torch\utils\data\dataloader.py", line 1437, in _shutdown_workers
    if self._persistent_workers or self._workers_status[worker_id]:
AttributeError: '_MultiProcessingDataLoaderIter' object has no attribute '_workers_status'


Using device: cpu
Epoch: 1, lr=0.001, TrainLoss: 1.9401, TrainAccuracy: 33.76%, TestLoss: 2.0604, TestAccuracy: 37.25%
Epoch: 2, lr=0.0012, TrainLoss: 1.7429, TrainAccuracy: 38.19%, TestLoss: 1.8562, TestAccuracy: 39.42%
Epoch: 3, lr=0.0014399999999999999, TrainLoss: 1.7337, TrainAccuracy: 38.42%, TestLoss: 2.1688, TestAccuracy: 36.86%
Epoch: 4, lr=0.0017279999999999997, TrainLoss: 1.7737, TrainAccuracy: 38.10%, TestLoss: 2.3518, TestAccuracy: 35.89%
Epoch: 5, lr=0.0012095999999999997, TrainLoss: 1.6747, TrainAccuracy: 40.19%, TestLoss: 1.8453, TestAccuracy: 40.47%
Epoch: 6, lr=0.0014515199999999995, TrainLoss: 1.7158, TrainAccuracy: 39.36%, TestLoss: 2.1137, TestAccuracy: 38.71%
Epoch: 7, lr=0.0010160639999999997, TrainLoss: 1.6329, TrainAccuracy: 41.73%, TestLoss: 1.8459, TestAccuracy: 39.44%
Epoch: 8, lr=0.0012192767999999997, TrainLoss: 1.6666, TrainAccuracy: 40.81%, TestLoss: 2.7768, TestAccuracy: 33.52%
Epoch: 9, lr=0.0008534937599999998, TrainLoss: 1.6036, TrainAccuracy: 42.64%,