In [None]:
# 导入标准库、第三方库和PyTorch模块
import os
import torch
import torchvision.transforms as transforms
import numpy as np
import logging
import json
from datetime import datetime

from torch.utils.data import DataLoader
from torchvision.datasets import MNIST

# 数据处理：数据集加载、缩放、归一化、格式转换、洗牌、批标准化。
def create_dataset(data_dir, training=True, batch_size=32, resize=(32, 32), rescale=1/(255*0.3081), shift=-0.1307/0.3081, buffer_size=64):
    transform = transforms.Compose([
        transforms.Resize(resize),
        transforms.ToTensor(),
        transforms.Normalize(mean=[shift], std=[rescale])
    ])

    ds = MNIST(root=data_dir, train=training, transform=transform, download=True)
    loader = DataLoader(ds, batch_size=batch_size, shuffle=training, num_workers=4, pin_memory=True, drop_last=True)

    return loader

In [None]:
import torch.nn as nn

class LeNet5(nn.Module):
    """模型定义：算子初始化（参数设置），网络构建。"""
    def __init__(self, activation='relu', dropout_rate=None):
        super(LeNet5, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, 5, stride=1, padding=0)
        self.conv2 = nn.Conv2d(6, 16, 5, stride=1, padding=0)
        self.activation = activation
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.dropout_rate = dropout_rate
        self.dropout = nn.Dropout(p=dropout_rate) if dropout_rate else None
        self.fc1 = nn.Linear(400, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        act = self.relu if self.activation == 'relu' else self.sigmoid
        x = act(self.conv1(x))
        x = self.pool(x)
        x = act(self.conv2(x))
        x = self.pool(x)
        if self.dropout:
            x = self.dropout(x)
        x = x.view(x.size(0), -1)  # Flatten
        x = self.fc1(x)
        x = self.fc2(x)
        x = self.fc3(x)
        x = self.softmax(x)

        return x


In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [None]:
import torch.optim as optim
from torch.autograd import Variable
import matplotlib.pyplot as plt

def train(data_dir, loss_type='ce', activation='relu', dropout_rate=None, lr=0.01, momentum=0.9, num_epochs=10):
    ds_train = create_dataset(data_dir, training=True)
    ds_eval = create_dataset(data_dir, training=False)

    net = LeNet5(activation=activation, dropout_rate=dropout_rate)
    net.to(device)  # Move the model to the selected device

    if loss_type == 'mse':
        loss = nn.MSELoss()
    elif loss_type == 'ce':
        loss = nn.CrossEntropyLoss()
    else:
        raise ValueError("Invalid loss_type. Choose either 'mse' or 'ce'")

    opt = optim.SGD(net.parameters(), lr=lr, momentum=momentum)

    train_losses = []
    val_accuracies = []

    for epoch in range(num_epochs):
        running_loss = 0.0
        for i, data in enumerate(ds_train, 0):
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)  # Move the inputs and labels to the selected device
            opt.zero_grad()

            outputs = net(inputs)
            if loss_type == 'mse':
                labels = labels.long()
                labels_one_hot = torch.zeros(labels.shape[0], 10).to(device)
                labels_one_hot.scatter_(1, labels.view(-1, 1), 1)
                loss_output = loss(outputs, labels_one_hot)
            else:
                loss_output = loss(outputs, labels)

            loss_output.backward()
            opt.step()

            running_loss += loss_output.item()
        train_losses.append(running_loss)

        logging.info(f'Epoch {epoch + 1}, Loss: {running_loss}')

        correct = 0
        total = 0
        with torch.no_grad():
            for data in ds_eval:
                images, labels = data
                images, labels = images.to(device), labels.to(device)  # Move the images and labels to the selected device
                outputs = net(images)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
        acc = 100 * correct / total
        val_accuracies.append(acc)
        logging.info(f'Accuracy: {acc} %')

    return train_losses, val_accuracies


In [None]:
def plot_history(train_losses, val_acc, title):
    epochs = range(1, len(train_losses) + 1)

    plt.figure()
    plt.plot(epochs, train_losses, 'r', label='Training loss')
    plt.title(title)
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()

    plt.figure()
    plt.plot(epochs, val_acc, 'b', label='Validation accuracy')
    plt.title(title)
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend()

    plt.show()

In [None]:
def setup_logger():
    timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
    log_filename = f"training_logs_{timestamp}.log"

    logging.basicConfig(filename=log_filename,
                        filemode='w',
                        level=logging.INFO,
                        format='%(asctime)s [%(levelname)s] - %(message)s',
                        datefmt='%Y-%m-%d %H:%M:%S')
    console = logging.StreamHandler()
    console.setLevel(logging.INFO)
    formatter = logging.Formatter('%(asctime)s [%(levelname)s] - %(message)s')
    console.setFormatter(formatter)
    logging.getLogger('').addHandler(console)


In [None]:
def main():
    setup_logger()

    data_dir = 'MNIST_Data'
    num_epochs = 50
    lr = 0.003
    momentum = 0.9

    # Save hyperparameters
    hyperparameters = {
        'num_epochs': num_epochs,
        'learning_rate': lr,
        'momentum': momentum
    }
    with open('hyperparameters.json', 'w') as f:
        json.dump(hyperparameters, f, indent=4)

    logging.info("Training with Mean Squared Error Loss:")
    train_losses, val_acc = train(data_dir, loss_type='mse', num_epochs=num_epochs, lr=lr, momentum=momentum)
    plot_history(train_losses, val_acc, 'Training with Mean Squared Error Loss')

    logging.info("Training with Cross Entropy Loss:")
    train_losses, val_acc = train(data_dir, loss_type='ce', num_epochs=num_epochs, lr=lr, momentum=momentum)
    plot_history(train_losses, val_acc, 'Training with Cross Entropy Loss')

    logging.info("Training with ReLU activation:")
    train_losses, val_acc = train(data_dir, activation='relu', num_epochs=num_epochs, lr=lr, momentum=momentum)
    plot_history(train_losses, val_acc, 'Training with ReLU Activation')

    logging.info("Training with Sigmoid activation:")
    train_losses, val_acc = train(data_dir, activation='sigmoid', num_epochs=num_epochs, lr=lr, momentum=momentum)
    plot_history(train_losses, val_acc, 'Training with Sigmoid Activation')

    logging.info("Training without Dropout:")
    train_losses, val_acc = train(data_dir, dropout_rate=None, num_epochs=num_epochs, lr=lr, momentum=momentum)
    plot_history(train_losses, val_acc, 'Training without Dropout')

    logging.info("Training with Dropout:")
    train_losses, val_acc = train(data_dir, dropout_rate=0.5, num_epochs=num_epochs, lr=lr, momentum=momentum)
    plot_history(train_losses, val_acc, 'Training with Dropout')

if __name__ == '__main__':
    main()