加载所需要的包

In [5]:
import os
import time
import os.path as osp
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms, models
import torchvision
import matplotlib.pyplot as plt
import random

数据加载器

In [4]:
def load_data(BATCH_SIZE):
    transform_cifar10_train = transforms.Compose([
        transforms.Resize((224,224)),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
    ])

    transform_cifar10_test = transforms.Compose([
        transforms.Resize((224,224)),
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
    ])

    train_set = torchvision.datasets.CIFAR10(root='../data', train=True,
                                                download=True, transform=transform_cifar10_train)
    
    train_dataloader = torch.utils.data.DataLoader(train_set, batch_size=BATCH_SIZE,
                                                    shuffle=True, num_workers=2)

    test_set = torchvision.datasets.CIFAR10(root='../data', train=False,
                                            download=True, transform=transform_cifar10_test)
    
    
    test_dataloader = torch.utils.data.DataLoader(test_set, batch_size=BATCH_SIZE,
                                                    shuffle=False, num_workers=2)
    
    return train_set, train_dataloader, test_set, test_dataloader  

测试与训练批次

In [3]:
def train_batch(model, image, target):
    # Perform one training batch iteration.
    output = model(image)
    loss_fn = nn.CrossEntropyLoss()
    loss = loss_fn(output, target)
    return output, loss


def test_batch(model, image, target):
    # Perform one testing batch iteration.
    output = model(image)
    loss_fn = nn.CrossEntropyLoss()
    loss = loss_fn(output, target)
    return output, loss

可视化函数

In [2]:
def plt_show(process_data):
    plt.figure(figsize=(12, 6))
    plt.plot(process_data['Epoch'], process_data['Train Loss'], label='Train Loss', marker='o')
    plt.plot(process_data['Epoch'], process_data['Test Loss'], label='Test Loss', marker='o')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    plt.title('Training and Testing Loss')
    plt.grid(True)
    plt.savefig('loss_plot.png')
    plt.show()

    plt.figure(figsize=(12, 6))
    plt.plot(process_data['Epoch'], process_data['Train Accuracy'], label='Train Accuracy', marker='o')
    plt.plot(process_data['Epoch'], process_data['Test Accuracy'], label='Test Accuracy', marker='o')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.title('Training and Testing Accuracy')
    plt.grid(True)
    plt.savefig('accuracy_plot.png')
    plt.show()

训练过程并评估

In [6]:
def train_and_evaluate(learning_rate, momentum, gamma, step, EVAL_INTERVAL, model,device):
    BATCH_SIZE = 64
    NUM_EPOCHS = 20
    SAVE_DIR = './log'

    train_set, train_dataloader, test_set, test_dataloader = load_data(BATCH_SIZE)
    # train_set, _ = torch.utils.data.random_split(train_set,[1280,len(train_set)-1280])
    # test_set, _ = torch.utils.data.random_split(train_set,[100,len(train_set)-100])

    optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=momentum)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=step, gamma=gamma)

    training_loss,training_acc = [],[]
    testing_loss,testing_acc = [],[]


    for epoch in range(NUM_EPOCHS):
        model.train()
        torch.cuda.empty_cache()
        running_cls_loss = 0.0
        running_cls_corrects = 0

        for batch_idx, (image, target) in enumerate(train_dataloader):
            image = image.to(device)
            target = target.to(device)

            outputs, loss = train_batch(model, image, target)
            _, preds = torch.max(outputs, 1)

            loss_data = loss.data.item()
            if np.isnan(loss_data):
                raise ValueError('loss is nan while training')
            running_cls_loss += loss.item()
            running_cls_corrects += torch.sum(preds == target.data)

            loss.backward()
            optimizer.step()
            optimizer.zero_grad()

        epoch_loss = running_cls_loss / len(train_set)
        epoch_acc = running_cls_corrects.double() / len(train_set)
        print(f'Epoch: {epoch + 1}/{NUM_EPOCHS} Train Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')
        training_loss.append(epoch_loss)
        training_acc.append(epoch_acc.cpu().detach().numpy())

        scheduler.step()

        if (epoch + 1) % EVAL_INTERVAL == 0 or (epoch + 1) == NUM_EPOCHS:
            print('Begin test...')
            model.eval()
            val_loss = 0.0
            val_corrects = 0

            for batch_idx, (image, target) in enumerate(test_dataloader):
                image = image.to(device)
                target = target.to(device)

                outputs, loss = test_batch(model, image, target)
                _, preds = torch.max(outputs, 1)

                val_loss += loss.item()
                val_corrects += torch.sum(preds == target.data)

            val_loss = val_loss / len(test_set)
            val_acc = val_corrects.double() / len(test_set)
            print(f'Test Loss: {val_loss:.4f} Acc: {val_acc:.4f}')
            testing_loss.append(val_loss)
            testing_acc.append(val_acc.cpu().detach().numpy())

            if (epoch + 1) == NUM_EPOCHS:

                state = {
                    'state_dict': model.state_dict(),
                    'acc': epoch_acc,
                    'epoch': (epoch + 1),
                }

                if not os.path.exists(SAVE_DIR):
                    os.makedirs(SAVE_DIR)

                torch.save(state, osp.join(SAVE_DIR, 'checkpoint_%s.pth' % (str(epoch + 1))))       
    process_data = {
        'Epoch': list(range(1, NUM_EPOCHS + 1)),
        'Train Loss': training_loss,
        'Train Accuracy': training_acc,
        'Test Loss': testing_loss,
        'Test Accuracy': testing_acc,
    }
    plt_show(process_data)
    return model

超参数和随机种子定义

In [7]:
learning_rate = 0.1
momentum = 0.5
step = 5
gamma = 0.9
EVAL_INTERVAL = 2
results = []

SEED = 1
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)

ResNet

In [8]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
NUM_CLASS = 10
model = models.resnet18(weights=None)
num_features = model.fc.in_features
model.fc = nn.Linear(num_features, NUM_CLASS)
model.to(device)
print("使用ResNet18训练模型并输出结果:")
result = train_and_evaluate(learning_rate, momentum, gamma, step, EVAL_INTERVAL, model, device)

使用ResNet18训练模型并输出结果:
Files already downloaded and verified
Files already downloaded and verified
Epoch: 1/20 Train Loss: 0.0244 Acc: 0.4319
Epoch: 2/20 Train Loss: 0.0149 Acc: 0.6592
Begin test...
Test Loss: 0.0172 Acc: 0.6366
Epoch: 3/20 Train Loss: 0.0109 Acc: 0.7569
Epoch: 4/20 Train Loss: 0.0087 Acc: 0.8064
Begin test...
Test Loss: 0.0131 Acc: 0.7237
Epoch: 5/20 Train Loss: 0.0072 Acc: 0.8399
Epoch: 6/20 Train Loss: 0.0058 Acc: 0.8692
Begin test...
Test Loss: 0.0096 Acc: 0.8108
Epoch: 7/20 Train Loss: 0.0049 Acc: 0.8905
Epoch: 8/20 Train Loss: 0.0042 Acc: 0.9075
Begin test...
Test Loss: 0.0074 Acc: 0.8458


KeyboardInterrupt: 

AlexNet

In [9]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
NUM_CLASS = 10
model = models.alexnet(weights=None)
model.classifier[6] = nn.Linear(model.classifier[6].in_features, NUM_CLASS)
model.to(device)
print("使用AlexNet训练模型并输出结果:")
result = train_and_evaluate(learning_rate, momentum, gamma, step, EVAL_INTERVAL, model, device)

使用AlexNet训练模型并输出结果:
Files already downloaded and verified
Files already downloaded and verified
Epoch: 1/20 Train Loss: 0.0320 Acc: 0.2392
Epoch: 2/20 Train Loss: 0.0233 Acc: 0.4575
Begin test...
Test Loss: 0.0197 Acc: 0.5427
Epoch: 3/20 Train Loss: 0.0186 Acc: 0.5820
Epoch: 4/20 Train Loss: 0.0162 Acc: 0.6406
Begin test...
Test Loss: 0.0218 Acc: 0.5097
Epoch: 5/20 Train Loss: 0.0146 Acc: 0.6753
Epoch: 6/20 Train Loss: 0.0130 Acc: 0.7149
Begin test...
Test Loss: 0.0127 Acc: 0.7265
Epoch: 7/20 Train Loss: 0.0120 Acc: 0.7371
Epoch: 8/20 Train Loss: 0.0116 Acc: 0.7485
Begin test...
Test Loss: 0.0196 Acc: 0.5673
Epoch: 9/20 Train Loss: 0.0113 Acc: 0.7531
Epoch: 10/20 Train Loss: 0.0107 Acc: 0.7680
Begin test...
Test Loss: 0.0128 Acc: 0.7322


KeyboardInterrupt: 

VGG

In [10]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
NUM_CLASS = 10
model = models.vgg11(weights=None)
model.classifier[6] = nn.Linear(model.classifier[6].in_features, NUM_CLASS)
model.to(device)
print("使用VGG11训练模型并输出结果:")
result = train_and_evaluate(learning_rate, momentum, gamma, step, EVAL_INTERVAL,model,device)

使用VGG11训练模型并输出结果:
Files already downloaded and verified
Files already downloaded and verified
Epoch: 1/20 Train Loss: 0.0322 Acc: 0.2355
Epoch: 2/20 Train Loss: 0.0217 Acc: 0.4977
Begin test...
Test Loss: 0.0200 Acc: 0.5618
Epoch: 3/20 Train Loss: 0.0155 Acc: 0.6509
Epoch: 4/20 Train Loss: 0.0123 Acc: 0.7280
Begin test...
Test Loss: 0.0158 Acc: 0.6623


KeyboardInterrupt: 