# 完整的模型训练套路

In [None]:
import torch
import torchvision
import time

from torch import nn
from torch.utils.data import DataLoader

from torch.utils.tensorboard import SummaryWriter

## 一些超参数

## 准备数据集

In [None]:
train_data = torchvision.datasets.CIFAR10(".data", train=True
                                       , transform=torchvision.transforms.ToTensor()
                                       , download=True)
test_data = torchvision.datasets.CIFAR10(".data", train=False
                                       , transform=torchvision.transforms.ToTensor()
                                       , download=True)
train_data_size, test_data_size = len(train_data), len(test_data)
train_data_size, test_data_size

## 准备`DataLoader`

In [None]:
batch_size = 64
train_dataloader = DataLoader(train_data, batch_size=batch_size)
test_dataloader = DataLoader(test_data, batch_size=batch_size)

## 设定代码的运行设备

In [None]:
device = torch.device('cpu')
if torch.cuda.is_available():
    device = torch.device('cuda')
device

## 定义网络并创建网络

In [None]:
model = nn.Sequential(
    nn.Conv2d(3, 32, 5, padding=2),
    nn.MaxPool2d(2),
    nn.Conv2d(32, 32, 5, padding=2),
    nn.MaxPool2d(2),
    nn.Conv2d(32, 64, 5, padding=2),
    nn.MaxPool2d(2),
    nn.Flatten(),
    nn.Linear(1024, 64),
    nn.Linear(64, 10),
)
model = model.to(device)
model

## 损失函数

In [None]:
loss_fn = nn.CrossEntropyLoss()
loss_fn = loss_fn.to(device)

## 优化器

In [None]:
lr = 0.001
optimizer = torch.optim.Adam(model.parameters(), lr)

## 可视化

In [None]:
writer = SummaryWriter(".logs/h")

In [None]:
total_train_step = 0
total_test_step = 0
epoches = 3

for epoch in range(epoches):
    print("=========The {} epoch begin=========".format(epoch + 1))

    model.train()

    start_time = time.time()
    for data in train_dataloader:
        images, targets = data
        images = images.to(device)
        targets = targets.to(device)
        outputs = model(images)

        loss = loss_fn(outputs, targets)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_train_step += 1
        if total_train_step % 100 == 1:
            end_time = time.time();
            print("step: {} , loss: {}, time: {}".format(total_train_step, loss.item(), end_time - start_time))
            start_time = end_time
        writer.add_scalar('train_loss', loss.item(), total_train_step)


    model.eval()

    total_test_loss = 0
    total_accuracy = 0
    total_test_step += 1
    with torch.no_grad():
        for data in test_dataloader:
            images, targets = data
            images = images.to(device)
            targets = targets.to(device)
            outputs = model(images)
            loss = loss_fn(outputs, targets)
            total_test_loss += loss.item()
            total_accuracy += (outputs.argmax(1) == targets).sum()
    print("\ttest data set total loss: {}".format(total_test_loss))
    print("\ttest data set total accuracy: {}".format(total_accuracy))
    writer.add_scalar('test_accuracy', total_accuracy / test_data_size, total_test_step)

    torch.save(model, '.param/cifar10_{}.pth'.format(epoch))

writer.close()

# 完整的模型验证套路

In [None]:
from PIL import Image

image_path = '.data/dog.png'
image = Image.open(image_path)
image = image.convert('RGB')
print(image)

In [None]:
transform = torchvision.transforms.Compose([
    torchvision.transforms.Resize((32, 32)),
    torchvision.transforms.ToTensor()])
image = transform(image)
image.shape

In [None]:
model_restore = torch.load('.param/cifar10_{}.pth'.format(epoches - 1))
model_restore

In [None]:
image = torch.reshape(image, (1, 3, 32, 32))
image = image.to(device)

model_restore.eval()
with torch.no_grad():
    output = model_restore(image)

output = torch.argmax(output)
output