In [3]:
import torch
import torchvision
import torchvision.transforms as transforms

In [4]:
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

batch_size = 4

trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                         shuffle=False, num_workers=2)

classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data\cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:16<00:00, 10243610.09it/s]


Extracting ./data\cifar-10-python.tar.gz to ./data
Files already downloaded and verified


In [5]:
device=torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [6]:
from torch import nn
class Net(nn.Module):
    def __init__(self) -> None:
        super(Net,self).__init__()
        #32,32,3
        self.conv_pool1=nn.Sequential(
            nn.Conv2d(in_channels=3,out_channels=64,kernel_size=5,stride=1,padding=2),
            nn.ReLU(),
            nn.BatchNorm2d(64),
            nn.MaxPool2d(kernel_size=2,stride=2)
        )

        #16,16,64
        self.conv_pool2=nn.Sequential(
            nn.Conv2d(in_channels=64,out_channels=128,kernel_size=3,stride=1,padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(128),
            nn.MaxPool2d(kernel_size=2,stride=2)
        )

        #8,8,128
        self.conv_pool3=nn.Sequential(
            nn.Conv2d(in_channels=128,out_channels=256,kernel_size=3,stride=1,padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(256),
            nn.MaxPool2d(kernel_size=2,stride=2)
        )
        
        #4,4,256
        self.fc1=nn.Sequential(
            nn.Linear(4*4*256,512),
            nn.ReLU(),
            nn.Dropout(0.3)
        )

        self.fc2=nn.Sequential(
            nn.Linear(512,128),
            nn.ReLU(),
            nn.Dropout(0.3)
        )

        self.out=nn.Sequential(
            nn.Linear(128,10)
        )

    def forward(self,x):
        x=self.conv_pool1(x)
        x=self.conv_pool2(x)
        x=self.conv_pool3(x)

        x = x.view(-1, 4*4*256)

        x=self.fc1(x)
        x=self.fc2(x)
        x=self.out(x)

        return x


In [7]:
net=Net().to(device)
print(net)

Net(
  (conv_pool1): Sequential(
    (0): Conv2d(3, 64, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): ReLU()
    (2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (conv_pool2): Sequential(
    (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (conv_pool3): Sequential(
    (0): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (fc1): Sequential(
    (0): Linear(in_features=4096, out_features=512, bias=True)
    (1): ReLU()
   

Optimizer &Loss Function

In [8]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

Train the Model

In [9]:
from tqdm import tqdm

#存储训练过程
history={'Train Loss':[],'Test Loss':[],'Test accuracy':[]}

for epoch in range(1,6):
    #构建tqdm进度条
    processBar=tqdm(trainloader,unit='step')
    #打开网络的训练模式
    net.train(True)
    #开始对训练集的DataLoader进行迭代
    totalTrainLoss=0.0

    for step,(trainImgs,labels) in enumerate(processBar):
        #将图像和标签传输进device中
        trainImgs=trainImgs.to(device)
        labels=labels.to(device)

        #清空模型的梯度
        optimizer.zero_grad()

        #对模型进行前向推理
        outputs=net(trainImgs)

        #计算本轮推理的Loss值
        loss=criterion(outputs,labels)
        #计算本轮推理的准确率
        predictions=torch.argmax(outputs,dim=1)
        accuracy=torch.sum(predictions==labels)/labels.shape[0]

        #进行反向传播求出模型参数的梯度
        loss.backward()
        #使用迭代器更新模型权重
        optimizer.step()

        #将本step结果进行可视化处理
        processBar.set_description("[%d/%d] Loss:%.4f, Acc:%.4f"%
                                   (epoch,5,loss.item(),accuracy.item()))
        
        totalTrainLoss+=loss

        if step==len(processBar)-1:
            correct,totalLoss=0,0
            totalSize=0
            net.train(False)
            for testImgs,labels in testloader:
                testImgs=testImgs.to(device)
                labels=labels.to(device)
                outputs=net(testImgs)
                loss=criterion(outputs,labels)
                predictions=torch.argmax(outputs,dim=1)
                totalSize+=labels.size(0)
                totalLoss+=loss
                correct+=torch.sum(predictions==labels)
            testAccuracy=correct/totalSize
            testLoss=totalLoss/len(testloader)
            trainLoss=totalTrainLoss/len(trainloader)
            history['Train Loss'].append(trainLoss.item())
            history['Test Loss'].append(testLoss.item())
            history['Test Accuracy'].append(testAccuracy.item())
            processBar.set_description("[%d/%d] Loss: %.4f, Acc: %.4f, Test Loss: %.4f, Test Acc: %.4f" % 
                                   (epoch,5,loss.item(),accuracy.item(),testLoss.item(),testAccuracy.item()))
    processBar.close()

[1/5] Loss:1.4189, Acc:0.7500: 100%|█████████▉| 12499/12500 [02:02<00:00, 101.79step/s]


OutOfMemoryError: CUDA out of memory. Tried to allocate 2.00 MiB. GPU 0 has a total capacty of 4.00 GiB of which 0 bytes is free. Of the allocated memory 10.59 GiB is allocated by PyTorch, and 17.82 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

In [None]:
import matplotlib.pyplot as plt
#对测试Loss进行可视化
plt.plot(history['Train Loss'],label = 'Train Loss')
plt.plot(history['Test Loss'],label = 'Test Loss')
plt.legend(loc='best')
plt.grid(True)
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.show()

In [None]:
#对测试准确率进行可视化
plt.plot(history['Test Accuracy'],color = 'red',label = 'Test Accuracy')
plt.legend(loc='best')
plt.grid(True)
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.show()

In [None]:
torch.save(net.state_dict(),'./models/cifar.pth')

In [None]:
model = Net()
model.load_state_dict(torch.load('./models/cifar.pth'))
model.eval()

In [None]:
torch.save(model, './models/cifar2.pth')