在 PyTorch 中，神经网络接受的输入张量格式一般是 (N, C, H, W)，其中：

N：批量大小（Batch Size），代表一次输入多少张图像。

C：通道数（Channel），对于灰度图为1，对于彩色图一般为3（红、绿、蓝）。

H：图像高度（Height）。

W：图像宽度（Width）。

In [1]:
import torch
import torchvision.datasets as dsets
import torchvision.transforms as transforms
import torch.nn.init

In [2]:
device='cuda'if torch.cuda.is_available()else 'cpu'
torch.manual_seed(1)
if device=='cuda':
    torch.cuda.manual_seed_all(1)

In [3]:
learning_rate=0.003
batch_size=100
epochs=10

In [4]:
mnist_train=dsets.MNIST(root='Data/',
                        train=True,
                        transform=transforms.ToTensor(),
                        download=True)
mnist_test=dsets.MNIST(root='Data/',
                        train=False,
                        transform=transforms.ToTensor(),
                        download=True)

In [5]:
data_loader=torch.utils.data.DataLoader(dataset=mnist_train,
                                        batch_size=batch_size,
                                        shuffle=True,
                                        drop_last=True)

In [6]:
class CNN(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.drop_prob=0.5
        
        self.layer1=torch.nn.Sequential(
        torch.nn.Conv2d(1,32,kernel_size=3,stride=1,padding=1),
        torch.nn.ReLU(),
        torch.nn.MaxPool2d(kernel_size=2,stride=2))
        # L1 ImgIn shape=(?, 1，28, 28,)
        #    Conv     -> (?, 32，28, 28)
        #    Pool     -> (?, 32，14, 14)
        
        self.layer2=torch.nn.Sequential(
        torch.nn.Conv2d(32,64,kernel_size=3,stride=1,padding=1),
        torch.nn.ReLU(),
        torch.nn.MaxPool2d(kernel_size=2,stride=2))
        # L2 ImgIn shape=(?, 32，14, 14,)
        #    Conv      ->(?, 64，14, 14)
        #    Pool      ->(?, 64，7, 7)
        
        self.layer3=torch.nn.Sequential(
        torch.nn.Conv2d(64,128,kernel_size=3,stride=1,padding=1),
        torch.nn.ReLU(),
        torch.nn.MaxPool2d(kernel_size=2,stride=2,padding=1))
        # L3 ImgIn shape=(?, 64, 7, 7)
        # Conv         ->(?, 128, 7, 7)
        # Pool         ->(?, 128, 4, 4)
        
        self.fc1=torch.nn.Linear(4*4*128,625,bias=True)
        torch.nn.init.xavier_uniform_(self.fc1.weight)
        self.layer4=torch.nn.Sequential(
        self.fc1,
        torch.nn.ReLU(),
        torch.nn.Dropout(p=1-self.drop_prob))
        
        self.fc2=torch.nn.Linear(625,10,bias=True)
        torch.nn.init.xavier_uniform_(self.fc2.weight)
        
    def forward(self,x):
        out=self.layer1(x)
        out=self.layer2(out)
        out=self.layer3(out)
        out=out.view(-1,4*4*128)
        out=self.layer4(out)
        out=self.fc2(out)
        return out

In [7]:
model=CNN().to(device)

In [8]:
criterion=torch.nn.CrossEntropyLoss()## Softmax is internally computed.
optimizer=torch.optim.Adam(model.parameters(),lr=learning_rate)

当你使用诸如 torchvision.transforms.ToTensor() 这样的工具时，
图像数据会自动从常见的 (高, 宽, 通道) 格式
转换为 PyTorch 所要求的 (通道, 高, 宽) 格式，

并且在构建 DataLoader 时会把多张图片组合成一个批量，形成 (batch_size, 通道, 高, 宽) 的张量。

In [None]:
total_batch=len(data_loader)
model.train()
for epoch in range(epochs+1):
    
    avg_cost=0
    for X,Y in data_loader:
        
        X=X.to(device)
        Y=Y.to(device)
        
        hypothesis=model(X)
        loss=criterion(hypothesis,Y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        avg_cost+=loss/total_batch
    print("[Epoch:{:4d}],[Cost:{:.9f}]".format(
    epoch,avg_cost))
    

[Epoch:   0],[Cost:0.164162651]
[Epoch:   1],[Cost:0.054527536]
[Epoch:   2],[Cost:0.042593956]
[Epoch:   3],[Cost:0.036716606]


In [None]:
with torch.no_grad():
    model.eval()
    X=mnist_test.data.view(len(mnist_test),1,28,28).float().to(device)
    Y=mnist_test.targets.to(device)
    prediction=model(X)
    correct_num=torch.argmax(prediction,1)==Y
    accuracy=correct_num.float().mean()
    print("Accuracy:{:4f}%".format(accuracy.item()*100))