In [1]:
import time 
import math

import torch 
import torch.nn as nn
import torch.nn.functional as F

import torchvision.datasets as dsets
import torchvision.transforms as trans

In [2]:
device = torch.device('mps')

In [3]:
batch_size = 100

train_set = dsets.MNIST(root='../data/mnist/',
                        transform=trans.ToTensor(),
                        train=True)
test_set = dsets.MNIST(root='../data/mnist/',
                       transform=trans.ToTensor(),
                          train=False)
train_dl = torch.utils.data.DataLoader(dataset=train_set,
                                        batch_size=batch_size,
                                        shuffle=True,
                                        num_workers=6)
test_dl = torch.utils.data.DataLoader(dataset=test_set,
                                        batch_size=batch_size,
                                        num_workers=6)

In [4]:
class FNN(nn.Module):
    def __init__(self):
        super(FNN,self).__init__()
        self.fc1 = nn.Linear(28*28, 500)
        self.fc2 = nn.Linear(500, 300)
        self.fc3 = nn.Linear(300, 10)
        
    def forward(self,x):
        x = x.view(-1,28*28)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x
    

In [5]:
net = FNN()
x = torch.randn(3,1,28,28)
y = net(x)
print(y.size())

torch.Size([3, 10])


In [6]:
print(net)

FNN(
  (fc1): Linear(in_features=784, out_features=500, bias=True)
  (fc2): Linear(in_features=500, out_features=300, bias=True)
  (fc3): Linear(in_features=300, out_features=10, bias=True)
)


通过nn.Sequential搭建网络

In [10]:
net2 = nn.Sequential(nn.Linear(28*28, 500),
                     nn.ReLU(),
                     nn.Linear(500, 300),
                     nn.ReLU(),
                     nn.Linear(300, 10),
                     nn.Softmax(dim=1))

In [11]:
x = torch.rand(3,784)
y = net2(x)
print(y.size())

torch.Size([3, 10])


In [12]:
def eval(model,criterion,dataloader):
    loss,accuracy = 0,0
    for bx,by in dataloader:
        bx,by = bx.to(device),by.to(device)
        logit = model(bx)
        error = criterion(logit,by)
        loss += error.item()
        
        _,pred_y = logit.max(dim=1)
        acc = (pred_y.data==by).float().sum()/by.size(0)
        accuracy += acc 
        
    loss /= len(dataloader)
    accuracy /= len(dataloader)
    return loss,accuracy

In [13]:
net = FNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(),lr=0.001)

In [15]:
for epoch in range(20):
    since = time.time()
    for batch_x, batch_y in train_dl:
        batch_x, batch_y = batch_x.to(device), batch_y.to(device)
        optimizer.zero_grad()
        
        logit = net(batch_x)
        error = criterion(logit,batch_y)
        error.backward()
        
        optimizer.step()
        
    now = time.time()
    train_loss, train_acc = eval(net,criterion,train_dl)
    test_loss, test_acc = eval(net,criterion,test_dl)
    print('[%d/%d, %.0f seconds] Train Loss: %.3f, Accuracy: %.3f | Test Loss: %.3f, Accuracy: %.3f' % (epoch+1, 20, now-since, train_loss, train_acc, test_loss, test_acc))
    

[1/20, 7 seconds] Train Loss: 0.113, Accuracy: 0.966 | Test Loss: 0.118, Accuracy: 0.962
[2/20, 6 seconds] Train Loss: 0.062, Accuracy: 0.981 | Test Loss: 0.084, Accuracy: 0.972
[3/20, 5 seconds] Train Loss: 0.044, Accuracy: 0.986 | Test Loss: 0.078, Accuracy: 0.976
[4/20, 5 seconds] Train Loss: 0.028, Accuracy: 0.991 | Test Loss: 0.067, Accuracy: 0.980
[5/20, 5 seconds] Train Loss: 0.020, Accuracy: 0.994 | Test Loss: 0.060, Accuracy: 0.983
[6/20, 5 seconds] Train Loss: 0.020, Accuracy: 0.993 | Test Loss: 0.075, Accuracy: 0.979
[7/20, 5 seconds] Train Loss: 0.022, Accuracy: 0.993 | Test Loss: 0.082, Accuracy: 0.979
[8/20, 5 seconds] Train Loss: 0.020, Accuracy: 0.993 | Test Loss: 0.087, Accuracy: 0.978
[9/20, 5 seconds] Train Loss: 0.012, Accuracy: 0.996 | Test Loss: 0.077, Accuracy: 0.983
[10/20, 5 seconds] Train Loss: 0.012, Accuracy: 0.996 | Test Loss: 0.080, Accuracy: 0.982
[11/20, 5 seconds] Train Loss: 0.009, Accuracy: 0.997 | Test Loss: 0.079, Accuracy: 0.982
[12/20, 5 seconds] 

初始化

In [19]:
for name, para in net.named_parameters():
    if name.find('weight') != -1:
        n_o,n_i = param.size()
        para.data.normal_(0,math.sqrt(2/n_i))
    if name.find('bias') != -1:
        param.data.zero_()

In [20]:
for name,para in net.named_parameters():
    if name.find('weight') != -1:
        nn.init.kaiming_normal_(para)
    if name.find('bias') != -1:
        nn.init.zeros_(para)

In [21]:
for m in net.modules():
   print(m)

FNN(
  (fc1): Linear(in_features=784, out_features=500, bias=True)
  (fc2): Linear(in_features=500, out_features=300, bias=True)
  (fc3): Linear(in_features=300, out_features=10, bias=True)
)
Linear(in_features=784, out_features=500, bias=True)
Linear(in_features=500, out_features=300, bias=True)
Linear(in_features=300, out_features=10, bias=True)


In [22]:
modules = list(net.modules())
modules[1]

Linear(in_features=784, out_features=500, bias=True)

In [23]:
modules[1].weight

Parameter containing:
tensor([[-0.0202,  0.0355, -0.0444,  ..., -0.0597, -0.0940, -0.0544],
        [ 0.0164, -0.0343,  0.0212,  ...,  0.0313,  0.0141,  0.0468],
        [ 0.0315, -0.0028,  0.0563,  ..., -0.0886,  0.0196,  0.0329],
        ...,
        [ 0.0901,  0.0793, -0.0329,  ...,  0.0721, -0.0240, -0.0314],
        [ 0.0261, -0.0667, -0.0552,  ..., -0.0642,  0.0618,  0.0185],
        [-0.0226,  0.0246, -0.0435,  ...,  0.0253,  0.1846, -0.0354]],
       device='mps:0', requires_grad=True)