In [1]:
import torch 
import torch.nn as nn
import torch.nn.functional as F
class Net(nn.Module):
    def __init__(self):
        super(Net,self).__init__()
        self.conv1=nn.Conv2d(3,6,5)
        self.conv2=nn.Conv2d(6,16,5)
        self.fc1=nn.Linear(16*5*5,120)
        self.fc2=nn.Linear(120,84)
        self.fc3=nn.Linear(84,10)
        
    def forward(self,x):
        x=F.max_pool2d(F.relu(self.conv1(x)),(2,2))
        x=F.max_pool2d(F.relu(self.conv2(x)),2)
        x=x.view(-1,16*5*5)
        x=F.relu(self.fc1(x))
        x=F.relu(self.fc2(x))
        x=self.fc3(x)
        return x
        
        

In [2]:
net=Net()
print(net)

Net(
  (conv1): Conv2d(3, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


In [3]:
params=list(net.parameters())
print(len(params))

10


In [4]:
#打印某一层参数的形状
print(params[0].size())

torch.Size([6, 3, 5, 5])


In [5]:
#随机输入一个向量，查看前向传播输出
input = torch.randn(1,3,32,32)
out = net(input)
print(out)

tensor([[-0.1035, -0.1554,  0.0187, -0.1082, -0.0107, -0.1463, -0.0072,  0.0040,
         -0.0151, -0.0173]], grad_fn=<AddmmBackward>)


In [6]:
#梯度初始化
net.zero_grad()

In [7]:
#梯度反向传播
out.backward(torch.randn(1,10))

In [8]:
#损失函数
target=torch.randn(1,10)
criterion=nn.MSELoss()
output=net(input)
loss=criterion(output,target)
print(loss)

tensor(1.3808, grad_fn=<MseLossBackward>)


In [9]:
#将梯度初始化，计算上一步loss的反向传播
net.zero_grad()
print(net.conv1.bias.grad)

tensor([0., 0., 0., 0., 0., 0.])


In [10]:
#计算43中loss的反向传播
loss.backward()

print('conv1.bias.grad after backward')
print(net.conv1.bias.grad)

conv1.bias.grad after backward
tensor([ 0.0122, -0.0081, -0.0046,  0.0195,  0.0332,  0.0025])


In [11]:
#更新权重 定义SGD优化器算法，学习率设置为0.01
import torch.optim as optim
optimizer=optim.SGD(net.parameters(),lr=0.01)


In [13]:
#使用优化器更新权重
optimizer.zero_grad()
output=net(input)
loss=criterion(output,target)
loss.backward()
optimizer.step()#更新权重

In [14]:
#构造一个transform，将三通道(0,1)区间的数据转换成(-1,1)的数据
import torchvision 
import torchvision.transforms as transforms


In [15]:
net2 = Net()
criterion2=nn.CrossEntropyLoss()
optimizer2=optim.SGD(net2.parameters(),lr=0.001,momentum=0.9)

In [None]:
#训练网络
for epoch in range(2):
    running_loss=0
    for i,data in enumerate(trainloader,0):
        # 获取X,y对
        inputs,labels=data
        #初始化梯度
        optimizer2.zero_grad()
        #前馈
        outputs=net2(inputs)
        #计算损失
        loss=criterion2(outputs,labels)
        #计算梯度
        loss.backward()
        #更新权值
        optimizer2.step()
        #每2000个数据打印平均代价函数值
        running_loss+=loss.item()
        if i%2000==1999:
            print('[%d, %5d]loss :%.3f'%(epoch+1,i+1,running_loss/2000))
            runnung_loss=0


In [None]:
#使用模型预测，取一些数据
dataiter=iter(testloader)
images,labels=dataiter.next()
imshow(torchvision.utils.make_grid(images))
print("GroundTruth:","".join('%5s' % classes[labels[j]] for j in range(4))

In [None]:
#.使用模型预测
outputs = net2(images)
_, predicted = torch.max(outputs, 1)
print('Predicted: ', ' '.join('%5s' % classes[predicted[j]]
                              for j in range(4)))

In [None]:
#在测试集上打分
correct=0
total=0
with torch.no_grad():
    for data in testloader:
        images,labels=data
        outputs=net(images)
        _,predicted=torch.max(outputs.data,1)
        total+=label.size(0)
        correct+=(predicted==labels).sum().item()
print('Accuracy of the network on the 10000 test images: %d %%' % (100 * correct / total))

In [None]:
#保存训练的模型
PATH="./data.pth"
torch.save(net.state_dict(),PATH)

In [None]:
#读取保存的模型
pretrained_net = torch.load(PATH)

In [None]:
#加载模型
net3 = Net()
net3.load_state_dict(pretrained_net)

In [19]:
import torch 
from torch import nn
class MLP(nn.Module):
    def __init__(self,**kwargs):
        super(MLP,self).__init__(**kwargs)
        self.hidden=nn.Linear(784,256)
        self.act=nn.ReLU()
        self.output=nn.Linear(256,10)
        
    def forward(self,x):
        a=self.act(self.hidden(x))
        return self.output(a)

In [21]:
x=torch.rand(2,784)
net=MLP()
print(net)
net(x)

MLP(
  (hidden): Linear(in_features=784, out_features=256, bias=True)
  (act): ReLU()
  (output): Linear(in_features=256, out_features=10, bias=True)
)


tensor([[ 0.1310, -0.0021,  0.1003, -0.1797, -0.0330,  0.1736, -0.0189,  0.1641,
         -0.0578, -0.3024],
        [ 0.1065, -0.1049,  0.0303, -0.2346, -0.0266,  0.0580,  0.1156,  0.1001,
         -0.1538, -0.2057]], grad_fn=<AddmmBackward>)

In [24]:
#ModuleList 接收⼀个⼦模块的列表作为输⼊
net = nn.ModuleList([nn.Linear(784, 256), nn.ReLU()])
net.append(nn.Linear(256, 10)) # # 类似List的append操作
print(net[-1]) # 类似List的索引访问
print(net)

Linear(in_features=256, out_features=10, bias=True)
ModuleList(
  (0): Linear(in_features=784, out_features=256, bias=True)
  (1): ReLU()
  (2): Linear(in_features=256, out_features=10, bias=True)
)


In [25]:
#ModuleDict 接收⼀个子模块的字典作为输入, 然后也可以类似字典那样进行添加访问操作
net=nn.ModuleDict({
    "linear":nn.Linear(784,256),
    "act":nn.ReLU()
})
net["output"]=nn.Linear(256,10)
print(net['output'])

Linear(in_features=256, out_features=10, bias=True)


In [27]:
#参数访问 用data访问数值 grad访问梯度
for name,param in net.named_parameters():
    print(name,param.size(),type(param))

linear.weight torch.Size([256, 784]) <class 'torch.nn.parameter.Parameter'>
linear.bias torch.Size([256]) <class 'torch.nn.parameter.Parameter'>
output.weight torch.Size([10, 256]) <class 'torch.nn.parameter.Parameter'>
output.bias torch.Size([10]) <class 'torch.nn.parameter.Parameter'>


In [28]:
#自定义层，Parameter 类其实是 Tensor 的⼦类， ParameterList 和 ParameterDict 分别定义参数的列表和字典
class MyDense(nn.Module):
    def __init__(self):
        super(MyDense,self).__init__()
        self.params=nn.ParameterList([nn.Parameter(torch.randn(4,4)) for i in range(3)])
        self.params.append(nn.Parameter(torch.randn(4, 1)))
        
    def forward(self,x):
        for i in range(len(self.params)):
            x=torch.mm(x,self.params[i])
        return x
net=MyDense()
print(net)


MyDense(
  (params): ParameterList(
      (0): Parameter containing: [torch.FloatTensor of size 4x4]
      (1): Parameter containing: [torch.FloatTensor of size 4x4]
      (2): Parameter containing: [torch.FloatTensor of size 4x4]
      (3): Parameter containing: [torch.FloatTensor of size 4x1]
  )
)
