# Pytorch 基础

## Pytorch 实现反向传播

- 执行一次正向传播后会保存所有中间变量，反向传播中链式法则就是通过这些中间变量相乘，所得即是损失函数对权重参数的偏导数。

In [2]:
import torch
x= torch.tensor(1,requires_grad=True, dtype=torch.float32)
z = x**2
y= torch.tensor(2,requires_grad=True, dtype=torch.float32)
sigma = torch.sigmoid(z)
loss =-(y*torch.log(sigma) + (1-y) * torch.log(1-sigma))
torch.autograd.grad(loss,x)


(tensor(-2.5379),)

* 定义一个神经网络架构，三分类，500样本20特征，1-13,2-8,out-3(共三层) 激活函数relu和sigmoid

In [11]:
import torch
import torch.nn as nn
from torch.nn import functional as F

In [4]:
# 确定数据

In [9]:
torch.manual_seed(420)
X = torch.rand((500,20),dtype=torch.float32)
y=torch.randint(low=0,high=3,size=(500,),dtype=torch.float32)

input_=X.shape[1]
output_=len(y.unique())

In [8]:
y.unique()

tensor([0., 1., 2.])

In [6]:
X


tensor([[0.8054, 0.1990, 0.9759,  ..., 0.0117, 0.2572, 0.2272],
        [0.6076, 0.9066, 0.5540,  ..., 0.8121, 0.0603, 0.7086],
        [0.0708, 0.5807, 0.8304,  ..., 0.8998, 0.0322, 0.4390],
        ...,
        [0.7986, 0.6708, 0.7298,  ..., 0.1268, 0.1310, 0.8556],
        [0.6634, 0.8943, 0.9527,  ..., 0.2029, 0.3998, 0.2302],
        [0.7081, 0.1069, 0.1263,  ..., 0.0153, 0.4722, 0.0718]])

In [12]:
class Model(nn.Module):
    def __init__(self,in_features=40,out_features=2):
        super().__init__()
        self.linear1=nn.Linear(in_features,13,bias=False)
        self.linear2=nn.Linear(13,8,False)
        self.output=nn.Linear(8,out_features,True)

    def forward(self,X):
        sigma1=torch.relu(self.linear1(X))
        sigma2=torch.sigmoid(self.linear2(sigma1))
        z_hat=self.output(sigma2)
        return z_hat

In [31]:
torch.manual_seed(420)
net = Model(input_,output_)

In [32]:
z_hat=net.forward(X)

In [33]:
#定义损失函数
criterion=nn.CrossEntropyLoss()
loss= criterion(z_hat,y.long())

In [35]:
loss.backward(retain_graph=True)

In [36]:
net.linear1.weight.grad.shape

torch.Size([13, 20])

In [44]:
lr=0.1
w=net.linear1.weight.data #权重
dw=net.linear1.weight.grad #梯度

In [49]:
w-=lr*dw

In [50]:
w

tensor([[ 1.3654e-01, -1.3460e-01,  2.1279e-01, -1.7763e-01, -6.8224e-02,
         -1.5410e-01,  1.7244e-01,  8.3873e-02, -1.1153e-01, -1.7294e-01,
         -1.2947e-01, -4.3144e-02, -1.1414e-01,  1.6293e-01, -9.4087e-02,
         -1.4629e-01, -6.8988e-02, -2.1836e-01, -1.0860e-01, -1.2199e-01],
        [ 4.8409e-02,  1.8209e-01,  2.4153e-02, -1.3000e-01,  9.2217e-02,
         -9.5247e-02, -1.0573e-01, -4.2681e-02, -1.1672e-01,  2.4788e-02,
          1.8157e-01,  3.0701e-02,  1.3505e-01, -1.9408e-01, -1.7601e-01,
         -2.9699e-02,  2.1301e-04,  1.3971e-01, -1.9644e-01,  9.3520e-02],
        [-1.9139e-01,  3.6475e-02,  1.4867e-01,  3.1411e-02,  7.2105e-02,
          1.4316e-01,  2.2200e-01, -1.3974e-01,  7.4415e-02,  1.8477e-01,
          1.2811e-01, -2.0221e-01, -1.5431e-01, -2.1827e-01,  1.0008e-01,
          2.2166e-01, -2.1585e-01,  1.7985e-01, -2.0857e-01, -2.6561e-02],
        [ 1.8146e-01, -3.5133e-02,  2.4806e-02,  1.6307e-01, -1.8753e-01,
          5.6652e-02, -1.0906e-01, 

## Momentum

In [None]:
# v(t)=gamma * v(t-1) - lr*dw
# w(t+1)=w(t)+v(t)

In [None]:
lr=0.1
gamma=0.9


In [None]:
dw=net.linear1.weight.grad
w=net.linear1.weight.data

In [53]:
v=torch.zeros(dw.shape[0],dw.shape[1])

In [72]:
v=gamma*v-lr*dw
w+= v

In [73]:
w

tensor([[ 0.1364, -0.1346,  0.2126, -0.1776, -0.0683, -0.1541,  0.1723,  0.0838,
         -0.1116, -0.1730, -0.1295, -0.0432, -0.1142,  0.1628, -0.0941, -0.1463,
         -0.0690, -0.2184, -0.1086, -0.1220],
        [ 0.0511,  0.1843,  0.0242, -0.1269,  0.0938, -0.0957, -0.1047, -0.0410,
         -0.1170,  0.0268,  0.1821,  0.0326,  0.1351, -0.1924, -0.1770, -0.0292,
          0.0003,  0.1411, -0.1943,  0.0957],
        [-0.1863,  0.0404,  0.1573,  0.0368,  0.0789,  0.1527,  0.2296, -0.1342,
          0.0855,  0.1913,  0.1372, -0.1992, -0.1469, -0.2113,  0.1106,  0.2257,
         -0.2087,  0.1899, -0.2024, -0.0197],
        [ 0.1816, -0.0348,  0.0249,  0.1638, -0.1873,  0.0573, -0.1086,  0.2061,
         -0.1933,  0.0175,  0.1401, -0.1311, -0.1317,  0.0768, -0.1756,  0.1318,
          0.0275, -0.2194,  0.0778, -0.1865],
        [ 0.0334,  0.1427, -0.0358,  0.0890, -0.0108, -0.1639, -0.2104,  0.1926,
          0.0868,  0.0685, -0.2003, -0.0220,  0.0492, -0.1948, -0.1765,  0.1195,
      

## torch.optim 

In [None]:
# 导入库
# 确定数据，超参数
# 定义神经网络架构
# 实例化神经网络的类-正向传播
# 定义损失函数
# 定义优化算法

In [79]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.nn import functional as F

torch.manual_seed(420)
X = torch.rand((500,20),dtype=torch.float32)
y=torch.randint(low=0,high=3,size=(500,),dtype=torch.float32)

lr=0.1
gamma=0.9


class Model(nn.Module):
    """定义一个神经网络架构,三分类,500样本20特征,1-13,2-8,out-3(共三层) 激活函数relu和sigmoid"""
    def __init__(self,in_features=40,out_features=2):
        super().__init__()
        self.linear1=nn.Linear(in_features,13,bias=False)
        self.linear2=nn.Linear(13,8,False)
        self.output=nn.Linear(8,out_features,True)

    def forward(self,X):
        sigma1=torch.relu(self.linear1(X))
        sigma2=torch.sigmoid(self.linear2(sigma1))
        z_hat=self.output(sigma2)
        return z_hat

In [80]:
input_=X.shape[1]
output_=len(y.unique())

In [82]:
torch.manual_seed(420)
net=Model(in_features=input_,out_features=output_)

In [83]:
criterion=nn.CrossEntropyLoss()


In [91]:
for a in net.parameters():
    print(a)


Parameter containing:
tensor([[ 1.3656e-01, -1.3459e-01,  2.1281e-01, -1.7763e-01, -6.8218e-02,
         -1.5410e-01,  1.7245e-01,  8.3885e-02, -1.1153e-01, -1.7294e-01,
         -1.2947e-01, -4.3138e-02, -1.1413e-01,  1.6295e-01, -9.4082e-02,
         -1.4629e-01, -6.8982e-02, -2.1836e-01, -1.0859e-01, -1.2199e-01],
        [ 4.8127e-02,  1.8186e-01,  2.4149e-02, -1.3032e-01,  9.2056e-02,
         -9.5202e-02, -1.0584e-01, -4.2852e-02, -1.1669e-01,  2.4581e-02,
          1.8152e-01,  3.0500e-02,  1.3506e-01, -1.9425e-01, -1.7591e-01,
         -2.9751e-02,  2.0485e-04,  1.3957e-01, -1.9666e-01,  9.3293e-02],
        [-1.9192e-01,  3.6070e-02,  1.4778e-01,  3.0845e-02,  7.1393e-02,
          1.4217e-01,  2.2122e-01, -1.4032e-01,  7.3255e-02,  1.8409e-01,
          1.2716e-01, -2.0253e-01, -1.5509e-01, -2.1899e-01,  9.8980e-02,
          2.2123e-01, -2.1659e-01,  1.7880e-01, -2.0922e-01, -2.7275e-02],
        [ 1.8144e-01, -3.5166e-02,  2.4801e-02,  1.6299e-01, -1.8755e-01,
          5.6

In [93]:
opt=optim.SGD(net.parameters(),lr=lr,momentum=gamma)

## 梯度下降流程

In [None]:
# 向前传播
# 损失函数值
# 反向传播得到梯度
# 更新权重和动量
# 清空梯度-清除上一个坐标的梯度节省内存

In [97]:
y

tensor([1., 1., 2., 0., 2., 0., 1., 0., 1., 2., 1., 0., 0., 2., 0., 2., 1., 1.,
        1., 1., 2., 0., 2., 2., 1., 0., 1., 2., 2., 2., 0., 2., 2., 0., 1., 2.,
        2., 2., 1., 1., 2., 2., 0., 0., 1., 1., 2., 1., 0., 0., 0., 1., 2., 1.,
        1., 1., 2., 1., 0., 1., 1., 2., 2., 2., 1., 0., 2., 1., 1., 1., 2., 1.,
        1., 1., 1., 1., 0., 0., 0., 1., 0., 2., 2., 2., 2., 2., 0., 1., 1., 0.,
        0., 1., 2., 1., 2., 0., 0., 0., 2., 2., 0., 0., 2., 1., 1., 1., 2., 2.,
        0., 1., 1., 0., 2., 0., 2., 1., 1., 2., 0., 0., 1., 0., 0., 2., 1., 2.,
        2., 2., 1., 0., 1., 1., 0., 1., 1., 0., 0., 1., 0., 2., 1., 0., 0., 1.,
        1., 0., 0., 1., 1., 1., 2., 2., 0., 1., 0., 2., 1., 0., 0., 0., 0., 0.,
        0., 1., 1., 1., 2., 1., 2., 2., 0., 2., 0., 2., 0., 1., 1., 2., 0., 1.,
        2., 1., 0., 1., 1., 0., 2., 1., 1., 2., 1., 0., 1., 0., 1., 1., 1., 2.,
        0., 2., 1., 2., 0., 2., 2., 1., 1., 2., 1., 0., 2., 1., 2., 0., 1., 1.,
        0., 1., 2., 1., 2., 2., 0., 1., 

In [128]:
z_hat= net.forward(X)
loss=criterion(z_hat,y.reshape(500).long())
loss.backward()
opt.step() #步子，走一步更新权重w,更新动量v
opt.zero_grad() #清空梯度
print(loss)
print(net.linear1.weight.data[0][:10])


tensor(1.0992, grad_fn=<NllLossBackward0>)
tensor([ 0.1360, -0.1347,  0.2122, -0.1777, -0.0684, -0.1542,  0.1719,  0.0834,
        -0.1117, -0.1730])


## TensorDataset & DataLoader