# Pytorch 基础

## Pytorch 实现反向传播

- 执行一次正向传播后会保存所有中间变量，反向传播中链式法则就是通过这些中间变量相乘，所得即是损失函数对权重参数的偏导数。

In [149]:
import torch
x= torch.tensor(1,requires_grad=True, dtype=torch.float32)
z = x**2
y= torch.tensor(2,requires_grad=True, dtype=torch.float32)
sigma = torch.sigmoid(z)
loss =-(y*torch.log(sigma) + (1-y) * torch.log(1-sigma))
torch.autograd.grad(loss,x)


(tensor(-2.5379),)

* 定义一个神经网络架构，三分类，500样本20特征，1-13,2-8,out-3(共三层) 激活函数relu和sigmoid

In [150]:
import torch
import torch.nn as nn
from torch.nn import functional as F

In [151]:
# 确定数据

In [152]:
torch.manual_seed(420)
X = torch.rand((500,20),dtype=torch.float32)
y=torch.randint(low=0,high=3,size=(500,),dtype=torch.float32)

input_=X.shape[1]
output_=len(y.unique())

In [153]:
y.unique()

tensor([0., 1., 2.])

In [154]:
X


tensor([[0.8054, 0.1990, 0.9759,  ..., 0.0117, 0.2572, 0.2272],
        [0.6076, 0.9066, 0.5540,  ..., 0.8121, 0.0603, 0.7086],
        [0.0708, 0.5807, 0.8304,  ..., 0.8998, 0.0322, 0.4390],
        ...,
        [0.7986, 0.6708, 0.7298,  ..., 0.1268, 0.1310, 0.8556],
        [0.6634, 0.8943, 0.9527,  ..., 0.2029, 0.3998, 0.2302],
        [0.7081, 0.1069, 0.1263,  ..., 0.0153, 0.4722, 0.0718]])

In [155]:
class Model(nn.Module):
    def __init__(self,in_features=40,out_features=2):
        super().__init__()
        self.linear1=nn.Linear(in_features,13,bias=False)
        self.linear2=nn.Linear(13,8,False)
        self.output=nn.Linear(8,out_features,True)

    def forward(self,X):
        sigma1=torch.relu(self.linear1(X))
        sigma2=torch.sigmoid(self.linear2(sigma1))
        z_hat=self.output(sigma2)
        return z_hat

In [156]:
torch.manual_seed(420)
net = Model(input_,output_)

In [157]:
z_hat=net.forward(X)

In [158]:
#定义损失函数
criterion=nn.CrossEntropyLoss()
loss= criterion(z_hat,y.long())

In [159]:
loss.backward(retain_graph=True)

In [160]:
net.linear1.weight.grad.shape

torch.Size([13, 20])

In [161]:
lr=0.1
w=net.linear1.weight.data #权重
dw=net.linear1.weight.grad #梯度

In [162]:
w-=lr*dw

In [163]:
w

tensor([[ 1.3656e-01, -1.3459e-01,  2.1281e-01, -1.7763e-01, -6.8219e-02,
         -1.5410e-01,  1.7245e-01,  8.3883e-02, -1.1153e-01, -1.7294e-01,
         -1.2947e-01, -4.3139e-02, -1.1413e-01,  1.6294e-01, -9.4083e-02,
         -1.4629e-01, -6.8983e-02, -2.1836e-01, -1.0859e-01, -1.2199e-01],
        [ 4.8174e-02,  1.8190e-01,  2.4149e-02, -1.3026e-01,  9.2083e-02,
         -9.5210e-02, -1.0582e-01, -4.2824e-02, -1.1669e-01,  2.4615e-02,
          1.8153e-01,  3.0533e-02,  1.3506e-01, -1.9422e-01, -1.7593e-01,
         -2.9742e-02,  2.0621e-04,  1.3959e-01, -1.9662e-01,  9.3331e-02],
        [-1.9184e-01,  3.6138e-02,  1.4793e-01,  3.0939e-02,  7.1511e-02,
          1.4233e-01,  2.2135e-01, -1.4023e-01,  7.3449e-02,  1.8421e-01,
          1.2732e-01, -2.0247e-01, -1.5496e-01, -2.1887e-01,  9.9163e-02,
          2.2131e-01, -2.1647e-01,  1.7898e-01, -2.0911e-01, -2.7156e-02],
        [ 1.8145e-01, -3.5160e-02,  2.4802e-02,  1.6301e-01, -1.8755e-01,
          5.6598e-02, -1.0910e-01, 

## Momentum

In [164]:
# v(t)=gamma * v(t-1) - lr*dw
# w(t+1)=w(t)+v(t)

In [165]:
lr=0.1
gamma=0.9


In [166]:
dw=net.linear1.weight.grad
w=net.linear1.weight.data

In [167]:
v=torch.zeros(dw.shape[0],dw.shape[1])

In [168]:
v=gamma*v-lr*dw
w+= v

In [169]:
w

tensor([[ 1.3655e-01, -1.3459e-01,  2.1280e-01, -1.7763e-01, -6.8220e-02,
         -1.5410e-01,  1.7245e-01,  8.3881e-02, -1.1153e-01, -1.7294e-01,
         -1.2947e-01, -4.3140e-02, -1.1413e-01,  1.6294e-01, -9.4084e-02,
         -1.4629e-01, -6.8984e-02, -2.1836e-01, -1.0859e-01, -1.2199e-01],
        [ 4.8221e-02,  1.8193e-01,  2.4150e-02, -1.3021e-01,  9.2109e-02,
         -9.5217e-02, -1.0580e-01, -4.2795e-02, -1.1670e-01,  2.4650e-02,
          1.8154e-01,  3.0567e-02,  1.3505e-01, -1.9419e-01, -1.7594e-01,
         -2.9734e-02,  2.0757e-04,  1.3962e-01, -1.9658e-01,  9.3368e-02],
        [-1.9175e-01,  3.6205e-02,  1.4808e-01,  3.1034e-02,  7.1630e-02,
          1.4250e-01,  2.2148e-01, -1.4013e-01,  7.3642e-02,  1.8432e-01,
          1.2747e-01, -2.0242e-01, -1.5483e-01, -2.1875e-01,  9.9346e-02,
          2.2138e-01, -2.1635e-01,  1.7915e-01, -2.0900e-01, -2.7037e-02],
        [ 1.8145e-01, -3.5155e-02,  2.4803e-02,  1.6302e-01, -1.8755e-01,
          5.6608e-02, -1.0909e-01, 

## torch.optim 

In [170]:
# 导入库
# 确定数据，超参数
# 定义神经网络架构
# 实例化神经网络的类-正向传播
# 定义损失函数
# 定义优化算法

In [171]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.nn import functional as F

torch.manual_seed(420)
X = torch.rand((500,20),dtype=torch.float32)
y=torch.randint(low=0,high=3,size=(500,),dtype=torch.float32)

lr=0.1
gamma=0.9


class Model(nn.Module):
    """定义一个神经网络架构,三分类,500样本20特征,1-13,2-8,out-3(共三层) 激活函数relu和sigmoid"""
    def __init__(self,in_features=40,out_features=2):
        super().__init__()
        self.linear1=nn.Linear(in_features,13,bias=False)
        self.linear2=nn.Linear(13,8,False)
        self.output=nn.Linear(8,out_features,True)

    def forward(self,X):
        sigma1=torch.relu(self.linear1(X))
        sigma2=torch.sigmoid(self.linear2(sigma1))
        z_hat=self.output(sigma2)
        return z_hat

In [172]:
input_=X.shape[1]
output_=len(y.unique())

In [173]:
torch.manual_seed(420)
net=Model(in_features=input_,out_features=output_)

In [174]:
criterion=nn.CrossEntropyLoss()


In [175]:
for a in net.parameters():
    print(a)


Parameter containing:
tensor([[ 1.3656e-01, -1.3459e-01,  2.1281e-01, -1.7763e-01, -6.8218e-02,
         -1.5410e-01,  1.7245e-01,  8.3885e-02, -1.1153e-01, -1.7294e-01,
         -1.2947e-01, -4.3138e-02, -1.1413e-01,  1.6295e-01, -9.4082e-02,
         -1.4629e-01, -6.8982e-02, -2.1836e-01, -1.0859e-01, -1.2199e-01],
        [ 4.8127e-02,  1.8186e-01,  2.4149e-02, -1.3032e-01,  9.2056e-02,
         -9.5202e-02, -1.0584e-01, -4.2852e-02, -1.1669e-01,  2.4581e-02,
          1.8152e-01,  3.0500e-02,  1.3506e-01, -1.9425e-01, -1.7591e-01,
         -2.9751e-02,  2.0485e-04,  1.3957e-01, -1.9666e-01,  9.3293e-02],
        [-1.9192e-01,  3.6070e-02,  1.4778e-01,  3.0845e-02,  7.1393e-02,
          1.4217e-01,  2.2122e-01, -1.4032e-01,  7.3255e-02,  1.8409e-01,
          1.2716e-01, -2.0253e-01, -1.5509e-01, -2.1899e-01,  9.8980e-02,
          2.2123e-01, -2.1659e-01,  1.7880e-01, -2.0922e-01, -2.7275e-02],
        [ 1.8144e-01, -3.5166e-02,  2.4801e-02,  1.6299e-01, -1.8755e-01,
          5.6

In [176]:
opt=optim.SGD(net.parameters(),lr=lr,momentum=gamma)

## 梯度下降流程

In [177]:
# 向前传播
# 损失函数值
# 反向传播得到梯度
# 更新权重和动量
# 清空梯度-清除上一个坐标的梯度节省内存

In [178]:
y

tensor([1., 1., 2., 0., 2., 0., 1., 0., 1., 2., 1., 0., 0., 2., 0., 2., 1., 1.,
        1., 1., 2., 0., 2., 2., 1., 0., 1., 2., 2., 2., 0., 2., 2., 0., 1., 2.,
        2., 2., 1., 1., 2., 2., 0., 0., 1., 1., 2., 1., 0., 0., 0., 1., 2., 1.,
        1., 1., 2., 1., 0., 1., 1., 2., 2., 2., 1., 0., 2., 1., 1., 1., 2., 1.,
        1., 1., 1., 1., 0., 0., 0., 1., 0., 2., 2., 2., 2., 2., 0., 1., 1., 0.,
        0., 1., 2., 1., 2., 0., 0., 0., 2., 2., 0., 0., 2., 1., 1., 1., 2., 2.,
        0., 1., 1., 0., 2., 0., 2., 1., 1., 2., 0., 0., 1., 0., 0., 2., 1., 2.,
        2., 2., 1., 0., 1., 1., 0., 1., 1., 0., 0., 1., 0., 2., 1., 0., 0., 1.,
        1., 0., 0., 1., 1., 1., 2., 2., 0., 1., 0., 2., 1., 0., 0., 0., 0., 0.,
        0., 1., 1., 1., 2., 1., 2., 2., 0., 2., 0., 2., 0., 1., 1., 2., 0., 1.,
        2., 1., 0., 1., 1., 0., 2., 1., 1., 2., 1., 0., 1., 0., 1., 1., 1., 2.,
        0., 2., 1., 2., 0., 2., 2., 1., 1., 2., 1., 0., 2., 1., 2., 0., 1., 1.,
        0., 1., 2., 1., 2., 2., 0., 1., 

In [179]:
z_hat= net.forward(X)
loss=criterion(z_hat,y.reshape(500).long())
loss.backward()
opt.step() #步子，走一步更新权重w,更新动量v
opt.zero_grad() #清空梯度
print(loss)
print(net.linear1.weight.data[0][:10])


tensor(1.1535, grad_fn=<NllLossBackward0>)
tensor([ 0.1366, -0.1346,  0.2128, -0.1776, -0.0682, -0.1541,  0.1724,  0.0839,
        -0.1115, -0.1729])


## TensorDataset & DataLoader

In [213]:
import torch
from torch.utils.data import TensorDataset

In [181]:
a=torch.randn(500,2,3) # 二维表
b=torch.randn(500,3,4,5) # 图像数据
c=torch.randn(500,1)  # 标签

In [182]:
# 合并，要求被合并张量第一维度（样本数）相等
TensorDataset(a,b,c) # generator

<torch.utils.data.dataset.TensorDataset at 0x131160490>

In [183]:
for x in TensorDataset(a,b,c):
    print(x)
    break

(tensor([[ 0.0555,  0.0347, -0.0640],
        [-0.6151,  0.5850, -1.3424]]), tensor([[[ 1.4229,  0.3269, -0.7064,  0.4886, -0.4457],
         [-0.1819,  1.3381, -0.0515,  0.9612,  0.6173],
         [ 2.1468,  0.0329, -1.3354, -0.2216, -1.2585],
         [-0.0606, -0.7752,  1.5580,  0.8701,  2.0751]],

        [[-0.4195,  0.3641,  1.1461,  1.3315,  0.6182],
         [ 0.4945,  0.4110,  0.4114, -1.9308, -0.2237],
         [ 0.4374,  0.4338,  0.5920,  0.7556, -0.4258],
         [ 1.5789, -0.1794, -0.5889,  1.8905, -0.7718]],

        [[-0.7557, -1.2767,  1.0856,  0.7704,  2.3633],
         [ 0.0490, -0.9121, -0.0489, -1.2371, -1.2507],
         [-2.2677, -0.1536, -0.2799, -0.9272,  1.4546],
         [-0.8360, -0.3864, -0.9757, -0.5694,  0.2240]]]), tensor([-0.1178]))


In [184]:
# DataLoader -用来切割小批量的类

In [212]:
from torch.utils.data import DataLoader

In [186]:
data=TensorDataset(b,c)
DataLoader(data) #generator

<torch.utils.data.dataloader.DataLoader at 0x131160eb0>

In [187]:
for x in DataLoader(data):
    print(x)
    break

[tensor([[[[ 1.4229,  0.3269, -0.7064,  0.4886, -0.4457],
          [-0.1819,  1.3381, -0.0515,  0.9612,  0.6173],
          [ 2.1468,  0.0329, -1.3354, -0.2216, -1.2585],
          [-0.0606, -0.7752,  1.5580,  0.8701,  2.0751]],

         [[-0.4195,  0.3641,  1.1461,  1.3315,  0.6182],
          [ 0.4945,  0.4110,  0.4114, -1.9308, -0.2237],
          [ 0.4374,  0.4338,  0.5920,  0.7556, -0.4258],
          [ 1.5789, -0.1794, -0.5889,  1.8905, -0.7718]],

         [[-0.7557, -1.2767,  1.0856,  0.7704,  2.3633],
          [ 0.0490, -0.9121, -0.0489, -1.2371, -1.2507],
          [-2.2677, -0.1536, -0.2799, -0.9272,  1.4546],
          [-0.8360, -0.3864, -0.9757, -0.5694,  0.2240]]]]), tensor([[-0.1178]])]


In [188]:
bs=120

In [189]:
dataset=DataLoader(data
           ,batch_size=bs
           ,shuffle=True  # 随机打乱
           ,drop_last=False #舍弃最后一个batch
          )       

In [190]:
data

<torch.utils.data.dataset.TensorDataset at 0x131160c10>

In [191]:
dataset

<torch.utils.data.dataloader.DataLoader at 0x131160f10>

In [192]:
for x in dataset:
    print(x[0].shape)
    

torch.Size([120, 3, 4, 5])
torch.Size([120, 3, 4, 5])
torch.Size([120, 3, 4, 5])
torch.Size([120, 3, 4, 5])
torch.Size([20, 3, 4, 5])


In [193]:
len(dataset) #多少个batch

5

In [194]:
dataset.dataset #展示数据

<torch.utils.data.dataset.TensorDataset at 0x131160c10>

In [195]:
len(dataset.dataset) # 多少样本

500

In [196]:
dataset.dataset[499] #单个样本
dataset.dataset[499][0] #单个样本的特征张量
dataset.dataset[499][0].shape #单个样本的特征张量


torch.Size([3, 4, 5])

In [197]:
dataset.dataset[0][0].shape
dataset.batch_size

120

### 切分数据集

In [198]:
from torch.utils.data import random_split

In [199]:
t =torch.arange(12).reshape(4,3)
t

tensor([[ 0,  1,  2],
        [ 3,  4,  5],
        [ 6,  7,  8],
        [ 9, 10, 11]])

In [200]:
random_split(t,[2,2])

[<torch.utils.data.dataset.Subset at 0x1310aa850>,
 <torch.utils.data.dataset.Subset at 0x1310aa400>]

In [None]:
train,test=random_split(t,[2,2])

In [210]:
for tr,te in random_split(t,[2,2]):
    print(tr,te)
    break

tensor([ 9, 10, 11]) tensor([6, 7, 8])


### TensorDataset 封装

In [None]:
torch.manual_seed(420)
features,labels=tensorGenCla(num_class=2)
labels=labels.float()
data=TensorDataset(features,labels)
batch_data=DataLoader(data,batch_size=batch_size,shuffle=True)

### 定义并实例化Dataset

In [None]:
class LBCDataset(Dataset):
    def __init__(self,data):
        self.features=features
        self.labels=labels
        self.lens=len(data.data)
    def __getitem__(self,index):
        return self.features[index,:],self.labels[index]
    def __len__(self):
        return self.lens
    
data=LBC()
LBC_data=LBCDataset(data)


### randm_split 进行切分

In [None]:
# randm_split 进行切分
num_train=int(LBC_data.len*0.7)
num_test=LBC_data.len-num_train
LBC_train,LBC_test=random_split(LBC_data,[num_train,num_test])

In [None]:
# .dataset 可以用于切分后数据集的还原
LBC_train.dataset==LBC_data

### DataLoader 装载

In [None]:
train_loader=DataLoader(LBC_train,batch_size=10,shuffer=True)
test_loader=DataLoader(LBC_test,batch_size=10,shuffer=False)

## batch_normalization处理 & BN层添加

In [226]:
# 创建一个两层神经网络，BN层可选，激活函数可选
class net_class1(nn.modules):
    def __init__(self,act_fun=torch.relu,in_features=2,n_hidden=4,out_features=1,bias=True,momentum=0.1,BN_model=None):
        super(net_class1,self).__init__()
        self.linear1=nn.Linear(in_features,n_hidden,bias=bias)
        self.normalize1=nn.BatchNorm1d(n_hidden,momentum=momentum)
        self.linear2=nn.Linear(n_hidden,out_features,bias=bias)
        self.BN_model=BN_model
        self.act_fun=act_fun

    def forward(self,x):
        if self.BN_model == None:
            z1=self.linear1(x)
            p1=self.act_fun(z1)
            out=self.linear2(p1)
        elif self.BN_model=='pre':
            z1=self.normalize1(self.linear1(x))
            p1=self.act_fun(z1)
            out=self.linear2(p1)
        elif self.BN_model=='post':
            z1=self.linear1(x)
            p1=self.act_fun(z1)
            out=self.normalize1(self.linear2(p1))
        
        return out

        

TypeError: module() takes at most 2 arguments (3 given)