# PyTorch学习-基础笔记（LS1-LS14）

In [7]:
import numpy as np
import torch
import time

## LS1.初见

### PyTorch VS TensorFlow

- PyTorch ：FaceBook使用的框架，易上手，动态图
- TensorFlow ：Google使用的框架，适合工程师搭建，静态图（即需要在Session中Run才能输出，无法方便Debug调参）

### PyTorch包

- 自然语言处理 ：NLP，AllenNLP
- 视觉 ：TorchVision
- 图网络/图卷积 ：geometric，Fast.ai
- 模型部署 ：ONNX

### PyTorch能做什么

#### 1. GPU加速

In [2]:
print(torch.__version__)
print(torch.cuda.is_available())
# 检查版本是否匹配

1.12.1+cu116
True


In [3]:
a = torch.randn(10000, 1000)
b = torch.randn(1000, 2000)
# 创建矩阵

t0 = time.time()
c = torch.matmul(a, b)
t1 = time.time()
print(a.device, t1 - t0, c.norm(2))
# 使用CPU运算

device = torch.device('cuda')
a = a.to(device)
b = b.to(device)
# 将矩阵搬入cuda

t0 = time.time()
c = torch.matmul(a, b)
t2 = time.time()
print(a.device, t2 - t0, c.norm(2))
# 先进行初始化再在GPU上进行运算

t0 = time.time()
c = torch.matmul(a, b)
t2 = time.time()
print(a.device, t2 - t0, c.norm(2))
# 初始化上一步已经完成，直接进行运算

cpu 0.3280599117279053 tensor(141364.0156)
cuda:0 2.3828623294830322 tensor(141507.7656, device='cuda:0')
cuda:0 0.0009987354278564453 tensor(141507.7656, device='cuda:0')


#### 2. 梯度求导

$$
y=a^2x+bx+c\qquad x=1\qquad a,b,c=2
$$

In [1]:
from torch import autograd

In [5]:
x = torch.tensor(1.)
a = torch.tensor(2., requires_grad=True)
b = torch.tensor(2., requires_grad=True)
c = torch.tensor(2., requires_grad=True)
# requires_grad=True 告诉pytorch需要求导

y = a**2 * x + b * x + c

print('before:', a.grad, b.grad, c.grad) # 运算前没有梯度信息
grads = autograd.grad(y, [a, b, c])
print('after :', grads[0], grads[1], grads[2])

before: None None None
after : tensor(4.) tensor(1.) tensor(1.)


#### 3. 常用网络层

- nn.Linear
- nn.Conv2d
- nn.LSTM


- nn.ReLU
- nn.Sigmoid


- nn.Softmax
- nn.CrossEntropyLoss
- nn.MSE

## LS2.有关数据类型的基本操作

In [3]:
a=torch.randn(2,3)

print(a.type())
print(type(a)) # python自带的方法并不能判断数据是float还是int
print(isinstance(a,torch.FloatTensor))
print(isinstance(a,torch.cuda.FloatTensor)) # 在cpu上和在cuda上是不一样的
a=a.cuda()
print(isinstance(a,torch.cuda.FloatTensor))

torch.FloatTensor
<class 'torch.Tensor'>
True
False
True


In [4]:
a=torch.tensor(2.2) # 0维度

print(a.shape)
print(len(a.shape))
print(a.size())

torch.Size([])
0
torch.Size([])


In [11]:
a=torch.randn(2,3) # 2维度
print(a.numel()) #numel指的是tensor占用内存的数量，2*3
print(a.dim()) #equal to len(a.shape)

6
2


## LS3.几个方法创建Tensor

In [2]:
a=np.array([2,3.3]) # import from numpy
print(torch.from_numpy(a))

tensor([2.0000, 3.3000], dtype=torch.float64)


In [10]:
print(torch.empty(1))
#未初始化的Tensor一定要跟写入数据的后续步骤

print(torch.Tensor([2,3]))
print(torch.Tensor(2,3))
print(torch.IntTensor(2,3))
print(torch.FloatTensor(2,3))
#.tensor接受的是数据的内容
#.Tensor.IntTensor.FloatTensor接受的是数据的shape,除非输入list

tensor([2.3694e-38])
tensor([2., 3.])
tensor([[3.4007e-06, 2.0823e+23, 1.6839e+22],
        [3.3056e+21, 1.6800e-07, 2.1513e+23]])
tensor([[ 912537648, 1714446640, 1684288869],
        [1664299620,  875848241, 1714829365]], dtype=torch.int32)
tensor([[0., 0., 0.],
        [0., 0., 0.]])


In [13]:
torch.set_default_tensor_type(torch.DoubleTensor) # set default type
print(torch.tensor([1.2,3]).type())
torch.set_default_tensor_type(torch.FloatTensor) # generally, use default type (torch.FloatTensor)
print(torch.tensor([1.2,3]).type())

torch.DoubleTensor
torch.FloatTensor


In [24]:
print(torch.rand(3,3)) #均匀分布rand会随机产生0-1之间的数值，不包括1
print(torch.randn(3,3)) #正态分布
print(torch.randint(1,10,[3,3])) #均匀采样0-10的tensor
print(torch.full([2,3],7)) #生成对应大小由特定元素填满
print(torch.arange(1,0,-0.1)) #1-0，间隔0.1
print(torch.linspace(0,10,steps=4)) #0-10，等分数列
print(torch.logspace(0,10,steps=4)) #返回10的x次方


a=torch.rand(3,3)
print(torch.rand_like(a)) #仿照a的格式产生

tensor([[0.4139, 0.1328, 0.8629],
        [0.0312, 0.2243, 0.7997],
        [0.5681, 0.9832, 0.4253]])
tensor([[-0.5769, -0.1792,  1.9824],
        [ 0.9507,  0.6540, -0.0424],
        [-0.5346, -0.4958, -1.3556]])
tensor([[3, 9, 9],
        [3, 7, 2],
        [9, 6, 2]])
tensor([[7, 7, 7],
        [7, 7, 7]])
tensor([1.0000, 0.9000, 0.8000, 0.7000, 0.6000, 0.5000, 0.4000, 0.3000, 0.2000,
        0.1000])
tensor([ 0.0000,  3.3333,  6.6667, 10.0000])
tensor([1.0000e+00, 2.1544e+03, 4.6416e+06, 1.0000e+10])
tensor([[0.4119, 0.3886, 0.6826],
        [0.0795, 0.7317, 0.8148],
        [0.0499, 0.3007, 0.1318]])


In [19]:
print(torch.ones(3,3))
print(torch.zeros(3,3))
print(torch.eye(3,4)) #单位矩阵

a=torch.zeros(3,3)
print(torch.ones_like(a))

tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]])
tensor([[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]])
tensor([[1., 0., 0., 0.],
        [0., 1., 0., 0.],
        [0., 0., 1., 0.]])
tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]])


In [28]:
print(torch.randperm(10))  #随机打散

a=torch.rand(3,2)
idx=torch.randperm(3)
print(idx)
print(a)
print(a[idx])

tensor([6, 9, 1, 5, 4, 2, 3, 8, 7, 0])
tensor([0, 2, 1])
tensor([[0.8521, 0.0413],
        [0.8517, 0.5678],
        [0.7366, 0.4911]])
tensor([[0.8521, 0.0413],
        [0.7366, 0.4911],
        [0.8517, 0.5678]])


## LS8.进行索引及切片

In [3]:
a=torch.rand(5,5,5,5)
print(a[0].shape)
print(a[0,0].shape)
print(a[0,0,0,0])

torch.Size([5, 5, 5])
torch.Size([5, 5])
tensor(0.0799)


In [16]:
a=torch.rand(3,3)
print(a[:2])
print(a[:2,:-1])
print(a[:2,-1:])

tensor([[0.6470, 0.0561, 0.9499],
        [0.4071, 0.8629, 0.3111],
        [0.9711, 0.5190, 0.8429]])
tensor([[0.6470, 0.0561, 0.9499],
        [0.4071, 0.8629, 0.3111]])
tensor([[0.6470, 0.0561],
        [0.4071, 0.8629]])
tensor([[0.9499],
        [0.3111]])


In [17]:
a=torch.rand(5)
print(a)
print(a[...])
print(a[0:5:2])
print(a[::2])

tensor([0.2740, 0.2456, 0.5549, 0.8365, 0.4994])
tensor([0.2740, 0.2456, 0.5549, 0.8365, 0.4994])
tensor([0.2740, 0.5549, 0.4994])
tensor([0.2740, 0.5549, 0.4994])


In [15]:
a=torch.rand(3,3)
print(a)
print(a.index_select(0,torch.tensor([0,2])))
print(a.index_select(1,torch.tensor([0,2])))

tensor([[0.8389, 0.7743, 0.3600],
        [0.5588, 0.4112, 0.4842],
        [0.4019, 0.1733, 0.3622]])
tensor([[0.8389, 0.7743, 0.3600],
        [0.4019, 0.1733, 0.3622]])
tensor([[0.8389, 0.3600],
        [0.5588, 0.4842],
        [0.4019, 0.3622]])


In [18]:
x=torch.randn(2,2)
print(x)
mask=x.ge(0.5) #ByteTensor ,随机的
print(mask)
print(torch.masked_select(x,mask)) #取出所有大于0.5的元素

tensor([[0.6071, 1.4642],
        [1.1086, 0.0352]])
tensor([[ True,  True],
        [ True, False]])
tensor([0.6071, 1.4642, 1.1086])


In [19]:
src=torch.tensor([[4,3,5]
                 ,[6,7,8]])

print(torch.take(src,torch.tensor([0,2,5]))) # 打平后挑选

tensor([4, 5, 8])


## LS9.改变数据维度

In [48]:
a=torch.rand(3,3,5,5)
print(a.shape)
print(a.view(3,3*5*5).shape)
print(a.view(3*3*5,5).shape)
print(a.view(-1).shape)

b=a.view(3,3*5*5)
print(b.view(3,3,5,5).shape)

torch.Size([3, 3, 5, 5])
torch.Size([3, 75])
torch.Size([45, 5])
torch.Size([225])
torch.Size([3, 3, 5, 5])


In [14]:
print(a.shape)
print(a.unsqueeze(0).shape)
print(a.unsqueeze(-1).shape)

torch.Size([3, 3, 5, 5])
torch.Size([1, 3, 3, 5, 5])
torch.Size([3, 3, 5, 5, 1])


In [17]:
b=torch.rand(3).unsqueeze(0).unsqueeze(0).unsqueeze(-1)
print(b.shape)
print(b.squeeze().shape)
print(b.squeeze(0).shape)
print(b.squeeze(-1).shape)

torch.Size([1, 1, 3, 1])
torch.Size([3])
torch.Size([1, 3, 1])


In [30]:
print(b)
print(b.expand(1,2,3,1)) # -1 represent no change
print(b.repeat(1,2,1,1)) # 相当于乘

tensor([[[[0.8181],
          [0.3624],
          [0.7913]]]])
tensor([[[[0.8181],
          [0.3624],
          [0.7913]],

         [[0.8181],
          [0.3624],
          [0.7913]]]])
tensor([[[[0.8181],
          [0.3624],
          [0.7913]],

         [[0.8181],
          [0.3624],
          [0.7913]]]])


In [40]:
c=torch.randn(1,2)
print(c.t().shape)

c=torch.randn(1,2,3)
print(c.transpose(0,2).shape)
print(c.permute(1,2,0).shape) # 多次变换

torch.Size([2, 1])
torch.Size([3, 2, 1])
torch.Size([2, 3, 1])


transpose和permute会打乱内存顺序，要用.contiguous()把内存顺序调整回来。

详细见 [此处《Pytorch中的contiguous理解》](https://blog.csdn.net/gdymind/article/details/82662502?spm=1001.2101.3001.6661.1&utm_medium=distribute.pc_relevant_t0.none-task-blog-2%7Edefault%7ECTRLIST%7ERate-1-82662502-blog-107855070.t5_refersearch_landing&depth_1-utm_source=distribute.pc_relevant_t0.none-task-blog-2%7Edefault%7ECTRLIST%7ERate-1-82662502-blog-107855070.t5_refersearch_landing&utm_relevant_index=1)

## LS11.进行合并与分割

In [4]:
a=torch.rand(2,3,2,4)
b=torch.rand(2,3,1,4)
print(torch.cat([a,b],dim=2).shape) # 非concat维度必须保持一致

torch.Size([2, 3, 3, 4])


In [6]:
a=torch.rand(2,3,1,4)
b=torch.rand(2,3,1,4)
print(torch.stack([a,b],dim=2).shape) # 必须所有维度全部保持一致

torch.Size([2, 3, 2, 1, 4])


In [19]:
c=torch.rand(2,4)
aa,bb=c.split([1,3],dim=1) # 按照长度拆分
print(aa.shape,bb.shape)
aa,bb=c.split(2,dim=1)
print(aa.shape,bb.shape)
aa,bb=c.chunk(2,dim=1) # 按数量拆分
print(aa.shape,bb.shape)

torch.Size([2, 1]) torch.Size([2, 3])
torch.Size([2, 2]) torch.Size([2, 2])
torch.Size([2, 2]) torch.Size([2, 2])


## LS12.进行数学运算

In [8]:
a=torch.rand(2,3)
b=torch.rand(3)

print(torch.all(torch.eq(a+b,torch.add(a,b)))) #加
print(torch.all(torch.eq(a-b,torch.sub(a,b)))) #减
print(torch.all(torch.eq(a*b,torch.mul(a,b)))) #乘
print(torch.all(torch.eq(a/b,torch.div(a,b)))) #除

tensor(True)
tensor(True)
tensor(True)
tensor(True)


In [9]:
a=torch.tensor([[3.,3.],[3.,3.]])
b=torch.ones(2,2)

print(torch.mm(a,b)) #只能用于二维矩阵的相乘
print(torch.matmul(a,b)) #可以用于多维矩阵的相乘
print(a@b) # @等价于torch.matmul

tensor([[6., 6.],
        [6., 6.]])
tensor([[6., 6.],
        [6., 6.]])
tensor([[6., 6.],
        [6., 6.]])


In [10]:
a=torch.rand(2,3,4,5)

b=torch.rand(2,3,5,6)
print(torch.matmul(a,b).shape)  #最后两维进行矩阵相乘

b=torch.rand(2,1,5,6)
print(torch.matmul(a,b).shape)  #用broadcast机制补全后进行运算

torch.Size([2, 3, 4, 6])
torch.Size([2, 3, 4, 6])


In [12]:
a=torch.full([2,2],3)

print(a.pow(2))
print(a**2)

tensor([[9, 9],
        [9, 9]])
tensor([[9, 9],
        [9, 9]])


In [13]:
aa=a**2

print(aa**0.5)
print(aa.sqrt())
print(aa.rsqrt()) #平方根的倒数

tensor([[3., 3.],
        [3., 3.]])
tensor([[3., 3.],
        [3., 3.]])
tensor([[0.3333, 0.3333],
        [0.3333, 0.3333]])


In [14]:
a=torch.exp(torch.ones(2,2))

print(a)
print(torch.log(a))

tensor([[2.7183, 2.7183],
        [2.7183, 2.7183]])
tensor([[1., 1.],
        [1., 1.]])


In [15]:
a=torch.tensor(3.14)

print(a.floor(),a.ceil(),a.trunc(),a.frac())
#a.floor() 向上取整 a.ceil() 向下取整 a.trunc() 取整数位 a.frac() 取小数位 a.round() 四舍五入

tensor(3.) tensor(4.) tensor(3.) tensor(0.1400)


In [17]:
grad=torch.rand(2,2)*15

print(grad)
print(grad.clamp(10)) #把原矩阵中小于10的都换成10
print(grad.clamp(0,10)) #把原矩阵的数的范围换成0-10

tensor([[ 8.3140,  4.7482],
        [ 3.6645, 11.5761]])
tensor([[10.0000, 10.0000],
        [10.0000, 11.5761]])
tensor([[ 8.3140,  4.7482],
        [ 3.6645, 10.0000]])


## LS13.统计属性运算

In [31]:
a=torch.full([8],1,dtype=torch.float32) #加上数据类型
b=a.view(2,4)
c=a.view(2,2,2)
print(b)
print(c)

print(b.norm(1),c.norm(1)) # 1-范数：绝对值求和
print(b.norm(2),c.norm(2)) # 2-范数：平方和开根号

tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.]])
tensor([[[1., 1.],
         [1., 1.]],

        [[1., 1.],
         [1., 1.]]])
tensor(8.) tensor(8.)
tensor(2.8284) tensor(2.8284)


In [32]:
#求哪个维度上的范数，那个维度就会消失
print(b.norm(1,dim=1),c.norm(1,dim=1)) #第1维度上的1-范数
print(b.norm(1,dim=0),c.norm(1,dim=0)) #第0维度上的1-范数

tensor([4., 4.]) tensor([[2., 2.],
        [2., 2.]])
tensor([2., 2., 2., 2.]) tensor([[2., 2.],
        [2., 2.]])


In [66]:
a=torch.arange(8).view(2,4).float()

print(a)
print(a.min(),a.max(),a.mean(),a.prod(),a.sum()) # 最小值和最大值，均值以及相乘相加

tensor([[0., 1., 2., 3.],
        [4., 5., 6., 7.]])
tensor(0.) tensor(7.) tensor(3.5000) tensor(0.) tensor(28.)


In [67]:
idx=torch.randperm(4)
a=a[:,idx] # 打乱

print(a)
print(a.argmax(),a.argmin()) # 求打平后最小值和最大值的索引

tensor([[2., 3., 1., 0.],
        [6., 7., 5., 4.]])
tensor(5) tensor(3)


In [68]:
a=torch.randn(4,10)

print(a.max(dim=1))
print(a.argmax(dim=1))
print(a.max(dim=1,keepdim=True))

torch.return_types.max(
values=tensor([1.9155, 1.6649, 1.5232, 0.6204]),
indices=tensor([7, 2, 6, 5]))
tensor([7, 2, 6, 5])
torch.return_types.max(
values=tensor([[1.9155],
        [1.6649],
        [1.5232],
        [0.6204]]),
indices=tensor([[7],
        [2],
        [6],
        [5]]))


In [70]:
a=torch.randn(2,5)

#Topk: 从前往后数
#kthvalue:从后往前数
print(a.topk(3,dim=1))  #第1维度排名前3
print(a.topk(3,dim=1,largest=False)) #第1维度排名后3
print(a.kthvalue(3,dim=1)) #第1维度排名倒数第3

torch.return_types.topk(
values=tensor([[ 1.6105, -0.0882, -0.4803],
        [ 0.9384,  0.3840, -0.9316]]),
indices=tensor([[1, 2, 4],
        [0, 3, 2]]))
torch.return_types.topk(
values=tensor([[-0.7087, -0.6440, -0.4803],
        [-2.1850, -0.9395, -0.9316]]),
indices=tensor([[3, 0, 4],
        [1, 4, 2]]))
torch.return_types.kthvalue(
values=tensor([-0.4803, -0.9316]),
indices=tensor([4, 2]))


In [73]:
a=torch.randn(2,2)
print(a)

#>,>=,<,<=,!=,==
print(a>0)
print(torch.gt(a,0)) #等价于a>0,greater

tensor([[0.5077, 1.0516],
        [0.7547, 1.4434]])
tensor([[True, True],
        [True, True]])
tensor([[True, True],
        [True, True]])


In [74]:
a=torch.ones(2,2)
b=torch.randn(2,2)

#torch.eq(a,b):tensor中各元素的对比
#torch.equal(a,b):tensor的对比
print(torch.eq(a,b))
print(torch.eq(a,a))
print(torch.equal(a,a))

tensor([[False, False],
        [False, False]])
tensor([[True, True],
        [True, True]])
True


## LS14.高阶操作

In [76]:
cond=torch.rand(2,2)
print(cond)

a=torch.zeros(2,2).float()
b=torch.ones(2,2).float()

print(torch.where(cond>0.5,a,b)) # where cond>0.5 a,else b

tensor([[0.2644, 0.7934],
        [0.0957, 0.6672]])
tensor([[1., 0.],
        [1., 0.]])


In [90]:
prob=torch.randn(4,10)
idx=prob.topk(dim=1,k=3) #取一维最大3个数
idx=idx[1] # 取最大数的索引

label=torch.arange(10)+100 # 100-109，10个数

print(torch.gather(label.expand(4,10),dim=1,index=idx.long())) # 按照idx的顺序给label构造

tensor([[107, 103, 100],
        [102, 104, 101],
        [106, 107, 108],
        [108, 105, 107]])
