In [1]:
import torch
import numpy as np
torch.manual_seed(1024)     # 为了保证每次结果运行的稳定，设置随机数种子

<torch._C.Generator at 0x2bb0d4bdcd0>

### 在数学上，一个值有以下四种类型

* Scaler:单个数字
* Vector:一维数组
* Matrix:二维数组
* Tensor:高维数组

In [2]:
# 首先定义一个描述值的函数
def describe(x):
    print("Type: {}".format(x.type()))
    print("shape/size: {}".format(x.shape))
    print("Values: \n{}".format(x))

In [3]:
describe(torch.Tensor(2, 3))   # 创建一个Tensor

Type: torch.FloatTensor
shape/size: torch.Size([2, 3])
Values: 
tensor([[0., 0., 0.],
        [0., 0., 0.]])


In [4]:
describe(torch.randn(2, 3))    # 数据服从标准的高斯分布

Type: torch.FloatTensor
shape/size: torch.Size([2, 3])
Values: 
tensor([[-1.4837,  0.2671, -1.8337],
        [-0.1047,  0.6002, -0.5496]])


In [5]:
x = torch.rand(2, 3)    #数据在[0,1]之间，且服从均匀分布
describe(x)

Type: torch.FloatTensor
shape/size: torch.Size([2, 3])
Values: 
tensor([[0.8793, 0.1163, 0.0540],
        [0.5480, 0.2743, 0.7038]])


In [6]:
y = torch.zeros(2, 3)
describe(y)

Type: torch.FloatTensor
shape/size: torch.Size([2, 3])
Values: 
tensor([[0., 0., 0.],
        [0., 0., 0.]])


In [7]:
z = torch.ones(2, 3)
describe(z)

Type: torch.FloatTensor
shape/size: torch.Size([2, 3])
Values: 
tensor([[1., 1., 1.],
        [1., 1., 1.]])


In [9]:
a = torch.zeros(2, 3).fill_(5)    # "..._"带有下划线的函数表示原地操作
describe(a)

Type: torch.FloatTensor
shape/size: torch.Size([2, 3])
Values: 
tensor([[5., 5., 5.],
        [5., 5., 5.]])


### torch与numpy之间的转换

In [12]:
npy = np.random.rand(2, 3)
print("npy dtype:{}".format(npy.dtype))
trh = torch.from_numpy(npy)   # 将numpy转成torch
describe(trh)

npy dtype:float64
Type: torch.DoubleTensor
shape/size: torch.Size([2, 3])
Values: 
tensor([[0.3292, 0.8979, 0.7085],
        [0.3634, 0.6681, 0.8048]], dtype=torch.float64)


### 类型以及tensor的一些操作

In [13]:
a = torch.arange(6).view(2, 3)    # view()相当于numpy的reshape
describe(a)

Type: torch.LongTensor
shape/size: torch.Size([2, 3])
Values: 
tensor([[0, 1, 2],
        [3, 4, 5]])


In [14]:
b = torch.FloatTensor([[1, 2, 3],
                       [4, 5, 6]])   # 创建Float类型的Tensor
describe(b)

Type: torch.FloatTensor
shape/size: torch.Size([2, 3])
Values: 
tensor([[1., 2., 3.],
        [4., 5., 6.]])


In [15]:
b = b.long()   # 转成Long类型
describe(b)

Type: torch.LongTensor
shape/size: torch.Size([2, 3])
Values: 
tensor([[1, 2, 3],
        [4, 5, 6]])


In [16]:
c = torch.add(b, b)
describe(c)

Type: torch.LongTensor
shape/size: torch.Size([2, 3])
Values: 
tensor([[ 2,  4,  6],
        [ 8, 10, 12]])


In [17]:
b

tensor([[1, 2, 3],
        [4, 5, 6]])

In [18]:
# 维度计算
b_0 = torch.sum(b, dim=0)
describe(b_0)

Type: torch.LongTensor
shape/size: torch.Size([3])
Values: 
tensor([5, 7, 9])


In [19]:
b_1 = torch.sum(b, dim=1)
describe(b_1)

Type: torch.LongTensor
shape/size: torch.Size([2])
Values: 
tensor([ 6, 15])


In [20]:
b_T = torch.transpose(b, 0, 1)
describe(b_T)
# 返回输入b的转置交换维度0和维度1，输入和输出共享内存

Type: torch.LongTensor
shape/size: torch.Size([3, 2])
Values: 
tensor([[1, 4],
        [2, 5],
        [3, 6]])


In [22]:
# 切片操作
describe(b[:1, :2])

Type: torch.LongTensor
shape/size: torch.Size([1, 2])
Values: 
tensor([[1, 2]])


In [23]:
describe(b[0, 1])

Type: torch.LongTensor
shape/size: torch.Size([])
Values: 
2


In [26]:
indices = torch.LongTensor([0, 2])
c = torch.index_select(b, dim=1, index=indices)
describe(b)
describe(c)
#index_select(obj, dim, index):obj表示要操作的对象，dim=0表示操作第0轴，
#即按行进行索引；dim=1表示按列进行索引；index表示取对应轴上的值

Type: torch.LongTensor
shape/size: torch.Size([2, 3])
Values: 
tensor([[1, 2, 3],
        [4, 5, 6]])
Type: torch.LongTensor
shape/size: torch.Size([2, 2])
Values: 
tensor([[1, 3],
        [4, 6]])


In [30]:
indices = torch.LongTensor([0, 1])
d = torch.index_select(b, dim=0, index=indices)
describe(b)
describe(d)

Type: torch.LongTensor
shape/size: torch.Size([2, 3])
Values: 
tensor([[1, 2, 3],
        [4, 5, 6]])
Type: torch.LongTensor
shape/size: torch.Size([2, 3])
Values: 
tensor([[1, 2, 3],
        [4, 5, 6]])


In [31]:
# torch.Tensor转成np.ndarray
e = torch.Tensor([1, 2])
e_np = e.numpy()   
print("tensor e dtype:{}".format(e.dtype))
print("ndarray e_np dtype:{}".format(e_np.dtype))

tensor e dtype:torch.float32
ndarray e_np dtype:float32


In [36]:
# unsequeeze和sequeeze
# sequeeze作用是进行维度压缩，去掉维数为1的维度
# unsqueeze作用是进行维度扩张，给指定位置加上维数为1的维度
x = torch.arange(12).view(3, 4)
print("x.shape={}".format(x.shape))

x_add_dim = x.unsqueeze(dim=1)  # 在索引为1的位置上，增加维数为1的维度
print("x_add_dim.shape={}".format(x_add_dim.shape))

x_reduce_dim = x_add_dim.squeeze(dim=1)  # 对增维后的x_add_dim操作，去掉维数
                                      #为1的维度(对每一列操作)，因为只有1行
print("x_reduce_dim.shape={}".format(x_reduce_dim.shape))

x.shape=torch.Size([3, 4])
x_add_dim.shape=torch.Size([3, 1, 4])
x_reduce_dim.shape=torch.Size([3, 4])


In [41]:
# cat()与stack()
x = torch.arange(6).view(2, 3)
print("x={}".format(x))
print("x.shape={}".format(x.shape))
x_cat0 = torch.cat([x, x], dim=0)   # 按行进行拼接
print("x_cat0={}".format(x_cat0))
print("x_cat0.shape={}".format(x_cat0.shape))
x_cat1 = torch.cat([x, x], dim=1)   # 按列进行拼接
print("x_cat1={}".format(x_cat1))
print("x_cat1.shape={}".format(x_cat1.shape))
# cat操作不会改变空间维数，如原始tensor为2维，cat后，仍然是2维
x_stack = torch.stack([x, x])  # stack会改变空间的维数
print("x_stack={}".format(x_stack))
print("x_stack.shape={}".format(x_stack.shape))

x=tensor([[0, 1, 2],
        [3, 4, 5]])
x.shape=torch.Size([2, 3])
x_cat0=tensor([[0, 1, 2],
        [3, 4, 5],
        [0, 1, 2],
        [3, 4, 5]])
x_cat0.shape=torch.Size([4, 3])
x_cat1=tensor([[0, 1, 2, 0, 1, 2],
        [3, 4, 5, 3, 4, 5]])
x_cat1.shape=torch.Size([2, 6])
x_stack=tensor([[[0, 1, 2],
         [3, 4, 5]],

        [[0, 1, 2],
         [3, 4, 5]]])
x_stack.shape=torch.Size([2, 2, 3])


### 代数运算

In [42]:
# 矩阵转置操作
x = torch.arange(0, 12).view(3, 4)
print("x={}".format(x))
print("x.transpose(0, 1):\n", x.transpose(0, 1))
print("x.transpose(1, 0):\n", x.transpose(1, 0))

x=tensor([[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11]])
x.transpose(0, 1):
 tensor([[ 0,  4,  8],
        [ 1,  5,  9],
        [ 2,  6, 10],
        [ 3,  7, 11]])
x.transpose(1, 0):
 tensor([[ 0,  4,  8],
        [ 1,  5,  9],
        [ 2,  6, 10],
        [ 3,  7, 11]])


In [46]:
# 在编程的过程中常常需要对数据的shape进行改变
batch_size = 3
seq_size = 4
feature_size = 5

x  = torch.arange(batch_size * seq_size * feature_size).view(batch_size,
                                                             seq_size,
                                                             feature_size)
print("x.shape:\n", x.shape)
print("x:\n", x)
print("------------------------")

print("x.transpose(1, 0).shape:\n", x.transpose(1, 0).shape)
print("x.transpose(1, 0):\n", x.transpose(1, 0))

x.shape:
 torch.Size([3, 4, 5])
x:
 tensor([[[ 0,  1,  2,  3,  4],
         [ 5,  6,  7,  8,  9],
         [10, 11, 12, 13, 14],
         [15, 16, 17, 18, 19]],

        [[20, 21, 22, 23, 24],
         [25, 26, 27, 28, 29],
         [30, 31, 32, 33, 34],
         [35, 36, 37, 38, 39]],

        [[40, 41, 42, 43, 44],
         [45, 46, 47, 48, 49],
         [50, 51, 52, 53, 54],
         [55, 56, 57, 58, 59]]])
------------------------
x.transpose(1, 0).shape:
 torch.Size([4, 3, 5])
x.transpose(1, 0):
 tensor([[[ 0,  1,  2,  3,  4],
         [20, 21, 22, 23, 24],
         [40, 41, 42, 43, 44]],

        [[ 5,  6,  7,  8,  9],
         [25, 26, 27, 28, 29],
         [45, 46, 47, 48, 49]],

        [[10, 11, 12, 13, 14],
         [30, 31, 32, 33, 34],
         [50, 51, 52, 53, 54]],

        [[15, 16, 17, 18, 19],
         [35, 36, 37, 38, 39],
         [55, 56, 57, 58, 59]]])


In [47]:
batch_size = 3
seq_size = 4
feature_size = 5

x = torch.arange(batch_size * seq_size * feature_size).view(batch_size, seq_size, feature_size)

print("x.shape: \n", x.shape)
print("x: \n", x)
print("---------")
#permute用于维度换位
print("x.permute(1, 0, 2).shape: \n", x.permute(1, 0, 2).shape)
print("x.permute(1, 0, 2): \n", x.permute(1, 0, 2))

x.shape: 
 torch.Size([3, 4, 5])
x: 
 tensor([[[ 0,  1,  2,  3,  4],
         [ 5,  6,  7,  8,  9],
         [10, 11, 12, 13, 14],
         [15, 16, 17, 18, 19]],

        [[20, 21, 22, 23, 24],
         [25, 26, 27, 28, 29],
         [30, 31, 32, 33, 34],
         [35, 36, 37, 38, 39]],

        [[40, 41, 42, 43, 44],
         [45, 46, 47, 48, 49],
         [50, 51, 52, 53, 54],
         [55, 56, 57, 58, 59]]])
---------
x.permute(1, 0, 2).shape: 
 torch.Size([4, 3, 5])
x.permute(1, 0, 2): 
 tensor([[[ 0,  1,  2,  3,  4],
         [20, 21, 22, 23, 24],
         [40, 41, 42, 43, 44]],

        [[ 5,  6,  7,  8,  9],
         [25, 26, 27, 28, 29],
         [45, 46, 47, 48, 49]],

        [[10, 11, 12, 13, 14],
         [30, 31, 32, 33, 34],
         [50, 51, 52, 53, 54]],

        [[15, 16, 17, 18, 19],
         [35, 36, 37, 38, 39],
         [55, 56, 57, 58, 59]]])


In [48]:
# 矩阵乘法
x1 = torch.arange(6).view(2, 3).float()
describe(x1)

x2 = torch.ones(3, 2)
x2[:, 1] += 1
describe(x2)

describe(torch.mm(x1, x2))

Type: torch.FloatTensor
shape/size: torch.Size([2, 3])
Values: 
tensor([[0., 1., 2.],
        [3., 4., 5.]])
Type: torch.FloatTensor
shape/size: torch.Size([3, 2])
Values: 
tensor([[1., 2.],
        [1., 2.],
        [1., 2.]])
Type: torch.FloatTensor
shape/size: torch.Size([2, 2])
Values: 
tensor([[ 3.,  6.],
        [12., 24.]])


### 计算梯度

In [49]:
x = torch.tensor([[2.0 , 3.0]], requires_grad=True)
z = 3 * x
print(z)

tensor([[6., 9.]], grad_fn=<MulBackward0>)


In [50]:
x = torch.tensor([[2.0, 3.0]], requires_grad=True)
print("x:\n", x)
print("----------")
z = 3 * x
print("z = 3*x:\n", z)
print("----------")
loss = z.sum()
print("loss = z.sum(): \n", loss)
print("----------")
loss.backward()
print("after loss.backward(), x.grad:\n", x.grad)

x:
 tensor([[2., 3.]], requires_grad=True)
----------
z = 3*x:
 tensor([[6., 9.]], grad_fn=<MulBackward0>)
----------
loss = z.sum(): 
 tensor(15., grad_fn=<SumBackward0>)
----------
after loss.backward(), x.grad:
 tensor([[3., 3.]])


### 计算条件函数的梯度

$$ f(x)=\left\{
\begin{array}{ll}
       sin(x) \text{ if } x > 0 \\
       cos(x) \text{ otherwise } \\
\end{array}
\right.$$

In [51]:
def f(x):
    if(x.data > 0).all():
        return torch.sin(x)
    else:
        return torch.cos(x)

In [52]:
# 对于scaler求梯度
x = torch.tensor([1.0], requires_grad=True)
y = f(x)
y.backward()   # 单个的scaler可以直接求梯度
print(x.grad)

tensor([0.5403])


In [54]:
torch.cos(torch.tensor([1.0]))

tensor([0.5403])

In [55]:
# 对于其他类型求梯度
x = torch.tensor([1.0, 0.5], requires_grad=True)
y = f(x)
y.sum().backward()   # 除scaler之外，都需要通过sum()求loss，再进行反向传播
                     # 否则，会出错
print(x.grad)

tensor([0.5403, 0.8776])


In [56]:
def f2(x):
    mask = torch.gt(x, 0).float()
    return mask * torch.sin(x) + (1 - mask) * torch.cos(x)

In [57]:
x = torch.tensor([1.0, -1], requires_grad=True)
y = f2(x)
y.sum().backward()
print(x.grad)

tensor([0.5403, 0.8415])


In [58]:
def describe_grad(x):
    if x.grad is None:
        print("没有梯度")
    else:
        print("gradient: \n{}".format(x.grad))
        print("gradient function:{}".format(x.grad_fn))

In [59]:
x = torch.ones(2, 2, requires_grad=True)
describe(x)
describe_grad(x)

Type: torch.FloatTensor
shape/size: torch.Size([2, 2])
Values: 
tensor([[1., 1.],
        [1., 1.]], requires_grad=True)
没有梯度


In [60]:
y = (x + 2) * (x + 5) + 3
describe(y)

Type: torch.FloatTensor
shape/size: torch.Size([2, 2])
Values: 
tensor([[21., 21.],
        [21., 21.]], grad_fn=<AddBackward0>)


In [61]:
z = y.mean()
describe(z)

Type: torch.FloatTensor
shape/size: torch.Size([])
Values: 
21.0


In [62]:
describe_grad(x)

没有梯度


In [63]:
z.backward(create_graph=True, retain_graph=True)
describe_grad(x)

gradient: 
tensor([[2.2500, 2.2500],
        [2.2500, 2.2500]], grad_fn=<CloneBackward>)
gradient function:None


上面的过程，$$ y = x^2+7x+13 $$, dy/dx = 2x+7,所以当x=1时，x.grad=9，
又因为z.mean()，所以求平均，结果为9/4=2.25

### cuda device

In [64]:
print(torch.cuda.is_available())

False


In [66]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cpu


In [67]:
x = torch.rand(3, 3).to(device)
print(x.device)

cpu


In [68]:
describe(x)

Type: torch.FloatTensor
shape/size: torch.Size([3, 3])
Values: 
tensor([[0.8004, 0.7361, 0.8983],
        [0.3373, 0.3783, 0.7862],
        [0.7811, 0.0282, 0.0970]])
