# NLP Experiment 2- PyTorch Tutorial
周成杰 11/7

In [29]:
import torch
import torch.nn as nn

## Torch Tensor


In [3]:
data = [[1, 2],[3, 4]]
x_data = torch.tensor(data)
print(x_data)

tensor([[1, 2],
        [3, 4]])


In [4]:
# Tensor不同初始化的方式
shape = (2,3,)
rand_tensor = torch.rand(shape) # 均匀分布，randn正态分布
ones_tensor = torch.ones(shape)
zeros_tensor = torch.zeros(shape)

print(f"Random Tensor: \n {rand_tensor} \n")
print(f"Ones Tensor: \n {ones_tensor} \n")
print(f"Zeros Tensor: \n {zeros_tensor}")

Random Tensor: 
 tensor([[0.8005, 0.1208, 0.1743],
        [0.3175, 0.6781, 0.5444]]) 

Ones Tensor: 
 tensor([[1., 1., 1.],
        [1., 1., 1.]]) 

Zeros Tensor: 
 tensor([[0., 0., 0.],
        [0., 0., 0.]])


In [28]:
# Torch Tensor在内存中的存储方式：连续/不连续，改变张量shape的几种方式：reshape/view/permute
# https://zhuanlan.zhihu.com/p/555700619
print(rand_tensor.reshape(1,3,2))
print(rand_tensor.view(1,1,2,3))

# Tensor的shape拓展与压缩
t1 = torch.unsqueeze(rand_tensor,dim=2)
print(t1,t1.shape)
t2 = torch.squeeze(t1,dim=2)
print(t2,t2.shape)

tensor([[[0.8005, 0.1208],
         [0.1743, 0.3175],
         [0.6781, 0.5444]]])
tensor([[[[0.8005, 0.1208, 0.1743],
          [0.3175, 0.6781, 0.5444]]]])
tensor([[[0.8005],
         [0.1208],
         [0.1743]],

        [[0.3175],
         [0.6781],
         [0.5444]]]) torch.Size([2, 3, 1])
tensor([[0.8005, 0.1208, 0.1743],
        [0.3175, 0.6781, 0.5444]]) torch.Size([2, 3])


In [9]:
#https://pytorch.org/docs/stable/generated/torch.cat.html#torch.cat
t1 = torch.cat([rand_tensor, zeros_tensor, ones_tensor], dim=1)
print(t1)
t2 = torch.cat([rand_tensor, zeros_tensor, ones_tensor], dim=0)
print(t2)

# https://pytorch.org/docs/stable/generated/torch.matmul.html#torch.matmul
t3 = torch.matmul(rand_tensor,ones_tensor.T)
print(t3)

# https://pytorch.org/docs/stable/generated/torch.add.html#torch.add
t4 = torch.add(rand_tensor,ones_tensor)
print(t4)

tensor([[0.8005, 0.1208, 0.1743, 0.0000, 0.0000, 0.0000, 1.0000, 1.0000, 1.0000],
        [0.3175, 0.6781, 0.5444, 0.0000, 0.0000, 0.0000, 1.0000, 1.0000, 1.0000]])
tensor([[0.8005, 0.1208, 0.1743],
        [0.3175, 0.6781, 0.5444],
        [0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000],
        [1.0000, 1.0000, 1.0000],
        [1.0000, 1.0000, 1.0000]])
tensor([[1.0956, 1.0956],
        [1.5399, 1.5399]])
tensor([[1.8005, 1.1208, 1.1743],
        [1.3175, 1.6781, 1.5444]])


## Torch Neural Network

In [32]:
# Linear Layer
# https://pytorch.org/docs/stable/generated/torch.nn.Linear.html#torch.nn.Linear

linear = nn.Linear(5,2,bias=False)
for param in linear.parameters():
    print(param)

linear2 = nn.Linear(5,2,bias=True)
for param in linear2.parameters():
    print(param)

Parameter containing:
tensor([[-0.1215,  0.3440, -0.3900,  0.1044, -0.1939],
        [-0.1592,  0.3668,  0.2511, -0.1914, -0.2816]], requires_grad=True)
Parameter containing:
tensor([[-0.3247, -0.3814,  0.2107,  0.3480, -0.4240],
        [-0.2716, -0.1629, -0.0786, -0.1665,  0.1474]], requires_grad=True)
Parameter containing:
tensor([-0.3223, -0.2733], requires_grad=True)


In [36]:
x = torch.randn(4,6,5) # batch * length * dim
# print(x)
y = linear(x)
print(y,y.shape)

tensor([[[-0.3628, -0.3967],
         [-0.2073, -0.6649],
         [-0.6159,  0.2434],
         [ 0.6381, -0.0023],
         [-0.4054,  0.0838],
         [-0.3294, -0.0058]],

        [[ 0.7599,  0.2221],
         [-0.2213,  0.4296],
         [ 0.4018, -1.2950],
         [ 0.9586, -0.7426],
         [ 0.4638, -1.2840],
         [ 0.6606,  0.7012]],

        [[ 0.0880, -0.0531],
         [ 0.2026,  0.7092],
         [ 0.0043, -0.9480],
         [-1.3609, -0.6373],
         [-0.8310,  0.0135],
         [ 0.7950,  0.4819]],

        [[-0.5441, -0.4280],
         [-0.2033,  0.0511],
         [-0.7577,  0.0222],
         [ 0.3995, -0.7188],
         [ 0.7437,  0.7451],
         [-0.0713,  0.3792]]], grad_fn=<UnsafeViewBackward0>) torch.Size([4, 6, 2])


In [41]:
# activation Layer

activation = nn.ReLU()
print(y)
z = activation(y)
print(z)

tensor([[[-0.3628, -0.3967],
         [-0.2073, -0.6649],
         [-0.6159,  0.2434],
         [ 0.6381, -0.0023],
         [-0.4054,  0.0838],
         [-0.3294, -0.0058]],

        [[ 0.7599,  0.2221],
         [-0.2213,  0.4296],
         [ 0.4018, -1.2950],
         [ 0.9586, -0.7426],
         [ 0.4638, -1.2840],
         [ 0.6606,  0.7012]],

        [[ 0.0880, -0.0531],
         [ 0.2026,  0.7092],
         [ 0.0043, -0.9480],
         [-1.3609, -0.6373],
         [-0.8310,  0.0135],
         [ 0.7950,  0.4819]],

        [[-0.5441, -0.4280],
         [-0.2033,  0.0511],
         [-0.7577,  0.0222],
         [ 0.3995, -0.7188],
         [ 0.7437,  0.7451],
         [-0.0713,  0.3792]]], grad_fn=<UnsafeViewBackward0>)
tensor([[[0.0000, 0.0000],
         [0.0000, 0.0000],
         [0.0000, 0.2434],
         [0.6381, 0.0000],
         [0.0000, 0.0838],
         [0.0000, 0.0000]],

        [[0.7599, 0.2221],
         [0.0000, 0.4296],
         [0.4018, 0.0000],
         [0.9586, 0.

In [42]:
# softmax Layer

softmax = nn.Softmax(dim=2)
print(y)
z1 = softmax(y)
print(z1)

tensor([[[-0.3628, -0.3967],
         [-0.2073, -0.6649],
         [-0.6159,  0.2434],
         [ 0.6381, -0.0023],
         [-0.4054,  0.0838],
         [-0.3294, -0.0058]],

        [[ 0.7599,  0.2221],
         [-0.2213,  0.4296],
         [ 0.4018, -1.2950],
         [ 0.9586, -0.7426],
         [ 0.4638, -1.2840],
         [ 0.6606,  0.7012]],

        [[ 0.0880, -0.0531],
         [ 0.2026,  0.7092],
         [ 0.0043, -0.9480],
         [-1.3609, -0.6373],
         [-0.8310,  0.0135],
         [ 0.7950,  0.4819]],

        [[-0.5441, -0.4280],
         [-0.2033,  0.0511],
         [-0.7577,  0.0222],
         [ 0.3995, -0.7188],
         [ 0.7437,  0.7451],
         [-0.0713,  0.3792]]], grad_fn=<UnsafeViewBackward0>)
tensor([[[0.5085, 0.4915],
         [0.6124, 0.3876],
         [0.2975, 0.7025],
         [0.6548, 0.3452],
         [0.3801, 0.6199],
         [0.4198, 0.5802]],

        [[0.6313, 0.3687],
         [0.3428, 0.6572],
         [0.8451, 0.1549],
         [0.8457, 0.

In [40]:
# Dropout layer

dropout = nn.Dropout(p=0.5)
x1 = torch.rand(2,3,4)
print(x1)
y1 = dropout(x1)
y2 = dropout(x1)
print(y1)
print(y2)

tensor([[[0.8592, 0.0174, 0.8639, 0.3702],
         [0.5233, 0.9780, 0.4521, 0.8146],
         [0.1710, 0.5368, 0.5127, 0.1182]],

        [[0.6980, 0.1634, 0.6144, 0.0277],
         [0.6007, 0.0121, 0.2936, 0.5517],
         [0.5486, 0.8967, 0.7572, 0.6998]]])
tensor([[[1.7183, 0.0348, 0.0000, 0.0000],
         [1.0466, 1.9559, 0.0000, 1.6292],
         [0.0000, 1.0737, 1.0254, 0.2364]],

        [[0.0000, 0.0000, 0.0000, 0.0553],
         [0.0000, 0.0243, 0.0000, 0.0000],
         [0.0000, 1.7934, 1.5143, 0.0000]]])
tensor([[[1.7183, 0.0348, 1.7278, 0.0000],
         [0.0000, 0.0000, 0.9042, 1.6292],
         [0.0000, 0.0000, 0.0000, 0.2364]],

        [[0.0000, 0.0000, 1.2288, 0.0000],
         [0.0000, 0.0243, 0.0000, 0.0000],
         [1.0973, 0.0000, 1.5143, 0.0000]]])


In [46]:
# Embedding Layer

emb = nn.Embedding(10,5)
for param in emb.parameters():
    print(param,param.shape)
x = torch.randint(low=0,high=10,size=(4,6))
print(x)
y = emb(x)
print(y,y.shape)

Parameter containing:
tensor([[-0.7657, -1.9638, -1.3845, -1.1404, -0.2116],
        [ 1.4877,  0.0500,  1.4616,  1.7402, -0.3242],
        [-0.3998, -1.4812, -0.2772,  0.6905,  0.6825],
        [ 0.4230, -0.5876, -0.6416,  0.2801,  0.5740],
        [-0.4172, -0.4693, -0.8067,  1.9842,  1.2168],
        [ 0.6307,  1.7329,  0.7282,  0.0100, -1.6937],
        [-0.2798, -0.5744, -1.7183,  0.0976, -0.4376],
        [ 0.5681, -0.7682, -0.1739, -0.6880,  0.0435],
        [-0.0827, -0.2418, -0.0357, -0.0938, -0.1249],
        [ 0.8882,  0.5511, -0.7181, -0.9120,  0.0813]], requires_grad=True) torch.Size([10, 5])
tensor([[2, 8, 9, 5, 2, 9],
        [4, 0, 3, 7, 7, 0],
        [7, 4, 0, 1, 4, 5],
        [9, 0, 7, 3, 9, 5]])
tensor([[[-0.3998, -1.4812, -0.2772,  0.6905,  0.6825],
         [-0.0827, -0.2418, -0.0357, -0.0938, -0.1249],
         [ 0.8882,  0.5511, -0.7181, -0.9120,  0.0813],
         [ 0.6307,  1.7329,  0.7282,  0.0100, -1.6937],
         [-0.3998, -1.4812, -0.2772,  0.6905,  0.6

In [55]:
# LSTM Layer

lstm = nn.LSTM(input_size=5,hidden_size=8,batch_first=True,bidirectional=False)
for name_param in (lstm.named_parameters()):
    print(name_param[0],name_param[1].shape)
x = torch.randn(4,6,5) # batch_size * sentence_length * embedding_size
y,_ = lstm(x)
print(y,y.shape)

weight_ih_l0 torch.Size([32, 5])
weight_hh_l0 torch.Size([32, 8])
bias_ih_l0 torch.Size([32])
bias_hh_l0 torch.Size([32])
tensor([[[-0.0374,  0.1385, -0.1738, -0.1217, -0.0395,  0.1048,  0.1730,
          -0.1069],
         [-0.1703,  0.2635,  0.0329, -0.0674, -0.1061,  0.2347,  0.2165,
           0.1122],
         [-0.0170,  0.2285, -0.0157,  0.0276, -0.2342,  0.2067,  0.2054,
          -0.0760],
         [ 0.1323,  0.1986, -0.0643,  0.0327, -0.2214,  0.1211,  0.2011,
          -0.1882],
         [ 0.1694,  0.4118,  0.0895, -0.0438, -0.0611,  0.1761,  0.2275,
          -0.0618],
         [-0.0437,  0.2553,  0.0097, -0.0311, -0.0771,  0.1383,  0.3086,
          -0.0773]],

        [[-0.0170,  0.0904, -0.1656,  0.0521, -0.1943,  0.1080,  0.1986,
          -0.1571],
         [-0.0679,  0.0442, -0.1730,  0.0587, -0.1524,  0.0752,  0.2647,
          -0.2594],
         [-0.2501,  0.2666,  0.0516, -0.1108,  0.0262,  0.2204,  0.2584,
           0.1463],
         [-0.0926,  0.2042, -0.0345, -0

## Torch Backward & Optimization

In [78]:
# 一个简单的逻辑斯蒂回归模型

class LogisiticRegressionModel(torch.nn.Module):

    def __init__(self):
        super(LogisiticRegressionModel, self).__init__()
        self.linear1 = torch.nn.Linear(1, 1)

    def forward(self, x):
        x = self.linear1(x)
        y_pred = torch.sigmoid(x)
        return y_pred

model = LogisiticRegressionModel()


In [79]:
for name_param in model.named_parameters():
    print(name_param[0],name_param[1])

linear1.weight Parameter containing:
tensor([[0.7685]], requires_grad=True)
linear1.bias Parameter containing:
tensor([-0.8625], requires_grad=True)


In [86]:
# 构建标准 Dataset 和 Dataloader
# 如何运用 Python迭代器实现 Pytorch中的 Dataloader ：https://zhuanlan.zhihu.com/p/340465632
# 通过 Dataloader 实现 minibatch

# 也可以用 torch.utils.data.TensorDataset
class TrainDataset(torch.utils.data.Dataset):
    def __init__(self):
        self.data = torch.randint(low=0,high=10,size=(100,),dtype=torch.float32)
        self.label = torch.tensor([1.0 if i >= 5 else 0 for i in self.data],dtype=torch.float32)
    
    def __getitem__(self, index):
        return [self.data[index],self.label[index]]
    
    def __len__(self):
        return self.data.shape[0]

train_dataset = TrainDataset()

In [87]:
from torch.utils.data import DataLoader

dataloader = DataLoader(train_dataset,batch_size=8)
for data in dataloader:
    print(data)

[tensor([3., 5., 6., 0., 9., 9., 9., 2.]), tensor([0., 1., 1., 0., 1., 1., 1., 0.])]
[tensor([8., 7., 5., 8., 3., 2., 6., 1.]), tensor([1., 1., 1., 1., 0., 0., 1., 0.])]
[tensor([6., 0., 4., 9., 3., 2., 1., 7.]), tensor([1., 0., 0., 1., 0., 0., 0., 1.])]
[tensor([1., 3., 3., 9., 6., 9., 0., 6.]), tensor([0., 0., 0., 1., 1., 1., 0., 1.])]
[tensor([2., 3., 7., 7., 2., 4., 6., 9.]), tensor([0., 0., 1., 1., 0., 0., 1., 1.])]
[tensor([8., 9., 7., 1., 7., 0., 0., 5.]), tensor([1., 1., 1., 0., 1., 0., 0., 1.])]
[tensor([7., 0., 0., 2., 9., 1., 9., 4.]), tensor([1., 0., 0., 0., 1., 0., 1., 0.])]
[tensor([9., 5., 2., 7., 0., 8., 5., 7.]), tensor([1., 1., 0., 1., 0., 1., 1., 1.])]
[tensor([3., 4., 1., 3., 2., 0., 0., 8.]), tensor([0., 0., 0., 0., 0., 0., 0., 1.])]
[tensor([1., 0., 6., 3., 6., 7., 8., 9.]), tensor([0., 0., 1., 0., 1., 1., 1., 1.])]
[tensor([1., 8., 7., 3., 4., 8., 6., 5.]), tensor([0., 1., 1., 0., 0., 1., 1., 1.])]
[tensor([5., 2., 9., 3., 0., 1., 9., 0.]), tensor([1., 0., 1., 0.

In [94]:
critertion = nn.BCELoss() # 交叉熵损失函数
optimizer = torch.optim.Adam(model.parameters(),lr=0.01) # Adam优化器

for epoch in range(1000):
    for train_iter in dataloader:
        y_pred = model(train_iter[0].view(train_iter[0].shape[0],1))
        loss = critertion(y_pred,train_iter[1].view(train_iter[1].shape[0],1))
        # print(epoch, loss.item())

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()


In [95]:
for name_param in model.named_parameters():
    print(name_param[0],name_param[1])

linear1.weight Parameter containing:
tensor([[4.4175]], requires_grad=True)
linear1.bias Parameter containing:
tensor([-19.6022], requires_grad=True)


In [101]:
x = torch.tensor([[4.],[5.],[9.],[1.],[7.],[6.]])
with torch.no_grad():
    y = model(x)

print(y)

tensor([[1.2649e-01],
        [9.2309e-01],
        [1.0000e+00],
        [2.5430e-07],
        [9.9999e-01],
        [9.9900e-01]])
