# Pytorch Practice

In [3]:
import torch
import torch.nn as nn
import numpy as np

# 0. Tensor基础
- 0: scalar
- 1: vector
- 2: matrix
- 3: n-dimensional tensor

In [28]:
from torch import tensor

## 0.1 Scalar

通常就是一个数值

In [31]:
scalar = tensor(42.)
scalar, scalar.dim(), scalar.item()

(tensor(42.), 0, 42.0)

## 0.2 Vector
例如：[-5., 2., 0.], 在深度学习中通常只特征，例如词向量特征，某一维特征等。

In [32]:
vector = tensor([1.5, -0.5, 3.0])
vector, vector.dim(), vector.size()

(tensor([ 1.5000, -0.5000,  3.0000]), 1, torch.Size([3]))

## 0.3 Matrix
一般计算的都是矩阵，通常都是多维的。

In [33]:
matrix = tensor([[1., 2.],[3., 4.]])
matrix.matmul(matrix), tensor([1., 0.]).matmul(matrix), matrix * matrix, tensor([1., 2.]).matmul(matrix)

(tensor([[ 7., 10.],
         [15., 22.]]),
 tensor([1., 2.]),
 tensor([[ 1.,  4.],
         [ 9., 16.]]),
 tensor([ 7., 10.]))

# 1. 线性回归模型
- 线性回归模型就是一个不加激活函数的全连接层

In [6]:
class LinearRegressionModel(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(LinearRegressionModel, self).__init__()
        self.linear = nn.Linear(input_dim, output_dim)
        
    def forward(self, x):
        out = self.linear(x)
        return out

In [7]:
input_dim = 1
output_dim = 1

model = LinearRegressionModel(input_dim, output_dim)

In [8]:
model

LinearRegressionModel(
  (linear): Linear(in_features=1, out_features=1, bias=True)
)

指定参数和损失函数

In [9]:
epochs = 1000
learning_rate = 0.01
# 创建优化器
optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate)
# 创建损失函数
criterion = nn.MSELoss()

创建输入数据

In [12]:
x_values = [i for i in range(11)]
x_train = np.array(x_values, dtype = np.float32)
x_train = x_train.reshape(-1, 1)

y_values = [2*i + 1 for i in x_values]
y_train = np.array(y_values, dtype = np.float32)
y_train = y_train.reshape(-1, 1)
x_train.shape, y_train.shape

((11, 1), (11, 1))

开始训练模型

In [14]:
for epoch in range(epochs):
    epoch += 1
    # 转化为tensor
    inputs = torch.from_numpy(x_train)
    labels = torch.from_numpy(y_train)
    
    # 梯度清零
    optimizer.zero_grad()
    
    # 前向传播
    outputs = model(inputs)
    
    # 计算损失
    loss = criterion(outputs, labels)
    
    # 反向传播
    loss.backward()
    
    # 更新权重参数
    optimizer.step()
    if epoch % 50 == 0:
        print('epoch{}, loss{}'.format(epoch, loss.item()))

epoch50, loss0.024922925978899002
epoch100, loss0.014215030707418919
epoch150, loss0.008107777684926987
epoch200, loss0.004624378867447376
epoch250, loss0.0026375409215688705
epoch300, loss0.0015043760649859905
epoch350, loss0.0008580290013924241
epoch400, loss0.0004893920267932117
epoch450, loss0.000279134139418602
epoch500, loss0.00015920335135888308
epoch550, loss9.080698509933427e-05
epoch600, loss5.1790630095638335e-05
epoch650, loss2.9537959562730975e-05
epoch700, loss1.6848685845616274e-05
epoch750, loss9.610754204913974e-06
epoch800, loss5.48083289686474e-06
epoch850, loss3.1261442927643657e-06
epoch900, loss1.782401909622422e-06
epoch950, loss1.0167717618969618e-06
epoch1000, loss5.798705160486861e-07


测试模型预测结果

In [18]:
predicted = model(torch.from_numpy(x_train)).data.numpy()
# 如果不使用numpy(), 输出张量
# predicted = model(torch.from_numpy(x_train)).data
predicted

tensor([[ 0.9986],
        [ 2.9988],
        [ 4.9990],
        [ 6.9992],
        [ 8.9994],
        [10.9996],
        [12.9998],
        [15.0000],
        [17.0002],
        [19.0004],
        [21.0006]])

模型的保存与读取

In [19]:
torch.save(model.state_dict(), 'model.pkl')
model.load_state_dict(torch.load('model.pkl'))

<All keys matched successfully>

### 使用GPU进行训练
- 只需要把数据和模型传入到cuda里面就可以


首先验证GPU信息

In [22]:
torch.cuda.is_available(), torch.cuda.device_count(), torch.cuda.get_device_name(0)

(True, 1, 'GeForce RTX 2060 SUPER')

搬迁原有的训练代码至GPU

In [27]:
class LinearRegressionModel(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(LinearRegressionModel, self).__init__()
        self.linear = nn.Linear(input_dim, output_dim)
        
    def forward(self, x):
        out = self.linear(x)
        return out
    
input_dim = 1
output_dim = 1

model = LinearRegressionModel(input_dim, output_dim)

# 将模型传入到GPU中
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.to(device)

epochs = 1000
learning_rate = 0.01
# 创建优化器
optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate)
# 创建损失函数
criterion = nn.MSELoss()

for epoch in range(epochs):
    epoch += 1
    # 转化为tensor
    inputs = torch.from_numpy(x_train).to(device)
    labels = torch.from_numpy(y_train).to(device)
    
    # 梯度清零
    optimizer.zero_grad()
    
    # 前向传播
    outputs = model(inputs)
    
    # 计算损失
    loss = criterion(outputs, labels)
    
    # 反向传播
    loss.backward()
    
    # 更新权重参数
    optimizer.step()
    if epoch % 50 == 0:
        print('epoch{}, loss{}'.format(epoch, loss.item()))

epoch50, loss0.005179176572710276
epoch100, loss0.0029540262185037136
epoch150, loss0.0016848703380674124
epoch200, loss0.000960984209086746
epoch250, loss0.0005481111584231257
epoch300, loss0.0003126159717794508
epoch350, loss0.00017831222794484347
epoch400, loss0.0001016985479509458
epoch450, loss5.8006004110211506e-05
epoch500, loss3.308253508294001e-05
epoch550, loss1.8868291590479203e-05
epoch600, loss1.0761198609543499e-05
epoch650, loss6.137979653431103e-06
epoch700, loss3.500336333672749e-06
epoch750, loss1.9965177671110723e-06
epoch800, loss1.138864263339201e-06
epoch850, loss6.497398317151237e-07
epoch900, loss3.703109427988238e-07
epoch950, loss2.1137333305887296e-07
epoch1000, loss1.204232802365368e-07


# 2. RNN模型

In [5]:
class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers = 1):
        # 定义
        super(RNN, self).__init__()
        
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        # embedding层
        self.embedding = nn.Embedding(input_size, hidden_size)
        # Pytorch的RNN层，batch_first标志可以让输入的张量的第一个维度标示batch，有了embedding层，input_size = hidden_size
        self.rnn = nn.RNN(hidden_size, hidden_size, num_layers)# , batch_first = True)
        # 输出的全连接层
        self.fc = nn.Linear(hidden_size, output_size)
        # 最后的logsoftmax层，考虑如何表现企业的发展预期？是否使用一种分类的打分方法？
        # self.softmax = nn.LogSoftmax()
        
    def forward(self, input, hidden):
        # 运算过程
        # size of input：[batch_size, num_step, data_dim]
        
        # embedding层:
        # 从输入到隐含层的计算，可以把一个数值现转化成one-hot向量，再把一个向量转化为hidden_size维的向量
        output = self.embedding(input, hidden)
        # size of output：[batch_size, num_step, hidden_size]
        
        output, hidden = self.rnn(output, hidden)
        # size of output：[batch_size, num_step, hidden_size]
      
        # 从输出output中取出最后一个时间步的数值，注意output输出包含了所有时间步的结果
        output = output[:,-1,:]
        # size of output：[batch_size, hidden_size]
        
        # 全链接层
        output = self.linear(output)
        # output尺寸为：batch_size, output_size
        
        # softmax层，归一化处理
        # output = self.softmax(output)
         # size of output：batch_size, output_size
        return output, hidden

# 3. LSTM模型

输入大小是三维tensor[seq_len,batch_size,input_dim]

input_dim是输入的维度，比如是128

batch_size是一次往RNN输入句子的数目，比如是5。

seq_len是一个句子的最大长度，比如15