In [1]:
import numpy as np
import torch
import pandas as pd
import matplotlib.pyplot as plt

import plotly.offline as pltoff
import plotly.graph_objects as pltobj

```
那么输入此LSTM的 input.size() == (seq_len, batch_size, inp_dim)

在我们的LSTM时间序列预测任务中：
seq_len 时间序列的长度，在这里前9年共 9*12 == 108 个月，则 seq_len == 108
batch_size 同个批次中输入的序列条数
inp_dim 输入数据的维度，在这里输入数据由【客流量，年份，月份】三组数据构成，则 inp_dim == 3

如果是自然语言处理 (NLP) ，那么：
seq_len 将对应句子的长度
batch_size 同个批次中输入的句子数量
inp_dim 句子中用来表示每个单词（中文分词）的矢量维度
```

```
# inp_dim 是LSTM输入张量的维度，我们已经根据我们的数据确定了这个值是3
# mid_dim 是LSTM三个门 (gate) 的网络宽度，也是LSTM输出张量的维度
# num_layers 是使用两个LSTM对数据进行预测，然后将他们的输出堆叠起来。
```

In [25]:
class RegLSTM(torch.nn.Module):
    def __init__(self, inp_dim, out_dim, mid_dim, num_layers):
        super(RegLSTM, self).__init__()

        self.rnn = torch.nn.LSTM(inp_dim, mid_dim, num_layers)  # rnn
        self.reg = torch.nn.Sequential(
            torch.nn.Linear(mid_dim, mid_dim),
            torch.nn.Tanh(),
            torch.nn.Linear(mid_dim, out_dim)
        )  # 回归

    def forward(self, x):
        y = self.rnn(x)[0]

        seq_len, batch_size, hid_dim = y.shape
        y = y.view(-1, hid_dim)
        y = self.reg(y)
        y = y.view(seq_len, batch_size, -1)
        return y

    def output_y_hc(self, x, hc):
        y, hc = self.rnn(x, hc)

        seq_len, batch_size, hid_dim = y.size()
        y = y.view(-1, hid_dim)
        y = self.reg(y)
        y = y.view(seq_len, batch_size, -1)
        return y, hc

In [3]:
class RegGRU(torch.nn.Module):
    def __init__(self, inp_dim, out_dim, mid_dim, num_layers):
        super(RegGRU, self).__init__()

        self.rnn = torch.nn.GRU(inp_dim, mid_dim, num_layers)
        self.reg = torch.nn.Linear(mid_dim, out_dim)

    def forward(self, x):
        x, h = self.rnn(x)

        seq_len, batch_size, hid_dim = x.shape
        x = x.view(-1, hid_dim)
        x = self.reg(x)
        x = x.view(seq_len, batch_size, -1)
        return x

    def output_y_h(self, x, h):
        y, h = self.rnn(x, h)
        seq_len, batch_size, hid_dim = y.size()
        y = y.view(-1, hid_dim)
        y = self.reg(y)
        y = y.view(seq_len, batch_size, -1)
        return y, h

In [4]:
def load_data():
    data = pd.read_csv("./dataset/consumer.csv")
    seq_number = np.array(
        data["UserNum"].tolist(), dtype=np.float32
    )
    seq_number = seq_number[:, np.newaxis]

    seq_year = np.arange(12)
    seq_month = np.arange(12)
    seq_year_month = np.transpose(
        [
            np.repeat(seq_year, len(seq_month)),
            np.tile(seq_month, len(seq_year))
        ],
    )
    seq = np.concatenate(
        (seq_number, seq_year_month),
        axis=1
    )

    print(seq)

    seq = (seq - seq.mean(axis=0)) / seq.std(axis=0)

    return seq


In [5]:
load_data()

[[112.   0.   0.]
 [118.   0.   1.]
 [132.   0.   2.]
 [129.   0.   3.]
 [121.   0.   4.]
 [135.   0.   5.]
 [148.   0.   6.]
 [148.   0.   7.]
 [136.   0.   8.]
 [119.   0.   9.]
 [104.   0.  10.]
 [118.   0.  11.]
 [115.   1.   0.]
 [126.   1.   1.]
 [141.   1.   2.]
 [135.   1.   3.]
 [125.   1.   4.]
 [149.   1.   5.]
 [170.   1.   6.]
 [170.   1.   7.]
 [158.   1.   8.]
 [133.   1.   9.]
 [114.   1.  10.]
 [140.   1.  11.]
 [145.   2.   0.]
 [150.   2.   1.]
 [178.   2.   2.]
 [163.   2.   3.]
 [172.   2.   4.]
 [178.   2.   5.]
 [199.   2.   6.]
 [199.   2.   7.]
 [184.   2.   8.]
 [162.   2.   9.]
 [146.   2.  10.]
 [166.   2.  11.]
 [171.   3.   0.]
 [180.   3.   1.]
 [193.   3.   2.]
 [181.   3.   3.]
 [183.   3.   4.]
 [218.   3.   5.]
 [230.   3.   6.]
 [242.   3.   7.]
 [209.   3.   8.]
 [191.   3.   9.]
 [172.   3.  10.]
 [194.   3.  11.]
 [196.   4.   0.]
 [196.   4.   1.]
 [236.   4.   2.]
 [235.   4.   3.]
 [229.   4.   4.]
 [243.   4.   5.]
 [264.   4.   6.]
 [272.   4

array([[-1.40777884, -1.59325501, -1.59325501],
       [-1.35759023, -1.59325501, -1.30357228],
       [-1.24048348, -1.59325501, -1.01388955],
       [-1.26557778, -1.59325501, -0.72420682],
       [-1.33249593, -1.59325501, -0.43452409],
       [-1.21538918, -1.59325501, -0.14484136],
       [-1.10664719, -1.59325501,  0.14484136],
       [-1.10664719, -1.59325501,  0.43452409],
       [-1.20702441, -1.59325501,  0.72420682],
       [-1.34922546, -1.59325501,  1.01388955],
       [-1.47469699, -1.59325501,  1.30357228],
       [-1.35759023, -1.59325501,  1.59325501],
       [-1.38268454, -1.30357228, -1.59325501],
       [-1.29067209, -1.30357228, -1.30357228],
       [-1.16520057, -1.30357228, -1.01388955],
       [-1.21538918, -1.30357228, -0.72420682],
       [-1.29903686, -1.30357228, -0.43452409],
       [-1.09828242, -1.30357228, -0.14484136],
       [-0.9226223 , -1.30357228,  0.14484136],
       [-0.9226223 , -1.30357228,  0.43452409],
       [-1.02299951, -1.30357228,  0.724

In [6]:
load_data()[:-1,:]

[[112.   0.   0.]
 [118.   0.   1.]
 [132.   0.   2.]
 [129.   0.   3.]
 [121.   0.   4.]
 [135.   0.   5.]
 [148.   0.   6.]
 [148.   0.   7.]
 [136.   0.   8.]
 [119.   0.   9.]
 [104.   0.  10.]
 [118.   0.  11.]
 [115.   1.   0.]
 [126.   1.   1.]
 [141.   1.   2.]
 [135.   1.   3.]
 [125.   1.   4.]
 [149.   1.   5.]
 [170.   1.   6.]
 [170.   1.   7.]
 [158.   1.   8.]
 [133.   1.   9.]
 [114.   1.  10.]
 [140.   1.  11.]
 [145.   2.   0.]
 [150.   2.   1.]
 [178.   2.   2.]
 [163.   2.   3.]
 [172.   2.   4.]
 [178.   2.   5.]
 [199.   2.   6.]
 [199.   2.   7.]
 [184.   2.   8.]
 [162.   2.   9.]
 [146.   2.  10.]
 [166.   2.  11.]
 [171.   3.   0.]
 [180.   3.   1.]
 [193.   3.   2.]
 [181.   3.   3.]
 [183.   3.   4.]
 [218.   3.   5.]
 [230.   3.   6.]
 [242.   3.   7.]
 [209.   3.   8.]
 [191.   3.   9.]
 [172.   3.  10.]
 [194.   3.  11.]
 [196.   4.   0.]
 [196.   4.   1.]
 [236.   4.   2.]
 [235.   4.   3.]
 [229.   4.   4.]
 [243.   4.   5.]
 [264.   4.   6.]
 [272.   4

array([[-1.40777884, -1.59325501, -1.59325501],
       [-1.35759023, -1.59325501, -1.30357228],
       [-1.24048348, -1.59325501, -1.01388955],
       [-1.26557778, -1.59325501, -0.72420682],
       [-1.33249593, -1.59325501, -0.43452409],
       [-1.21538918, -1.59325501, -0.14484136],
       [-1.10664719, -1.59325501,  0.14484136],
       [-1.10664719, -1.59325501,  0.43452409],
       [-1.20702441, -1.59325501,  0.72420682],
       [-1.34922546, -1.59325501,  1.01388955],
       [-1.47469699, -1.59325501,  1.30357228],
       [-1.35759023, -1.59325501,  1.59325501],
       [-1.38268454, -1.30357228, -1.59325501],
       [-1.29067209, -1.30357228, -1.30357228],
       [-1.16520057, -1.30357228, -1.01388955],
       [-1.21538918, -1.30357228, -0.72420682],
       [-1.29903686, -1.30357228, -0.43452409],
       [-1.09828242, -1.30357228, -0.14484136],
       [-0.9226223 , -1.30357228,  0.14484136],
       [-0.9226223 , -1.30357228,  0.43452409],
       [-1.02299951, -1.30357228,  0.724

In [7]:
def run_train_gru():
    inp_dim = 3
    out_dim = 1
    batch_size = 12*4

    '''加载数据'''
    dataset = load_data()
    dataset_x = dataset[:-1, :]
    dataset_y = dataset[+1:, 0]
    assert dataset_x.shape[1] == inp_dim

    train_size = int(len(dataset_x) * 0.75)

    train_x = dataset_x[:train_size]
    train_y = dataset_y[:train_size]

    train_x = train_x.reshape((train_size, inp_dim))
    train_y = train_y.reshape((train_size, out_dim))

    '''创建模型'''
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    net = RegGRU(inp_dim, out_dim, mid_dim=12, num_layers=2).to(device)
    criterion = torch.nn.MSELoss()
    optimizer = torch.optim.Adam(net.parameters(), lr=1e-2)

    '''train'''
    var_x = torch.tensor(train_x, dtype=torch.float32, device=device)
    var_y = torch.tensor(train_y, dtype=torch.float32, device=device)

    batch_var_x = list()
    batch_var_y = list()

    for i in range(batch_size):
        j = train_size - i
        batch_var_x.append(var_x[j:])
        batch_var_y.append(var_y[j:])

    batch_var_x = torch.nn.utils.rnn.pad_sequence(batch_var_x)
    batch_var_y = torch.nn.utils.rnn.pad_sequence(batch_var_y)

    with torch.no_grad():
        weights = np.tanh(np.arange(len(train_y)) * (np.e / len(train_y)))
        weights = torch.tensor(weights, dtype=torch.float32, device=device)

    for e in range(256):
        out = net(batch_var_x)
        loss = (out - batch_var_y) ** 2 * weights
        loss = loss.mean()

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if e % 100 == 0:
            print('Epoch:{},loss:{:.5f}'.format(e, loss.item()))
            
    '''eval'''
    net_eval = net.eval()
    
    test_x = dataset_x.copy()
    test_x[train_size:,0] = 0
    test_x = test_x[:,np.newaxis,:]
    test_x = torch.tensor(test_x,dtype=torch.float32, device=device)
    
    for i in range(train_size,len(dataset)-2):
        test_y = net_eval(test_x[:i])
        test_x[i+1,0,0] = test_y[-1]
        
    pred_y = test_x[1:,0,0]
    pred_y = pred_y.cpu().data.numpy()
        
    diff_y = pred_y[train_size:] - dataset_y[train_size:-1]
    
    l1_loss = np.mean(np.abs(diff_y))
    l2_loss =  np.mean(diff_y **2)
    
    print("L1:{:.3f}   L2:{:.3f}".format(l1_loss,l2_loss))
    ipyplot = pltoff.iplot
    
    d1 = pltobj.Scatter(y=pred_y)
    d2 = pltobj.Scatter(y=dataset_y)
    ipyplot([d1,d2])

In [8]:
run_train_gru()

[[112.   0.   0.]
 [118.   0.   1.]
 [132.   0.   2.]
 [129.   0.   3.]
 [121.   0.   4.]
 [135.   0.   5.]
 [148.   0.   6.]
 [148.   0.   7.]
 [136.   0.   8.]
 [119.   0.   9.]
 [104.   0.  10.]
 [118.   0.  11.]
 [115.   1.   0.]
 [126.   1.   1.]
 [141.   1.   2.]
 [135.   1.   3.]
 [125.   1.   4.]
 [149.   1.   5.]
 [170.   1.   6.]
 [170.   1.   7.]
 [158.   1.   8.]
 [133.   1.   9.]
 [114.   1.  10.]
 [140.   1.  11.]
 [145.   2.   0.]
 [150.   2.   1.]
 [178.   2.   2.]
 [163.   2.   3.]
 [172.   2.   4.]
 [178.   2.   5.]
 [199.   2.   6.]
 [199.   2.   7.]
 [184.   2.   8.]
 [162.   2.   9.]
 [146.   2.  10.]
 [166.   2.  11.]
 [171.   3.   0.]
 [180.   3.   1.]
 [193.   3.   2.]
 [181.   3.   3.]
 [183.   3.   4.]
 [218.   3.   5.]
 [230.   3.   6.]
 [242.   3.   7.]
 [209.   3.   8.]
 [191.   3.   9.]
 [172.   3.  10.]
 [194.   3.  11.]
 [196.   4.   0.]
 [196.   4.   1.]
 [236.   4.   2.]
 [235.   4.   3.]
 [229.   4.   4.]
 [243.   4.   5.]
 [264.   4.   6.]
 [272.   4

In [16]:
tmp_data = pd.read_csv("./dataset/consumer.csv")
tmp_t = pltobj.Scatter(x=tmp_data["Month"], y=tmp_data["UserNum"],name="流量")
ipyplot = pltoff.iplot
lay = pltobj.Layout(
    title="每个月客流量",
    xaxis=dict(title="月份"),
    yaxis=dict(title="客流量"),
    legend=dict(font=dict())
)
fig = pltobj.Figure(
    layout=lay,
    data=[tmp_t]
)
ipyplot(fig)


In [39]:
def run_train_lstm():
    inp_dim = 3
    out_dim = 1
    mid_dim = 8
    num_layers = 1
    batch_size = 12 * 4

    '''load data'''
    data = load_data()
    dataset_x = data[:-1, :]
    dataset_y = data[+1:, 0]
    assert dataset_x.shape[1] == inp_dim

    train_size = int(len(dataset_x) * 0.75)
    train_x = dataset_x[:train_size]
    train_y = dataset_y[:train_size]

    train_x = train_x.reshape((train_size, inp_dim))
    train_y = train_y.reshape((train_size, out_dim))

    '''创建模型'''
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    net = RegLSTM(inp_dim=inp_dim, out_dim=out_dim,
                  mid_dim=mid_dim, num_layers=num_layers).to(device)
    criterion = torch.nn.MSELoss()
    optimizer = torch.optim.Adam(net.parameters(), lr=1e-2)

    '''训练'''
    var_x = torch.tensor(train_x, dtype=torch.float32, device=device)
    var_y = torch.tensor(train_y, dtype=torch.float32, device=device)

    batch_var_x = list()
    batch_var_y = list()

    for i in range(batch_size):
        j = train_size - i
        batch_var_x.append(var_x[j:])
        batch_var_y.append(var_y[j:])

    batch_var_x = torch.nn.utils.rnn.pad_sequence(batch_var_x)
    batch_var_y = torch.nn.utils.rnn.pad_sequence(batch_var_y)

    with torch.no_grad():
        weights = np.tanh(np.arange(len(train_y)) * (np.e / len(train_y)))
        weights = torch.tensor(weights, dtype=torch.float32, device=device)

    print("开始训练")
    for e in range(384):
        out = net(batch_var_x)

        loss = (out - batch_var_y) ** 2 * weights
        loss = loss.mean()

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if e % 64 == 0:
            print("Epoch:{:4}, loss:{:.5f}".format(e, loss.item()))

    torch.save(net.state_dict(), "./net.pth")
    print("Save in:./net.pth")

    '''eval'''
    net.load_state_dict(torch.load(("./net.pth"),
                        map_location=lambda storage, loc: storage))
    net_eval = net.eval()
    test_x = dataset_x.copy()
    test_x[train_size:, 0] = 0
    test_x = test_x[:, np.newaxis, :]
    test_x = torch.tensor(test_x, dtype=torch.float32, device=device)

    '''不优雅的方式'''
    # for i in range(train_size, len(data)-2):
    #     test_y = net_eval(test_x[:i])
    #     test_x[i, 0, 0] = test_y[-1]

    '''优雅的方式'''
    eval_size = 1
    zero_ten = torch.zeros((num_layers, eval_size, mid_dim),
                           dtype=torch.float32, device=device)
    test_y, hc = net_eval.output_y_hc(
        test_x[:train_size], (zero_ten, zero_ten))
    test_x[train_size+1, 0, 0] = test_y[-1]

    for i in range(train_size + 1, len(data)-2):
        test_y, hc = net_eval.output_y_hc(test_x[i:i+1], hc)
        test_x[i+1, 0, 0] = test_y[-1]

    pred_y = test_x[1:, 0, 0]
    pred_y = pred_y.cpu().data.numpy()

    diff_y = pred_y[train_size:] - dataset_y[train_size:-1]

    l1_loss = np.mean(np.abs(diff_y))
    l2_loss = np.mean(diff_y ** 2)
    print("L1: {:.3f}  L2: {:.3f}".format(l1_loss, l2_loss))

    ipyplot = pltoff.iplot
    tmp_p = pltobj.Scatter(y =pred_y)
    tmp_d = pltobj.Scatter(y = dataset_y)

    ipyplot([tmp_p, tmp_d])


In [40]:
run_train_lstm()

[[112.   0.   0.]
 [118.   0.   1.]
 [132.   0.   2.]
 [129.   0.   3.]
 [121.   0.   4.]
 [135.   0.   5.]
 [148.   0.   6.]
 [148.   0.   7.]
 [136.   0.   8.]
 [119.   0.   9.]
 [104.   0.  10.]
 [118.   0.  11.]
 [115.   1.   0.]
 [126.   1.   1.]
 [141.   1.   2.]
 [135.   1.   3.]
 [125.   1.   4.]
 [149.   1.   5.]
 [170.   1.   6.]
 [170.   1.   7.]
 [158.   1.   8.]
 [133.   1.   9.]
 [114.   1.  10.]
 [140.   1.  11.]
 [145.   2.   0.]
 [150.   2.   1.]
 [178.   2.   2.]
 [163.   2.   3.]
 [172.   2.   4.]
 [178.   2.   5.]
 [199.   2.   6.]
 [199.   2.   7.]
 [184.   2.   8.]
 [162.   2.   9.]
 [146.   2.  10.]
 [166.   2.  11.]
 [171.   3.   0.]
 [180.   3.   1.]
 [193.   3.   2.]
 [181.   3.   3.]
 [183.   3.   4.]
 [218.   3.   5.]
 [230.   3.   6.]
 [242.   3.   7.]
 [209.   3.   8.]
 [191.   3.   9.]
 [172.   3.  10.]
 [194.   3.  11.]
 [196.   4.   0.]
 [196.   4.   1.]
 [236.   4.   2.]
 [235.   4.   3.]
 [229.   4.   4.]
 [243.   4.   5.]
 [264.   4.   6.]
 [272.   4