In [11]:
import os
import random
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from itertools import chain
from scipy.interpolate import make_interp_spline
import matplotlib.pyplot as plt

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

## 加载数据并标准化

In [12]:
def load_data(path):
    df = pd.read_excel(path, header=1)
    columns = df.columns
    MAX = list(np.zeros(5))
    MIN = list(np.zeros(5))
    
    df.fillna(df.mean(), inplace=True)

    for i in range(0, 2):
        print(np.max(df[columns[i]]))
        print(np.min(df[columns[i]]))
        MAX.append(np.max(df[columns[i]]))
        MIN.append(np.min(df[columns[i]]))
        df[columns[i]] = (df[columns[i]] - MIN[-1]) / (MAX[-1] - MIN[-1])
    return df, MAX[0], MIN[0]

In [13]:
path = r'3-1.xlsx'
df_tmp, m_, n_ = load_data(path)

5519
3
1.0
0.0


In [30]:
df_tmp.head()

Unnamed: 0,2,0
0,0.0,0.0
1,0.000181,0.0
2,0.000363,0.0
3,0.000544,0.0
4,0.000725,0.0


In [14]:
class MyDataset(Dataset):
    def __init__(self, data):
        self.data = data

    def __getitem__(self, item):
        return self.data[item]

    def __len__(self):
        return len(self.data)

In [15]:
batch_size = 30
num = 240

In [31]:
data, m, n = load_data(path)
load = data[data.columns[1]]
load = load.tolist()
data = data.values.tolist()
len(load)

5519
3
1.0
0.0


5517

In [45]:
seq = []
for i in range(0, len(data) - 5 - num, num):
    train_seq = []
    train_label = []
    for j in range(i, i + 5):
        x = [load[j]]
        for c in range(1, 2):
            x.append(data[j][c])
        train_seq.append(x)
    for j in range(i + 5, i + 5 + num):
        train_label.append(load[j])
    train_seq = torch.FloatTensor(train_seq)
    train_label = torch.FloatTensor(train_label).view(-1)
    seq.append((train_seq, train_label))

tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 

22

In [None]:

Dtr = seq[0:int(len(seq) * 0.7)]
Dte = seq[int(len(seq) * 0.7):len(seq)]

train_len = int(len(Dtr) / batch_size) * batch_size
test_len = int(len(Dte) / batch_size) * batch_size
Dtr, Dte = Dtr[:train_len], Dte[:test_len]

train = MyDataset(Dtr)
test = MyDataset(Dte)

Dtr = DataLoader(dataset=train, batch_size=batch_size, shuffle=False, num_workers=0)
Dte = DataLoader(dataset=test, batch_size=batch_size, shuffle=False, num_workers=0)

In [20]:
class LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size, batch_size):
        super().__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.output_size = output_size
        self.num_directions = 1
        self.batch_size = batch_size
        self.lstm = nn.LSTM(self.input_size, self.hidden_size, self.num_layers, batch_first=True)
        self.linear = nn.Linear(self.hidden_size, self.output_size)

    def forward(self, input_seq):
        h_0 = torch.randn(self.num_directions * self.num_layers, self.batch_size, self.hidden_size).to(device)
        c_0 = torch.randn(self.num_directions * self.num_layers, self.batch_size, self.hidden_size).to(device)
        # print(input_seq.size())
        seq_len = input_seq.shape[1]
        # input(batch_size, seq_len, input_size)
        input_seq = input_seq.view(self.batch_size, seq_len, self.input_size)
        # output(batch_size, seq_len, num_directions * hidden_size)
        output, _ = self.lstm(input_seq, (h_0, c_0))
        # print('output.size=', output.size())
        # print(self.batch_size * seq_len, self.hidden_size)
        output = output.contiguous().view(self.batch_size * seq_len, self.hidden_size)  # (5 * 30, 64)
        pred = self.linear(output)  # pred()
        # print('pred=', pred.shape)
        pred = pred.view(self.batch_size, seq_len, -1)
        pred = pred[:, -1, :]
        return pred


In [27]:
input_size, hidden_size, num_layers, output_size = 7, 64, 1, num

model = LSTM(input_size, hidden_size, num_layers, output_size, batch_size=batch_size).to(device)

loss_function = nn.MSELoss().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.05)
    
epochs = 10
for i in range(epochs):
    cnt = 0
    print('当前', i)
    for (seq, label) in Dtr:
        cnt += 1
        seq = seq.to(device)
        label = label.to(device)
        print(seq.size())
        y_pred = model(seq)
        loss = loss_function(y_pred, label)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        if cnt % 100 == 0:
            print('epoch', i, ':', cnt - 100, '~', cnt, loss.item())
# state = {'model': model.state_dict(), 'optimizer': optimizer.state_dict()}
# torch.save(state, LSTM_PATH)

当前 0
当前 1
当前 2
当前 3
当前 4
当前 5
当前 6
当前 7
当前 8
当前 9


In [23]:
MAX = m
MIN = n
    
pred = []
y = []

input_size, hidden_size, num_layers, output_size = 7, 64, 1, num
model = LSTM(input_size, hidden_size, num_layers, output_size, batch_size=batch_size).to(device)

model.eval()

for (seq, target) in Dte:
    target = list(chain.from_iterable(target.data.tolist()))
    y.extend(target)
    seq = seq.to(device)
    # print(seq.size())
    with torch.no_grad():
        y_pred = model(seq)
        print(seq.size())
        y_pred = list(chain.from_iterable(y_pred.data.tolist()))
        # print(len(y_pred))
        pred.extend(y_pred)

y, pred = np.array([y]), np.array([pred])
y = (MAX - MIN) * y + MIN
pred = (MAX - MIN) * pred + MIN
# print('accuracy:', (y, pred))
# print(len(pred))

In [28]:
print(y.shape)
print(pred.shape)

(1, 0)
(1, 0)


In [25]:
days = 100
lim = y.shape[1] - days 
x = [i for i in range(1, days+1)]

x_smooth = np.linspace(np.min(x), np.max(x), 900)
y_smooth = make_interp_spline(x, y.T[-days:])(x_smooth)

plt.plot(x_smooth, y_smooth, c='green', marker='*', ms=1, alpha=0.75, label='true')

y_smooth = make_interp_spline(x, pred.T[-days:])(x_smooth)
plt.plot(x_smooth, y_smooth, c='red', marker='o', ms=1, alpha=0.75, label='pred')
plt.grid(axis='y')

plt.legend()
plt.show()

ValueError: Shapes of x (100,) and y (0, 1) are incompatible

In [26]:
input_seq = []
for j in range(len(data)-24*batch_size, len(data)):
    x = [load[j]]
    for c in range(2, 8):
        x.append(data[j][c])
    input_seq.append(x)
input_seq = torch.FloatTensor(input_seq)

print(input_seq.size())
input_seq = input_seq.reshape(batch_size, 24, 7)

# xxx = torch.rand(30,24,7)
# print(xxx.size())
output = model(input_seq)
output = (MAX - MIN) * output + MIN
output[-1]

IndexError: list index out of range