In [165]:
import  numpy as np
import  torch
import  torch.nn as nn
import  torch.optim as optim
from    matplotlib import pyplot as plt
import pandas as pd
from tqdm.notebook import tqdm as tqdm
import random
def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True

In [166]:
input_size = 4
hidden_size = 64
output_size = 1
lr=0.001

In [167]:
data = pd.read_csv('data1.csv',encoding='utf-8')
date = data['take_at'].str.split(' ').str[0]
data['year-month-day']=date
data['year-month'] = date.str.split('-').str[0]+'-'+date.str.split('-').str[1]
data_last = data[['status','payment','year-month-day','year-month']]
data_last=data_last[data_last['status']==6]
data_last.head()

Unnamed: 0,status,payment,year-month-day,year-month
0,6,3350.0,2020-01-01,2020-01
1,6,2090.0,2020-01-01,2020-01
2,6,3590.0,2020-01-01,2020-01
3,6,3290.0,2020-01-01,2020-01
4,6,3090.0,2020-01-01,2020-01


In [168]:
data_day = data_last.groupby('year-month-day')
# 每一天的GMV数据(status=6)
day_ = list(data_day)
gmv_day= []
for i in range(len(list(data_day))):
    gmv_1=day_[i][1]['payment'].sum()
    gmv_day.append(gmv_1)
print(f'天数：{len(gmv_day)}天')
    
# 由于一共547天,不是7的整数倍，所以取到前546天的数据作为周的数据来源
gmv_week=[]
m,n=0,0
for i in range(len(gmv_day)):
    m+=gmv_day[i]
    n+=1
    if n%7==0:
        gmv_week.append(m)
        n=0
        m=0
print(f'周数：{len(gmv_week)}周')


天数：547天
周数：78周


In [227]:
print(f'训练集的周gmv最大值是：{max(gmv_week[:52])}')
mid_ = []
x_data = []
for i in range(len(gmv_week)):
    if i>len(gmv_week)-4:
        break
    mid_.append(gmv_week[i])
    mid_.append(gmv_week[i+1])
    mid_.append(gmv_week[i+2])
    mid_.append(gmv_week[i+3])
    x_data.append(mid_)
    mid_ = []
from sklearn import preprocessing
MinMaxScaler = preprocessing.MinMaxScaler()
x_data=MinMaxScaler.fit_transform(x_data)
# x_data = np.log(x_data)
y_data=[x[0] for x in x_data][1:]
x_data = x_data[:-1]
print(len(x_data))
print(len(y_data))
print(np.array(x_data))
print(np.array(y_data))

训练集的周gmv最大值是：235778339.27999997
74
74
[[0.18625629 0.1231477  0.12798655 0.072953  ]
 [0.1231477  0.12798655 0.06717697 0.06838171]
 [0.12798655 0.06717697 0.0625772  0.13334199]
 [0.06717697 0.0625772  0.12794221 0.18272052]
 [0.0625772  0.12794221 0.17762841 0.1228812 ]
 [0.12794221 0.17762841 0.11741624 0.08714091]
 [0.17762841 0.11741624 0.08145327 0.28135274]
 [0.11741624 0.08145327 0.27687516 0.06471294]
 [0.08145327 0.27687516 0.05888557 0.06316523]
 [0.27687516 0.05888557 0.05732822 0.02489737]
 [0.05888557 0.05732822 0.01882192 0.06315001]
 [0.05732822 0.01882192 0.0573129  0.03742457]
 [0.01882192 0.0573129  0.03142717 0.11198612]
 [0.0573129  0.03142717 0.10645328 0.06984555]
 [0.03142717 0.10645328 0.06405015 0.17709275]
 [0.10645328 0.06405015 0.17196557 0.10467575]
 [0.06405015 0.17196557 0.09909736 0.08908417]
 [0.17196557 0.09909736 0.08340865 0.10279297]
 [0.09909736 0.08340865 0.09720286 0.44845354]
 [0.08340865 0.09720286 0.44501709 0.13583873]
 [0.09720286 0.4450170

In [170]:
class Net(nn.Module):
    def __init__(self, ):
        super(Net, self).__init__()

        self.rnn = nn.RNN(
            input_size=input_size,
            hidden_size=hidden_size,
            num_layers=1,
            batch_first=True,
        )
        for p in self.rnn.parameters():
            nn.init.normal_(p, mean=0.0, std=0.001)
        
        self.linear1 = nn.Linear(hidden_size, 128)
        self.linear2 = nn.Linear(128, output_size)
    def forward(self, x, hidden_prev):
        out, hidden_prev = self.rnn(x, hidden_prev)
       # [b, seq, h]
        out = out.view(-1, hidden_size)
        out = self.linear1(out)
        out = self.linear2(out)
        out = out.unsqueeze(dim=0)
        return out, hidden_prev


In [183]:
# 构造训练数据
x = torch.tensor(np.reshape(x_data, (1, len(x_data), 4)))
y = torch.tensor(np.reshape(y_data, (1, len(x_data), 1)))
x_train = x[:,:50,:]
x_test = x[:,50:,:]
y_train = y[:,:50,:]
y_test = y[:,50:,:]
print(f'训练数据{len(x_data[:50])}条')
print(f'测试数据{len(x_data[50:])}条')

训练数据50条
测试数据24条


In [231]:
set_seed(seed=42)
print(f'开始训练，训练集的数据长度是{x_train.shape[1]}')
# 定义模型
model = Net()
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr)
hidden_prev = torch.ones(1, 1, hidden_size)
for iter in tqdm(range(2000)):
    x_train = torch.as_tensor(x_train, dtype=torch.float32)
    output, hidden_prev = model(x_train, hidden_prev)
    hidden_prev = hidden_prev.detach()
    output = output.double()
    loss = criterion(output, y_train)
    model.zero_grad()
    loss.backward()
    optimizer.step()
    if iter % 100 == 0:
        print("Iteration: {} loss {}".format(iter, loss.item()),end='\n')

开始训练，训练集的数据长度是50


HBox(children=(FloatProgress(value=0.0, max=2000.0), HTML(value='')))

Iteration: 0 loss 0.12318037154797608
Iteration: 100 loss 0.00012717889143988424
Iteration: 200 loss 2.7766480928741285e-06
Iteration: 300 loss 2.3219488153482537e-06
Iteration: 400 loss 1.9208948192679085e-06
Iteration: 500 loss 1.5471000325532172e-06
Iteration: 600 loss 1.2024270051342456e-06
Iteration: 700 loss 9.016620840746619e-07
Iteration: 800 loss 6.525344881693209e-07
Iteration: 900 loss 4.556675277491393e-07
Iteration: 1000 loss 3.0754759232904146e-07
Iteration: 1100 loss 2.0222481443943253e-07
Iteration: 1200 loss 1.3195012087721612e-07
Iteration: 1300 loss 8.805691200404632e-08
Iteration: 1400 loss 6.23117796882928e-08
Iteration: 1500 loss 4.804273595040857e-08
Iteration: 1600 loss 4.0524505168538075e-08
Iteration: 1700 loss 3.67330412798665e-08
Iteration: 1800 loss 3.488290030573934e-08
Iteration: 1900 loss 3.3987282607857e-08



In [234]:
# 测试集上loss
print(f'开始测试，测试集的数据长度是{x_test.shape[1]}')
pre_last=[]
tru_last=[]
with torch.no_grad():
    for _ in range(1):
        x_test = torch.as_tensor(x_test, dtype=torch.float32)
        (pred1, hidden_prev) = model(x_test, hidden_prev)
        pred1 = pred1.double()
        loss1 = criterion(pred1, y_test)
print(f"test_loss {loss1.item()}")

for i ,j in zip(np.array(pred1[0]),np.array(y_test[0])):
    pre_last.append(i[0]*235778339)
    tru_last.append(j[0]*235778339)

开始测试，测试集的数据长度是24
test_loss 3.789853078204497e-08


### 结果记录
1000:test_loss 7.720046910337566e-08 

2000:test_loss 3.789853078204497e-08

In [237]:
from pyecharts.charts import Line
from pyecharts import options as opts

def mape(y_true, y_pred):
    """
    参数:
    y_true -- 测试集目标真实值
    y_pred -- 测试集目标预测值
    返回:
    mape -- MAPE 评价指标
    """
    n = len(y_true)
    mape = sum(np.abs((y_true - y_pred)/y_true))/n*100
    return mape

print(f'------周级别上rmse：{np.sqrt(np.average((np.array(pre_last) - np.array(tru_last)) ** 2))}')
print(f'------周级别上mape：{mape(np.array(tru_last),np.array(pre_last))}%')
print(f'------周级别上mape：{mape(np.array(y_test),np.array(pred1))}%')

print(np.array(pre_last))
print(np.array(tru_last))



year_num1 = tru_last
year_num2 = pre_last
def line_charts() -> Line:
    line = (
        Line()
        .add_xaxis(range(len(year_num1)))
        .add_yaxis(
            "真实值",
            year_num1
        )
        .add_yaxis(
            "预测值",
            year_num2
        )
        .set_global_opts(title_opts=opts.TitleOpts(title="rnn周gmv预测图")
                         ,yaxis_opts=opts.AxisOpts(name="GMV")
                         ,xaxis_opts=opts.AxisOpts(name="时间"),)
        .set_series_opts(
            label_opts=opts.LabelOpts(is_show=False),) #
    )
    return line

line = line_charts()
line.render("rnn模型在测试集上预测结果.html")

------周级别上rmse：45900.25421440089
------周级别上mape：0.2854649770345436%
------周级别上mape：[[0.77238435]
 [0.05883066]
 [0.09629189]
 [0.24180017]
 [0.10355684]
 [0.06417394]
 [0.0666201 ]
 [0.05223649]
 [0.04440473]
 [0.10166025]
 [0.09310065]
 [0.01302756]
 [3.87271613]
 [0.24978808]
 [0.01579057]
 [0.08563712]
 [0.16484641]
 [0.3683238 ]
 [0.01097015]
 [0.04714082]
 [0.03292135]
 [0.09708616]
 [0.10936409]
 [0.08848715]]%
[9.90424477e+06 1.80927330e+07 2.01442255e+07 1.25990794e+07
 5.94399841e+07 2.88104716e+07 3.17884998e+07 2.20059116e+07
 1.00034185e+07 2.49627401e+07 1.03999493e+08 8.09366590e+06
 2.25106777e+06 1.54633051e+07 2.55496979e+07 5.22961800e+07
 1.20152678e+07 1.13747504e+07 1.41559068e+07 1.48996233e+07
 1.42294768e+07 5.81570029e+07 8.97626528e+07 4.67884158e+07]
[9.98133907e+06 1.80820952e+07 2.01248469e+07 1.26296178e+07
 5.93784936e+07 2.87919947e+07 3.18096914e+07 2.19944225e+07
 9.99897843e+06 2.49881431e+07 1.03902759e+08 8.09472044e+06
 2.34175738e+06 1.54247758e+0

'E:\\py_\\out_work\\YJF_jp_work\\rnn模型在测试集上预测结果.html'