In [1]:
import  numpy as np
import  torch
import  torch.nn as nn
import  torch.optim as optim
from    matplotlib import pyplot as plt
import pandas as pd
from tqdm.notebook import tqdm as tqdm
import random
def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True

In [2]:
input_size = 2
hidden_size = 32
output_size = 1
lr=0.001

In [3]:
data = pd.read_csv('data1.csv',encoding='utf-8')
date = data['take_at'].str.split(' ').str[0]
data['year-month-day']=date
data['year-month'] = date.str.split('-').str[0]+'-'+date.str.split('-').str[1]
data_last = data[['status','payment','year-month-day','year-month']]
data_last=data_last[data_last['status']==6]
data_last.head()

Unnamed: 0,status,payment,year-month-day,year-month
0,6,3350.0,2020-01-01,2020-01
1,6,2090.0,2020-01-01,2020-01
2,6,3590.0,2020-01-01,2020-01
3,6,3290.0,2020-01-01,2020-01
4,6,3090.0,2020-01-01,2020-01


In [4]:
data_day = data_last.groupby('year-month-day')
# 每一天的GMV数据(status=6)
day_ = list(data_day)
gmv_day= []
for i in range(len(list(data_day))):
    gmv_1=day_[i][1]['payment'].sum()
    gmv_day.append(gmv_1)
print(f'天数：{len(gmv_day)}天')
    
# 由于一共547天,不是7的整数倍，所以取到前546天的数据作为周的数据来源
gmv_week=[]
m,n=0,0
for i in range(len(gmv_day)):
    m+=gmv_day[i]
    n+=1
    if n%7==0:
        gmv_week.append(m)
        n=0
        m=0
print(f'周数：{len(gmv_week)}周')

天数：547天
周数：78周


In [5]:
print(f'训练集的周gmv最大值是：{max(gmv_week[:52])}')
mid_ = []
x_data = []
for i in range(len(gmv_week)):
    if i>len(gmv_week)-3:
        break
    mid_.append(gmv_week[i])
    mid_.append(gmv_week[i+1])
    x_data.append(mid_)
    mid_ = []
    
# from sklearn import preprocessing
# RobustScaler = preprocessing.RobustScaler()
# x_data=RobustScaler.fit_transform(x_data)

x_data = np.log(x_data)
x_data = x_data/np.log(max(gmv_week[:52]))
y_data=[x[1] for x in x_data][1:]
x_data = x_data[:-1]
print(len(x_data))
print(len(y_data))
print(np.array(x_data))
print(np.array(y_data))

训练集的周gmv最大值是：235778339.27999997
75
75
[[0.91978803 0.90227695]
 [0.90227695 0.90384996]
 [0.90384996 0.87943916]
 [0.87943916 0.87702593]
 [0.87702593 0.90383576]
 [0.90383576 0.91771535]
 [0.91771535 0.90034988]
 [0.90034988 0.88628523]
 [0.88628523 0.93766217]
 [0.93766217 0.87500451]
 [0.87500451 0.87412754]
 [0.87412754 0.84573072]
 [0.84573072 0.87411884]
 [0.87411884 0.85682973]
 [0.85682973 0.89645163]
 [0.89645163 0.87781099]
 [0.87781099 0.91630852]
 [0.91630852 0.89366148]
 [0.89366148 0.88715631]
 [0.88715631 0.8929179 ]
 [0.8929179  0.96008791]
 [0.96008791 0.90463424]
 [0.90463424 0.95604696]
 [0.95604696 0.93997503]
 [0.93997503 0.87862646]
 [0.87862646 0.84170826]
 [0.84170826 0.86774768]
 [0.86774768 0.85952107]
 [0.85952107 0.86141722]
 [0.86141722 0.85262826]
 [0.85262826 0.87680306]
 [0.87680306 0.87366386]
 [0.87366386 0.89875791]
 [0.89875791 0.85245711]
 [0.85245711 0.84320197]
 [0.84320197 0.89419938]
 [0.89419938 0.86393511]
 [0.86393511 0.86294354]
 [0.86294354

In [6]:
class Net(nn.Module):
    def __init__(self, ):
        super(Net, self).__init__()

        self.rnn = nn.RNN(
            input_size=input_size,
            hidden_size=hidden_size,
            num_layers=1,
            batch_first=True,
        )
        for p in self.rnn.parameters():
            nn.init.normal_(p, mean=0.0, std=0.001)
        
        self.linear1 = nn.Linear(hidden_size, 128)
        self.linear2 = nn.Linear(128, output_size)
    def forward(self, x, hidden_prev):
        out, hidden_prev = self.rnn(x, hidden_prev)
       # [b, seq, h]
        out = out.view(-1, hidden_size)
        out = self.linear1(out)
        out = self.linear2(out)
        out = out.unsqueeze(dim=0)
        return out, hidden_prev


In [7]:
# 构造训练数据
x = torch.tensor(np.reshape(x_data, (1, len(x_data), 2)))
y = torch.tensor(np.reshape(y_data, (1, len(x_data), 1)))
x_train = x[:,:50,:]
x_test = x[:,50:,:]
y_train = y[:,:50,:]
y_test = y[:,50:,:]
print(f'训练数据{len(x_data[:50])}条')
print(f'测试数据{len(x_data[50:])}条')

训练数据50条
测试数据25条


In [8]:
set_seed(seed=42)
print(f'开始训练，训练集的数据长度是{x_train.shape[1]}')
# 定义模型
model = Net()
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr)
hidden_prev = torch.ones(1, 1, hidden_size)
for iter in tqdm(range(2000)):
    x_train = torch.as_tensor(x_train, dtype=torch.float32)
    output, hidden_prev = model(x_train, hidden_prev)
    hidden_prev = hidden_prev.detach()
    output = output.double()
    loss = criterion(output, y_train)
    model.zero_grad()
    loss.backward()
    optimizer.step()
    if iter % 100 == 0:
        x_test = torch.as_tensor(x_test, dtype=torch.float32)
        (pred1, hidden_prev) = model(x_test, hidden_prev)
        pred1 = pred1.double()
        loss1 = criterion(pred1, y_test)
        print(f"Iteration:{iter}---train_loss:{loss.item()}---test_loss {loss1.item()}")
pre_last=[]
tru_last=[]
for i ,j in zip(np.array(pred1.detach().numpy()[0]),np.array(y_test[0])):
    pre_last.append(np.exp(i[0]*np.log(max(gmv_week[:52]))))
    tru_last.append(np.exp(j[0]*np.log(max(gmv_week[:52]))))

开始训练，训练集的数据长度是50


HBox(children=(FloatProgress(value=0.0, max=2000.0), HTML(value='')))

Iteration:0---train_loss:0.6014301827734913---test_loss 0.5752097800165364
Iteration:100---train_loss:0.001590686511086706---test_loss 0.0010544979272007504
Iteration:200---train_loss:0.0015884770674699509---test_loss 0.0010525194552436473
Iteration:300---train_loss:0.001586855882063477---test_loss 0.0010521039606474869
Iteration:400---train_loss:0.0015849035412563248---test_loss 0.0010512911310400176
Iteration:500---train_loss:0.0015826536630146328---test_loss 0.0010503440735735144
Iteration:600---train_loss:0.0015801305801892956---test_loss 0.0010492715862111277
Iteration:700---train_loss:0.001577353039154866---test_loss 0.0010480785747838776
Iteration:800---train_loss:0.0015743372405875734---test_loss 0.0010467674273580668
Iteration:900---train_loss:0.001571099681480556---test_loss 0.0010453416779024173
Iteration:1000---train_loss:0.0015676552185448123---test_loss 0.001043804914295801
Iteration:1100---train_loss:0.001564023375675985---test_loss 0.0010421634537706823
Iteration:1200--

### 结果记录
1000:test_loss 7.720046910337566e-08 

2000:test_loss 3.789853078204497e-08

In [9]:
from pyecharts.charts import Line
from pyecharts import options as opts

def mape(y_true, y_pred):
    """
    参数:
    y_true -- 测试集目标真实值
    y_pred -- 测试集目标预测值
    返回:
    mape -- MAPE 评价指标
    """
    n = len(y_true)
    mape = sum(np.abs((y_true - y_pred)/y_true))/n*100
    return mape

print(f'------周级别上rmse：{np.sqrt(np.average((np.array(pre_last) - np.array(tru_last)) ** 2))}')
print(f'------周级别上mape：{mape(np.array(tru_last),np.array(pre_last))}%')
print(np.array(pre_last))
print(np.array(tru_last))



year_num1 = tru_last
year_num2 = pre_last
def line_charts() -> Line:
    line = (
        Line()
        .add_xaxis(range(len(year_num1)))
        .add_yaxis(
            "真实值",
            year_num1
        )
        .add_yaxis(
            "预测值",
            year_num2
        )
        .set_global_opts(title_opts=opts.TitleOpts(title="rnn周gmv预测图")
                         ,yaxis_opts=opts.AxisOpts(name="GMV")
                         ,xaxis_opts=opts.AxisOpts(name="时间"),)
        .set_series_opts(
            label_opts=opts.LabelOpts(is_show=False),) #
    )
    return line

line = line_charts()
line.render("rnn模型在测试集上预测结果.html")

------周级别上rmse：28509758.358424224
------周级别上mape：45.69616339372893%
[26662057.59217376 26588263.70059847 27994352.68542106 26841023.59176973
 31542944.64722073 32361439.69816523 30799393.24757595 29605893.66765099
 26430625.72222625 27645814.86065626 36006646.34076471 29841013.36635747
 22625375.73083143 24611807.09134269 28480725.0477804  32614919.93420494
 28965460.90027209 25657170.17901597 26086085.03533568 26609628.2004769
 26578519.36534303 31687022.11520366 37917960.33714967 35988694.24042961
 34027527.85298485]
[2.52426364e+07 2.72181972e+07 1.99695038e+07 6.51806993e+07
 3.56002621e+07 3.85186998e+07 2.90262781e+07 1.74253923e+07
 3.19215282e+07 1.08240457e+08 1.55837699e+07 1.00200353e+07
 2.26727225e+07 3.24685145e+07 5.82880648e+07 1.93945481e+07
 1.87965796e+07 2.14440879e+07 2.21580539e+07 2.15122107e+07
 6.39448340e+07 9.44706167e+07 5.29647342e+07 5.47208260e+07
 9.75810444e+07]


'E:\\py_\\out_work\\YJF_jp_work\\rnn模型在测试集上预测结果.html'