In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

df = pd.read_excel('dataframe2.xlsx', sheet_name='Sheet1')
df

In [None]:
df.drop(['Unnamed: 0','percentage of hard mode','%','easywords','hardwords','easywordsband','hardwordsband'], axis=1, inplace=True)
df = df.sort_values('Contest number', ascending=True)
df.drop('Contest number', axis=1, inplace=True)
df.reset_index(drop=True, inplace=True)
data = df.iloc[:, 1:10].copy().reset_index()
data

In [None]:
# 先预测Number of reported results
data = data.iloc[:, 1:2].copy()
data

In [None]:
plt.figure(figsize=(5, 3))
 
data['Number of  reported results'].plot()
plt.show()

In [None]:
def difference(data_set,interval=1):
    diff=list()
    for i in range(interval,len(data_set)):
        value=data_set[i]-data_set[i-interval]
        diff.append(value)
    return pd.Series(diff)
 
# 这里的series是之前数据预处理后得到的DateFrame型数据
raw_value=data.values
data_diff=difference(raw_value,1)

In [None]:
def timeseries_to_supervised(data,lag=1):
    df=pd.DataFrame(data)
    columns=[df.shift(1)]
    columns.append(df)
    df=pd.concat(columns,axis=1)
    df.fillna(0,inplace=True)
    return df
 
seq=timeseries_to_supervised(data_diff, 1)
data_seq=seq.values
data_seq

In [None]:
test_ratio=0.2
train_size=int(len(data_seq)*(1-test_ratio))
test_size = len(data_seq) - train_size
data_train,data_test=data_seq[0:train_size],data_seq[train_size:len(data_seq)]


In [None]:
from sklearn.preprocessing import MinMaxScaler
def scale(train,test):
    # 创建一个缩放器，将数据集中的数据缩放到[-1,1]的取值范围中
    scaler=MinMaxScaler(feature_range=(-1,1))
    # 使用缩放器来将训练集和测试集进行缩放
    train_scaled=scaler.fit_transform(train)
    test_scaled=scaler.fit_transform(test)
    return scaler,train_scaled,test_scaled
 
scaler,train_scaled,test_scaled=scale(data_train,data_test)

In [None]:
X,y=data_train[:,0:-1],data_train[:,-1]
X=X.reshape(X.shape[0],1,X.shape[1])

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,LSTM

def fit_lstm(data_train,batch_size,nb_epoch,neurons):
    # 将数据对中的x和y分开
    X,y=data_train[:,0:-1],data_train[:,-1]
    # 将2D数据拼接成3D数据，形状为[N*1*1]
    X=X.reshape(X.shape[0],1,X.shape[1])
 
    model=Sequential()
    model.add(LSTM(neurons,batch_input_shape=(batch_size,X.shape[1],X.shape[2]),stateful=True))
    model.add(Dense(1))
 
    model.compile(loss='mean_squared_error',optimizer='adam')
    for i in range(nb_epoch):
        # shuffle是不混淆数据顺序
        his=model.fit(X,y,batch_size=batch_size,verbose=1,shuffle=False)
        # 每训练完一次就重置一次网络状态，网络状态与网络权重不同
        model.reset_states()
    return model
 
# 构建一个LSTM模型并训练，样本数为1，训练次数为3，LSTM层神经元个数为4
lstm_model=fit_lstm(train_scaled,batch_size=1,nb_epoch=10,neurons=4)

In [None]:
def forecast_lstm(model,batch_size,X):
    # 将形状为[1:]的，包含一个元素的一维数组X，转换形状为[1,1,1]的3D张量
    X=X.reshape(1,1,len(X))
    # 输出形状为1行一列的二维数组yhat
    yhat=model.predict(X,batch_size=batch_size)
    # 将yhat中的结果返回
    return yhat[0,0]
 
# # 取出测试集中的一条数据，并将其拆分为X和y
# X,y=data_test[i,0:-1],data_test[i,-1]
# # 将训练好的模型、测试数据传入预测函数中
# yhat=forecast_lstm(lstm_model,1,X)

In [None]:
# # 对预测的数据进行逆差分转换
# def invert_difference(history,yhat,interval=1):
#     return yhat+history[-interval]
 
# # 将预测值进行逆缩放，使用之前训练好的缩放器，x为一维数组，y为实数
# def invert_scale(scaler,X,y):
#     # 将X,y转换为一个list列表
#     new_row=[x for x in X]+[y]
#     # 将列表转换为数组
#     array=np.array(new_row)
#     # 将数组重构成一个形状为[1,2]的二维数组->[[10,12]]
#     array=array.reshape(1,len(array))
#     # 逆缩放输入的形状为[1,2]，输出形状也是如此
#     invert=scaler.inverse_transform(array)
#     # 只需要返回y值即可
#     return invert[0,-1]
 
# # 将预测值进行逆缩放
# yhat=invert_scale(scaler,X,yhat)
# # 对预测的y值进行逆差分
# yhat=invert_difference(raw_value,yhat,len(test_scaled)+1-i)

In [None]:
predictions=list()
for i in range(len(test_scaled)):
    # 将测试集拆分为X和y
    X,y=data_test[i,0:-1],data_test[i,-1]
    # 将训练好的模型、测试数据传入预测函数中
    yhat=forecast_lstm(lstm_model,1,X)
    # 将预测值进行逆缩放
    yhat=invert_scale(scaler,X,yhat)
    # 对预测的y值进行逆差分
    yhat=invert_difference(raw_value,yhat,len(test_scaled)+1-i)
    # 存储正在预测的y值
    predictions.append(yhat)

In [None]:
plt.plot(raw_value[-test_size:])
plt.plot(predictions)
plt.legend(['true','pred'])
plt.show()