In [13]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM
import numpy as np
import pandas as pd
import re

In [16]:
df = pd.read_csv("Interval_Data.csv")
df_extracted = df
df_extracted.head()

Unnamed: 0,Date,Interval_Price,midpoint
0,2022-07-01,"[135.660004, 139.039993]",137.349998
1,2022-07-05,"[136.929993, 141.610001]",139.269997
2,2022-07-06,"[141.080002, 144.119995]",142.599998
3,2022-07-07,"[143.279999, 146.550003]",144.915001
4,2022-07-08,"[145.0, 147.550003]",146.275002


In [17]:
def str_to_interval(interval_str):
    if not isinstance(interval_str, str):
        return interval_str
    numbers = re.findall(r"[-+]?\d*\.\d+|\d+", interval_str)
    if len(numbers) == 2:
        return pd.Interval(float(numbers[0]), float(numbers[1]))
    return None

df_extracted['Interval_Price'] = df_extracted['Interval_Price'].apply(str_to_interval)
# Remove '[]' and split the interval range into two columns 'Low' and 'High'
# df_extracted[['Low', 'High']] = df_extracted['Interval_Price'].str.strip('[]').str.split('-', expand=True)

df_extracted['Low'] = df_extracted['Interval_Price'].apply(lambda x: x.left)
df_extracted['High'] = df_extracted['Interval_Price'].apply(lambda x: x.right)
df_extracted['Mid'] = df_extracted['Interval_Price'].apply(lambda x: (x.left + x.right) / 2)

# Convert 'Low' and 'High' to numeric, and calculate the 'Mid' from 'Low' and 'High'.
# df_extracted['Low'] = pd.to_numeric(df_extracted['Low'])
# df_extracted['High'] = pd.to_numeric(df_extracted['High'])
# df_extracted['Mid'] = pd.to_numeric(df_extracted['midpoint'])

df_extracted = df_extracted.drop(columns=['Interval_Price','midpoint'])
df_extracted.head()

Unnamed: 0,Date,Low,High,Mid
0,2022-07-01,135.660004,139.039993,137.349998
1,2022-07-05,136.929993,141.610001,139.269997
2,2022-07-06,141.080002,144.119995,142.599998
3,2022-07-07,143.279999,146.550003,144.915001
4,2022-07-08,145.0,147.550003,146.275002


In [19]:
from sklearn.preprocessing import MinMaxScaler
scaler=MinMaxScaler(feature_range=(0,1))
data = df_extracted
scaled_Low=scaler.fit_transform(np.array(data['Low']).reshape(-1,1))

In [20]:
##splitting dataset into train and test split
def train_test_split_portion(data, train_portion):
    training_size =int(len(data) * train_portion)
    test_size =len(data)-training_size
    train_data,test_data = data[0:training_size,:], data[training_size:len(data),:1]  #删除1
    return train_data, test_data

In [21]:
# convert an array of values into a dataset matrix
def create_dataset(dataset, time_step=1):
    dataX, dataY = [], []
    for i in range(len(dataset)-time_step-1):
        a = dataset[i:(i+time_step), 0]   ###i=0, 0,1,2,3-----99   100
        dataX.append(a)
        dataY.append(dataset[i + time_step, 0])
    return np.array(dataX), np.array(dataY)

In [22]:
from matplotlib import pyplot as plt


### Plotting
def Ploting_Stacked_LSTMprediction (original_data, scaled_data, train_predict, test_predict, look_back):
    # shift train predictions for plotting
    trainPredictPlot = np.empty_like(scaled_data)
    trainPredictPlot[:, :] = np.nan
    trainPredictPlot[look_back:len(train_predict)+look_back, :] = train_predict

    # shift test predictions for plotting
    testPredictPlot = np.empty_like(scaled_data)
    testPredictPlot[:, :] = np.nan
    testPredictPlot[len(train_predict)+(look_back*2)+1:len(scaled_data)-1, :] = test_predict

    trainPredictPlot = pd.DataFrame(trainPredictPlot, index = original_data.index)
    testPredictPlot = pd.DataFrame(testPredictPlot, index = original_data.index)
    real = pd.DataFrame(scaler.inverse_transform(scaled_data), index = original_data.index)
    # plot baseline and predictions

    plt.figure(figsize=(18,10))
    plt.plot(real)
    plt.plot(trainPredictPlot, label="Train Predict")
    plt.plot(testPredictPlot, label="Test Predict")
    plt.legend(loc = 'best')
    plt.show()

In [23]:
def plot_Stacked_LSTM_prediction_30days (original_data, scaled_data, test_data, look_back):
    x_input=test_data[len(test_data)-(look_back+30):len(test_data)-30].reshape(1,-1)  # leave 130 day left
    temp_input=list(x_input)
    temp_input=temp_input[0].tolist()

    lst_output=[]
    n_steps=look_back
    i=0
    while(i<30):

        if(len(temp_input)>look_back):
            x_input=np.array(temp_input[1:])
            #print("{} day input {}".format(i,x_input))
            x_input=x_input.reshape(1,-1)
            x_input = x_input.reshape((1, n_steps, 1))

            yhat = model.predict(x_input, verbose=0)
            #print("{} day output {}".format(i,yhat))
            temp_input.extend(yhat[0].tolist())
            temp_input=temp_input[1:]

            lst_output.extend(yhat.tolist())
            i=i+1
        else:
            x_input = x_input.reshape((1, n_steps,1))
            yhat = model.predict(x_input, verbose=0)
            #print(yhat[0])
            temp_input.extend(yhat[0].tolist())
            #print(len(temp_input))
            lst_output.extend(yhat.tolist())
            i=i+1

    df1 = pd.DataFrame(scaler.inverse_transform(scaled_data), index = original_data.index)
    df2 = df1[len(df1)-30:]
    df3 = pd.DataFrame(scaler.inverse_transform(lst_output), index = original_data[len(original_data)-30:].index)
    plt.figure(figsize=(20,10))
    plt.plot(df1[:len(df1)-30])
    plt.plot(df2)
    plt.plot(df3, label = 'Prediction')
    plt.legend(loc = 'best')
    #testscore = math.sqrt(mean_squared_error(df2,df3))

    #return testscore