In [137]:
import os
import datetime

import IPython
import IPython.display
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as tf
from datetime import datetime as dt
from dateutil import relativedelta
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM
from sklearn.metrics import mean_squared_error

# fix random seed for reproducibility
tf.random.set_seed(7)

In [138]:
def calc_nmse(y, yhat) -> float:
    assert len(y) == len(yhat)
    mse_xy = np.sum(np.square(np.asarray(y) - np.asarray(yhat)))
    mse_x = np.sum(np.square(np.asarray(y)))
    nmse = mse_xy / mse_x
    return nmse

In [139]:
url = 'https://cexa.oceanprotocol.io/ohlc?exchange=binance&pair=ETH/USDT&period=1h'
df = pd.read_json(url)
df.columns = ['dt1', 'open', 'high', 'low', 'close', 'volume']

df['dt1'] = df.dt1.apply(lambda x: dt.utcfromtimestamp( x / 1000 ) )
df.set_index('dt1', inplace = True)

In [140]:
train_df, test_df = df[:-16],  df[-16:]

In [141]:
test_df

Unnamed: 0_level_0,open,high,low,close,volume
dt1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2023-04-05 08:00:00,1909.96,1915.02,1903.51,1911.0,19474.6335
2023-04-05 09:00:00,1911.0,1914.87,1908.03,1909.3,10767.8588
2023-04-05 10:00:00,1909.29,1913.87,1908.72,1913.87,8152.0747
2023-04-05 11:00:00,1913.87,1921.7,1910.5,1914.56,17216.2973
2023-04-05 12:00:00,1914.56,1942.98,1893.93,1921.74,54213.5896
2023-04-05 13:00:00,1921.73,1924.99,1906.16,1912.9,38446.3193
2023-04-05 14:00:00,1912.91,1917.51,1889.45,1897.45,49855.0942
2023-04-05 15:00:00,1897.46,1905.26,1882.31,1897.2,49990.1835
2023-04-05 16:00:00,1897.21,1900.79,1887.55,1893.28,25585.0444
2023-04-05 17:00:00,1893.28,1902.26,1891.1,1898.14,16400.8915


In [142]:
y_index = 3
columnsN = len(df.columns)

scaler = MinMaxScaler()
train_df.iloc[:,  [i for i in range(0, columnsN) if i != y_index]] = scaler.fit_transform(train_df.iloc[:,  [i for i in range(0,columnsN) if i!=y_index]])
test_df.iloc[:,  [i for i in range(0, columnsN) if i != y_index]] = scaler.transform(test_df.iloc[:,  [i for i in range(0, columnsN) if i!=y_index]])

y_scaler = MinMaxScaler(feature_range=(-1, 1))
train_df[['close']] = y_scaler.fit_transform(train_df[['close']])
test_df[['close']] = y_scaler.transform(test_df[['close']])

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value, self.name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_wi

In [143]:
def create_dataset(dataset, look_back=1):
    dataX, dataY = [], []
    for i in range( len(dataset) - look_back - 1 ):
        a = dataset[ i : ( i + look_back ), 0]
        dataX.append( a )
        dataY.append( dataset[i + look_back, y_index] )
    return np.array(dataX), np.array(dataY)

In [144]:
def create_dataset_prediction(dataset, look_back, n_forecast):
    dataX, dataY = [], []
    for i in range(look_back, len(dataset) -  look_back - 1 ):
        dataX.append( dataset[ i - look_back : i , 0] )
        dataY.append( dataset[ i: i+n_forecast, y_index] )
    return np.array(dataX), np.array(dataY)

In [145]:
x, y = create_dataset_prediction(train_df.values, 30, 14,)

In [146]:
look_back = 14
trainX, trainY = create_dataset(train_df.values, look_back)
testX,  testY  = create_dataset(test_df.values, look_back)

In [147]:
# reshape input to be [samples, time steps, features]
trainX = np.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1]))
testX = np.reshape(testX, (testX.shape[0], 1, testX.shape[1]))

In [148]:
# create and fit the LSTM network
model = Sequential()
model.add(LSTM(32, input_shape=(1, look_back)))
model.add(Dense(14))
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(trainX, trainY, epochs = 30, batch_size = 8, verbose = 2)

# make predictions
trainPredict = model.predict(trainX)
testPredict = model.predict(testX)

# invert predictions
trainPredictReal = y_scaler.inverse_transform(trainPredict)
trainYreal = y_scaler.inverse_transform([trainY])
testPredictReal = y_scaler.inverse_transform(testPredict)
testYreal = y_scaler.inverse_transform([testY])

# calculate root mean squared error
trainScore = np.sqrt(mean_squared_error(trainYreal[0], trainPredictReal[:,0]))
print('Train Score: %.2f RMSE' % (trainScore))
testScore = np.sqrt(mean_squared_error(testYreal[0], testPredictReal[:,0]))
print('Test Score: %.2f RMSE' % (testScore))
calc_nmse(testYreal[0], testPredictReal[:, 0])

Epoch 1/30
121/121 - 2s - loss: 0.1351 - 2s/epoch - 17ms/step
Epoch 2/30
121/121 - 0s - loss: 0.0647 - 267ms/epoch - 2ms/step
Epoch 3/30
121/121 - 0s - loss: 0.0205 - 245ms/epoch - 2ms/step
Epoch 4/30
121/121 - 0s - loss: 0.0133 - 245ms/epoch - 2ms/step
Epoch 5/30
121/121 - 0s - loss: 0.0110 - 253ms/epoch - 2ms/step
Epoch 6/30
121/121 - 0s - loss: 0.0098 - 263ms/epoch - 2ms/step
Epoch 7/30
121/121 - 0s - loss: 0.0088 - 265ms/epoch - 2ms/step
Epoch 8/30
121/121 - 0s - loss: 0.0076 - 271ms/epoch - 2ms/step
Epoch 9/30
121/121 - 0s - loss: 0.0068 - 279ms/epoch - 2ms/step
Epoch 10/30
121/121 - 0s - loss: 0.0062 - 259ms/epoch - 2ms/step
Epoch 11/30
121/121 - 0s - loss: 0.0055 - 264ms/epoch - 2ms/step
Epoch 12/30
121/121 - 0s - loss: 0.0055 - 260ms/epoch - 2ms/step
Epoch 13/30
121/121 - 0s - loss: 0.0053 - 281ms/epoch - 2ms/step
Epoch 14/30
121/121 - 0s - loss: 0.0049 - 293ms/epoch - 2ms/step
Epoch 15/30
121/121 - 0s - loss: 0.0049 - 271ms/epoch - 2ms/step
Epoch 16/30
121/121 - 0s - loss: 0.0

2.5661242287872837e-05

In [149]:
testPredictReal.shape

(1, 14)

In [150]:
testPredictReal[:, 0]

array([1900.2748], dtype=float32)

In [151]:
testPredict

array([[0.9494759 , 0.9498935 , 0.9572679 , 0.9738289 , 0.9767861 ,
        0.9687265 , 0.9817941 , 0.9567774 , 0.9809471 , 0.9644564 ,
        0.97352743, 0.98437434, 0.9819929 , 0.9717314 ]], dtype=float32)

In [152]:
x_out = testPredictReal[-1][-12:]

In [153]:
x_out = str(x_out.round(3)).replace(' ','')

In [154]:
x_out

'[1902.3561906.781907.571905.4171908.9071902.2251908.6811904.276\n1906.6991909.5961908.961906.219]'

In [155]:
filename = 'eth_predict.csv'
fout = open(filename, 'w')
fout.write(x_out)
fout.close()

https://stackoverflow.com/questions/69906416/forecast-future-values-with-lstm-in-python