In [1]:
import pandas as pd
import ts_toolbox as tst
import numpy as np

# data = pd.read_csv('data/BTC_data_1d_2019-01-01.csv',index_col=0)
data = pd.read_csv('BTC_data_1d_2019-01-01.csv',index_col=0)
data['date'] = pd.to_datetime(data['Open time']).dt.strftime('%Y-%m-%d %H:%M:%S')
data = data[['date','Close']]
data = data.set_index('date')

lag = 7

# Generate the X and y
def dp_generate_X_y(data,col,lag):
    nrow=data.shape[0]
    tmp=data[col]
    
    # print('Raw data mean:',np.mean(tmp),'\nRaw data std:',np.std(tmp))
    tmp=(tmp-np.mean(tmp))/np.std(tmp)

    X=np.zeros((nrow-lag,lag))
    for i in range(nrow-lag):X[i,:lag]=tmp.iloc[i:i+lag]
    
    y=np.array(tmp[lag:]).reshape((-1,1))
    return (X,y)

X,y = dp_generate_X_y(data,'Close',lag = lag)
X = pd.DataFrame(X)

X_train_val, y_train_val, X_test, y_test = tst.dp_train_test_split(X,y,train_rate = 0.8)
X_train, y_train, X_val, y_val = tst.dp_train_test_split(X_train_val,y_train_val,train_rate = 0.8)
X_train = X_train.to_numpy()
X_val = X_val.to_numpy()
X_test = X_test.to_numpy()


from keras import models
from keras import layers
from tensorflow.keras import optimizers
from keras import metrics
from keras import losses
def training_performance(model,training_history,epochs):
    test_MAE=np.mean(np.abs(y_test-model.predict(X_test.reshape(-1,lag,1))))
    
    timestep=range(1,epochs+1)
    
    plt.figure(figsize=(10,8),facecolor='ghostwhite')
    plt.subplot(2,1,1)
    plt.plot(timestep,np.log(training_history.history['val_mae']),'b',label='Validation MAE')
    plt.plot(timestep,np.log(training_history.history['mae']),'bo',label='Training MAE')
    plt.hlines(np.log(drift_validate_benchmark),xmin=timestep[0],xmax=timestep[-1],colors='coral',label='Validation Drift Benchmark')
    plt.hlines(np.log(mean_validate_benchmark),xmin=timestep[0],xmax=timestep[-1],colors='lightblue',label='Validation Mean Benchmark')
    plt.hlines(np.log(test_MAE),xmin=timestep[0],xmax=timestep[-1],colors='purple',label='Testing MAE')
    plt.ylabel('logged MAE')
    plt.xlabel('Epoch')
    plt.legend(loc='upper right')
    
    plt.subplot(2,1,2)
    plt.hlines(test_MAE,xmin=timestep[0],xmax=timestep[-1],colors='purple',label='Testing MAE')
    plt.hlines(drift_test_benchmark,xmin=timestep[0],xmax=timestep[-1],colors='coral',label='Test Drift Benchmark')
    plt.hlines(mean_test_benchmark,xmin=timestep[0],xmax=timestep[-1],colors='lightblue',label='Test Mean Benchmark')
    plt.ylabel('MAE')
    plt.legend(loc='right')
    plt.show()  

def forecast(model,length,lastsample):
    y_preds=[]
    for i in range(length):
        y_pred=model.predict(lastsample.reshape(-1,lag,1))[0]
        y_preds.append(y_pred[0])
        lastsample=np.array(list(lastsample[1:])+list(y_pred))
    return np.array(y_preds)  
def get_Mae_benchmark(X,y):
    mean_benchmark=np.mean(np.abs(np.mean(X,0)-y))
    drift_benchmark=np.mean(np.abs(X[:,-1]-y))
    return(mean_benchmark,drift_benchmark)

mean_validate_benchmark,drift_validate_benchmark=get_Mae_benchmark(X_val,y_val)
mean_test_benchmark,drift_test_benchmark=get_Mae_benchmark(X_test,y_test)
print('Mean Validate MAE Benchmark:',mean_validate_benchmark,'; ','Drift Validate MAE Benchmark:',drift_validate_benchmark)
print('Mean Test MAE Benchmark:',mean_test_benchmark,'; ','Drift Test MAE Benchmark:',drift_test_benchmark)


model=models.Sequential()
model.add(layers.SimpleRNN(1,input_shape=(lag,1),activation='relu'))
model.add(layers.Dense(1))
optimizer=optimizers.RMSprop()
model.compile(optimizer=optimizer,loss='mae',metrics=['mae'])

model_fit=model.fit(X_train.reshape(-1,lag,1),y_train.flatten(),batch_size=16,epochs=200,
                  validation_data=(X_val.reshape(-1,lag,1),y_val.flatten()))

# training_performance(model,model_fit,200)
# forecast_value=forecast(model,10,X_test[-1])
# plot_prediction2(model,'USD/CNY',forecast_value)

from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_absolute_percentage_error


import plotly.express as px

tmp = data['Close']
prediction=model.predict(X_test.reshape(-1,lag,1))
prediction=prediction*np.std(tmp)+np.mean(tmp)
y_true=y_test*np.std(tmp.values)+np.mean(tmp.values)
forecast=forecast_value*np.std(tmp)+np.mean(tmp)
y_drift = pd.DataFrame(y_true).shift(1).iloc[:,0]


mse_test =mean_squared_error(y_true, prediction)
mae_test = mean_absolute_error(y_true, prediction)
mape_test = mean_absolute_percentage_error(y_true, prediction)

print('-----RNN-----')
print('MAE test set', round(mae_test, 2))
print('MSE test set', round(mse_test, 2))
print('MAPE test set', round(mape_test, 4))

mse_test =mean_squared_error(y_true[1:], y_drift[1:])
mae_test = mean_absolute_error(y_true[1:], y_drift[1:])
mape_test = mean_absolute_percentage_error(y_true[1:], y_drift[1:])

print('-----drift-----')
print('MAE test set', round(mae_test, 2))
print('MSE test set', round(mse_test, 2))
print('MAPE test set', round(mape_test, 4))

result = pd.DataFrame(data = {
                       'y_true':y_true.reshape(1,-1)[0],
                       'y_pred':prediction.reshape(1,-1)[0],
                       'y_drift':y_drift
                       }).reset_index(drop = False).melt(id_vars = ['index'])
fig = px.line(result, x="index", y="value", color='variable')
fig.show()

# Google colab