In [1]:
import sys
import os

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import pyarrow.parquet as pq

import datetime

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler

import warnings
warnings.filterwarnings('ignore')

from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout, GRU
from keras.models import load_model

plt.rcParams['axes.grid'] = True

In [2]:
root_dir = 'C:/Users/delst/OneDrive/Desktop/Code/Workspace/Crypto_V2'
sys.path.append(root_dir)

from A_Main.Configurations.setup_env import setup_environment
config = setup_environment(root_dir)

In [3]:
from Modularisation.feature_eng import FeatureEngineer

---

In [4]:
crypto_dir = os.listdir(os.path.join(config.raw_data, 'raw_data'))
crypto_dir

['ADAUSDT', 'BNBUSDT', 'BTCUSDT', 'DOGEUSDT', 'ETHUSDT', 'LTCUSDT', 'XRPUSDT']

In [5]:
select_crypto = crypto_dir[2]
select_crypto

'BTCUSDT'

In [6]:
files = os.path.join(config.sdo_parq, f'{select_crypto}_raw.parquet')
df = pq.read_table(files).to_pandas()
df.drop(columns=['SYMBOL'], inplace=True)
df.head()

Unnamed: 0_level_0,OPEN,HIGH,LOW,CLOSE,VOLUME,QUOTE_ASSET_VOLUME,NUMBER_OF_TRADES,TAKER_BUY_BASE_ASSET_VOLUME,TAKER_BUY_QUOTE_ASSET_VOLUME,TARGET
OPEN_TIME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2017-09-16 10:00:00,3753.29,3753.29,3470.66,3480.0,175.251826,627740.727773,778,58.976979,209869.879391,0.393885
2017-09-16 11:00:00,3490.0,3678.22,3490.0,3630.06,113.728279,409919.978386,475,68.053148,246000.247708,0.390215
2017-09-16 12:00:00,3630.06,3667.99,3565.47,3630.0,51.172541,185265.726418,347,23.788116,86067.342374,0.360434
2017-09-16 13:00:00,3610.0,3729.99,3531.01,3674.98,60.480745,219448.905188,413,36.841835,134235.540313,0.39865
2017-09-16 14:00:00,3674.98,3800.0,3630.0,3630.0,58.984953,218953.333668,664,14.245711,53040.500428,0.451718


In [7]:
df, df_test = train_test_split(df, test_size=0.2, shuffle=False)

---

In [8]:
feature_cols_for_eng = ['OPEN','HIGH','LOW','CLOSE','VOLUME']
ror_col = 'CLOSE'

In [9]:
df

Unnamed: 0_level_0,OPEN,HIGH,LOW,CLOSE,VOLUME,QUOTE_ASSET_VOLUME,NUMBER_OF_TRADES,TAKER_BUY_BASE_ASSET_VOLUME,TAKER_BUY_QUOTE_ASSET_VOLUME,TARGET
OPEN_TIME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2017-09-16 10:00:00,3753.29,3753.29,3470.66,3480.00,175.251826,6.277407e+05,778,58.976979,2.098699e+05,0.393885
2017-09-16 11:00:00,3490.00,3678.22,3490.00,3630.06,113.728279,4.099200e+05,475,68.053148,2.460002e+05,0.390215
2017-09-16 12:00:00,3630.06,3667.99,3565.47,3630.00,51.172541,1.852657e+05,347,23.788116,8.606734e+04,0.360434
2017-09-16 13:00:00,3610.00,3729.99,3531.01,3674.98,60.480745,2.194489e+05,413,36.841835,1.342355e+05,0.398650
2017-09-16 14:00:00,3674.98,3800.00,3630.00,3630.00,58.984953,2.189533e+05,664,14.245711,5.304050e+04,0.451718
...,...,...,...,...,...,...,...,...,...,...
2022-01-27 11:00:00,36318.22,36781.79,36290.01,36679.98,1707.196590,6.245030e+07,51605,867.932470,3.175429e+07,0.407396
2022-01-27 12:00:00,36679.98,36784.27,36423.46,36668.23,1846.062900,6.759869e+07,56132,958.906500,3.510487e+07,0.391630
2022-01-27 13:00:00,36668.23,36849.37,36543.16,36830.89,1852.852890,6.803013e+07,50496,997.422680,3.662981e+07,0.385286
2022-01-27 14:00:00,36830.88,37199.00,36679.99,36882.03,2826.177830,1.043480e+08,70171,1519.428810,5.612421e+07,0.408789


In [10]:
def apply_feature_engineering(
    df:pd.DataFrame,
    feature_cols_for_eng:list,
    ror_col:str
    ):
    
    feature_engineer = FeatureEngineer(
        df,
        feature_cols_for_eng,
        config.mmean_periods,
        ror_col
    )

    df = feature_engineer.get_rate_of_return()
    df = feature_engineer.get_market_means_ratios()
    df = feature_engineer.get_sma_ema()
    df = feature_engineer.get_percentage_change()
    
    return df

apply_feature_engineering(df, feature_cols_for_eng, ror_col)
apply_feature_engineering(df_test, feature_cols_for_eng, ror_col)

Unnamed: 0_level_0,OPEN,HIGH,LOW,CLOSE,VOLUME,QUOTE_ASSET_VOLUME,NUMBER_OF_TRADES,TAKER_BUY_BASE_ASSET_VOLUME,TAKER_BUY_QUOTE_ASSET_VOLUME,TARGET,...,LOW_ema,CLOSE_sma,CLOSE_ema,VOLUME_sma,VOLUME_ema,OPEN_pct_change,HIGH_pct_change,LOW_pct_change,CLOSE_pct_change,VOLUME_pct_change
OPEN_TIME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2022-01-27 16:00:00,36639.43,36787.77,36327.13,36528.86,1843.62877,6.735977e+07,49362,895.84934,3.273223e+07,0.445180,...,36327.130000,40257.191986,36528.860000,1762.927083,1843.628770,-0.003018,-0.005104,-0.004562,-0.006757,-0.189705
2022-01-27 17:00:00,36528.85,36600.00,36161.41,36282.03,1493.88279,5.423506e+07,44022,753.80997,2.736865e+07,0.454244,...,36326.670305,40257.191986,36528.175312,1762.927083,1842.658601,-0.003018,-0.005104,-0.004562,-0.006757,-0.189705
2022-01-27 18:00:00,36282.02,36345.76,36022.07,36215.49,1606.52202,5.803644e+07,43875,785.74886,2.838682e+07,0.457998,...,36325.825367,40257.191986,36527.307946,1762.927083,1842.003576,-0.006757,-0.006946,-0.003853,-0.001834,0.075400
2022-01-27 19:00:00,36215.49,36327.72,35883.82,36209.20,1971.02943,7.124669e+07,50461,1006.06489,3.637845e+07,0.519622,...,36324.599277,40257.191986,36526.425539,1762.927083,1842.361484,-0.001834,-0.000496,-0.003838,-0.000174,0.226892
2022-01-27 20:00:00,36209.19,36435.70,35557.96,35583.50,2461.62784,8.859789e+07,58612,1073.48806,3.865635e+07,0.471609,...,36322.472677,40257.191986,36523.809934,1762.927083,1844.079282,-0.000174,0.002972,-0.009081,-0.017280,0.248905
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-02-28 19:00:00,23481.63,23492.84,23205.06,23264.36,18237.24695,4.254090e+08,530354,8778.67607,2.047670e+08,0.448246,...,22980.663503,23288.049153,23063.977651,12774.414002,12584.032107,-0.001882,-0.002427,-0.009891,-0.009253,0.714836
2023-02-28 20:00:00,23263.38,23343.55,23166.35,23272.05,15233.07563,3.543993e+08,423062,7509.37862,1.747199e+08,0.470401,...,22981.178583,23287.456653,23064.554828,12773.837411,12591.380356,-0.009294,-0.006355,-0.001668,0.000331,-0.164727
2023-02-28 21:00:00,23270.59,23277.98,23020.97,23139.89,19752.93941,4.570602e+08,544036,9619.11094,2.225759e+08,0.468764,...,22981.288962,23286.547889,23064.763802,12788.353905,12611.245984,0.000310,-0.002809,-0.006275,-0.005679,0.296714
2023-02-28 22:00:00,23138.89,23240.82,23094.69,23161.38,9845.54159,2.282454e+08,375501,5060.07730,1.173096e+08,0.474988,...,22981.603528,23285.708236,23065.031808,12794.050210,12603.574127,-0.005660,-0.001596,0.003202,0.000929,-0.501566


---