### Temporary yaml loader

In [1]:
import yaml
def load_yaml(path):
    with open(path, mode='r') as file:
        return yaml.load(file, Loader=yaml.FullLoader)
    
config = load_yaml('extra/dataextractor.yaml')

In [2]:
config

{'data': {'useYfinance': False,
  'yfinance': {'stock': 'NVDA',
   'start': datetime.date(2019, 1, 8),
   'end': datetime.date(2020, 1, 8),
   'interval': '1d'},
  'binance': {'coin': 'BTCUSDT', 'interval': '1d'}},
 'processing': {'resample': True,
  'sampling': {'time': 'D',
   'aggregate': {'Open': 'first',
    'High': 'max',
    'Low': 'min',
    'Close': 'last',
    'Volume': 'sum'}},
  'label': {'source': 'Close', 'shift': -1}},
 'features': {'window': 14,
  'type': 0,
  'filter': ['Label',
   'Open',
   'High',
   'Low',
   'Volume',
   'stocK',
   'stocD',
   'stocSD',
   'Momentum',
   'ROC',
   'LWR',
   'AOosci',
   'Disp5',
   'Disp10',
   'OSCP',
   'CCI',
   'RSI',
   'OBV',
   'MA',
   'BIAS6',
   'PSY12',
   'ASY5',
   'ASY4',
   'ASY3',
   'ASY2',
   'ASY1']},
 'split': {'scalerName': 'MM',
  'trainTestPercentage': 0.8,
  'trainTestValidate': {'train': 0.7, 'test': 0.15, 'validate': 0.15}}}

### IMPORTS

In [3]:
%run dataProcessing.ipynb
%run dataFeatures.ipynb
%run dataSplitScale.ipynb

## STEP 1: Import data 

In [4]:
df = get_data(config)

Downloading all available 1d data for BTCUSDT. Be patient..!
All caught up..!


In [5]:
df.head(5)

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Label
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2017-08-17,4261.48,4485.39,4200.74,4285.08,795.150377,4108.37
2017-08-18,4285.08,4371.52,3938.77,4108.37,1199.888264,4139.98
2017-08-19,4108.37,4184.69,3850.0,4139.98,381.309763,4086.29
2017-08-20,4120.98,4211.08,4032.62,4086.29,467.083022,4016.0
2017-08-21,4069.13,4119.62,3911.79,4016.0,691.74306,4040.0


## STEP 2: Create features

In [6]:
df = add_features(df, config['features'])

In [7]:
df.head(5)

Unnamed: 0_level_0,Label,Open,High,Low,Volume,stocK,stocD,stocSD,Momentum,ROC,...,RSI,OBV,MA,BIAS6,PSY12,ASY5,ASY4,ASY3,ASY2,ASY1
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2017-09-03,4100.11,4508.5,4714.76,4298.33,691.216198,72.056081,79.434454,87.853957,422.79,10.346549,...,56.229448,1360.24698,4619.232,-0.022727,58.3,-0.344754,-0.254078,-1.558371,-3.488469,0.82261
2017-09-04,4366.47,4505.0,4527.49,3972.51,1394.644614,45.485613,62.399271,76.691393,84.11,2.094373,...,40.473369,-34.397634,4528.226,-0.09544,58.3,-2.104849,-3.545762,-5.494957,-4.342662,-9.507934
2017-09-05,4619.77,4106.97,4484.99,3603.0,1228.938157,57.137832,58.226509,66.686744,326.47,8.080941,...,50.250949,1194.540523,4456.542,-0.029946,66.7,-1.577788,-2.54769,-0.797071,-1.606912,6.29411
2017-09-06,4691.61,4366.49,4662.87,4335.26,807.363726,76.094717,59.572721,60.066167,505.76,12.293602,...,57.414561,2001.904249,4413.514,0.030337,66.7,-0.910352,0.811947,0.808393,5.966556,5.639002
2017-09-07,4282.8,4619.77,4788.59,4438.19,500.429975,81.471198,71.567916,63.122382,375.6,8.702482,...,59.208599,2502.334224,4457.408,0.051963,66.7,0.958175,0.992067,4.492067,3.591045,1.543089


## STEP 3: SPLIT DATA

In [8]:
x_train, x_test, y_train, y_test, scaler = train_test_split(df, config['split'])

In [9]:
x_train, x_test, x_validate, y_train, y_test, y_validate, scaler = train_test_validate_split(df, config['split'])

In [10]:
scaler

MinMaxScaler(copy=True, feature_range=(0, 1))

In [11]:
x_train

array([[0.08288194, 0.08705026, 0.09439432, ..., 0.36175962, 0.33583208,
        0.53618144],
       [0.08266226, 0.0757158 , 0.07363219, ..., 0.17651865, 0.30878581,
        0.29011799],
       [0.05767949, 0.0731435 , 0.05008603, ..., 0.39758355, 0.3954077 ,
        0.66650722],
       ...,
       [0.2651489 , 0.33424766, 0.28955585, ..., 0.5546153 , 0.68011345,
        0.88574411],
       [0.3431966 , 0.42933196, 0.36024852, ..., 0.76764766, 0.79347682,
        0.66979055],
       [0.37923184, 0.39452905, 0.39873447, ..., 0.82923664, 0.59874095,
        0.59275692]])

In [12]:
y_train

array([0.05725214, 0.07398999, 0.08990715, 0.09442152, 0.06873223,
       0.06722472, 0.05915366, 0.0640614 , 0.06124934, 0.04748568,
       0.        , 0.03210956, 0.03304901, 0.03210893, 0.05316131,
       0.0453083 , 0.0446774 , 0.02645341, 0.02556612, 0.0371367 ,
       0.02959725, 0.04598131, 0.04356828, 0.06308927, 0.06192675,
       0.06193869, 0.07474657, 0.07474468, 0.0748402 , 0.07044146,
       0.06406894, 0.06933737, 0.07414897, 0.07754228, 0.09117839,
       0.1004126 , 0.10016816, 0.1025793 , 0.14082133, 0.15464469,
       0.16846994, 0.15841567, 0.16155952, 0.1511898 , 0.14597792,
       0.1567762 , 0.17726868, 0.17820876, 0.17349896, 0.17135677,
       0.14377666, 0.15715889, 0.1679534 , 0.16211313, 0.15902207,
       0.18732107, 0.18421178, 0.2057342 , 0.22401914, 0.24073436,
       0.24670786, 0.26191808, 0.26115898, 0.23697281, 0.24350306,
       0.25851911, 0.24450534, 0.20849787, 0.1920384 , 0.16476494,
       0.20592209, 0.21277156, 0.25456401, 0.2938033 , 0.28341