### Temporary yaml loader

In [15]:
import yaml
def load_yaml(path):
    with open(path, mode='r') as file:
        return yaml.load(file, Loader=yaml.FullLoader)
    
config = load_yaml('extra/dataextractor.yaml')

In [16]:
config

{'data': {'useyfinance': True,
  'yfinance': {'stock': 'NVDA',
   'start': datetime.date(2019, 1, 8),
   'end': datetime.date(2020, 1, 8),
   'interval': '1d'},
  'binance': {'coin': 'BTCUSDT', 'interval': '1d'}},
 'processing': {'resample': True,
  'sampling': {'time': 'D',
   'aggregate': {'Open': 'first',
    'High': 'max',
    'Low': 'min',
    'Close': 'last',
    'Volume': 'sum'}},
  'label': {'source': 'Close', 'shift': -1}},
 'features': {'window': 14,
  'type': 0,
  'filter': ['label',
   'open',
   'high',
   'low',
   'volume',
   'stock',
   'stocd',
   'stocsd',
   'momentum',
   'roc',
   'lwr',
   'aoosci',
   'disp5',
   'disp10',
   'oscp',
   'cci',
   'rsi',
   'obv',
   'ma',
   'bias6',
   'psy12',
   'asy5',
   'asy4',
   'asy3',
   'asy2',
   'asy1']},
 'split': {'scalername': 'MM',
  'traintestpercentage': 0.8,
  'traintestvalidate': {'train': 0.7, 'test': 0.15, 'validate': 0.15}}}

### IMPORTS

In [17]:
%run dataProcessing.ipynb
%run dataFeatures.ipynb
%run dataSplitScale.ipynb

## STEP 1: Import data 

In [18]:
df = get_data(config)

[*********************100%***********************]  1 of 1 completed


In [19]:
df.head(5)

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Label
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2019-01-07,138.5,144.889999,136.429993,143.399994,17729000,139.830002
2019-01-08,146.690002,146.779999,136.899994,139.830002,19650400,142.580002
2019-01-09,141.899994,144.490005,139.860001,142.580002,15431500,145.229996
2019-01-10,141.800003,145.580002,139.360001,145.229996,13078900,148.830002
2019-01-11,144.330002,149.75,143.210007,148.830002,21869100,150.440002


## STEP 2: Create features

In [20]:
df = add_features(df, config['features'])

In [21]:
df.head(5)

Unnamed: 0_level_0,label,open,high,low,volume,stock,stocd,stocsd,momentum,roc,...,rsi,obv,ma,bias6,psy12,asy5,asy4,asy3,asy2,asy1
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2019-01-31,144.729996,137.259995,145.190002,136.380005,21071300,42.670676,22.021422,26.155683,-1.479996,-1.01907,...,48.256657,49040500,142.179999,-0.007183,66.7,-1.870123,-2.700879,1.358319,4.415431,4.525209
2019-02-01,149.179993,144.5,146.789993,142.580002,15626200,45.950447,36.668886,24.769444,-4.100006,-2.754825,...,49.183378,64666700,139.095999,0.014901,75.0,-2.024819,1.188595,3.170095,2.602316,0.679423
2019-02-04,149.949997,145.369995,150.679993,144.479996,13214800,60.843339,49.821487,36.170598,-1.26001,-0.83755,...,53.275596,77881500,141.329999,0.059693,75.0,1.556549,3.134663,2.744333,1.853895,3.028367
2019-02-05,153.0,149.660004,151.429993,148.300003,13560600,63.420327,56.738038,47.742804,0.080002,0.053381,...,53.966386,91442100,144.999997,0.050315,75.0,2.610696,2.186957,1.40754,1.771599,0.51483
2019-02-06,147.419998,151.289993,155.600006,151.070007,17561600,73.627833,65.963833,57.507786,4.160004,2.79495,...,56.697306,109003700,148.121997,0.045558,75.0,2.152287,1.559056,1.852267,1.264217,2.013604


## STEP 3: SPLIT DATA

In [22]:
x_train, x_test, y_train, y_test, scaler = train_test_split(df, config['split'])

In [23]:
x_train, x_test, x_validate, y_train, y_test, y_validate, scaler = train_test_validate_split(df, config['split'])

In [24]:
scaler

MinMaxScaler(copy=True, feature_range=(0, 1))

In [25]:
x_train

array([[0.02360536, 0.13785716, 0.06624604, ..., 0.63363034, 0.78173289,
        0.81884541],
       [0.15020101, 0.16642843, 0.17490354, ..., 0.85857761, 0.62237113,
        0.53754429],
       [0.16541338, 0.2358927 , 0.20820172, ..., 0.80571568, 0.55658951,
        0.70935851],
       ...,
       [0.71673349, 0.72107152, 0.69540824, ..., 0.2688726 , 0.21193324,
        0.31001241],
       [0.64329414, 0.69999995, 0.70014018, ..., 0.3450694 , 0.34117314,
        0.57835151],
       [0.70274525, 0.71107129, 0.67472838, ..., 0.36052664, 0.38956969,
        0.39056359]])

In [26]:
y_train

array([0.18775713, 0.26406022, 0.27726331, 0.32956102, 0.23388199,
       0.24674207, 0.2172496 , 0.29818239, 0.32750349, 0.35579557,
       0.40397796, 0.39197527, 0.42472567, 0.37705766, 0.4356996 ,
       0.42712621, 0.3998629 , 0.37088481, 0.35116586, 0.38871734,
       0.39437581, 0.38991774, 0.31327164, 0.26543199, 0.28909462,
       0.46913575, 0.49279839, 0.59739355, 0.54492446, 0.61779826,
       0.60305202, 0.71896438, 0.69650189, 0.86008226, 0.74965699,
       0.68587097, 0.73885444, 0.73251022, 0.7453703 , 0.7849793 ,
       0.83161855, 0.84396425, 0.94032904, 0.93415619, 0.98028105,
       0.99468428, 0.95130296, 1.        , 0.99039759, 0.96416304,
       0.87311371, 0.93329906, 0.9175238 , 0.90054866, 0.93775713,
       0.97547998, 0.98405336, 0.9110082 , 0.75977353, 0.78103562,
       0.8096707 , 0.80058293, 0.84722218, 0.84413562, 0.78995202,
       0.67438266, 0.68827151, 0.62431411, 0.60082312, 0.42301089,
       0.48456775, 0.44238683, 0.45284637, 0.39008911, 0.30812