### IMPORTS

In [1]:
import ipynb.fs.full.processing as processing
import ipynb.fs.full.features as features
import ipynb.fs.full.training as training
import ipynb.fs.full.analysis as analysis
import ipynb.fs.full.storage as storage
import ipynb.fs.full.visualize as visualize

In [2]:
import pandas as pd

### SERIALIZE THE YAML CONFIG

In [3]:
config = storage.load_yaml('extra/config.yaml')

In [4]:
config

{'data': 'extra/dataset.csv',
 'processing': {'resample': True,
  'time': 'D',
  'aggregate': {'Open': 'first',
   'High': 'max',
   'Low': 'min',
   'Close': 'last',
   'Volume': 'sum'},
  'label': {'from': 'Close', 'shift': 1}},
 'features': {'add': ['SK', 'SD', 'Momentum'],
  'window': 14,
  'filter': ['Close', 'Momentum', 'SD', 'Label']},
 'ensemble': {'models': [{'linreg': {'split': {'train': 0.8, 'test': 0.2}}},
   {'lstm': {'split': {'train': 0.6, 'test': 0.2, 'validation': 0.2},
     'layers': [{'lstm': {'value': 120, 'activation': 'relu'}},
      {'dropout': {'value': 0.15}},
      {'dense': {'value': 50, 'activation': 'relu'}},
      {'dense': {'value': 1}}],
     'epochs': 20,
     'batch': 8,
     'validation': 25,
     'loss': 'mean_squared_error',
     'optimizer': 'rmsprop'}},
   {'lstm': {'split': {'train': 0.6, 'test': 0.2, 'validation': 0.2},
     'layers': [{'lstm': {'value': 120, 'activation': 'relu'}},
      {'dropout': {'value': 0.15}},
      {'dense': {'value': 5

### STEP 1: PROCESS RAW DATA TO DATAFRAME

In [5]:
dataframe = processing.create_dataframe(config)

In [6]:
dataframe.head(5)

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Label
Date_Timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2010-01-01,1.43327,1.43356,1.43207,1.43335,39761.000053,1.43141
2010-01-03,1.43024,1.43359,1.42951,1.43141,3001.600003,1.44244
2010-01-04,1.43143,1.44556,1.42559,1.44244,80019.400094,1.43634
2010-01-05,1.44238,1.44834,1.43445,1.43634,79887.100067,1.44005
2010-01-06,1.43638,1.44342,1.42807,1.44005,80971.800085,1.43155


### STEP 2: ADD FEATURES TO DATAFRAME

In [7]:
dataset = features.add(dataframe, config['features'])

In [8]:
dataset.head(5)

Unnamed: 0_level_0,Close,Momentum,SD,Label
Date_Timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2010-02-01,1.392,-0.04649,14.842308,1.39737
2010-02-02,1.39737,-0.03928,13.972612,1.38969
2010-02-03,1.38969,-0.0507,11.302307,1.37395
2010-02-04,1.37395,-0.05393,11.014429,1.36763
2010-02-05,1.36763,-0.04356,11.636518,1.36409


### STEP 3: TRAIN MODEL ENSEMBLE

In [9]:
ensemble = [None] * len(config['ensemble']['models'])

In [None]:
# LOOP THROUGH REQUESTED MOEDLS
for index, item in enumerate(config['ensemble']['models']):
    
    # MODEL PROPS
    name = list(item)[0]
    params = item[name]
    
    # GENERATE FORMATTED, SPLIT & NORMALIZED DATA FOR TRAINING
    data = features.split(dataset, name, params)

    # TRAIN THE MODEL
    result = training.start(data, name, params)
    
    # APPEND IT TO THE ensemble
    ensemble[index] = {
        'name': name,
        'result': result,
        'scaler': data['scaler']
    }

### STEP 4: PREDICT WITH ENSEMBLE USING MEAN

In [None]:
averages = []

In [None]:
predictions = len(ensemble[0]['result']['predictions'])
models = len(ensemble)

In [None]:
# LOOP THROUGH EACH PREDICTION INDEX
for index in range(predictions):
    value = 0
    
    # QUERY EVERY MODELS PREDICTION
    for item in ensemble:
        value += item['result']['predictions'][index]
        
    # CALCULATE THE MEAN VALUE & APPEND
    mean = value / models
    averages.append(mean)

### STEP 5: VISUALIZE DIFFERENCES

In [None]:
analysis = pd.DataFrame(dataset['Label'], index=dataset.index[-predictions:])

In [None]:
analysis = analysis.rename(columns={
    'Label': 'actual'
})

In [None]:
analysis['ensemble'] = averages

In [None]:
# LOOP THROUGH ENSEMBLE MODELS
for index, foo in enumerate(ensemble):
    
    # EXTRACT NAME & PREDICTIONS
    name = foo['name']
    data = foo['result']['predictions']
    
    # ADD AS DF COLUMN
    analysis[name + '_' + str(index)] = data

In [None]:
analysis

In [None]:
visualize.differences(analysis)

### STEP 6: SAVE ENSEMBLE MODELS, PREDICTIONS & BUILD CONFIG

In [None]:
storage.save_ensemble({
    'config': config,
    'predictions': analysis,
    'ensemble': ensemble
})