### IMPORTS

In [1]:
import ipynb.fs.full.processing as processing
import ipynb.fs.full.training as training
import ipynb.fs.full.storage as storage
import ipynb.fs.full.visualize as visualize
import ipynb.fs.full.misc as misc

In [2]:
import ipynb.fs.full.splitting as splitting
import ipynb.fs.full.table as table
import ipynb.fs.full.decide as decide

In [3]:
import ipynb.fs.full.features as features

In [4]:
import numpy as np

### SERIALIZE THE YAML CONFIG

In [5]:
config = storage.load_yaml('extra/config.yaml')

In [6]:
config

{'data': 'extra/dataset.csv',
 'processing': {'resample': True,
  'time': 'D',
  'aggregate': {'Open': 'first',
   'High': 'max',
   'Low': 'min',
   'Close': 'last',
   'Volume': 'sum'},
  'label': {'from': 'Close', 'shift': 1}},
 'features': {'category': 'all', 'window': 14},
 'splitting': {'train_split': 0.8, 'validation_folds': 5},
 'regression_ensemble': {'models': [{'linreg': None},
   {'lstm': {'morph': {'window': 4, 'batch': 30},
     'layers': [{'lstm': {'units': 120}},
      {'dropout': {'rate': 0.15}},
      {'dense': {'units': 50, 'activation': 'relu'}},
      {'dense': {'units': 1}}],
     'epochs': 15,
     'loss': 'mean_squared_error',
     'optimizer': 'rmsprop'}},
   {'tcn': {'morph': {'window': 4, 'batch': 30},
     'layers': [{'tcn': {'nb_filters': 64,
        'nb_stacks': 1,
        'dilations': [1, 2, 4, 8, 16, 32],
        'padding': 'causal',
        'use_skip_connections': False,
        'dropout_rate': 0.1,
        'return_sequences': False,
        'activation

### STEP 1: PROCESS RAW DATA TO DATAFRAME

In [7]:
dataframe = processing.create_dataframe(config)

In [8]:
dataframe.head(5)

Unnamed: 0_level_0,Open,High,Low,Close,Volume,label
Date_Timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2010-01-01,1.43327,1.43356,1.43207,1.43335,39761.000053,1.43141
2010-01-03,1.43024,1.43359,1.42951,1.43141,3001.600003,1.44244
2010-01-04,1.43143,1.44556,1.42559,1.44244,80019.400094,1.43634
2010-01-05,1.44238,1.44834,1.43445,1.43634,79887.100067,1.44005
2010-01-06,1.43638,1.44342,1.42807,1.44005,80971.800085,1.43155


### STEP 2: ADD FEATURES TO DATAFRAME

In [9]:
regression_dataset = features.add(dataframe, config['features'])

In [10]:
regression_dataset.head(5)

Unnamed: 0_level_0,open,high,low,close,volume,label,stock,stocd,stocsd,momentum,...,rsi,obv,ma,bias6,psy12,asy5,asy4,asy3,asy2,asy1
Date_Timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2010-01-21,1.41115,1.41422,1.40247,1.40979,78980.100041,1.41361,13.189189,9.578919,19.849579,-0.02655,...,29.873956,-159980.600046,1.42518,-0.012336,41.7,-0.403064,-0.471831,-0.715772,-0.637503,-0.099256
2010-01-22,1.40977,1.41805,1.40837,1.41361,76660.300037,1.41517,20.072072,13.339429,14.445538,-0.02644,...,33.983332,-83320.300009,1.420572,-0.006774,41.7,-0.323346,-0.46918,-0.334804,0.08567,0.270596
2010-01-24,1.41557,1.41705,1.41396,1.41517,3128.600004,1.41458,22.882883,18.714715,13.877688,-0.01638,...,35.641946,-80191.700005,1.415528,-0.003171,33.3,-0.353285,-0.223529,0.093878,0.190445,0.110295
2010-01-25,1.41498,1.41939,1.41248,1.41458,79577.200048,1.40808,21.81982,21.591592,17.881912,-0.02625,...,35.280916,-159768.900053,1.412868,-0.000558,33.3,-0.187163,0.059984,0.113064,0.034298,-0.0417
2010-01-26,1.41454,1.4178,1.40397,1.40808,79232.600053,1.4028,10.108108,18.27027,19.525526,-0.03823,...,31.495786,-239001.500106,1.412246,-0.002826,25.0,-0.044125,-0.030342,-0.130655,-0.25113,-0.460559


### STEP 3: SPLIT & SCALE PRIMARY DATASET

In [11]:
primary_dataset, scaler = splitting.general(regression_dataset, config['splitting']['train_split'])

### STEP 4: CREATE CROSS VALIDATION FOLDS & TRAIN REGRESSION MODELS

In [12]:
regression_ensemble = [None] * len(config['regression_ensemble']['models'])

In [13]:
# LOOP THROUGH REQUESTED MOEDLS
for index, item in enumerate(config['regression_ensemble']['models']):
    
    # MODEL PROPS
    name, settings = misc.key_value(item)
    
    # FOLD CONTAINER
    folds = []
    
    # IF THE MODEL HAS EXTRA SETTINGS
    if settings:
        folds = splitting.timeseries(
            primary_dataset['train'],
            config['splitting']['validation_folds'],
            window=settings['morph']['window']
        )
    
    # OTHERWISE
    else:
        folds = splitting.timeseries(
            primary_dataset['train'],
            config['splitting']['validation_folds']
        )
        
    # FOLD PREDICTIONS
    temp_predictions = []
    temp_labels = []
    temp_models = []
    
    # TRAIN & PREDICT WITH EACH FOLD
    for fold in folds:
        model, predictions = training.start(fold, name, settings)
        
        # APPEND TO COLLECTIONS
        temp_predictions.append(predictions)
        temp_labels.append(fold['test']['labels'])
        temp_models.append(model)
        
    # APPEND RESULTS TO ENSEMBLE
    regression_ensemble[index] = {
        'name': '{}_{}'.format(name, index),
        'predictions': np.concatenate(temp_predictions),
        'labels': np.concatenate(temp_labels),
        'models': temp_models,
        'settings': settings
    }

Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


### STEP 5: CONSTRUCT TABLE FOR ENSEMBLE PREDICTIONS

In [14]:
ensemble_table = table.ensemble(regression_ensemble)

In [15]:
ensemble_table.head(5)

Unnamed: 0,linreg_0,lstm_1,tcn_2,label
0,1.415415,1.517122,1.541764,1.40686
1,1.411078,1.507079,1.320315,1.41436
2,1.419329,1.508753,1.37714,1.43163
3,1.433975,1.512904,1.376547,1.42894
4,1.422772,1.496153,1.371484,1.43328


### STEP 6: CLASSIFY LABELS WITH REQUESTED METHOD

In [16]:
# BUY   = 0
# SELL  = 1
# HOLD  = 2

In [17]:
classification_dataset = decide.label({
    'predictions': ensemble_table,
    'settings': config['classification_ensemble']['decision']
})

In [18]:
classification_dataset.head(5)

Unnamed: 0,linreg_0,lstm_1,tcn_2,label
0,1.415415,1.517122,1.541764,2
1,1.411078,1.507079,1.320315,1
2,1.419329,1.508753,1.37714,1
3,1.433975,1.512904,1.376547,2
4,1.422772,1.496153,1.371484,1


#### DECISION BREAKDOWN

In [19]:
classification_dataset['label'].value_counts(normalize=True)

2    0.500483
1    0.249758
0    0.249758
Name: label, dtype: float64

### SPLIT CLASSIFIER DATAFRAME

In [20]:
secondary_dataset, scaler = splitting.general(classification_dataset, config['splitting']['train_split'])

### TRAIN THE CLASSIFIER ENSEMBLE

In [21]:
classification_ensemble = [None] * len(config['classification_ensemble']['models'])

In [22]:
for index, item in enumerate(config['classification_ensemble']['models']):
    
    # MODEL PARAMS
    name, settings = misc.key_value(item)

    # TRAIN & PREDICT WITH THE MODEL
    model, predictions = training.start(secondary_dataset, name, settings)
        
    # APPEND RESULTS TO ENSEMBLE
    classification_ensemble[index] = {
        'name': '{}_{}'.format(name, index),
        'predictions': predictions,
        'labels': secondary_dataset['test']['labels'],
        'model': model,
        'settings': settings
    }

### CREATE CLASSIFICATION TABLE

In [23]:
classification_table = table.ensemble(classification_ensemble)

In [24]:
classification_table

Unnamed: 0,logreg_0,svc_1,label
0,0,2,1
1,0,2,0
2,1,2,2
3,0,2,2
4,0,2,2
...,...,...,...
409,1,2,1
410,1,2,2
411,1,2,1
412,1,2,0


### PREDICT USING PRIMARY TEST DATA & CLASSIFIER ENSEMBLE

In [25]:
import numpy as np

In [48]:
foo_dataset = []

In [49]:
for index, item in enumerate(regression_ensemble):
    
    # PARAMS
    models = item['models']
    settings = item['settings']
    
    # RESULT CONTAINER
    all_predictions = []
    
    # LOOP THROUGH MODELS
    for model in item['models']:
        
        # PLACEHOLDER
        all_predictions = []

        # IF THE DATASET HAS TO BE MORPHED
        if settings != None and 'morph' in settings:
            
            # CREATE A WINDOWED DATASET
            windowed = splitting.windowed(
                primary_dataset,
                settings['morph']['window']
            )
        
            # CREATE A GENERATOR
            generator = splitting.generator(
                windowed,
                settings['morph'],
                shuffle=False
            )
            
            # PREDICT WITH IT
            predictions = model.predict(generator)
            
        # OTHERWISE, PREDICT AS IS
        else:
            predictions = model.predict(primary_dataset['test']['features'])
            
        # APPEND TO CONTAINER
        all_predictions.append(predictions)
            
    # CALCULATE AVERAGE PREDICTION
    average_predictions = np.ndarray.flatten(np.mean(all_predictions, axis=0))
    
    # APPEND RESULT
    foo_dataset.append(average_predictions)

In [55]:
foo_dataset[2]

array([0.952427  , 0.96110064, 0.9268561 , 0.9300386 , 0.94256794,
       0.9527898 , 0.9662841 , 0.95687383, 1.0458897 , 1.1061842 ,
       1.1492535 , 1.0720521 , 1.0669695 , 1.030001  , 0.987394  ,
       0.98650926, 0.9421344 , 0.9282774 , 1.0568801 , 1.1162877 ,
       1.0784506 , 1.0868793 , 1.0880693 , 1.0533451 , 1.0110635 ,
       1.0318277 , 1.0496094 , 1.0625324 , 1.0351976 , 1.0860819 ,
       1.159286  , 1.0777433 , 1.1180282 , 1.0856544 , 1.0988973 ,
       1.1507746 , 1.1945015 , 1.208746  , 1.1663144 , 1.05433   ,
       1.0495996 , 1.0361747 , 1.0050905 , 1.0120798 , 1.1240513 ,
       1.1313297 , 1.156934  , 1.1856443 , 1.1537558 , 1.1033417 ,
       1.1816919 , 1.2283163 , 1.1314911 , 1.021478  , 1.0102576 ,
       1.0542512 , 1.003619  , 1.0189654 , 1.0025207 , 0.9958636 ,
       1.0299578 , 1.063835  , 1.0487922 , 1.1157405 , 1.1479828 ,
       1.1595947 , 1.0966895 , 1.1414154 , 1.09712   , 1.1010128 ,
       0.9823786 , 1.068364  , 1.1214677 , 1.0789762 , 1.06423

In [None]:
foo = np.array([
    [5, 2, 3, 4, 5],
    [1, 2, 3, 4, 5],
    [1, 2, 3, 4, 5]
])

In [None]:
foo.mean(axis = 0)