### IMPORTS

In [1]:
import ipynb.fs.full.processing as processing
import ipynb.fs.full.training as training
import ipynb.fs.full.storage as storage
import ipynb.fs.full.misc as misc
import ipynb.fs.full.splitting as splitting
import ipynb.fs.full.features as features
import ipynb.fs.full.ensemble as ensemble

### SERIALIZE THE YAML CONFIG

In [2]:
config = storage.load_yaml('extra/config.yaml')

In [3]:
config

{'data': 'extra/dataset.csv',
 'processing': {'resample': True,
  'time': 'D',
  'aggregate': {'Open': 'first',
   'High': 'max',
   'Low': 'min',
   'Close': 'last',
   'Volume': 'sum'},
  'label': {'from': 'Close', 'shift': 1}},
 'features': {'category': 'all', 'window': 14},
 'splitting': {'train_split': 0.8, 'validation_folds': 5},
 'regression_ensemble': {'models': [{'linreg': None},
   {'lstm': {'morph': {'window': 4, 'batch': 30},
     'layers': [{'lstm': {'units': 120}},
      {'dropout': {'rate': 0.15}},
      {'dense': {'units': 50, 'activation': 'relu'}},
      {'dense': {'units': 1}}],
     'epochs': 15,
     'loss': 'mean_squared_error',
     'optimizer': 'rmsprop'}},
   {'tcn': {'morph': {'window': 4, 'batch': 30},
     'layers': [{'tcn': {'nb_filters': 64,
        'nb_stacks': 1,
        'dilations': [1, 2, 4, 8, 16, 32],
        'padding': 'causal',
        'use_skip_connections': False,
        'dropout_rate': 0.1,
        'return_sequences': False}},
      {'dropout':

### STEP 1: PROCESS RAW DATA TO DATAFRAME

In [4]:
dataframe = processing.create_dataframe(config)

In [5]:
dataframe.head(5)

Unnamed: 0_level_0,Open,High,Low,Close,Volume,label
Date_Timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2010-01-01,1.43327,1.43356,1.43207,1.43335,39761.000053,1.43141
2010-01-03,1.43024,1.43359,1.42951,1.43141,3001.600003,1.44244
2010-01-04,1.43143,1.44556,1.42559,1.44244,80019.400094,1.43634
2010-01-05,1.44238,1.44834,1.43445,1.43634,79887.100067,1.44005
2010-01-06,1.43638,1.44342,1.42807,1.44005,80971.800085,1.43155


### STEP 2: ADD FEATURES TO DATAFRAME

In [6]:
regression_dataset = features.add(dataframe, config['features'])

In [7]:
regression_dataset.head(5)

Unnamed: 0_level_0,open,high,low,close,volume,label,stock,stocd,stocsd,momentum,...,rsi,obv,ma,bias6,psy12,asy5,asy4,asy3,asy2,asy1
Date_Timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2010-01-21,1.41115,1.41422,1.40247,1.40979,78980.100041,1.41361,13.189189,9.578919,19.849579,-0.02655,...,29.873956,-159980.600046,1.42518,-0.012336,41.7,-0.403064,-0.471831,-0.715772,-0.637503,-0.099256
2010-01-22,1.40977,1.41805,1.40837,1.41361,76660.300037,1.41517,20.072072,13.339429,14.445538,-0.02644,...,33.983332,-83320.300009,1.420572,-0.006774,41.7,-0.323346,-0.46918,-0.334804,0.08567,0.270596
2010-01-24,1.41557,1.41705,1.41396,1.41517,3128.600004,1.41458,22.882883,18.714715,13.877688,-0.01638,...,35.641946,-80191.700005,1.415528,-0.003171,33.3,-0.353285,-0.223529,0.093878,0.190445,0.110295
2010-01-25,1.41498,1.41939,1.41248,1.41458,79577.200048,1.40808,21.81982,21.591592,17.881912,-0.02625,...,35.280916,-159768.900053,1.412868,-0.000558,33.3,-0.187163,0.059984,0.113064,0.034298,-0.0417
2010-01-26,1.41454,1.4178,1.40397,1.40808,79232.600053,1.4028,10.108108,18.27027,19.525526,-0.03823,...,31.495786,-239001.500106,1.412246,-0.002826,25.0,-0.044125,-0.030342,-0.130655,-0.25113,-0.460559


### STEP 3: SPLIT PRIMARY DATA INTO TRAIN & TEST SECTIONS

In [8]:
primary_dataset = splitting.general(
    regression_dataset,
    config['splitting']['train_split']
)

### STEP 4: CREATE CROSS VALIDATION FOLDS & TRAIN REGRESSION MODELS

In [9]:
regression_ensemble, regression_table = ensemble.regression(primary_dataset, config)

TRAINING LINREG_0 FOLD #1
TRAINING LINREG_0 FOLD #2
TRAINING LINREG_0 FOLD #3
TRAINING LINREG_0 FOLD #4
TRAINING LINREG_0 FOLD #5

TRAINING LSTM_1 FOLD #1
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
TRAINING LSTM_1 FOLD #2
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
TRAINING LSTM_1 FOLD #3
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
TRAINING LSTM_1 FOLD #4
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
TRAINING LSTM_1 FOLD #5
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15

Epoch 14/15
Epoch 15/15

TRAINING TCN_2 FOLD #1
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
TRAINING TCN_2 FOLD #2
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
TRAINING TCN_2 FOLD #3
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
TRAINING TCN_2 FOLD #4
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
TRAINING TCN_2 FOLD #5
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15


Epoch 13/15
Epoch 14/15
Epoch 15/15


In [10]:
regression_table.head(5)

Unnamed: 0,linreg_0,lstm_1,tcn_2,label
0,1.415414,1.356091,1.206921,1.40686
1,1.411078,1.352623,1.138005,1.41436
2,1.419328,1.354105,1.182025,1.43163
3,1.433976,1.367597,1.171626,1.42894
4,1.422776,1.368676,1.159061,1.43328


### STEP 6: CREATE DECISION MACHINE & PUT LABELS THROUGH IT

In [11]:
decision_machine = misc.decision_machine()

In [12]:
regression_labels = decision_machine.calibrate(
    regression_table,
    config['classification_ensemble']['decision']
)

In [13]:
labeled_regression_table = misc.replace_labels(
    regression_table,
    regression_labels
)

In [14]:
# BUY   = 0
# SELL  = 1
# HOLD  = 2

In [15]:
labeled_regression_table.head(5)

Unnamed: 0,linreg_0,lstm_1,tcn_2,label
0,1.415414,1.356091,1.206921,2
1,1.411078,1.352623,1.138005,1
2,1.419328,1.354105,1.182025,1
3,1.433976,1.367597,1.171626,2
4,1.422776,1.368676,1.159061,1


### DECISION BREAKDOWN

In [16]:
labeled_regression_table['label'].value_counts(normalize=True)

2    0.500483
1    0.249758
0    0.249758
Name: label, dtype: float64

In [17]:
storage.save_pickle(labeled_regression_table, 'extra/cls-train.pickle')

### STEP 7: TRAIN THE CLASSIFIER ENSEMBLE

In [18]:
classifier_ensemble = ensemble.classifier(
    labeled_regression_table,
    config
)

TRAINING RANDFOREST_0 MODEL
TRAINING LOGREG_1 MODEL
TRAINING SVC_2 MODEL


### STEP 8: REGRESSION PREDICT ON TEST DATASET

In [25]:
test_predictions = regression_ensemble.predict(primary_dataset['test'])

In [26]:
test_predictions.head(5)

Unnamed: 0,linreg_0,lstm_1,tcn_2
0,1.196712,1.193464,1.100978
1,1.200233,1.188077,1.114284
2,1.208944,1.194373,1.114921
3,1.220007,1.205162,1.103823
4,1.218342,1.225968,1.197954


### STEP 9: CLASSIFY PREDICT ON REGRESSION DATASET

In [21]:
mash_dataset = {
    'features': test.to_numpy(),
    'labels': []
}

In [22]:
cls_dataset = classifier_ensemble.predict(mash_dataset)

In [23]:
cls_dataset.head(5)

Unnamed: 0,randforest_0,logreg_1,svc_2
0,0,0,2
1,0,1,2
2,0,1,2
3,0,1,2
4,2,2,2


### STEP 10: CREATE REGRESSION TEST PREDICTION TABLE

In [49]:
test_results = test_predictions.copy()

In [50]:
test_results['label'] = primary_dataset['test']['labels'][-len(test_results):]

### STEP X: SAVE EVERYTHING

In [24]:
storage.save_pipeline({
    'regression_ensemble': regression_ensemble,
    'regression_training': regression_table,
    'regression_testing': test_results,
    'classifier_ensemble': classifier_ensemble,
    'config': config
})