### IMPORTS

In [1]:
import ipynb.fs.full.processing as processing
import ipynb.fs.full.features as features
import ipynb.fs.full.training as training
import ipynb.fs.full.analysis as analysis
import ipynb.fs.full.storage as storage
import ipynb.fs.full.visualize as visualize
import ipynb.fs.full.misc as misc

In [2]:
import ipynb.fs.full.splitting as splitting
import ipynb.fs.full.table as table
import ipynb.fs.full.decide as decide

In [3]:
import numpy as np

### SERIALIZE THE YAML CONFIG

In [4]:
config = storage.load_yaml('extra/config.yaml')

In [5]:
config

{'data': 'extra/dataset.csv',
 'processing': {'resample': True,
  'time': 'D',
  'aggregate': {'Open': 'first',
   'High': 'max',
   'Low': 'min',
   'Close': 'last',
   'Volume': 'sum'},
  'label': {'from': 'close', 'shift': 1}},
 'features': {'add': ['sk', 'sd', 'momentum'],
  'window': 14,
  'filter': ['close', 'momentum', 'sd', 'label']},
 'splitting': {'train_split': 0.8, 'validation_folds': 5},
 'ensemble': {'models': [{'linreg': None},
   {'lstm': {'morph': {'window': 4, 'batch': 30},
     'layers': [{'lstm': {'value': 120}},
      {'dropout': {'value': 0.15}},
      {'dense': {'value': 50, 'activation': 'relu'}},
      {'dense': {'value': 1}}],
     'epochs': 15,
     'loss': 'mean_squared_error',
     'optimizer': 'rmsprop'}}]}}

In [30]:
for model in config['ensemble']['models']:
    name = list(model)[0]
    if model[name] and 'morph' in model[name]:
        model[name]['morph']

None
{'morph': {'window': 4, 'batch': 30}, 'layers': [{'lstm': {'value': 120}}, {'dropout': {'value': 0.15}}, {'dense': {'value': 50, 'activation': 'relu'}}, {'dense': {'value': 1}}], 'epochs': 15, 'loss': 'mean_squared_error', 'optimizer': 'rmsprop'}


### STEP 1: PROCESS RAW DATA TO DATAFRAME

In [7]:
dataframe = processing.create_dataframe(config)

In [8]:
dataframe.head(5)

Unnamed: 0_level_0,open,high,low,close,volume,label
Date_Timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2010-01-01,1.43327,1.43356,1.43207,1.43335,39761.000053,1.43141
2010-01-03,1.43024,1.43359,1.42951,1.43141,3001.600003,1.44244
2010-01-04,1.43143,1.44556,1.42559,1.44244,80019.400094,1.43634
2010-01-05,1.44238,1.44834,1.43445,1.43634,79887.100067,1.44005
2010-01-06,1.43638,1.44342,1.42807,1.44005,80971.800085,1.43155


### STEP 2: ADD FEATURES TO DATAFRAME

In [9]:
dataset = features.add(dataframe, config['features'])

In [10]:
dataset.head(5)

Unnamed: 0_level_0,close,momentum,sd,label
Date_Timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2010-02-01,1.392,-0.04649,14.842308,1.39737
2010-02-02,1.39737,-0.03928,13.972612,1.38969
2010-02-03,1.38969,-0.0507,11.302307,1.37395
2010-02-04,1.37395,-0.05393,11.014429,1.36763
2010-02-05,1.36763,-0.04356,11.636518,1.36409


### STEP 3: SPLIT & SCALE PRIMARY DATASET

In [11]:
features = dataset.loc[:, dataset.columns != 'label'].to_numpy()

In [12]:
labels = dataset[['label']].to_numpy()

In [13]:
primary, scaler = splitting.primary(features, labels, config['splitting']['train_split'])

### STEP 4: CREATE CROSS VALIDATION FOLDS & TRAIN REGRESSION MODELS

In [14]:
ensemble = [None] * len(config['ensemble']['models'])

In [None]:
# LOOP THROUGH REQUESTED MOEDLS
for index, item in enumerate(config['ensemble']['models']):
    
    # MODEL PROPS
    name = list(item)[0]
    settings = item[name]
    
    # FOLD PREDICTIONS
    temp_predictions = []
    temp_labels = []
    
    # CONTAINER
    folds = []
    
    # IF THE MODEL HAS EXTRA SETTINGS
    if settings:
        folds = splitting.timeseries(
            primary['train'],
            config['splitting']['validation_folds'],
            window=settings['morph']['window']
        )
    
    # OTHERWISE
    else:
        folds = splitting.timeseries(
            primary['train'],
            config['splitting']['validation_folds']
        )
    
    # TRAIN & PREDICT WITH EACH FOLD
    for fold in folds:
        predictions = training.start(fold, name, settings)
        
        # APPEND TO COLLECTIONS
        temp_predictions.append(predictions)
        temp_labels.append(fold['test']['labels'])
        
    # APPEND RESULTS TO ENSEMBLE
    ensemble[index] = {
        'name': name + '_' + str(index),
        'predictions': np.concatenate(temp_predictions),
        'labels': np.concatenate(temp_labels)
    }

### STEP 5: CONSTRUCT TABLE FOR ENSEMBLE PREDICTIONS

In [16]:
ensemble_table = table.ensemble(ensemble)

In [25]:
ensemble_table.tail(25)

Unnamed: 0,linreg_0,lstm_1,label
2040,1.180461,1.155557,1.17725
2041,1.177331,1.153381,1.1772
2042,1.177291,1.151082,1.17675
2043,1.176844,1.14875,1.17762
2044,1.17772,1.147977,1.17422
2045,1.174327,1.147254,1.18361
2046,1.183674,1.146684,1.17745
2047,1.177526,1.150661,1.17492
2048,1.175003,1.151161,1.17432
2049,1.17441,1.149593,1.17837


### STEP 6: CLASSIFY LABELS

In [18]:
label_names = {
    'buy': 0,
    'sell': 1,
    'hold': 2
}

In [19]:
threshold = 0.003

In [20]:
classification_dataset = decide.label({
    'predictions': ensemble_table,
    'label_names': label_names,
    'threshold': threshold
})

#### DECISION BREAKDOWN

In [21]:
classification_dataset['label'].value_counts(normalize=True)

3    0.548668
2    0.232930
1    0.218402
Name: label, dtype: float64

In [22]:
classification_dataset.head(5)

Unnamed: 0,linreg_0,lstm_1,label
0,1.440334,1.38996,3
1,1.432932,1.404075,1
2,1.447101,1.397953,1
3,1.461763,1.398027,3
4,1.463757,1.401691,2
