### IMPORTS

In [1]:
import ipynb.fs.full.processing as processing
import ipynb.fs.full.features as features
import ipynb.fs.full.training as training
import ipynb.fs.full.analysis as analysis
import ipynb.fs.full.storage as storage
import ipynb.fs.full.visualize as visualize
import ipynb.fs.full.misc as misc
import ipynb.fs.full.decision as decision

### SERIALIZE THE YAML CONFIG

In [2]:
config = storage.load_yaml('extra/config.yaml')

### STEP 1: PROCESS RAW DATA TO DATAFRAME

In [3]:
dataframe = processing.create_dataframe(config)

In [4]:
dataframe.head(5)

Unnamed: 0_level_0,open,high,low,close,volume,label
Date_Timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2010-01-01,1.43327,1.43356,1.43207,1.43335,39761.000053,1.43141
2010-01-03,1.43024,1.43359,1.42951,1.43141,3001.600003,1.44244
2010-01-04,1.43143,1.44556,1.42559,1.44244,80019.400094,1.43634
2010-01-05,1.44238,1.44834,1.43445,1.43634,79887.100067,1.44005
2010-01-06,1.43638,1.44342,1.42807,1.44005,80971.800085,1.43155


### STEP 2: ADD FEATURES TO DATAFRAME

In [5]:
dataset = features.add(dataframe, config['features'])

In [6]:
dataset.head(5)

Unnamed: 0_level_0,close,momentum,sd,label
Date_Timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2010-02-01,1.392,-0.04649,14.842308,1.39737
2010-02-02,1.39737,-0.03928,13.972612,1.38969
2010-02-03,1.38969,-0.0507,11.302307,1.37395
2010-02-04,1.37395,-0.05393,11.014429,1.36763
2010-02-05,1.36763,-0.04356,11.636518,1.36409


### STEP 3: TRAIN MODEL ENSEMBLE

In [7]:
ensemble = [None] * len(config['ensemble']['models'])

In [8]:
# LOOP THROUGH REQUESTED MOEDLS
for index, item in enumerate(config['ensemble']['models']):
    
    # MODEL PROPS
    name = list(item)[0]
    params = item[name]
    
    # GENERATE FORMATTED, SPLIT & NORMALIZED DATA FOR TRAINING
    data = features.split(dataset, name, params)

    # TRAIN THE MODEL
    result = training.start(data, name, params)
    
    # APPEND IT TO THE ensemble
    ensemble[index] = {
        'name': name,
        'result': result,
        'scaler': data['scaler']
    }

Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Train on 1861 samples, validate on 620 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Train on 1861 samples, validate on 620 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


### STEP 4: STITCH ALL REGRESSION PREDICTIONS TOGETHER

In [9]:
regression_predictions = misc.regression_table({
    'dataset': dataset,
    'ensemble': ensemble
})

In [10]:
regression_predictions.head(5)

Unnamed: 0_level_0,label,ensemble,linreg_0,lstm_1,lstm_2
Date_Timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2018-01-07,0.341122,0.36462,0.369891,0.339692,0.384278
2018-01-08,0.331619,0.346368,0.354823,0.318775,0.365504
2018-01-09,0.337909,0.333407,0.34555,0.305409,0.349262
2018-01-10,0.357856,0.343724,0.351705,0.319956,0.359511
2018-01-11,0.393802,0.369435,0.371198,0.352352,0.384756


### STEP 5: VISUALIZE DIFFERENCES

In [11]:
# visualize.differences(regression_table)

### STEP 6: CREATE DATASET FOR CLASSIFICATION

In [12]:
label_names = {
    'buy': 0,
    'sell': 1,
    'hold': 2
}

In [13]:
threshold = 0.02

In [14]:
classification_dataset = decision.classify({
    'predictions': regression_predictions,
    'label_names': label_names,
    'threshold': threshold
})

In [15]:
classification_dataset.head(10)

Unnamed: 0_level_0,label,ensemble,linreg_0,lstm_1,lstm_2,buy,sell,hold
Date_Timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2018-01-08,sell,-0.051356,-0.041589,-0.063554,-0.050088,0,1,0
2018-01-09,hold,-0.038136,-0.026482,-0.042833,-0.045456,0,1,0
2018-01-10,buy,0.030475,0.017655,0.046532,0.028924,1,0,0
2018-01-11,buy,0.072136,0.053943,0.096446,0.067863,1,0,0
2018-01-12,hold,0.120347,0.090405,0.160923,0.110838,1,0,0
2018-01-14,buy,-0.012746,-0.004094,-0.022659,-0.011462,0,0,1
2018-01-15,hold,0.045565,0.040518,0.055359,0.040978,1,0,0
2018-01-16,sell,-0.007767,0.002228,-0.018622,-0.006901,0,0,1
2018-01-17,buy,-0.064413,-0.049238,-0.083887,-0.060698,0,1,0
2018-01-18,hold,0.031075,0.029585,0.03862,0.025444,1,0,0


### TRAIN A CLASSIFIER

In [96]:
features = classification_dataset[['ensemble', 'linreg_0', 'lstm_1', 'lstm_2']].to_numpy()

In [97]:
labels = classification_dataset[['buy', 'sell', 'hold']].to_numpy()

In [98]:
labels[:10]

array([[0, 1, 0],
       [0, 1, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [0, 0, 1],
       [1, 0, 0],
       [0, 0, 1],
       [0, 1, 0],
       [1, 0, 0]], dtype=int64)

#### DSSDF

In [99]:
def foofoofoo(lblz):
    container = []
    for row in lblz:
        word = ''
        for value in row:
            word = word + str(value)
        container.append(word)
    return container

In [100]:
labels = foofoofoo(labels)

In [101]:
import numpy as np

In [102]:
np.array(labels[:50])

array(['010', '010', '100', '100', '100', '001', '100', '001', '010',
       '100', '001', '100', '001', '100', '100', '001', '001', '001',
       '010', '100', '001', '100', '010', '001', '010', '001', '010',
       '001', '001', '001', '100', '100', '100', '001', '010', '001',
       '001', '010', '010', '100', '001', '001', '001', '010', '010',
       '100', '100', '001', '100', '100'], dtype='<U3')

#### DSFDSF

In [26]:
import math
from sklearn.preprocessing import MinMaxScaler

In [27]:
scaler = MinMaxScaler(feature_range=(0, 1))

In [28]:
limit = math.ceil(len(features) * 0.8)

In [29]:
x_train = scaler.fit_transform(features[:limit])
y_train = labels[:limit]

In [30]:
x_test = scaler.transform(features[limit:])
y_test = labels[limit:]

In [31]:
# from sklearn.preprocessing import MultiLabelBinarizer
# labels = MultiLabelBinarizer().fit_transform(labels)

#### TRAIN

In [32]:
from sklearn.svm import SVC

In [112]:
model = SVC(C=0.05, gamma='scale', kernel='rbf', decision_function_shape='ovo')
model.fit(x_train, y_train)

SVC(C=0.05, decision_function_shape='ovo')

In [113]:
model.score(x_test, y_test)

0.959349593495935

In [114]:
predictions = model.predict(x_test)

In [115]:
predictions[:150]

array(['001', '001', '001', '010', '010', '010', '001', '001', '010',
       '100', '010', '001', '100', '100', '010', '001', '001', '001',
       '010', '100', '010', '001', '100', '001', '001', '001', '100',
       '001', '010', '100', '001', '001', '010', '100', '010', '001',
       '010', '001', '010', '100', '010', '010', '100', '001', '010',
       '100', '100', '001', '100', '001', '001', '001', '100', '100',
       '100', '001', '001', '001', '100', '100', '100', '010', '001',
       '010', '001', '010', '010', '001', '001', '100', '100', '001',
       '001', '001', '010', '010', '001', '001', '010', '100', '100',
       '010', '001', '100', '100', '001', '100', '001', '001', '001',
       '010', '001', '001', '001', '010', '001', '001', '001', '100',
       '001', '001', '100', '010', '001', '100', '100', '100', '100',
       '010', '001', '001', '100', '010', '001', '010', '001', '100',
       '001', '001', '001', '100', '001', '100'], dtype='<U3')

In [116]:
np.array(y_test)[:150]

array(['001', '001', '001', '010', '010', '010', '001', '001', '010',
       '100', '010', '001', '100', '100', '010', '001', '001', '010',
       '010', '100', '010', '001', '100', '001', '001', '001', '100',
       '001', '010', '100', '001', '100', '010', '100', '010', '100',
       '010', '001', '010', '100', '010', '010', '100', '001', '010',
       '100', '100', '001', '100', '001', '001', '010', '100', '100',
       '100', '001', '001', '001', '100', '100', '100', '010', '001',
       '010', '001', '010', '010', '001', '001', '100', '100', '001',
       '001', '001', '010', '010', '001', '001', '010', '100', '100',
       '010', '001', '100', '100', '001', '100', '001', '001', '001',
       '010', '001', '001', '001', '010', '100', '001', '001', '100',
       '001', '001', '100', '010', '001', '100', '100', '100', '100',
       '010', '001', '001', '100', '010', '001', '010', '001', '100',
       '001', '001', '001', '100', '001', '100'], dtype='<U3')

### STEP X: SAVE ENSEMBLE MODELS, PREDICTIONS & BUILD CONFIG

storage.save_ensemble({
    'config': config,
    'ensemble': ensemble,
    'predictions': analysis
})