### IMPORTS

In [1]:
import ipynb.fs.full.processing as processing
import ipynb.fs.full.features as features
import ipynb.fs.full.training as training
import ipynb.fs.full.analysis as analysis
import ipynb.fs.full.storage as storage
import ipynb.fs.full.visualize as visualize
import ipynb.fs.full.misc as misc
import ipynb.fs.full.decision as decision

### SERIALIZE THE YAML CONFIG

In [2]:
config = storage.load_yaml('extra/config.yaml')

### STEP 1: PROCESS RAW DATA TO DATAFRAME

In [3]:
dataframe = processing.create_dataframe(config)

In [4]:
dataframe.head(5)

Unnamed: 0_level_0,open,high,low,close,volume,label
Date_Timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2010-01-01,1.43327,1.43356,1.43207,1.43335,39761.000053,1.43141
2010-01-03,1.43024,1.43359,1.42951,1.43141,3001.600003,1.44244
2010-01-04,1.43143,1.44556,1.42559,1.44244,80019.400094,1.43634
2010-01-05,1.44238,1.44834,1.43445,1.43634,79887.100067,1.44005
2010-01-06,1.43638,1.44342,1.42807,1.44005,80971.800085,1.43155


### STEP 2: ADD FEATURES TO DATAFRAME

In [5]:
dataset = features.add(dataframe, config['features'])

In [6]:
dataset.head(5)

Unnamed: 0_level_0,close,momentum,sd,label
Date_Timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2010-02-01,1.392,-0.04649,14.842308,1.39737
2010-02-02,1.39737,-0.03928,13.972612,1.38969
2010-02-03,1.38969,-0.0507,11.302307,1.37395
2010-02-04,1.37395,-0.05393,11.014429,1.36763
2010-02-05,1.36763,-0.04356,11.636518,1.36409


### STEP 3: TRAIN MODEL ENSEMBLE

In [7]:
ensemble = [None] * len(config['ensemble']['models'])

In [8]:
# LOOP THROUGH REQUESTED MOEDLS
for index, item in enumerate(config['ensemble']['models']):
    
    # MODEL PROPS
    name = list(item)[0]
    params = item[name]
    
    # GENERATE FORMATTED, SPLIT & NORMALIZED DATA FOR TRAINING
    data = features.split(dataset, name, params)

    # TRAIN THE MODEL
    result = training.start(data, name, params)
    
    # APPEND IT TO THE ensemble
    ensemble[index] = {
        'name': name,
        'result': result,
        'scaler': data['scaler']
    }

Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Train on 1861 samples, validate on 620 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Train on 1861 samples, validate on 620 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


### STEP 4: STITCH ALL REGRESSION PREDICTIONS TOGETHER

In [9]:
regression_predictions = misc.regression_table({
    'dataset': dataset,
    'ensemble': ensemble
})

In [10]:
regression_predictions.head(5)

Unnamed: 0_level_0,label,ensemble,linreg_0,lstm_1,lstm_2
Date_Timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2018-01-07,0.341122,0.347565,0.369891,0.323474,0.34933
2018-01-08,0.331619,0.328441,0.354823,0.30185,0.328649
2018-01-09,0.337909,0.315712,0.34555,0.28809,0.313497
2018-01-10,0.357856,0.325799,0.351705,0.303569,0.322124
2018-01-11,0.393802,0.352287,0.371198,0.336557,0.349107


### STEP 5: VISUALIZE DIFFERENCES

In [11]:
# visualize.differences(regression_table)

### STEP 6: CREATE DATASET FOR CLASSIFICATION

In [12]:
label_names = {
    'buy': 0,
    'sell': 1,
    'hold': 2
}

In [13]:
threshold = 0.02

In [15]:
classification_dataset.head(5)

Unnamed: 0_level_0,label,ensemble,linreg_0,lstm_1,lstm_2
Date_Timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2018-01-08,2,-0.056596,-0.041589,-0.069189,-0.061028
2018-01-09,3,-0.039525,-0.026482,-0.046656,-0.047201
2018-01-10,1,0.03145,0.017655,0.052335,0.027149
2018-01-11,1,0.078164,0.053943,0.103156,0.080441
2018-01-12,3,0.140595,0.090405,0.180504,0.15331


### TRAIN A CLASSIFIER

In [16]:
features = classification_dataset[['ensemble', 'linreg_0', 'lstm_1', 'lstm_2']].to_numpy()

In [17]:
labels = classification_dataset['label'].to_numpy()

In [18]:
labels[:10]

array([2, 3, 1, 1, 3, 1, 3, 2, 1, 3], dtype=int64)

#### DSFDSF

In [19]:
import math
from sklearn.preprocessing import MinMaxScaler

In [20]:
scaler = MinMaxScaler(feature_range=(0, 1))

In [21]:
limit = math.ceil(len(features) * 0.8)

In [22]:
x_train = scaler.fit_transform(features[:limit])
y_train = labels[:limit]

In [23]:
x_test = scaler.transform(features[limit:])
y_test = labels[limit:]

In [24]:
# from sklearn.preprocessing import MultiLabelBinarizer
# labels = MultiLabelBinarizer().fit_transform(labels)

#### TRAIN

In [25]:
from sklearn.svm import SVC
import numpy as np

In [26]:
model = SVC(C=0.05, gamma='scale', kernel='rbf', decision_function_shape='ovo')
model.fit(x_train, y_train)

SVC(C=0.05, decision_function_shape='ovo')

In [27]:
model.score(x_test, y_test)

0.42276422764227645

In [28]:
predictions = model.predict(x_test)

In [29]:
predictions[:150]

array([3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
       3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
       3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
       3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
       3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
       3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3], dtype=int64)

In [30]:
np.array(y_test)[:150]

array([3, 3, 2, 2, 2, 2, 3, 3, 1, 3, 3, 1, 3, 2, 3, 3, 2, 2, 3, 2, 3, 1,
       3, 3, 3, 1, 3, 2, 1, 1, 1, 2, 1, 2, 1, 2, 3, 2, 1, 2, 2, 1, 3, 2,
       1, 1, 1, 3, 3, 2, 2, 1, 1, 1, 3, 3, 3, 1, 1, 1, 2, 3, 2, 3, 2, 2,
       3, 1, 1, 1, 3, 3, 3, 2, 2, 3, 2, 2, 3, 1, 2, 3, 3, 1, 3, 1, 3, 3,
       2, 2, 3, 2, 1, 2, 3, 3, 3, 1, 3, 3, 1, 2, 3, 3, 1, 1, 1, 2, 3, 3,
       1, 2, 3, 2, 3, 3, 3, 3, 3, 1, 3, 1, 1], dtype=int64)

### STEP X: SAVE ENSEMBLE MODELS, PREDICTIONS & BUILD CONFIG

storage.save_ensemble({
    'config': config,
    'ensemble': ensemble,
    'predictions': analysis
})