### IMPORTS

In [1]:
import ipynb.fs.full.processing as processing
import ipynb.fs.full.features as features
import ipynb.fs.full.training as training
import ipynb.fs.full.analysis as analysis
import ipynb.fs.full.storage as storage
import ipynb.fs.full.visualize as visualize
import ipynb.fs.full.misc as misc

In [2]:
import ipynb.fs.full.splitting as splitting
import ipynb.fs.full.table as table
import ipynb.fs.full.decide as decide

In [3]:
import numpy as np

### SERIALIZE THE YAML CONFIG

In [4]:
config = storage.load_yaml('extra/config.yaml')

In [5]:
config

{'data': 'extra/dataset.csv',
 'processing': {'resample': True,
  'time': 'D',
  'aggregate': {'Open': 'first',
   'High': 'max',
   'Low': 'min',
   'Close': 'last',
   'Volume': 'sum'},
  'label': {'from': 'close', 'shift': 1}},
 'features': {'add': ['sk', 'sd', 'momentum'],
  'window': 14,
  'filter': ['close', 'momentum', 'sd', 'label']},
 'splitting': {'train_split': 0.8, 'validation_folds': 5},
 'ensemble': {'models': [{'linreg': None},
   {'linreg': None},
   {'linreg': None},
   {'linreg': None},
   {'linreg': None},
   {'lstm': {'layers': [{'lstm': {'value': 120, 'activation': 'relu'}},
      {'dropout': {'value': 0.15}},
      {'dense': {'value': 50, 'activation': 'relu'}},
      {'dense': {'value': 1}}],
     'epochs': 5,
     'batch': 8,
     'loss': 'mean_squared_error',
     'optimizer': 'rmsprop'}}]}}

### STEP 1: PROCESS RAW DATA TO DATAFRAME

In [6]:
dataframe = processing.create_dataframe(config)

In [7]:
dataframe.head(5)

Unnamed: 0_level_0,open,high,low,close,volume,label
Date_Timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2010-01-01,1.43327,1.43356,1.43207,1.43335,39761.000053,1.43141
2010-01-03,1.43024,1.43359,1.42951,1.43141,3001.600003,1.44244
2010-01-04,1.43143,1.44556,1.42559,1.44244,80019.400094,1.43634
2010-01-05,1.44238,1.44834,1.43445,1.43634,79887.100067,1.44005
2010-01-06,1.43638,1.44342,1.42807,1.44005,80971.800085,1.43155


### STEP 2: ADD FEATURES TO DATAFRAME

In [8]:
dataset = features.add(dataframe, config['features'])

In [9]:
dataset.head(5)

Unnamed: 0_level_0,close,momentum,sd,label
Date_Timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2010-02-01,1.392,-0.04649,14.842308,1.39737
2010-02-02,1.39737,-0.03928,13.972612,1.38969
2010-02-03,1.38969,-0.0507,11.302307,1.37395
2010-02-04,1.37395,-0.05393,11.014429,1.36763
2010-02-05,1.36763,-0.04356,11.636518,1.36409


### STEP 3: SPLIT & SCALE PRIMARY DATASET

In [10]:
features = dataset.loc[:, dataset.columns != 'label'].to_numpy()

In [11]:
labels = dataset[['label']].to_numpy()

In [12]:
primary, scaler = splitting.primary(features, labels, config['splitting']['train_split'])

### STEP 4: CROSS VALIDATION FOLD SPLITTING

In [13]:
folds = splitting.timeseries(primary['train'], config['splitting']['validation_folds'])

### STEP 5: TRAIN REGRESSION MODELS USING CROSS VALIDATION

In [14]:
ensemble = [None] * len(config['ensemble']['models'])

In [15]:
# LOOP THROUGH REQUESTED MOEDLS
for index, item in enumerate(config['ensemble']['models']):
    
    # MODEL PROPS
    name = list(item)[0]
    settings = item[name]
    
    # FOLD PREDICTIONS
    temp_predictions = []
    temp_labels = []
    
    # TRAIN & PREDICT WITH EACH FOLD
    for fold in folds:
        predictions = training.start(fold, name, settings)
        
        # APPEND TO COLLECTIONS
        temp_predictions.append(predictions)
        temp_labels.append(fold['test']['labels'])
        
    # APPEND RESULTS TO ENSEMBLE
    ensemble[index] = {
        'name': name + '_' + str(index),
        'predictions': np.concatenate(temp_predictions),
        'labels': np.concatenate(temp_labels)
    }

Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Train on 415 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Train on 828 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Train on 1241 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Train on 1654 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Train on 2067 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


### STEP 6: CONSTRUCT TABLE FOR ENSEMBLE PREDICTIONS

In [16]:
ensemble_table = table.ensemble(ensemble)

In [17]:
ensemble_table.tail(5)

Unnamed: 0,linreg_0,linreg_1,linreg_2,linreg_3,linreg_4,lstm_5,label
2060,1.199858,1.199858,1.199858,1.199858,1.199858,1.191003,1.20106
2061,1.201056,1.201056,1.201056,1.201056,1.201056,1.191208,1.20614
2062,1.206117,1.206117,1.206117,1.206117,1.206117,1.195975,1.20107
2063,1.20107,1.20107,1.20107,1.20107,1.20107,1.192323,1.20664
2064,1.206629,1.206629,1.206629,1.206629,1.206629,1.197794,1.20304


### STEP 7: CLASSIFY LABELS

In [18]:
label_names = {
    'buy': 0,
    'sell': 1,
    'hold': 2
}

In [19]:
threshold = 0.003

In [20]:
classification_dataset = decide.label({
    'predictions': ensemble_table,
    'label_names': label_names,
    'threshold': threshold
})

#### DECISION BREAKDOWN

In [21]:
classification_dataset['label'].value_counts(normalize=True)

3    0.548668
2    0.232930
1    0.218402
Name: label, dtype: float64

In [22]:
classification_dataset.head(5)

Unnamed: 0,linreg_0,linreg_1,linreg_2,linreg_3,linreg_4,lstm_5,label
0,1.440334,1.440334,1.440334,1.440334,1.440334,1.534178,3
1,1.432932,1.432932,1.432932,1.432932,1.432932,1.505156,1
2,1.447101,1.447101,1.447101,1.447101,1.447101,1.520116,1
3,1.461763,1.461763,1.461763,1.461763,1.461763,1.548405,3
4,1.463757,1.463757,1.463757,1.463757,1.463757,1.541343,2


#### DSFDSF

In [23]:
xaxa = classification_dataset[['linreg_0', 'linreg_1', 'linreg_2', 'linreg_3']].to_numpy()

In [24]:
xaxa

array([[1.44033369, 1.44033369, 1.44033369, 1.44033369],
       [1.4329319 , 1.4329319 , 1.4329319 , 1.4329319 ],
       [1.44710112, 1.44710112, 1.44710112, 1.44710112],
       ...,
       [1.20611703, 1.20611703, 1.20611703, 1.20611703],
       [1.20107019, 1.20107019, 1.20107019, 1.20107019],
       [1.20662924, 1.20662924, 1.20662924, 1.20662924]])

In [25]:
xaxa.shape

(2065, 4)

In [26]:
    reshaped = xaxa.reshape(xaxa.shape[0], 1, xaxa.shape[1])

In [27]:
reshaped.shape

(2065, 1, 4)

#### FOO

In [28]:
import math
from sklearn.preprocessing import MinMaxScaler

In [29]:
features = classification_dataset[['predictions']]

KeyError: "None of [Index(['predictions'], dtype='object')] are in the [columns]"

In [None]:
labels = classification_dataset['label'].to_numpy()

In [None]:
scaler = MinMaxScaler(feature_range=(0, 1))

In [None]:
limit = math.ceil(len(features) * 0.8)

In [None]:
x_train = scaler.fit_transform(features[:limit])
y_train = labels[:limit].ravel()

In [None]:
y_train[:50]

In [None]:
x_test = scaler.transform(features[limit:])
y_test = labels[limit:].ravel()

In [None]:
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
import numpy as np

In [None]:
model = LogisticRegression(random_state=0, class_weight='balanced')
model.fit(x_train, y_train)

In [None]:
model.score(x_test, y_test)

In [None]:
predictions = model.predict(x_test)

In [None]:
predictions

In [None]:
np.array(y_test)