### IMPORTS

In [1]:
import ipynb.fs.full.processing as processing
import ipynb.fs.full.features as features
import ipynb.fs.full.training as training
import ipynb.fs.full.analysis as analysis
import ipynb.fs.full.storage as storage
import ipynb.fs.full.visualize as visualize
import ipynb.fs.full.misc as misc

In [2]:
import ipynb.fs.full.splitting as splitting
import ipynb.fs.full.table as table
import ipynb.fs.full.decide as decide

In [3]:
import numpy as np

### SERIALIZE THE YAML CONFIG

In [4]:
config = storage.load_yaml('extra/config.yaml')

In [5]:
config

{'data': 'extra/dataset.csv',
 'processing': {'resample': True,
  'time': 'D',
  'aggregate': {'Open': 'first',
   'High': 'max',
   'Low': 'min',
   'Close': 'last',
   'Volume': 'sum'},
  'label': {'from': 'close', 'shift': 1}},
 'features': {'add': ['sk', 'sd', 'momentum'],
  'window': 14,
  'filter': ['close', 'momentum', 'sd', 'label']},
 'splitting': {'train_split': 0.8, 'validation_folds': 5},
 'ensemble': {'models': [{'linreg': None},
   {'linreg': None},
   {'lstm': {'morph': {'batch': 8, 'window': 4, 'offset': 1},
     'layers': [{'lstm': {'value': 120, 'activation': 'relu'}},
      {'dropout': {'value': 0.15}},
      {'dense': {'value': 50, 'activation': 'relu'}},
      {'dense': {'value': 1}}],
     'epochs': 5,
     'batch': 8,
     'loss': 'mean_squared_error',
     'optimizer': 'rmsprop'}}]}}

### STEP 1: PROCESS RAW DATA TO DATAFRAME

In [6]:
dataframe = processing.create_dataframe(config)

In [7]:
dataframe.head(5)

Unnamed: 0_level_0,open,high,low,close,volume,label
Date_Timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2010-01-01,1.43327,1.43356,1.43207,1.43335,39761.000053,1.43141
2010-01-03,1.43024,1.43359,1.42951,1.43141,3001.600003,1.44244
2010-01-04,1.43143,1.44556,1.42559,1.44244,80019.400094,1.43634
2010-01-05,1.44238,1.44834,1.43445,1.43634,79887.100067,1.44005
2010-01-06,1.43638,1.44342,1.42807,1.44005,80971.800085,1.43155


### STEP 2: ADD FEATURES TO DATAFRAME

In [8]:
dataset = features.add(dataframe, config['features'])

In [9]:
dataset.head(5)

Unnamed: 0_level_0,close,momentum,sd,label
Date_Timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2010-02-01,1.392,-0.04649,14.842308,1.39737
2010-02-02,1.39737,-0.03928,13.972612,1.38969
2010-02-03,1.38969,-0.0507,11.302307,1.37395
2010-02-04,1.37395,-0.05393,11.014429,1.36763
2010-02-05,1.36763,-0.04356,11.636518,1.36409


### STEP 3: SPLIT & SCALE PRIMARY DATASET

In [10]:
features = dataset.loc[:, dataset.columns != 'label'].to_numpy()

In [11]:
labels = dataset[['label']].to_numpy()

In [12]:
primary, scaler = splitting.primary(features, labels, config['splitting']['train_split'])

### STEP 4: CROSS VALIDATION FOLD SPLITTING

In [13]:
folds = splitting.timeseries(primary['train'], config['splitting']['validation_folds'])

## LSTM TEST STARTS

In [133]:
from tensorflow.python.keras import Sequential
from tensorflow.python.keras.layers import Dense, LSTM, Dropout
from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator

In [160]:
foo_dataset = folds[4]

In [161]:
foo_dataset['train']['features'][:10]

array([[0.79319489, 0.34691755, 0.12074242],
       [0.80526339, 0.38261946, 0.11112053],
       [0.78800342, 0.32607081, 0.08157757],
       [0.75262945, 0.31007675, 0.07839263],
       [0.73842593, 0.3614261 , 0.08527512],
       [0.73047015, 0.35082941, 0.08250577],
       [0.73339177, 0.33835108, 0.07597503],
       [0.76193366, 0.39351325, 0.08361401],
       [0.75424757, 0.37949988, 0.0882472 ],
       [0.74053848, 0.38148056, 0.09626104]])

In [162]:
foo_dataset['train']['labels'][:10]

array([[1.39737],
       [1.38969],
       [1.37395],
       [1.36763],
       [1.36409],
       [1.36539],
       [1.37809],
       [1.37467],
       [1.36857],
       [1.36305]])

In [170]:
morphed = TimeseriesGenerator(
    foo_dataset['train']['features'],
    foo_dataset['train']['labels'],
    length=4,
    batch_size=15,
    shuffle=True
)

In [171]:
len(morphed)

138

In [165]:
2063 / 4

515.75

In [166]:
# STRIDE == OFFSET

In [167]:
x, y = morphed[0]

In [168]:
x

array([[[0.79319489, 0.34691755, 0.12074242],
        [0.80526339, 0.38261946, 0.11112053],
        [0.78800342, 0.32607081, 0.08157757],
        [0.75262945, 0.31007675, 0.07839263]]])

In [169]:
y

array([[1.36409]])

In [144]:
morphed[0][0].shape

(1, 4, 3)

In [145]:
model = Sequential()
model.add(LSTM(100, input_shape=(morphed[0][0].shape[1], morphed[0][0].shape[2])))
model.add(Dense(100, activation='relu'))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')

In [146]:
# TEST BIDIRECTIONAL LSTM

In [147]:
model.fit_generator(morphed, steps_per_epoch=1, epochs=200, verbose=0)

<tensorflow.python.keras.callbacks.History at 0x277affa7108>

In [148]:
# STEPS = LENGTH OF MORPHED

## LSTM TEST ENDS

### STEP 5: TRAIN REGRESSION MODELS USING CROSS VALIDATION

In [14]:
ensemble = [None] * len(config['ensemble']['models'])

In [15]:
# LOOP THROUGH REQUESTED MOEDLS
for index, item in enumerate(config['ensemble']['models']):
    
    # MODEL PROPS
    name = list(item)[0]
    settings = item[name]
    
    # FOLD PREDICTIONS
    temp_predictions = []
    temp_labels = []
    
    # TRAIN & PREDICT WITH EACH FOLD
    for fold in folds:
        predictions = training.start(fold, name, settings)
        
        # APPEND TO COLLECTIONS
        temp_predictions.append(predictions)
        temp_labels.append(fold['test']['labels'])
        
    # APPEND RESULTS TO ENSEMBLE
    ensemble[index] = {
        'name': name + '_' + str(index),
        'predictions': np.concatenate(temp_predictions),
        'labels': np.concatenate(temp_labels)
    }

ValueError: Input 0 of layer lstm is incompatible with the layer: expected ndim=3, found ndim=4. Full shape received: [None, 8, 4, 3]

### STEP 6: CONSTRUCT TABLE FOR ENSEMBLE PREDICTIONS

In [None]:
ensemble_table = table.ensemble(ensemble)

In [None]:
ensemble_table.tail(5)

### STEP 7: CLASSIFY LABELS

In [None]:
label_names = {
    'buy': 0,
    'sell': 1,
    'hold': 2
}

In [None]:
threshold = 0.003

In [None]:
classification_dataset = decide.label({
    'predictions': ensemble_table,
    'label_names': label_names,
    'threshold': threshold
})

#### DECISION BREAKDOWN

In [None]:
classification_dataset['label'].value_counts(normalize=True)

In [None]:
classification_dataset.head(5)

#### DSFDSF

In [None]:
testingzz = splitting.morph(classification_dataset, )

In [None]:
from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator

In [None]:
def morph(features, labels, params):
    
    # DECONSTRUCT PARAMS
    batch = params['batch']
    window = params['window']
    offset = params['offset']
    
    # GENERATE & RETURN
    return TimeseriesGenerator(
        features,
        labels,
        length=window,
        sampling_rate=offset,
        batch_size=batch
    )

In [None]:
foo_features = classification_dataset.loc[:, classification_dataset.columns != 'label'].to_numpy()

In [None]:
foo_labels = classification_dataset['label'].to_numpy()

In [None]:
foo_test = morph(foo_features, foo_labels, {
    'window': 4,
    'offset': 1,
    'batch': 5
})

In [None]:
foo_test[0][0].shape

In [None]:
len(foo_test)

In [None]:
xaxa = classification_dataset[['linreg_0', 'linreg_1', 'linreg_2', 'linreg_3']].to_numpy()

In [None]:
xaxa

In [None]:
xaxa.shape

In [None]:
    reshaped = xaxa.reshape(xaxa.shape[0], 1, xaxa.shape[1])

In [None]:
reshaped.shape

#### FOO

In [None]:
import math
from sklearn.preprocessing import MinMaxScaler

In [None]:
features = classification_dataset[['predictions']]

In [None]:
labels = classification_dataset['label'].to_numpy()

In [None]:
scaler = MinMaxScaler(feature_range=(0, 1))

In [None]:
limit = math.ceil(len(features) * 0.8)

In [None]:
x_train = scaler.fit_transform(features[:limit])
y_train = labels[:limit].ravel()

In [None]:
y_train[:50]

In [None]:
x_test = scaler.transform(features[limit:])
y_test = labels[limit:].ravel()

In [None]:
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
import numpy as np

In [None]:
model = LogisticRegression(random_state=0, class_weight='balanced')
model.fit(x_train, y_train)

In [None]:
model.score(x_test, y_test)

In [None]:
predictions = model.predict(x_test)

In [None]:
predictions

In [None]:
np.array(y_test)