### IMPORTS

In [1]:
import ipynb.fs.full.processing as processing
import ipynb.fs.full.features as features
import ipynb.fs.full.training as training
import ipynb.fs.full.analysis as analysis
import ipynb.fs.full.storage as storage
import ipynb.fs.full.visualize as visualize
import ipynb.fs.full.misc as misc
import ipynb.fs.full.decision as decision

### SERIALIZE THE YAML CONFIG

In [2]:
config = storage.load_yaml('extra/config.yaml')

### STEP 1: PROCESS RAW DATA TO DATAFRAME

In [3]:
dataframe = processing.create_dataframe(config)

In [4]:
dataframe.head(5)

Unnamed: 0_level_0,open,high,low,close,volume,label
Date_Timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2010-01-01,1.43327,1.43356,1.43207,1.43335,39761.000053,1.43141
2010-01-03,1.43024,1.43359,1.42951,1.43141,3001.600003,1.44244
2010-01-04,1.43143,1.44556,1.42559,1.44244,80019.400094,1.43634
2010-01-05,1.44238,1.44834,1.43445,1.43634,79887.100067,1.44005
2010-01-06,1.43638,1.44342,1.42807,1.44005,80971.800085,1.43155


### STEP 2: ADD FEATURES TO DATAFRAME

In [5]:
dataset = features.add(dataframe, config['features'])

In [6]:
dataset.head(5)

Unnamed: 0_level_0,close,momentum,sd,label
Date_Timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2010-02-01,1.392,-0.04649,14.842308,1.39737
2010-02-02,1.39737,-0.03928,13.972612,1.38969
2010-02-03,1.38969,-0.0507,11.302307,1.37395
2010-02-04,1.37395,-0.05393,11.014429,1.36763
2010-02-05,1.36763,-0.04356,11.636518,1.36409


### STEP 3: FEATURES & LABELS

In [7]:
features = dataset[['close', 'momentum', 'sd']].to_numpy()

In [8]:
labels = dataset[['label']].to_numpy()

### STEP 4: GENERATE CROSS VALIDATION FOLDS

In [17]:
from sklearn.model_selection import TimeSeriesSplit

In [18]:
tss = TimeSeriesSplit(n_splits=5)

In [34]:
folds = []

In [35]:
for train_index, test_index in tss.split(features):
    folds.append({
        'train': {
            'features': features[train_index],
            'labels': labels[train_index]
        },
        'test': {
            'features': features[test_index],
            'labels': labels[test_index]
        }
    })

### STEP 5: TRAIN LINREG MODELS WITH FOLDS

In [77]:
from sklearn.linear_model import LinearRegression
import numpy as np
import pandas as pd

In [78]:
def linear_regression(data):
    
    # INSTANTIATE MODEL
    linear = LinearRegression()
    
    # TRAIN WITH TRAIN DATA
    model = linear.fit(data['train']['features'], data['train']['labels'])
    
    predictions = model.predict(data['test']['features'])
    
    # RETURN PREDICTIONS & REAL LABELS
    return {
        'predictions': np.ndarray.flatten(predictions),
        'labels': np.ndarray.flatten(data['test']['labels'])
    }

In [80]:
df = pd.DataFrame()

In [81]:
for index, fold in enumerate(folds):
    blob = linear_regression(fold)
    df['fold_' + str(index)] = blob['predictions']

In [82]:
df

Unnamed: 0,fold_0,fold_1,fold_2,fold_3,fold_4
0,1.335568,1.293398,1.156981,1.124198,1.186807
1,1.319120,1.293243,1.160268,1.122078,1.184770
2,1.331265,1.284466,1.155369,1.124659,1.191456
3,1.333961,1.294553,1.162219,1.124835,1.194028
4,1.342050,1.303989,1.135720,1.115283,1.195311
...,...,...,...,...,...
511,1.289133,1.184234,1.125729,1.195133,1.109637
512,1.291943,1.178855,1.124956,1.198931,1.110321
513,1.284246,1.179543,1.127928,1.195581,1.117317
514,1.292067,1.165029,1.124277,1.195521,1.118316
