In [1]:
import sys
sys.path.append("..")
import Data as dt
import ChevalParesseux_lib as lib

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm

In [2]:
# I. Load data
data = dt.load_dataList(ticker_list=['A', 'AAPL', 'GME', 'META'])

# II. Making Samples
training_data = {}
testing_data = {}
embargo_data = {}

for ticker in data.keys():
    full_df = data[ticker].copy()
    full_df.index = pd.to_datetime(full_df['date'])
    training_data[ticker] = full_df.loc['2005-01-01':'2019-01-01']
    testing_data[ticker] = full_df.loc['2019-01-01': '2023-01-01']
    embargo_data[ticker] = full_df.loc['2023-01-01':]

full_training_data = pd.concat([training_data[ticker] for ticker in training_data.keys()], axis=0)

In [3]:
non_feature_columns = ['date', 'code', 'exchange', 'open', 'high', 'low', 'close', 'volume', 'count_trx', 'label']
price_name = 'close'
date_name = 'date'
bid_open_name = 'open'
ask_open_name = 'open'
n_jobs = 9

strategy = lib.ML_strategy(n_jobs=n_jobs, date_name=date_name, bid_open_name=bid_open_name, ask_open_name=ask_open_name)
strategy = strategy.set_params(non_feature_columns=non_feature_columns, price_name=price_name)

In [15]:
# ======= I. Define the models for : Training Processing =======
sampling_model = lib.DataSampler
labeller_model = lib.tripleBarrier_labeller
features_model = [
    lib.average_feature, 
    lib.minimum_feature, 
    lib.volatility_feature, 
    lib.quantile_feature, 
    lib.Z_momentum_feature, 
    lib.nonlinear_tempReg_feature, 
    lib.hurst_exponent_feature
]
cleaner_model = lib.FeaturesCleaner

# ------- I.1 Set the models -------
strategy = strategy.set_models(
    sampling_model=sampling_model,
    labeller_model=labeller_model,
    features_model=features_model,
    cleaner_model=cleaner_model
)

In [23]:
# ======= I. Define the parameters for : Training Processing =======
sampling_params = {
    "sampling_method": "daily_volBars",
    "column_name": "volume",
    "grouping_column": "code",
    "new_cols_methods": "mean",
    "vol_threshold": 0.005,
    "aggregation_dict": {
        "open": "first",
        "high": "max",
        "low": "min",
        "close": "last",
        "volume": "sum",
        "date": "first",
        "code": "first",
        "exchange": "first",
        "count_trx": "sum",
    }
}
labeller_params = {
    "upper_barrier": [1],
    "lower_barrier": [1.5],
    "vertical_barrier": [20],
    "vol_window": [20],
    "smoothing_method": ["ewma"],
    "window_smooth": [5],
    "lambda_smooth": [0.2],
}
features_params = {
    "window": [5, 10, 30, 60, 120],
    "power": [3, 4, 5],
    "quantile": [0.1, 0.9],
    "smoothing_method": [None, "ewma"],
    "window_smooth": [10],
    "lambda_smooth": [0.2],
}
cleaner_params = {
    'stationarity_threshold': 0.05,
    'outliers_threshold': 5,
}

# ------- I.1 Set the parameters -------
strategy = strategy.set_params(
    sampling_params=sampling_params,
    labeller_params=labeller_params,
    features_params=features_params,
    cleaner_params=cleaner_params
)

In [24]:
stacked_data, processed_data, features_informations = strategy.get_training_data(training_data=full_training_data)

Labelling data...


100%|██████████| 4/4 [00:07<00:00,  1.77s/it]


Extracting features...


100%|██████████| 4/4 [02:53<00:00, 43.37s/it]


In [None]:
set_params = {
    'labels_name': 'label',
    'price_name': 'close',
    'n_samples': 5000,
    'replacement': True,
    'vol_window': 20,
    'upper_barrier': 1,
    'vertical_barrier': 20,
}

In [None]:
dTree_params = {
    'criterion': 'gini',
    'max_depth': 4,
    'min_samples_split': 100,
    'min_samples_leaf': 100,
    'max_features': 50,
}