#### Using TuneTA by pandas-ta

In [1]:
import os
from pathlib import Path


# Change directory
# Modify this cell to insure that the output shows the correct path.
# Define all paths relative to the project root shown in the cell output
project_root = "/Users/liuqiaowei/SourceCode/freqtrade/user_data/notebooks"
i = 0
try:
    os.chdir(project_root)
    if not Path("LICENSE").is_file():
        i = 0
        while i < 4 and (not Path("LICENSE").is_file()):
            os.chdir(Path(Path.cwd(), "../"))
            i += 1
        project_root = Path.cwd()
except FileNotFoundError:
    print("Please define the project root relative to the current directory")
print(Path.cwd())

/Users/liuqiaowei/SourceCode/freqtrade


In [2]:
from freqtrade.configuration import Configuration


# Customize these according to your needs.

# Initialize empty configuration object
config = Configuration.from_files(["./user_data/config.json"])
# Optionally (recommended), use existing configuration file
# config = Configuration.from_files(["user_data/config.json"])

# Location of the data
data_location = config["datadir"]
config['timeframe'] = '1h'
# Pair to analyze - Only use one pair here
# config['exchange']['pair_whitelist']

In [3]:
from freqtrade.data.history import load_pair_history
from freqtrade.enums import CandleType
from pandas_ta import percent_return

candles_dict = {}
for pair in config['exchange']['pair_whitelist']:
    candles = load_pair_history(
        datadir=data_location,
        timeframe=config["timeframe"],
        pair=pair,
        data_format="feather",  # Make sure to update this to your data
        candle_type=CandleType.FUTURES,
    )
    candles['sym'] = pair
    candles.set_index('date', inplace=True)
    candles.set_index('sym', append=True, inplace=True)
    candles['return'] = percent_return(candles.close, offset=-4)
    # Keep Latest week for testing
    candles_dict[pair] = candles[-1000:]

In [4]:
import pandas_ta as ta

ta.Category.keys()

dict_keys(['candles', 'cycles', 'momentum', 'overlap', 'performance', 'statistics', 'trend', 'volatility', 'volume'])

In [5]:
# Generate indicator names
target_categories = ['statistics']
exclude_indicators = ['stoch', 'tsignals', 'xsignals']
indicator_names = []

for target_category in target_categories:
    indicators = ta.Category[target_category]
    for indicator in indicators:
        if indicator not in exclude_indicators:
            indicator_names.append(f'pta.{indicator}')

indicator_names

['pta.entropy',
 'pta.kurtosis',
 'pta.mad',
 'pta.median',
 'pta.quantile',
 'pta.skew',
 'pta.stdev',
 'pta.tos_stdevall',
 'pta.variance',
 'pta.zscore']

In [6]:
from tuneta.tune_ta import TuneTA
import pandas as pd

X = pd.concat([candles for _, candles in candles_dict.items()], axis=0).sort_index()
X.dropna(inplace=True)
y = X['return']
X = X.drop(columns=['return'])

OMP: Info #276: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.


In [7]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.3, shuffle=False)

In [8]:
tt = TuneTA(n_jobs=4, verbose=True)

In [9]:
tt.fit(X_train, y_train,
        indicators=indicator_names,
        ranges=[(4, 12), (12, 24)],
        trials=300,
        early_stop=50,
)

tt.fit_times()

[I 2024-11-13 22:37:27,393] A new study created in memory with name: pta.entropy(X.close, length=trial.suggest_int('length', 4, 12), lookahead=False, )
[I 2024-11-13 22:37:27,420] A new study created in memory with name: pta.kurtosis(X.close, length=trial.suggest_int('length', 4, 12), lookahead=False, )
[I 2024-11-13 22:37:27,472] A new study created in memory with name: pta.mad(X.close, length=trial.suggest_int('length', 4, 12), lookahead=False, )
[I 2024-11-13 22:37:27,498] A new study created in memory with name: pta.median(X.close, length=trial.suggest_int('length', 4, 12), lookahead=False, )
[I 2024-11-13 22:37:28,675] Trial 0 finished with value: 0.042511476160917196 and parameters: {'length': 7}. Best is trial 0 with value: 0.042511476160917196.
[I 2024-11-13 22:37:28,676] Trial 0 finished with value: 0.04136713184097311 and parameters: {'length': 7}. Best is trial 0 with value: 0.04136713184097311.
[I 2024-11-13 22:37:28,681] Trial 0 finished with value: 0.039538356470589585 an

    Indicator           Times
--  ----------------  -------
10  pta.mad             54.11
11  pta.mad             52.51
 7  pta.quantile        45.21
19  pta.kurtosis        41.25
 6  pta.median          38.26
 5  pta.quantile        37.55
 9  pta.stdev           35.19
15  pta.variance        34.68
 3  pta.entropy         34.1
 8  pta.stdev           31.95
16  pta.skew            30.98
14  pta.zscore          30.05
 4  pta.median          29.9
12  pta.zscore          29.03
 2  pta.entropy         28.45
18  pta.kurtosis        28.09
17  pta.skew            27.98
13  pta.variance        27.26
 1  pta.tos_stdevall    19.89
 0  pta.tos_stdevall    19.81


In [10]:
tt.report(target_corr=True, features_corr=False)


Indicator Correlation to Target:

                                      Correlation
----------------------------------  -------------
pta_tos_stdevall_length_13_ddof_21       0.055304
pta_tos_stdevall_length_11_ddof_9        0.053432
pta_entropy_length_16                    0.04548
pta_entropy_length_8                     0.041774
pta_median_length_22                     0.041516
pta_quantile_length_21_q_14              0.041483
pta_median_length_8                      0.041475
pta_quantile_length_9_q_11               0.041418
pta_stdev_length_11_ddof_7               0.039382
pta_stdev_length_19_ddof_16              0.039323
pta_mad_length_14                        0.039287
pta_mad_length_8                         0.039187
pta_zscore_length_8                      0.034201
pta_variance_length_21_ddof_18           0.028499
pta_zscore_length_19                     0.028
pta_variance_length_11_ddof_10           0.027698
pta_skew_length_17                       0.026425
pta_skew_length_8  

In [11]:
tt.prune(max_inter_correlation=.8)

OMP: Info #276: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.
OMP: Info #276: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.
OMP: Info #276: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.
OMP: Info #276: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.


In [12]:
tt.report(target_corr=True, features_corr=True)


Indicator Correlation to Target:

                                      Correlation
----------------------------------  -------------
pta_tos_stdevall_length_13_ddof_21       0.055304
pta_entropy_length_16                    0.04548
pta_entropy_length_8                     0.041774
pta_zscore_length_8                      0.034201
pta_zscore_length_19                     0.028
pta_skew_length_17                       0.026425
pta_skew_length_8                        0.0251
pta_kurtosis_length_8                    0.023199
pta_kurtosis_length_16                   0.018614

Indicator Correlation to Each Other:

                                      pta_tos_stdevall_length_13_ddof_21    pta_entropy_length_16    pta_entropy_length_8    pta_zscore_length_8    pta_zscore_length_19    pta_skew_length_17    pta_skew_length_8    pta_kurtosis_length_8    pta_kurtosis_length_16
----------------------------------  ------------------------------------  -----------------------  --------------------

In [14]:
indicators_corr_df =  tt.t_corr[tt.t_corr['Correlation'] > 0.02]
indicators_corr_df

Unnamed: 0,Correlation
pta_tos_stdevall_length_13_ddof_21,0.055304
pta_entropy_length_16,0.04548
pta_entropy_length_8,0.041774
pta_zscore_length_8,0.034201
pta_zscore_length_19,0.028
pta_skew_length_17,0.026425
pta_skew_length_8,0.0251
pta_kurtosis_length_8,0.023199


In [17]:
indicator_with_params_list = indicators_corr_df.index.to_list()

In [ ]:
# Dynamic calculate indicators
import pandas_ta as ta

factors_df_dict = {}
open_df = pd.DataFrame()

for key, value in candles_dict.items():
    candles = value.copy()
    candles.set_index(['date'], inplace=True)
    if len(factors_df_dict.keys()) == 0:
        for indicator_with_params in indicator_with_params_list:
            indicator_info_list = str.split(indicator_with_params, "_")
            indicator_params_str = ''
            # Find the first digital index，IF there is no digital in the string，the indicator do not need parameter
            for i, v in enumerate(indicator_info_list)
    else:
        pass
        
    
    
    

In [29]:
s = 'pta_tos_stdevall_length_13_ddof_21'
l = str.split(s, '_')
first_param_name = ''

for i, v in enumerate(l):
    if v.isdigit():
        first_param_name = l[i-1]
        break
    
s.index(first_param_name)
indicator_name = s[4:s.index(first_param_name)-1]
indicator_params = s[s.index(first_param_name):]

indicator_params_list = str.split(indicator_params, '_')
indicator_params_str = ""

for indicator_params in indicator_params_list:
    indicator_params_str += indicator_params
    if indicator_params.isdigit():
        indicator_params_str += ','
    else:
        indicator_params_str += '=' 
        
indicator_params_str = indicator_params_str[:-1]

f'ta.{indicator_name}({indicator_params_str})'


'ta.tos_stdevall(length=13,ddof=21)'