# Functionality Display —— gp_cross_factor 

## Load Data and Packages

In [1]:
import warnings
import numpy as np
import pandas as pd

from gp_cross_factor import genetic

warnings.filterwarnings('ignore')
np.random.seed(10)
# pd.set_option('display.max_columns', None)
# pd.set_option('expand_frame_repr', True)
# pd.set_option('display.unicode.ambiguous_as_wide', True)

In [2]:
Y = np.load("./data/Y.npy")
print(Y.shape)

(728, 4984)


In [3]:
X = np.load("./data/X.npy")
print(X.shape)

(728, 6, 4984)


In [4]:
feature_names = ["open", "close", "high", "low","vwap","volume"]

## Set Function Sets

In [5]:
max_samples=0.8
sample_weight = np.ones(X.shape[0])
num_div = int(X.shape[0] * max_samples)
sample_weight[num_div:] = 0

In [6]:
function_set_all = list(genetic._all_func_dictionary.keys())
remove_list = ['tan','sin','cos','neg']
function_set = [item for item in function_set_all if item not in remove_list]

## Train GP Model

In [7]:
# list all fitness metrics
list(genetic._extra_map.keys())

['rank_ic', 'rank_icir', 'quantile_max', 'quantile_mono']

In [13]:
gp_sample = genetic.SymbolicTransformer(generations=3,
                                        population_size=20,
                                        tournament_size=10,
                                        init_depth=(1,3),
                                        hall_of_fame=5,
                                        n_components=2,
                                        function_set=function_set,
                                        metric="rank_icir",
                                        const_range=None,
                                        p_crossover=0.5,
                                        p_hoist_mutation=0.1,
                                        p_subtree_mutation=0.1,
                                        p_point_mutation=0.1,
                                        p_point_replace=0.2,
                                        p_reproduction = 0.1,
                                        parsimony_coefficient = 0.001, # raw_fitness - parsimony_coefficient * len(program)
                                        feature_names=feature_names,
                                        max_samples=max_samples, verbose=2,
                                        random_state=1234, n_jobs=5)

In [14]:
gp_sample.fit_3D(X, Y, sample_weight=sample_weight,baseline=0.2, need_parallel=True)

     |        Population Metrics         |             Best Individual Metrics              |Time Remaining |
----- ----------------------------------- -------------------------------------------------- ---------------
 Gen     Length      Depth      Fitness       Length        Depth      Fitness  OOB_Fitness         Time


[Parallel(n_jobs=5)]: Using backend LokyBackend with 5 concurrent workers.
[Parallel(n_jobs=5)]: Done  20 out of  20 | elapsed:   29.5s finished


  0       3.70       1.90       0.1281         2.00         1.00        0.335       0.4260        3.30m


[Parallel(n_jobs=5)]: Using backend LokyBackend with 5 concurrent workers.
[Parallel(n_jobs=5)]: Done  20 out of  20 | elapsed:   14.2s finished


  1       2.60       1.35       0.2356         2.00         1.00        0.335       0.4260        1.00m


[Parallel(n_jobs=5)]: Using backend LokyBackend with 5 concurrent workers.
[Parallel(n_jobs=5)]: Done  20 out of  20 | elapsed:   20.5s finished


  2       2.55       1.25       0.2487         2.00         1.00        0.335       0.4260        0.00s


In [10]:
best_result = gp_sample.show_program(baseline=True)
res = pd.DataFrame(best_result).drop_duplicates(subset="Expression").sort_values(by='Fitness',ascending = False)
res.to_csv("./Demo_Result.csv",index=True,index_label='idx')
res

Unnamed: 0,Expression,Fitness,OOB Fitness,Length,Depth
0,ts_std_20(volume),0.334846,0.425952,2,1
9,volume,0.311036,0.452586,1,0
26,ts_max_60(volume),0.276911,0.261959,2,1
13,"delta_2(ts_argmax_60(ts_correlation_10(vwap, o...",0.244533,0.079844,5,3
27,"delta_2(ts_argmax_60(ts_correlation_10(vwap, d...",0.229625,0.19996,6,4
1,delta_2(close),0.221062,0.333318,2,1
2,"delta_2(ts_argmax_60(max(vwap, open)))",0.202969,0.222684,5,3


## Visualize Result

In [11]:
target_fac = gp_sample._satisfied_programs[0]
target_fac.__str__()

'ts_std_20(volume)'

In [12]:
print(target_fac.export_graphviz())

digraph program {
node [style=filled]
0 [label="ts_std_20", fillcolor="#136ed4"] ;
1 [label="volume", fillcolor="#60a6f6"] ;
0 -> 1 ;
}
