# Functionality Display —— gp_cross_factor 

## Load Data and Packages

In [1]:
import sys
sys.path.append('./gp_cross_factor/')

import warnings
import numpy as np
import pandas as pd
import genetic

np.random.seed(10)
pd.set_option('display.max_columns', None)
pd.set_option('expand_frame_repr', True)
pd.set_option('display.unicode.ambiguous_as_wide', True)
warnings.filterwarnings('ignore')

In [2]:
Y = np.load("./data/Y.npy")
print(Y.shape)

(728, 4984)


In [6]:
X = np.load("./data/X.npy")
print(X.shape)

(728, 6, 4984)


In [7]:
feature_names = ["open", "close", "high", "low","vwap","volume"]

## Set Function Sets

In [8]:
max_samples=0.8
sample_weight = np.ones(X.shape[0])
num_div = int(X.shape[0] * max_samples)
sample_weight[num_div:] = 0

In [9]:
function_set_all = list(genetic._all_func_dictionary.keys())
remove_list = ['tan','sin','cos','neg']
function_set = [item for item in function_set_all if item not in remove_list]

## Train GP Model

In [10]:
# list all fitness metrics
list(genetic._extra_map.keys())

['rank_ic', 'rank_icir', 'quantile_max', 'quantile_mono']

In [11]:
gp_sample = genetic.SymbolicTransformer(generations=4,
                                        population_size=50,
                                        tournament_size=20,
                                        init_depth=(1,3),
                                        hall_of_fame=20,
                                        n_components=10,
                                        function_set=function_set,
                                        metric="rank_icir",
                                        const_range=None,
                                        p_crossover=0.5,
                                        p_hoist_mutation=0.1,
                                        p_subtree_mutation=0.1,
                                        p_point_mutation=0.1,
                                        p_point_replace=0.2,
                                        p_reproduction = 0.1,
                                        parsimony_coefficient = 0.001, # raw_fitness - parsimony_coefficient * len(program)
                                        feature_names=feature_names,
                                        max_samples=max_samples, verbose=1,
                                        random_state=0, n_jobs=5)

In [12]:
gp_sample.fit_3D(X, Y, sample_weight=sample_weight,baseline=0.02, need_parallel=True)

     |        Population Metrics         |             Best Individual Metrics              |Time Remaining |
----- ----------------------------------- -------------------------------------------------- ---------------
 Gen     Length      Depth      Fitness       Length        Depth      Fitness  OOB_Fitness         Time
  0       3.58       2.10       0.0948         5.00         2.00        0.297       0.3188       15.04m
  1       4.16       1.94       0.2373         3.00         1.00        0.311       0.4526       48.16m
  2       3.70       1.74       0.2786         3.00         1.00        0.311       0.4526        1.16m
  3       2.62       1.04       0.2703         2.00         1.00        0.314       0.3975        0.00s


In [26]:
best_result = gp_sample.show_program(baseline=True)
res = pd.DataFrame(best_result).drop_duplicates(subset="Expression").sort_values(by='Fitness',ascending = False)
res.to_csv("./Demo_Result.csv",index=True,index_label='idx')
res

Unnamed: 0,Expression,Fitness,OOB Fitness,Length,Depth
176,ts_max_20(volume),0.314419,0.397478,2,1
117,"max(vwap, volume)",0.311144,0.452588,3,1
143,"sub(open, volume)",0.311051,0.452598,3,1
173,"max(low, volume)",0.311035,0.452588,3,1
78,"max(open, volume)",0.311035,0.452588,3,1
...,...,...,...,...,...
39,ts_min_40(high),0.038796,0.085256,2,1
23,log(ts_min_40(power_3(close))),0.037759,0.084755,4,3
33,ts_min_40(low),0.036218,0.083113,2,1
43,ts_argmax_10(ts_argmin_10(ts_std_60(low))),0.028684,0.007302,4,3


## Visualize Result

In [28]:
target_fac = gp_sample._satisfied_programs[176]
target_fac.__str__()

'ts_max_20(volume)'

In [29]:
print(target_fac.export_graphviz())

digraph program {
node [style=filled]
0 [label="ts_max_20", fillcolor="#136ed4"] ;
1 [label="volume", fillcolor="#60a6f6"] ;
0 -> 1 ;
}
