### Import library

In [1]:
import os
os.chdir('../../')
os.getcwd()

'C:\\Users\\csia7\\OneDrive\\문서\\GitHub\\WQBrain_2024_API'

In [2]:
import ace_lib as ace
import helpful_functions as hf
import pandas as pd
import requests
import plotly.express as px
import pygwalker as pyg
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA

### Start session
Enter credentials once - they will be saved to local folder and loaded each time

In [3]:
s = ace.start_session()

Complete biometrics authentication and press any key to continue: 
https://api.worldquantbrain.com/authentication/persona?inquiry=inq_wRbJSB8KavfDKU41fara2NHzDBBQ

 


## Global region Alpha Template

In [7]:
expression_template = f'''
group = (country+1)*group_max(pv13_52_minvol_1m_all_delay_1_sector, market) + pv13_52_minvol_1m_all_delay_1_sector;
group_neutralize(ts_scale(group_backfill(fnd23_intfvalld1_ecns, industry, 252, std=1), 252), densify(group))
'''

#### Step 1. Download datsets

In [4]:
datasets_df = hf.get_datasets(s, region = 'GLB', universe = 'MINVOL1M') # by default we load all datasets USA TOP3000 delay 1    , region='GLB', universe='MINVOL1M'
datasets_df # DataFrame.head() shows first 5 rows of the dataframe 

Unnamed: 0,id,name,description,category,subcategory,region,delay,universe,coverage,turnover,valueScore,userCount,alphaCount,fieldCount,themes,researchPapers
0,analyst11,ESG scores,Environmental Social Governance scores that ex...,"{'id': 'analyst', 'name': 'Analyst'}","{'id': 'analyst-esg', 'name': 'ESG'}",GLB,1,MINVOL1M,0.7917,,4.0,66,299,197,[],"[{'title': 'Research Paper 19: ESG Preference,..."
1,analyst14,Estimations of Key Fundamentals,This dataset reports many items from financial...,"{'id': 'analyst', 'name': 'Analyst'}","{'id': 'analyst-analyst-estimates', 'name': 'A...",GLB,1,MINVOL1M,0.5186,,3.0,101,584,926,[],[{'title': 'Research Paper 10: Investor Learni...
2,analyst15,Earnings forecasts,This dataset provides bottom-up forecast data ...,"{'id': 'analyst', 'name': 'Analyst'}","{'id': 'analyst-analyst-estimates', 'name': 'A...",GLB,1,MINVOL1M,0.9929,,2.0,157,1489,288,[],[]
3,analyst16,Real Time Estimates,This dataset provides real-time access to the ...,"{'id': 'analyst', 'name': 'Analyst'}","{'id': 'analyst-crowdsourced-estimates', 'name...",GLB,1,MINVOL1M,0.9013,,2.0,134,807,42,[],[]
4,analyst35,ESG Model,The dataset provide ESG related information ba...,"{'id': 'analyst', 'name': 'Analyst'}","{'id': 'analyst-esg', 'name': 'ESG'}",GLB,1,MINVOL1M,0.4375,,3.0,63,201,23,[],[]
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
83,shortinterest6,SmartHoldings Model,This dataset is a global stock selection model...,"{'id': 'shortinterest', 'name': 'Short Interest'}","{'id': 'shortinterest-short-sale-models', 'nam...",GLB,1,MINVOL1M,0.9711,,2.0,91,382,11,[],[]
84,shortinterest7,Short Selling Model,The dataset combines features obtained from sh...,"{'id': 'shortinterest', 'name': 'Short Interest'}","{'id': 'shortinterest-short-sale-models', 'nam...",GLB,1,MINVOL1M,1.0000,,2.0,73,331,16,[],[{'title': 'Research Paper 04: Strategic Rebal...
85,socialmedia12,Sentiment Data for Equity,This dataset provides sentiment data with diff...,"{'id': 'socialmedia', 'name': 'Social Media'}","{'id': 'socialmedia-social-media', 'name': 'So...",GLB,1,MINVOL1M,0.6731,,3.0,17,25,2,[],[{'title': 'Research Paper 01: Textual Sentime...
86,socialmedia5,Lexical Breakdown Data,Sentiment scores derived from social media. So...,"{'id': 'socialmedia', 'name': 'Social Media'}","{'id': 'socialmedia-social-media', 'name': 'So...",GLB,1,MINVOL1M,0.3753,,5.0,2,3,12,[],[]


In [20]:
# select needed datasets
selected_datasets_df_1 = datasets_df[
    (datasets_df["name"].str.contains('relationship data for equity', case=False))
].sort_values(by=['valueScore'], ascending=False)
selected_datasets_df_1

Unnamed: 0,id,name,description,category,subcategory,region,delay,universe,coverage,turnover,valueScore,userCount,alphaCount,fieldCount,themes,researchPapers
73,pv13,Relationship Data for Equity,The dataset outputs various classifications an...,"{'id': 'pv', 'name': 'Price Volume'}","{'id': 'pv-relationship', 'name': 'Relationship'}",GLB,1,MINVOL1M,0.9152,,1.0,127,1924,29,[],[]


In [21]:
# select needed datasets
selected_datasets_df_2 = datasets_df[
    (datasets_df["name"].str.contains('fundamental point in time', case=False))
].sort_values(by=['valueScore'], ascending=False)
selected_datasets_df_2

Unnamed: 0,id,name,description,category,subcategory,region,delay,universe,coverage,turnover,valueScore,userCount,alphaCount,fieldCount,themes,researchPapers
21,fundamental23,Fundamental Point in Time Data,This dataset gives point in time access to fun...,"{'id': 'fundamental', 'name': 'Fundamental'}","{'id': 'fundamental-fundamental-data', 'name':...",GLB,1,MINVOL1M,0.6704,,2.0,257,2041,886,[],[{'title': 'Research Paper 01: Textual Sentime...


#### Step 2. Select the needed datafields

##### Step 2-1. equity relationship datafield extraction

In [23]:
dataset_id_1 = selected_datasets_df_1.id.values.tolist()[0] # create a list of selected datasets ids, our list has only one element
dataset_id_1

'pv13'

In [24]:
datafields_df_1 = hf.get_datafields(s, region = 'GLB', universe = 'MINVOL1M', dataset_id=dataset_id_1) # download all fields of dataset news
datafields_df_1

Unnamed: 0,id,description,dataset,category,subcategory,region,delay,universe,type,coverage,turnover,userCount,alphaCount,themes
0,pv13_10_f2_g3_minvol_1m_sector,grouping fields,"{'id': 'pv13', 'name': 'Relationship Data for ...","{'id': 'pv', 'name': 'Price Volume'}","{'id': 'pv-relationship', 'name': 'Relationship'}",GLB,1,MINVOL1M,GROUP,1.0,,27,146,[]
1,pv13_10_f2_g4_minvol_1m_sector,grouping fields,"{'id': 'pv13', 'name': 'Relationship Data for ...","{'id': 'pv', 'name': 'Price Volume'}","{'id': 'pv-relationship', 'name': 'Relationship'}",GLB,1,MINVOL1M,GROUP,1.0,,16,102,[]
2,pv13_10_f3_g2_minvol_1m_sector,grouping fields,"{'id': 'pv13', 'name': 'Relationship Data for ...","{'id': 'pv', 'name': 'Price Volume'}","{'id': 'pv-relationship', 'name': 'Relationship'}",GLB,1,MINVOL1M,GROUP,1.0,,17,98,[]
3,pv13_10_minvol_1m_sector,grouping fields,"{'id': 'pv13', 'name': 'Relationship Data for ...","{'id': 'pv', 'name': 'Price Volume'}","{'id': 'pv-relationship', 'name': 'Relationship'}",GLB,1,MINVOL1M,GROUP,1.0,,36,120,[]
4,pv13_10_sector,grouping fields,"{'id': 'pv13', 'name': 'Relationship Data for ...","{'id': 'pv', 'name': 'Price Volume'}","{'id': 'pv-relationship', 'name': 'Relationship'}",GLB,1,MINVOL1M,GROUP,1.0,,4,10,[]
5,pv13_1l_scibr,grouping fields,"{'id': 'pv13', 'name': 'Relationship Data for ...","{'id': 'pv', 'name': 'Price Volume'}","{'id': 'pv-relationship', 'name': 'Relationship'}",GLB,1,MINVOL1M,GROUP,0.877,,31,120,[]
6,pv13_20_minvol_1m_sector,grouping fields,"{'id': 'pv13', 'name': 'Relationship Data for ...","{'id': 'pv', 'name': 'Price Volume'}","{'id': 'pv-relationship', 'name': 'Relationship'}",GLB,1,MINVOL1M,GROUP,1.0,,36,234,[]
7,pv13_2_f3_g2_minvol_1m_sector,grouping fields,"{'id': 'pv13', 'name': 'Relationship Data for ...","{'id': 'pv', 'name': 'Price Volume'}","{'id': 'pv-relationship', 'name': 'Relationship'}",GLB,1,MINVOL1M,GROUP,1.0,,19,75,[]
8,pv13_2_f4_g3_minvol_1m_sector,grouping fields,"{'id': 'pv13', 'name': 'Relationship Data for ...","{'id': 'pv', 'name': 'Price Volume'}","{'id': 'pv-relationship', 'name': 'Relationship'}",GLB,1,MINVOL1M,GROUP,1.0,,21,88,[]
9,pv13_2_minvol_1m_all_delay_1_sector,grouping fields,"{'id': 'pv13', 'name': 'Relationship Data for ...","{'id': 'pv', 'name': 'Price Volume'}","{'id': 'pv-relationship', 'name': 'Relationship'}",GLB,1,MINVOL1M,GROUP,1.0,,26,88,[]


In [30]:
selected_datafields_df_1 = datafields_df_1[
    (datafields_df_1['type'] == 'GROUP') & 
    (datafields_df_1['userCount'] > 5)
].sort_values(by=['userCount'], ascending=False)
selected_datafields_df_1

Unnamed: 0,id,description,dataset,category,subcategory,region,delay,universe,type,coverage,turnover,userCount,alphaCount,themes
21,pv13_6l_scibr,grouping fields,"{'id': 'pv13', 'name': 'Relationship Data for ...","{'id': 'pv', 'name': 'Price Volume'}","{'id': 'pv-relationship', 'name': 'Relationship'}",GLB,1,MINVOL1M,GROUP,0.9238,,39,372,[]
6,pv13_20_minvol_1m_sector,grouping fields,"{'id': 'pv13', 'name': 'Relationship Data for ...","{'id': 'pv', 'name': 'Price Volume'}","{'id': 'pv-relationship', 'name': 'Relationship'}",GLB,1,MINVOL1M,GROUP,1.0,,36,234,[]
3,pv13_10_minvol_1m_sector,grouping fields,"{'id': 'pv13', 'name': 'Relationship Data for ...","{'id': 'pv', 'name': 'Price Volume'}","{'id': 'pv-relationship', 'name': 'Relationship'}",GLB,1,MINVOL1M,GROUP,1.0,,36,120,[]
20,pv13_5l_scibr,grouping fields,"{'id': 'pv13', 'name': 'Relationship Data for ...","{'id': 'pv', 'name': 'Price Volume'}","{'id': 'pv-relationship', 'name': 'Relationship'}",GLB,1,MINVOL1M,GROUP,0.9238,,34,288,[]
5,pv13_1l_scibr,grouping fields,"{'id': 'pv13', 'name': 'Relationship Data for ...","{'id': 'pv', 'name': 'Price Volume'}","{'id': 'pv-relationship', 'name': 'Relationship'}",GLB,1,MINVOL1M,GROUP,0.877,,31,120,[]
0,pv13_10_f2_g3_minvol_1m_sector,grouping fields,"{'id': 'pv13', 'name': 'Relationship Data for ...","{'id': 'pv', 'name': 'Price Volume'}","{'id': 'pv-relationship', 'name': 'Relationship'}",GLB,1,MINVOL1M,GROUP,1.0,,27,146,[]
9,pv13_2_minvol_1m_all_delay_1_sector,grouping fields,"{'id': 'pv13', 'name': 'Relationship Data for ...","{'id': 'pv', 'name': 'Price Volume'}","{'id': 'pv-relationship', 'name': 'Relationship'}",GLB,1,MINVOL1M,GROUP,1.0,,26,88,[]
13,pv13_3l_scibr,grouping fields,"{'id': 'pv13', 'name': 'Relationship Data for ...","{'id': 'pv', 'name': 'Price Volume'}","{'id': 'pv-relationship', 'name': 'Relationship'}",GLB,1,MINVOL1M,GROUP,0.9238,,25,59,[]
12,pv13_2l_scibr,grouping fields,"{'id': 'pv13', 'name': 'Relationship Data for ...","{'id': 'pv', 'name': 'Price Volume'}","{'id': 'pv-relationship', 'name': 'Relationship'}",GLB,1,MINVOL1M,GROUP,0.9238,,23,125,[]
16,pv13_52_minvol_1m_sector,grouping fields,"{'id': 'pv13', 'name': 'Relationship Data for ...","{'id': 'pv', 'name': 'Price Volume'}","{'id': 'pv-relationship', 'name': 'Relationship'}",GLB,1,MINVOL1M,GROUP,1.0,,23,104,[]


In [34]:
len(selected_datafields_df_1)

19

In [36]:
selected_datafields_df_1_id = selected_datafields_df_1.id.values.tolist()
selected_datafields_df_1_id

['pv13_6l_scibr',
 'pv13_20_minvol_1m_sector',
 'pv13_10_minvol_1m_sector',
 'pv13_5l_scibr',
 'pv13_1l_scibr',
 'pv13_10_f2_g3_minvol_1m_sector',
 'pv13_2_minvol_1m_all_delay_1_sector',
 'pv13_3l_scibr',
 'pv13_2l_scibr',
 'pv13_52_minvol_1m_sector',
 'pv13_52_minvol_1m_all_delay_1_sector',
 'pv13_2_f4_g3_minvol_1m_sector',
 'pv13_2_minvol_1m_sector',
 'pv13_4l_scibr',
 'pv13_2_f3_g2_minvol_1m_sector',
 'pv13_5_f3_g2_minvol_1m_sector',
 'pv13_10_f3_g2_minvol_1m_sector',
 'pv13_10_f2_g4_minvol_1m_sector',
 'pv13_5_minvol_1m_sector']

##### Step 2-2. fundamental time datafields extraction

In [27]:
dataset_id_2 = selected_datasets_df_2.id.values.tolist()[0] # create a list of selected datasets ids, our list has only one element
dataset_id_2

'fundamental23'

In [28]:
datafields_df_2 = hf.get_datafields(s, region = 'GLB', universe = 'MINVOL1M', dataset_id=dataset_id_2) # download all fields of dataset news
datafields_df_2

Unnamed: 0,id,description,dataset,category,subcategory,region,delay,universe,type,coverage,turnover,userCount,alphaCount,themes
0,fnd23_acc_payable,accounts payable,"{'id': 'fundamental23', 'name': 'Fundamental P...","{'id': 'fundamental', 'name': 'Fundamental'}","{'id': 'fundamental-fundamental-data', 'name':...",GLB,1,MINVOL1M,MATRIX,0.8848,,11,30,[]
1,fnd23_annfv_item,integer index of the item description field. Y...,"{'id': 'fundamental23', 'name': 'Fundamental P...","{'id': 'fundamental', 'name': 'Fundamental'}","{'id': 'fundamental-fundamental-data', 'name':...",GLB,1,MINVOL1M,VECTOR,0.4969,,1,1,[]
2,fnd23_annfv_value,annual financial values.,"{'id': 'fundamental23', 'name': 'Fundamental P...","{'id': 'fundamental', 'name': 'Fundamental'}","{'id': 'fundamental-fundamental-data', 'name':...",GLB,1,MINVOL1M,VECTOR,0.4969,,2,2,[]
3,fnd23_annfvmfm2_acta,total current assets. Sum of cash and short te...,"{'id': 'fundamental23', 'name': 'Fundamental P...","{'id': 'fundamental', 'name': 'Fundamental'}","{'id': 'fundamental-fundamental-data', 'name':...",GLB,1,MINVOL1M,MATRIX,0.9223,,16,30,[]
4,fnd23_annfvmfm2_amao,Amortization of Acquisition Costs,"{'id': 'fundamental23', 'name': 'Fundamental P...","{'id': 'fundamental', 'name': 'Fundamental'}","{'id': 'fundamental-fundamental-data', 'name':...",GLB,1,MINVOL1M,MATRIX,0.3231,,1,2,[]
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
881,fnd23_topic3,third tier topic mapped to parent topic.,"{'id': 'fundamental23', 'name': 'Fundamental P...","{'id': 'fundamental', 'name': 'Fundamental'}","{'id': 'fundamental-fundamental-data', 'name':...",GLB,1,MINVOL1M,VECTOR,0.7647,,0,0,[]
882,fnd23_tot_assets,total assets.,"{'id': 'fundamental23', 'name': 'Fundamental P...","{'id': 'fundamental', 'name': 'Fundamental'}","{'id': 'fundamental-fundamental-data', 'name':...",GLB,1,MINVOL1M,MATRIX,1.0000,,31,84,[]
883,fnd23_tot_inventory,total inventory.,"{'id': 'fundamental23', 'name': 'Fundamental P...","{'id': 'fundamental', 'name': 'Fundamental'}","{'id': 'fundamental-fundamental-data', 'name':...",GLB,1,MINVOL1M,MATRIX,0.9832,,6,8,[]
884,fnd23_tot_revenue,total revenue.,"{'id': 'fundamental23', 'name': 'Fundamental P...","{'id': 'fundamental', 'name': 'Fundamental'}","{'id': 'fundamental-fundamental-data', 'name':...",GLB,1,MINVOL1M,MATRIX,0.9928,,12,18,[]


In [31]:
selected_datafields_df_2 = datafields_df_2[
    (datafields_df_2['type'] == 'MATRIX') &
    (datafields_df_2["userCount"] > 5)
].sort_values(by=['userCount'], ascending=False)
selected_datafields_df_2

Unnamed: 0,id,description,dataset,category,subcategory,region,delay,universe,type,coverage,turnover,userCount,alphaCount,themes
14,fnd23_annfvmfm2_fcos,Changes in Working Capital,"{'id': 'fundamental23', 'name': 'Fundamental P...","{'id': 'fundamental', 'name': 'Fundamental'}","{'id': 'fundamental-fundamental-data', 'name':...",GLB,1,MINVOL1M,MATRIX,1.0000,,70,167,[]
23,fnd23_annfvmfm2_olto,includes diverse cash flows that are reported ...,"{'id': 'fundamental23', 'name': 'Fundamental P...","{'id': 'fundamental', 'name': 'Fundamental'}","{'id': 'fundamental-fundamental-data', 'name':...",GLB,1,MINVOL1M,MATRIX,1.0000,,41,101,[]
882,fnd23_tot_assets,total assets.,"{'id': 'fundamental23', 'name': 'Fundamental P...","{'id': 'fundamental', 'name': 'Fundamental'}","{'id': 'fundamental-fundamental-data', 'name':...",GLB,1,MINVOL1M,MATRIX,1.0000,,31,84,[]
875,fnd23_roe,return on equity,"{'id': 'fundamental23', 'name': 'Fundamental P...","{'id': 'fundamental', 'name': 'Fundamental'}","{'id': 'fundamental-fundamental-data', 'name':...",GLB,1,MINVOL1M,MATRIX,1.0000,,29,47,[]
870,fnd23_net_income,net income,"{'id': 'fundamental23', 'name': 'Fundamental P...","{'id': 'fundamental', 'name': 'Fundamental'}","{'id': 'fundamental-fundamental-data', 'name':...",GLB,1,MINVOL1M,MATRIX,1.0000,,28,74,[]
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19,fnd23_annfvmfm2_lctl,represents current liabilities for industrial ...,"{'id': 'fundamental23', 'name': 'Fundamental P...","{'id': 'fundamental', 'name': 'Fundamental'}","{'id': 'fundamental-fundamental-data', 'name':...",GLB,1,MINVOL1M,MATRIX,0.9225,,6,8,[]
302,fnd23_intfvalld1_ipos,Operating Income.,"{'id': 'fundamental23', 'name': 'Fundamental P...","{'id': 'fundamental', 'name': 'Fundamental'}","{'id': 'fundamental-fundamental-data', 'name':...",GLB,1,MINVOL1M,MATRIX,0.9922,,6,13,[]
883,fnd23_tot_inventory,total inventory.,"{'id': 'fundamental23', 'name': 'Fundamental P...","{'id': 'fundamental', 'name': 'Fundamental'}","{'id': 'fundamental-fundamental-data', 'name':...",GLB,1,MINVOL1M,MATRIX,0.9832,,6,8,[]
132,fnd23_intfvalld1_cdav,Deferred Revenue - Current,"{'id': 'fundamental23', 'name': 'Fundamental P...","{'id': 'fundamental', 'name': 'Fundamental'}","{'id': 'fundamental-fundamental-data', 'name':...",GLB,1,MINVOL1M,MATRIX,0.3804,,6,15,[]


In [37]:
selected_datafields_df_2_id = selected_datafields_df_2.id.values.tolist()
selected_datafields_df_2_id

['fnd23_annfvmfm2_fcos',
 'fnd23_annfvmfm2_olto',
 'fnd23_tot_assets',
 'fnd23_roe',
 'fnd23_net_income',
 'fnd23_intfvalld1_iscs',
 'fnd23_cash_n_equivalents',
 'fnd23_aor',
 'fnd23_ebitda',
 'fnd23_intfvmfm2_olto',
 'fnd23_net_debt',
 'fnd23_ofc',
 'fnd23_intfvalld1_dtns',
 'fnd23_annfvmfm2_iscs',
 'fnd23_intfvmfm_olto',
 'fnd23_ve',
 'fnd23_capex',
 'fnd23_curr_assets',
 'fnd23_annfvmfm2_acta',
 'fnd23_annfvmfm2_cnin',
 'fnd23_intfvalld1_ecns',
 'fnd23_intfvmalld1_olto',
 'fnd23_ebit',
 'fnd23_annfvmfm2_bpao',
 'fnd23_intfvmalld1_sedv',
 'fnd23_annfvmfm_olto',
 'fnd23_tot_revenue',
 'fnd23_annfvmfm2_bloo',
 'fnd23_acc_payable',
 'fnd23_curr_liabilities',
 'fnd23_ffcf',
 'fnd23_intfvalld1_fbds',
 'fnd23_intfvalld1_ccns',
 'fnd23_annfvmfm2_ltio',
 'fnd23_debt_issuance',
 'fnd23_intfvmfm2_fcos',
 'fnd23_intfvmalld1_xbin',
 'fnd23_intfvmalld1_ipos',
 'fnd23_intfvalld1_fcos',
 'fnd23_annfvmfm2_eltq',
 'fnd23_annfvmfm2_inds',
 'fnd23_cogs',
 'fnd23_lt_debt',
 'fnd23_intfvmalld1_ccns',
 'f

#### Step 4. Apply generate_alpha function to the expression list
In generate alpha function you can specify region, universe, decay, delay and other simulation settings

이제 만들어둔 expressions 을 사용해 alpha simulation 을 해줍니다.

In [61]:
#?ace.generate_alpha

In [32]:
equity, time = ' ', ' '

expression_template = f'''
group = (country+1)*group_max({equity}, market) + {equity};
group_neutralize(ts_scale(group_backfill({time}, industry, 252, std=1), 252), densify(group))
'''

In [35]:
selected_datafields_df_1

Unnamed: 0,id,description,dataset,category,subcategory,region,delay,universe,type,coverage,turnover,userCount,alphaCount,themes
21,pv13_6l_scibr,grouping fields,"{'id': 'pv13', 'name': 'Relationship Data for ...","{'id': 'pv', 'name': 'Price Volume'}","{'id': 'pv-relationship', 'name': 'Relationship'}",GLB,1,MINVOL1M,GROUP,0.9238,,39,372,[]
6,pv13_20_minvol_1m_sector,grouping fields,"{'id': 'pv13', 'name': 'Relationship Data for ...","{'id': 'pv', 'name': 'Price Volume'}","{'id': 'pv-relationship', 'name': 'Relationship'}",GLB,1,MINVOL1M,GROUP,1.0,,36,234,[]
3,pv13_10_minvol_1m_sector,grouping fields,"{'id': 'pv13', 'name': 'Relationship Data for ...","{'id': 'pv', 'name': 'Price Volume'}","{'id': 'pv-relationship', 'name': 'Relationship'}",GLB,1,MINVOL1M,GROUP,1.0,,36,120,[]
20,pv13_5l_scibr,grouping fields,"{'id': 'pv13', 'name': 'Relationship Data for ...","{'id': 'pv', 'name': 'Price Volume'}","{'id': 'pv-relationship', 'name': 'Relationship'}",GLB,1,MINVOL1M,GROUP,0.9238,,34,288,[]
5,pv13_1l_scibr,grouping fields,"{'id': 'pv13', 'name': 'Relationship Data for ...","{'id': 'pv', 'name': 'Price Volume'}","{'id': 'pv-relationship', 'name': 'Relationship'}",GLB,1,MINVOL1M,GROUP,0.877,,31,120,[]
0,pv13_10_f2_g3_minvol_1m_sector,grouping fields,"{'id': 'pv13', 'name': 'Relationship Data for ...","{'id': 'pv', 'name': 'Price Volume'}","{'id': 'pv-relationship', 'name': 'Relationship'}",GLB,1,MINVOL1M,GROUP,1.0,,27,146,[]
9,pv13_2_minvol_1m_all_delay_1_sector,grouping fields,"{'id': 'pv13', 'name': 'Relationship Data for ...","{'id': 'pv', 'name': 'Price Volume'}","{'id': 'pv-relationship', 'name': 'Relationship'}",GLB,1,MINVOL1M,GROUP,1.0,,26,88,[]
13,pv13_3l_scibr,grouping fields,"{'id': 'pv13', 'name': 'Relationship Data for ...","{'id': 'pv', 'name': 'Price Volume'}","{'id': 'pv-relationship', 'name': 'Relationship'}",GLB,1,MINVOL1M,GROUP,0.9238,,25,59,[]
12,pv13_2l_scibr,grouping fields,"{'id': 'pv13', 'name': 'Relationship Data for ...","{'id': 'pv', 'name': 'Price Volume'}","{'id': 'pv-relationship', 'name': 'Relationship'}",GLB,1,MINVOL1M,GROUP,0.9238,,23,125,[]
16,pv13_52_minvol_1m_sector,grouping fields,"{'id': 'pv13', 'name': 'Relationship Data for ...","{'id': 'pv', 'name': 'Price Volume'}","{'id': 'pv-relationship', 'name': 'Relationship'}",GLB,1,MINVOL1M,GROUP,1.0,,23,104,[]


In [39]:
parameters = []
for i in range(10):
    for j in range(10):
        parameters.append([selected_datafields_df_1_id[i], selected_datafields_df_2_id[j]])
parameters

[['pv13_6l_scibr', 'fnd23_annfvmfm2_fcos'],
 ['pv13_6l_scibr', 'fnd23_annfvmfm2_olto'],
 ['pv13_6l_scibr', 'fnd23_tot_assets'],
 ['pv13_6l_scibr', 'fnd23_roe'],
 ['pv13_6l_scibr', 'fnd23_net_income'],
 ['pv13_6l_scibr', 'fnd23_intfvalld1_iscs'],
 ['pv13_6l_scibr', 'fnd23_cash_n_equivalents'],
 ['pv13_6l_scibr', 'fnd23_aor'],
 ['pv13_6l_scibr', 'fnd23_ebitda'],
 ['pv13_6l_scibr', 'fnd23_intfvmfm2_olto'],
 ['pv13_20_minvol_1m_sector', 'fnd23_annfvmfm2_fcos'],
 ['pv13_20_minvol_1m_sector', 'fnd23_annfvmfm2_olto'],
 ['pv13_20_minvol_1m_sector', 'fnd23_tot_assets'],
 ['pv13_20_minvol_1m_sector', 'fnd23_roe'],
 ['pv13_20_minvol_1m_sector', 'fnd23_net_income'],
 ['pv13_20_minvol_1m_sector', 'fnd23_intfvalld1_iscs'],
 ['pv13_20_minvol_1m_sector', 'fnd23_cash_n_equivalents'],
 ['pv13_20_minvol_1m_sector', 'fnd23_aor'],
 ['pv13_20_minvol_1m_sector', 'fnd23_ebitda'],
 ['pv13_20_minvol_1m_sector', 'fnd23_intfvmfm2_olto'],
 ['pv13_10_minvol_1m_sector', 'fnd23_annfvmfm2_fcos'],
 ['pv13_10_minvol_1m_

In [40]:
expressions = []
for params in parameters:
    equity = params[0]
    time = params[1]
    expressions.append(f'group = (country+1)*group_max({equity}, market) + {equity};group_neutralize(ts_scale(group_backfill({time}, industry, 252, std=1), 252), densify(group))')

In [41]:
expressions

['group = (country+1)*group_max(pv13_6l_scibr, market) + pv13_6l_scibr;group_neutralize(ts_scale(group_backfill(fnd23_annfvmfm2_fcos, industry, 252, std=1), 252), densify(group))',
 'group = (country+1)*group_max(pv13_6l_scibr, market) + pv13_6l_scibr;group_neutralize(ts_scale(group_backfill(fnd23_annfvmfm2_olto, industry, 252, std=1), 252), densify(group))',
 'group = (country+1)*group_max(pv13_6l_scibr, market) + pv13_6l_scibr;group_neutralize(ts_scale(group_backfill(fnd23_tot_assets, industry, 252, std=1), 252), densify(group))',
 'group = (country+1)*group_max(pv13_6l_scibr, market) + pv13_6l_scibr;group_neutralize(ts_scale(group_backfill(fnd23_roe, industry, 252, std=1), 252), densify(group))',
 'group = (country+1)*group_max(pv13_6l_scibr, market) + pv13_6l_scibr;group_neutralize(ts_scale(group_backfill(fnd23_net_income, industry, 252, std=1), 252), densify(group))',
 'group = (country+1)*group_max(pv13_6l_scibr, market) + pv13_6l_scibr;group_neutralize(ts_scale(group_backfill(fn

In [43]:
#when you send multiple alphas for simulation, please make sure all alphas of a single list should have common settings
#alphas with different settings should be sent in a different list, for instance below list has all alphas with same settings

alpha_list = [ace.generate_alpha(x, region= "GLB", universe = "MINVOL1M", neutralization = 'COUNTRY', truncation = 0.01, delay = 1, decay = 0) for x in expressions]
alpha_list[0]

{'type': 'REGULAR',
 'settings': {'instrumentType': 'EQUITY',
  'region': 'GLB',
  'universe': 'MINVOL1M',
  'delay': 1,
  'decay': 0,
  'neutralization': 'COUNTRY',
  'truncation': 0.01,
  'pasteurization': 'ON',
  'testPeriod': 'P0Y0M0D',
  'unitHandling': 'VERIFY',
  'nanHandling': 'OFF',
  'language': 'FASTEXPR',
  'visualization': False},
 'regular': 'group = (country+1)*group_max(pv13_6l_scibr, market) + pv13_6l_scibr;group_neutralize(ts_scale(group_backfill(fnd23_annfvmfm2_fcos, industry, 252, std=1), 252), densify(group))'}

### Simulate alpha list, get simulation result

simulate_alpha_list_multi will do a multi-simulation if list of alphas is greater than 10, which is the case here

the returned object will contain simulation results for all alphas as a list

In [44]:
#alpha expressions are sliced to first 10 for demonstration purpose

result = ace.simulate_alpha_list_multi(s, alpha_list[:50])

100%|███████████████████████████████████████████████████████████████████████████████| 17/17 [1:14:59<00:00, 264.67s/it]


위 코드를 실행하면 시뮬레이션이 시작됩니다. 100개의 알파에 대략 1시간 정도 소요되니, 인터넷 연결이 끊기지 않게 주의하시고, 너무 많은 알파를 한 번에 돌리기보다는 몇개씩 끊어서 돌려도 좋습니다.

In [45]:
#prettify_result function can be used from the helpful_functions library to take a look at IS stats of all the simulated alphas

result_st1 = hf.prettify_result(result, detailed_tests_view=False)
result_st1

Unnamed: 0,pnl,book_size,long_count,short_count,turnover,returns,drawdown,margin,fitness,sharpe,start_date,alpha_id,expression,concentrated_weight,high_turnover,is_ladder_sharpe,low_fitness,low_sharpe,low_sub_universe_sharpe,low_turnover
0,4142182,20000000,4013,3967,0.0469,0.04,0.0361,0.001704,1.28,2.26,2012-01-22,ZpO36rQ,group = (country+1)*group_max(pv13_20_minvol_1...,PASS,PASS,PASS,PASS,PASS,PASS,PASS
1,4122365,20000000,3988,3989,0.0474,0.0398,0.0362,0.001679,1.25,2.21,2012-01-22,g1nOo3K,group = (country+1)*group_max(pv13_10_minvol_1...,PASS,PASS,FAIL,PASS,PASS,PASS,PASS
2,4350382,20000000,4019,3911,0.0481,0.042,0.0612,0.001745,1.24,2.14,2012-01-22,ZpO3lOd,group = (country+1)*group_max(pv13_20_minvol_1...,PASS,PASS,FAIL,PASS,PASS,PASS,PASS
3,4631670,20000000,3825,3617,0.0374,0.0447,0.069,0.002389,1.2,2.01,2012-01-22,9wN15bq,"group = (country+1)*group_max(pv13_1l_scibr, m...",PASS,PASS,FAIL,PASS,PASS,PASS,PASS
4,4333680,20000000,3758,3722,0.0375,0.0418,0.0601,0.002233,1.17,2.03,2012-01-22,p69rxkj,"group = (country+1)*group_max(pv13_1l_scibr, m...",PASS,PASS,FAIL,PASS,PASS,PASS,PASS
5,4074932,20000000,4029,3898,0.0486,0.0393,0.0559,0.001618,1.15,2.05,2012-01-22,WpvQAeG,group = (country+1)*group_max(pv13_10_minvol_1...,PASS,PASS,FAIL,PASS,PASS,PASS,PASS
6,3493294,20000000,3936,4085,0.0485,0.0337,0.0726,0.00139,1.03,1.98,2012-01-22,WpvQXvZ,group = (country+1)*group_max(pv13_20_minvol_1...,PASS,PASS,FAIL,PASS,PASS,PASS,PASS
7,3178647,20000000,3854,3679,0.0319,0.0307,0.0271,0.001926,1.02,2.06,2012-01-22,dxNA2EY,"group = (country+1)*group_max(pv13_1l_scibr, m...",PASS,PASS,FAIL,PASS,PASS,PASS,PASS
8,3246982,20000000,3994,4098,0.0483,0.0314,0.0219,0.001297,1.01,2.01,2012-01-22,AwoZM1Y,group = (country+1)*group_max(pv13_20_minvol_1...,PASS,PASS,FAIL,PASS,PASS,PASS,PASS
9,3950050,20000000,3633,3891,0.0411,0.0381,0.0911,0.001855,1.01,1.83,2012-01-22,ZpO3Vqj,"group = (country+1)*group_max(pv13_1l_scibr, m...",PASS,PASS,FAIL,PASS,PASS,PASS,PASS


prettify 함수를 이용해 dataframe 형식으로 변환하면 결과를 보기 더 편합니다.

In [46]:
result_st1.to_csv('2024_09_06_GLB.csv')

### How to submit?

Create a list of submittable alphas - alphas that have no FAIL in is_tests

시뮬레이션을 마친 알파 중 제출 가능한 알파들을 제출해 봅시다.

In [47]:
#to take a look at the combined result of all new alphas

is_tests_df = hf.concat_is_tests(result)
is_tests_df

Unnamed: 0,alpha_id,endDate,limit,message,name,result,startDate,themes,value,year
0,PpkWbgp,,1.58,,LOW_SHARPE,FAIL,,,1.2000,
1,PpkWbgp,,1.00,,LOW_FITNESS,FAIL,,,0.4700,
2,PpkWbgp,,0.01,,LOW_TURNOVER,PASS,,,0.0425,
3,PpkWbgp,,0.70,,HIGH_TURNOVER,PASS,,,0.0425,
4,PpkWbgp,,,,CONCENTRATED_WEIGHT,PASS,,,,
...,...,...,...,...,...,...,...,...,...,...
645,Lp0APkL,,,,DATA_DIVERSITY,PENDING,,,,
646,Lp0APkL,,,,PROD_CORRELATION,PENDING,,,,
647,Lp0APkL,,,,REGULAR_SUBMISSION,PENDING,,,,
648,Lp0APkL,2020-01-25,1.58,,IS_LADDER_SHARPE,FAIL,2022-01-24,,1.4700,2.0


In [48]:
#making a list of failed alphas
failed_alphas = is_tests_df.query('result=="FAIL"')['alpha_id'].unique()

#making a list of passed alphas
passed_alphas = list(set(is_tests_df['alpha_id']).difference(failed_alphas))

print(f'Failed alphas:{failed_alphas}\nPassed alphas:{passed_alphas}')

Failed alphas:['PpkWbgp' '2730xmx' 'm8E3N6E' '5wPb1Yz' 'bLekgAm' '9wNv32K' 'NpYwdMq'
 'bLekmpq' 'Lp0jvem' 'm8ERpqp' 'RpGQMN1' 'VpAj9rb' '7webAp1' 'NpYmwYo'
 'KbWdjKk' '273zb1x' 'AwoZM1Y' 'ZpO3lOd' 'WpvQXvZ' 'jZqRd5W' 'g1nOo3K'
 'wR0xJeQ' 'OplP5pY' 'Jp6EgjO' 'z9l6PqE' 'AwoZgjd' '7webzN5' 'WpvQAeG'
 'ad5Z1x9' 'ngLR75d' 'OplP7L1' 'g1nOjlM' 'm8ERgw6' '0713Rqv' 'Lp0a9Wm'
 '17rbGPX' 'GwZAQ9J' 'ZpO3XNQ' 'GwZAX7O' 'VpAjZLJ' 'p69rxkj' 'Ew9WzgL'
 'Qawkznr' 'ngLRqnq' '9wN15bq' 'ZpO3Vqj' 'dxNA2EY' 'k3z2xnL' 'Lp0APkL']
Passed alphas:['ZpO36rQ']


위 함수들을 이용해 All Pass (correlation 제외) 가 나온 알파들의 id 를 쉽게 구별해 낼 수 있습니다. 주의하셔야 할 점은 is_test 함수에 넣는 input 값은 prettify 하지 않은, alpha simulation 을 마쳤을 때 출력되는 원본 형태를 입력해야 된다는 겁니다.

In [14]:
ace.get_check_submission(s, passed_alphas[0])

InvalidIndexError: You can only assign a scalar value not a <class 'list'>

위 함수로 각 알파가 correlation 을 포함한 모든 기준에서 통과를 했는지 확인 해 볼 수 있습니다.

In [83]:
for alpha_id in passed_alphas:
    print(ace.get_check_submission(s, alpha_id))

                      name result  limit   value  \
0               LOW_SHARPE   PASS   1.25  2.1700   
1              LOW_FITNESS   PASS   1.00  1.1900   
2             LOW_TURNOVER   PASS   0.01  0.3861   
3            HIGH_TURNOVER   PASS   0.70  0.3861   
4      CONCENTRATED_WEIGHT   PASS    NaN     NaN   
5  LOW_SUB_UNIVERSE_SHARPE   PASS   0.76  0.9300   
6         SELF_CORRELATION   PASS   0.70  0.5198   
7         PROD_CORRELATION   FAIL   0.70  0.7659   
8       REGULAR_SUBMISSION   PASS   4.00  0.0000   
9      MATCHES_COMPETITION   PASS    NaN     NaN   

                                 competitions alpha_id  
0                                         NaN  z92n2P8  
1                                         NaN  z92n2P8  
2                                         NaN  z92n2P8  
3                                         NaN  z92n2P8  
4                                         NaN  z92n2P8  
5                                         NaN  z92n2P8  
6                           

                      name result  limit   value  \
0               LOW_SHARPE   PASS   1.25  2.4700   
1              LOW_FITNESS   PASS   1.00  1.3400   
2             LOW_TURNOVER   PASS   0.01  0.4417   
3            HIGH_TURNOVER   PASS   0.70  0.4417   
4      CONCENTRATED_WEIGHT   PASS    NaN     NaN   
5  LOW_SUB_UNIVERSE_SHARPE   PASS   0.86  1.0800   
6         SELF_CORRELATION   PASS   0.70  0.3222   
7         PROD_CORRELATION   FAIL   0.70  0.7670   
8       REGULAR_SUBMISSION   PASS   4.00  0.0000   
9      MATCHES_COMPETITION   PASS    NaN     NaN   

                                 competitions alpha_id  
0                                         NaN  xxG0den  
1                                         NaN  xxG0den  
2                                         NaN  xxG0den  
3                                         NaN  xxG0den  
4                                         NaN  xxG0den  
5                                         NaN  xxG0den  
6                           

                      name result  limit   value  \
0               LOW_SHARPE   PASS   1.25  2.3300   
1              LOW_FITNESS   PASS   1.00  1.3900   
2             LOW_TURNOVER   PASS   0.01  0.3689   
3            HIGH_TURNOVER   PASS   0.70  0.3689   
4      CONCENTRATED_WEIGHT   PASS    NaN     NaN   
5  LOW_SUB_UNIVERSE_SHARPE   PASS   0.82  1.0300   
6         SELF_CORRELATION   PASS   0.70  0.3848   
7         PROD_CORRELATION   FAIL   0.70  0.8931   
8       REGULAR_SUBMISSION   PASS   4.00  0.0000   
9      MATCHES_COMPETITION   PASS    NaN     NaN   

                                 competitions alpha_id  
0                                         NaN  8zJ1aRV  
1                                         NaN  8zJ1aRV  
2                                         NaN  8zJ1aRV  
3                                         NaN  8zJ1aRV  
4                                         NaN  8zJ1aRV  
5                                         NaN  8zJ1aRV  
6                           

                      name result  limit   value  \
0               LOW_SHARPE   PASS   1.25  2.5800   
1              LOW_FITNESS   PASS   1.00  1.5600   
2             LOW_TURNOVER   PASS   0.01  0.3700   
3            HIGH_TURNOVER   PASS   0.70  0.3700   
4      CONCENTRATED_WEIGHT   PASS    NaN     NaN   
5  LOW_SUB_UNIVERSE_SHARPE   PASS   0.90  1.2600   
6         SELF_CORRELATION   PASS   0.70  0.3681   
7         PROD_CORRELATION   FAIL   0.70  0.8606   
8       REGULAR_SUBMISSION   PASS   4.00  0.0000   
9      MATCHES_COMPETITION   PASS    NaN     NaN   

                                 competitions alpha_id  
0                                         NaN  wRGgxYl  
1                                         NaN  wRGgxYl  
2                                         NaN  wRGgxYl  
3                                         NaN  wRGgxYl  
4                                         NaN  wRGgxYl  
5                                         NaN  wRGgxYl  
6                           

                      name result  limit   value  \
0               LOW_SHARPE   PASS   1.25  2.6100   
1              LOW_FITNESS   PASS   1.00  1.6900   
2             LOW_TURNOVER   PASS   0.01  0.3402   
3            HIGH_TURNOVER   PASS   0.70  0.3402   
4      CONCENTRATED_WEIGHT   PASS    NaN     NaN   
5  LOW_SUB_UNIVERSE_SHARPE   PASS   0.91  1.2500   
6         SELF_CORRELATION   PASS   0.70  0.4079   
7         PROD_CORRELATION   FAIL   0.70  0.8244   
8       REGULAR_SUBMISSION   PASS   4.00  0.0000   
9      MATCHES_COMPETITION   PASS    NaN     NaN   

                                 competitions alpha_id  
0                                         NaN  dx9YMjE  
1                                         NaN  dx9YMjE  
2                                         NaN  dx9YMjE  
3                                         NaN  dx9YMjE  
4                                         NaN  dx9YMjE  
5                                         NaN  dx9YMjE  
6                           

위 함수로 통과한 알파들을 한꺼번에 돌려볼 수 있습니다. 다만 여러개를 돌리면 시간이 꽤 걸리는 점 주의해 주시길 바랍니다.

In [49]:
for alpha_id in passed_alphas:
    hf.set_alpha_properties(s, alpha_id, tags = ['Sep06_GLB'])

위 함수로 통과한 알파들에 태그를 붙일 수 있습니다.

When you got a list of submittable alphas, you can call function submit_alpha()

In [50]:
#calling submit_alpha on all alphas that have passed the submission tests

submit_result = {alpha_id: ace.submit_alpha(s, alpha_id) for alpha_id in passed_alphas}

위 함수로 알파를 제출할 수 있습니다.

알파의 id 로 제출하는 것이기 때문에, 그 알파를 시뮬레이션 한 날짜가 중요합니다. 가령, 1주일 전에 시뮬레이션 해 결과를 저장해 두었던 alpha id로 제출을 한다면 제출 날짜가 1주일 전이 됩니다. 그러니 오늘 제출하고 싶은 alpha 가 있다면 시뮬레이션을 다시 돌려 새로 생긴 alpha id 를 이용해야 합니다. 이 과정은 alpha 에 특정 태그를 달아 Brain 플랫폼에서 직접 하는게 훨씬 수월합니다. (다만, 플랫폼에서는 alpha id 로 알파를 검색할 수 없습니다.)

In [51]:
#submit_result will have return values from the submit_result function

submit_result

{'ZpO36rQ': False}

주의하셔야 하실 점은, 하루 최대 알파 제출 한도인 4개를 루프중에 이미 넘기면 그 뒤의 알파들이 제출 가능하더라도 자동으로 제출에 실패하게 됩니다.

### Library Fuctions.

following are some other functions that you can use for your own analysis

**get_alpha_pnl(s, alpha_id)** - to get the pnl for an alpha

**get_alpha_yearly_stats(s, alpha_id)** - to get yearly statistics for an alpha

**get_self_corr(s, alpha_id)** - to get self correlation results for an alpha

**get_prod_corr(s, alpha_id)** - to get prod correlation results for an alpha

**get_check_submission(s, alpha_id)** - to get check submission result for an alpha

**check_self_corr_test(s, alpha_id)** - to check if alpha passes self correlation test (self_corr<0.7)

**check_prod_corr_test(s, alpha_id)** - to check if alpha passes prod correlation test (prod_corr<0.7)

**perfomance_comparison(s, alpha_id)** - to get the result of performance comparison for an alpha merged performance

유용한 추가 함수들입니다. prod correlation 혹은 self correlation 만을 확인하고 싶을 때, 혹은 알파의 pnl 을 확인하고 싶을때 유용히 쓰입니다.