### Import library

In [1]:
import os
os.chdir('../../')
os.getcwd()

'C:\\Users\\csia7\\OneDrive\\문서\\GitHub\\WQBrain_2024_API'

In [2]:
import ace_lib as ace
import helpful_functions as hf
import pandas as pd
import requests
import plotly.express as px
import pygwalker as pyg
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA

### Start session
Enter credentials once - they will be saved to local folder and loaded each time

In [3]:
s = ace.start_session()

Complete biometrics authentication and press any key to continue: 
https://api.worldquantbrain.com/authentication/persona?inquiry=inq_bjYNzoqQ7cYkHX47aUpkD4GfXAbM

 


## Global TOP3000 region Alpha Template

In [4]:
expression_template = f'''
signal1 = ts_zscore (ts_backfill (vec_avg(mdl139_score), 252), 252)
signal2 = ts_zscore (ts_backfill (vec_avg(oth193_shield2), 252), 252)
group = pv13_6l_scibr
group_zscore(signal1, signal2, group)

'''

#### Step 1. Download datsets

In [5]:
datasets_df = hf.get_datasets(s, region = 'GLB', universe = 'MINVOL1M') # by default we load all datasets USA TOP3000 delay 1    , region='GLB', universe='MINVOL1M'
datasets_df # DataFrame.head() shows first 5 rows of the dataframe 

Unnamed: 0,id,name,description,category,subcategory,region,delay,universe,coverage,turnover,valueScore,userCount,alphaCount,fieldCount,themes,researchPapers
0,analyst11,ESG scores,Environmental Social Governance scores that ex...,"{'id': 'analyst', 'name': 'Analyst'}","{'id': 'analyst-esg', 'name': 'ESG'}",GLB,1,MINVOL1M,0.7917,,4.0,66,304,197,[],"[{'title': 'Research Paper 19: ESG Preference,..."
1,analyst14,Estimations of Key Fundamentals,This dataset reports many items from financial...,"{'id': 'analyst', 'name': 'Analyst'}","{'id': 'analyst-analyst-estimates', 'name': 'A...",GLB,1,MINVOL1M,0.5186,,3.0,101,589,926,[],[{'title': 'Research Paper 10: Investor Learni...
2,analyst15,Earnings forecasts,This dataset provides bottom-up forecast data ...,"{'id': 'analyst', 'name': 'Analyst'}","{'id': 'analyst-analyst-estimates', 'name': 'A...",GLB,1,MINVOL1M,0.9929,,2.0,158,1497,288,[],[]
3,analyst16,Real Time Estimates,This dataset provides real-time access to the ...,"{'id': 'analyst', 'name': 'Analyst'}","{'id': 'analyst-crowdsourced-estimates', 'name...",GLB,1,MINVOL1M,0.9013,,2.0,134,821,42,[],[]
4,analyst35,ESG Model,The dataset provide ESG related information ba...,"{'id': 'analyst', 'name': 'Analyst'}","{'id': 'analyst-esg', 'name': 'ESG'}",GLB,1,MINVOL1M,0.4375,,3.0,63,202,23,[],[]
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
83,shortinterest6,SmartHoldings Model,This dataset is a global stock selection model...,"{'id': 'shortinterest', 'name': 'Short Interest'}","{'id': 'shortinterest-short-sale-models', 'nam...",GLB,1,MINVOL1M,0.9711,,2.0,92,389,11,[],[]
84,shortinterest7,Short Selling Model,The dataset combines features obtained from sh...,"{'id': 'shortinterest', 'name': 'Short Interest'}","{'id': 'shortinterest-short-sale-models', 'nam...",GLB,1,MINVOL1M,1.0000,,2.0,73,331,16,[],[{'title': 'Research Paper 04: Strategic Rebal...
85,socialmedia12,Sentiment Data for Equity,This dataset provides sentiment data with diff...,"{'id': 'socialmedia', 'name': 'Social Media'}","{'id': 'socialmedia-social-media', 'name': 'So...",GLB,1,MINVOL1M,0.6731,,3.0,17,25,2,[],[{'title': 'Research Paper 01: Textual Sentime...
86,socialmedia5,Lexical Breakdown Data,Sentiment scores derived from social media. So...,"{'id': 'socialmedia', 'name': 'Social Media'}","{'id': 'socialmedia-social-media', 'name': 'So...",GLB,1,MINVOL1M,0.3753,,5.0,2,3,12,[],[]


In [6]:
# select needed datasets
selected_datasets_df_1 = datasets_df[
    (datasets_df["name"].str.contains('inflation', case=False))
].sort_values(by=['valueScore'], ascending=False)
selected_datasets_df_1

Unnamed: 0,id,name,description,category,subcategory,region,delay,universe,coverage,turnover,valueScore,userCount,alphaCount,fieldCount,themes,researchPapers
35,model139,Inflation based stock selection model,The model is expected to generate signal in an...,"{'id': 'model', 'name': 'Model'}","{'id': 'model-risk-based-models', 'name': 'Ris...",GLB,1,MINVOL1M,0.5965,,1.0,121,891,2,[],[]


In [7]:
# select needed datasets
selected_datasets_df_2 = datasets_df[
    (datasets_df["name"].str.contains('systematic hedging', case=False))
].sort_values(by=['valueScore'], ascending=False)
selected_datasets_df_2

Unnamed: 0,id,name,description,category,subcategory,region,delay,universe,coverage,turnover,valueScore,userCount,alphaCount,fieldCount,themes,researchPapers
61,other193,Systematic Hedging for Investors to Evade Larg...,Machine learning based stock selection model. ...,"{'id': 'other', 'name': 'Other'}","{'id': 'other-aiml-data', 'name': 'AI/ML Data'}",GLB,1,MINVOL1M,0.5667,,2.0,125,609,3,[],[]


In [8]:
# select needed datasets
selected_datasets_df_3 = datasets_df[
    (datasets_df["name"].str.contains('relationship data for equity', case=False))
].sort_values(by=['valueScore'], ascending=False)
selected_datasets_df_3

Unnamed: 0,id,name,description,category,subcategory,region,delay,universe,coverage,turnover,valueScore,userCount,alphaCount,fieldCount,themes,researchPapers
73,pv13,Relationship Data for Equity,The dataset outputs various classifications an...,"{'id': 'pv', 'name': 'Price Volume'}","{'id': 'pv-relationship', 'name': 'Relationship'}",GLB,1,MINVOL1M,0.9152,,1.0,130,1964,29,[],[]


#### Step 2. Select the needed datafields

### Data 1

In [9]:
dataset_id_1 = selected_datasets_df_1.id.values.tolist()[0] # create a list of selected datasets ids, our list has only one element
dataset_id_1

'model139'

In [10]:
datafields_df_1 = hf.get_datafields(s, region = 'GLB', universe = 'MINVOL1M', dataset_id=dataset_id_1) # download all fields of dataset news
datafields_df_1

Unnamed: 0,id,description,dataset,category,subcategory,region,delay,universe,type,coverage,turnover,userCount,alphaCount,themes
0,mdl139_mktcap,Market Cap,"{'id': 'model139', 'name': 'Inflation based st...","{'id': 'model', 'name': 'Model'}","{'id': 'model-risk-based-models', 'name': 'Ris...",GLB,1,MINVOL1M,VECTOR,0.5965,,7,18,[]
1,mdl139_score,Composite Score (value between 0 and 1),"{'id': 'model139', 'name': 'Inflation based st...","{'id': 'model', 'name': 'Model'}","{'id': 'model-risk-based-models', 'name': 'Ris...",GLB,1,MINVOL1M,VECTOR,0.5965,,119,888,[]


In [11]:
selected_datafields_df_1_id = datafields_df_1.id.values.tolist()
selected_datafields_df_1_id

['mdl139_mktcap', 'mdl139_score']

### Data 2

In [12]:
dataset_id_2 = selected_datasets_df_2.id.values.tolist()[0] # create a list of selected datasets ids, our list has only one element
dataset_id_2

'other193'

In [13]:
datafields_df_2 = hf.get_datafields(s, region = 'GLB', universe = 'MINVOL1M', dataset_id=dataset_id_2) # download all fields of dataset news
datafields_df_2

Unnamed: 0,id,description,dataset,category,subcategory,region,delay,universe,type,coverage,turnover,userCount,alphaCount,themes
0,oth193_shield,The field gives the score which comes from the...,"{'id': 'other193', 'name': 'Systematic Hedging...","{'id': 'other', 'name': 'Other'}","{'id': 'other-aiml-data', 'name': 'AI/ML Data'}",GLB,1,MINVOL1M,VECTOR,0.5667,,59,223,[]
1,oth193_shield2,The field gives the score which comes from a m...,"{'id': 'other193', 'name': 'Systematic Hedging...","{'id': 'other', 'name': 'Other'}","{'id': 'other-aiml-data', 'name': 'AI/ML Data'}",GLB,1,MINVOL1M,VECTOR,0.5667,,109,431,[]
2,oth193_wolfe_shield_global_mktcap,Martket cap values in dollar,"{'id': 'other193', 'name': 'Systematic Hedging...","{'id': 'other', 'name': 'Other'}","{'id': 'other-aiml-data', 'name': 'AI/ML Data'}",GLB,1,MINVOL1M,VECTOR,0.5667,,29,52,[]


In [14]:
selected_datafields_df_2_id = datafields_df_2.id.values.tolist()
selected_datafields_df_2_id

['oth193_shield', 'oth193_shield2', 'oth193_wolfe_shield_global_mktcap']

### Data 3

In [15]:
dataset_id_3 = selected_datasets_df_3.id.values.tolist()[0] # create a list of selected datasets ids, our list has only one element
dataset_id_3

'pv13'

In [16]:
datafields_df_3 = hf.get_datafields(s, region = 'GLB', universe = 'MINVOL1M', dataset_id=dataset_id_3) # download all fields of dataset news
datafields_df_3

Unnamed: 0,id,description,dataset,category,subcategory,region,delay,universe,type,coverage,turnover,userCount,alphaCount,themes
0,pv13_10_f2_g3_minvol_1m_sector,grouping fields,"{'id': 'pv13', 'name': 'Relationship Data for ...","{'id': 'pv', 'name': 'Price Volume'}","{'id': 'pv-relationship', 'name': 'Relationship'}",GLB,1,MINVOL1M,GROUP,1.0,,27,147,[]
1,pv13_10_f2_g4_minvol_1m_sector,grouping fields,"{'id': 'pv13', 'name': 'Relationship Data for ...","{'id': 'pv', 'name': 'Price Volume'}","{'id': 'pv-relationship', 'name': 'Relationship'}",GLB,1,MINVOL1M,GROUP,1.0,,16,102,[]
2,pv13_10_f3_g2_minvol_1m_sector,grouping fields,"{'id': 'pv13', 'name': 'Relationship Data for ...","{'id': 'pv', 'name': 'Price Volume'}","{'id': 'pv-relationship', 'name': 'Relationship'}",GLB,1,MINVOL1M,GROUP,1.0,,17,99,[]
3,pv13_10_minvol_1m_sector,grouping fields,"{'id': 'pv13', 'name': 'Relationship Data for ...","{'id': 'pv', 'name': 'Price Volume'}","{'id': 'pv-relationship', 'name': 'Relationship'}",GLB,1,MINVOL1M,GROUP,1.0,,36,124,[]
4,pv13_10_sector,grouping fields,"{'id': 'pv13', 'name': 'Relationship Data for ...","{'id': 'pv', 'name': 'Price Volume'}","{'id': 'pv-relationship', 'name': 'Relationship'}",GLB,1,MINVOL1M,GROUP,1.0,,4,10,[]
5,pv13_1l_scibr,grouping fields,"{'id': 'pv13', 'name': 'Relationship Data for ...","{'id': 'pv', 'name': 'Price Volume'}","{'id': 'pv-relationship', 'name': 'Relationship'}",GLB,1,MINVOL1M,GROUP,0.877,,31,120,[]
6,pv13_20_minvol_1m_sector,grouping fields,"{'id': 'pv13', 'name': 'Relationship Data for ...","{'id': 'pv', 'name': 'Price Volume'}","{'id': 'pv-relationship', 'name': 'Relationship'}",GLB,1,MINVOL1M,GROUP,1.0,,38,247,[]
7,pv13_2_f3_g2_minvol_1m_sector,grouping fields,"{'id': 'pv13', 'name': 'Relationship Data for ...","{'id': 'pv', 'name': 'Price Volume'}","{'id': 'pv-relationship', 'name': 'Relationship'}",GLB,1,MINVOL1M,GROUP,1.0,,19,78,[]
8,pv13_2_f4_g3_minvol_1m_sector,grouping fields,"{'id': 'pv13', 'name': 'Relationship Data for ...","{'id': 'pv', 'name': 'Price Volume'}","{'id': 'pv-relationship', 'name': 'Relationship'}",GLB,1,MINVOL1M,GROUP,1.0,,21,89,[]
9,pv13_2_minvol_1m_all_delay_1_sector,grouping fields,"{'id': 'pv13', 'name': 'Relationship Data for ...","{'id': 'pv', 'name': 'Price Volume'}","{'id': 'pv-relationship', 'name': 'Relationship'}",GLB,1,MINVOL1M,GROUP,1.0,,26,94,[]


In [17]:
selected_datafields_df_3 = datafields_df_3[
    (datafields_df_3['type'].str.contains('group', case=False))
].sort_values(by=['userCount'], ascending=False)
selected_datafields_df_3

Unnamed: 0,id,description,dataset,category,subcategory,region,delay,universe,type,coverage,turnover,userCount,alphaCount,themes
21,pv13_6l_scibr,grouping fields,"{'id': 'pv13', 'name': 'Relationship Data for ...","{'id': 'pv', 'name': 'Price Volume'}","{'id': 'pv-relationship', 'name': 'Relationship'}",GLB,1,MINVOL1M,GROUP,0.9238,,40,377,[]
6,pv13_20_minvol_1m_sector,grouping fields,"{'id': 'pv13', 'name': 'Relationship Data for ...","{'id': 'pv', 'name': 'Price Volume'}","{'id': 'pv-relationship', 'name': 'Relationship'}",GLB,1,MINVOL1M,GROUP,1.0,,38,247,[]
3,pv13_10_minvol_1m_sector,grouping fields,"{'id': 'pv13', 'name': 'Relationship Data for ...","{'id': 'pv', 'name': 'Price Volume'}","{'id': 'pv-relationship', 'name': 'Relationship'}",GLB,1,MINVOL1M,GROUP,1.0,,36,124,[]
20,pv13_5l_scibr,grouping fields,"{'id': 'pv13', 'name': 'Relationship Data for ...","{'id': 'pv', 'name': 'Price Volume'}","{'id': 'pv-relationship', 'name': 'Relationship'}",GLB,1,MINVOL1M,GROUP,0.9238,,34,291,[]
5,pv13_1l_scibr,grouping fields,"{'id': 'pv13', 'name': 'Relationship Data for ...","{'id': 'pv', 'name': 'Price Volume'}","{'id': 'pv-relationship', 'name': 'Relationship'}",GLB,1,MINVOL1M,GROUP,0.877,,31,120,[]
0,pv13_10_f2_g3_minvol_1m_sector,grouping fields,"{'id': 'pv13', 'name': 'Relationship Data for ...","{'id': 'pv', 'name': 'Price Volume'}","{'id': 'pv-relationship', 'name': 'Relationship'}",GLB,1,MINVOL1M,GROUP,1.0,,27,147,[]
9,pv13_2_minvol_1m_all_delay_1_sector,grouping fields,"{'id': 'pv13', 'name': 'Relationship Data for ...","{'id': 'pv', 'name': 'Price Volume'}","{'id': 'pv-relationship', 'name': 'Relationship'}",GLB,1,MINVOL1M,GROUP,1.0,,26,94,[]
13,pv13_3l_scibr,grouping fields,"{'id': 'pv13', 'name': 'Relationship Data for ...","{'id': 'pv', 'name': 'Price Volume'}","{'id': 'pv-relationship', 'name': 'Relationship'}",GLB,1,MINVOL1M,GROUP,0.9238,,25,60,[]
12,pv13_2l_scibr,grouping fields,"{'id': 'pv13', 'name': 'Relationship Data for ...","{'id': 'pv', 'name': 'Price Volume'}","{'id': 'pv-relationship', 'name': 'Relationship'}",GLB,1,MINVOL1M,GROUP,0.9238,,23,128,[]
15,pv13_52_minvol_1m_all_delay_1_sector,grouping fields,"{'id': 'pv13', 'name': 'Relationship Data for ...","{'id': 'pv', 'name': 'Price Volume'}","{'id': 'pv-relationship', 'name': 'Relationship'}",GLB,1,MINVOL1M,GROUP,1.0,,23,132,[]


In [18]:
selected_datafields_df_3_id = selected_datafields_df_3.id.values.tolist()


#### Step 4. Apply generate_alpha function to the expression list
In generate alpha function you can specify region, universe, decay, delay and other simulation settings

이제 만들어둔 expressions 을 사용해 alpha simulation 을 해줍니다.

In [19]:
#?ace.generate_alpha

In [20]:
# parameters
data1, data2, data3 = 0, 0, 0

expression_template = f'''
s1 = ts_zscore (ts_backfill (vec_avg({data1}), 252), 252)
s2 = ts_zscore (ts_backfill (vec_avg({data2}), 252), 252)
group = {data3}
group_zscore(s1, s2, group)
'''

In [21]:
parameters = []
for a in selected_datafields_df_1_id:
    for b in selected_datafields_df_2_id:
        for c in selected_datafields_df_3_id[:5]:
            parameters.append([a, b, c])
parameters[:5]

[['mdl139_mktcap', 'oth193_shield', 'pv13_6l_scibr'],
 ['mdl139_mktcap', 'oth193_shield', 'pv13_20_minvol_1m_sector'],
 ['mdl139_mktcap', 'oth193_shield', 'pv13_10_minvol_1m_sector'],
 ['mdl139_mktcap', 'oth193_shield', 'pv13_5l_scibr'],
 ['mdl139_mktcap', 'oth193_shield', 'pv13_1l_scibr']]

In [22]:
expressions = []
for data1, data2, data3 in parameters:
    expressions.append(f's1 = ts_zscore (ts_backfill (vec_avg({data1}), 252), 252);s2 = ts_zscore (ts_backfill (vec_avg({data2}), 252), 252);group = {data3};group_zscore(-vector_neut(s1,s2), group)')
    
expressions[:5]

['s1 = ts_zscore (ts_backfill (vec_avg(mdl139_mktcap), 252), 252);s2 = ts_zscore (ts_backfill (vec_avg(oth193_shield), 252), 252);group = pv13_6l_scibr;group_zscore(-vector_neut(s1,s2), group)',
 's1 = ts_zscore (ts_backfill (vec_avg(mdl139_mktcap), 252), 252);s2 = ts_zscore (ts_backfill (vec_avg(oth193_shield), 252), 252);group = pv13_20_minvol_1m_sector;group_zscore(-vector_neut(s1,s2), group)',
 's1 = ts_zscore (ts_backfill (vec_avg(mdl139_mktcap), 252), 252);s2 = ts_zscore (ts_backfill (vec_avg(oth193_shield), 252), 252);group = pv13_10_minvol_1m_sector;group_zscore(-vector_neut(s1,s2), group)',
 's1 = ts_zscore (ts_backfill (vec_avg(mdl139_mktcap), 252), 252);s2 = ts_zscore (ts_backfill (vec_avg(oth193_shield), 252), 252);group = pv13_5l_scibr;group_zscore(-vector_neut(s1,s2), group)',
 's1 = ts_zscore (ts_backfill (vec_avg(mdl139_mktcap), 252), 252);s2 = ts_zscore (ts_backfill (vec_avg(oth193_shield), 252), 252);group = pv13_1l_scibr;group_zscore(-vector_neut(s1,s2), group)']

In [23]:
len(expressions)

30

In [24]:
#when you send multiple alphas for simulation, please make sure all alphas of a single list should have common settings
#alphas with different settings should be sent in a different list, for instance below list has all alphas with same settings

alpha_list = [ace.generate_alpha(x, region= "GLB", universe = "MINVOL1M", neutralization = 'COUNTRY', truncation = 0.01, delay = 1, decay = 2) for x in expressions]
alpha_list[0]

{'type': 'REGULAR',
 'settings': {'instrumentType': 'EQUITY',
  'region': 'GLB',
  'universe': 'MINVOL1M',
  'delay': 1,
  'decay': 2,
  'neutralization': 'COUNTRY',
  'truncation': 0.01,
  'pasteurization': 'ON',
  'testPeriod': 'P0Y0M0D',
  'unitHandling': 'VERIFY',
  'nanHandling': 'OFF',
  'language': 'FASTEXPR',
  'visualization': False},
 'regular': 's1 = ts_zscore (ts_backfill (vec_avg(mdl139_mktcap), 252), 252);s2 = ts_zscore (ts_backfill (vec_avg(oth193_shield), 252), 252);group = pv13_6l_scibr;group_zscore(-vector_neut(s1,s2), group)'}

### Simulate alpha list, get simulation result

simulate_alpha_list_multi will do a multi-simulation if list of alphas is greater than 10, which is the case here

the returned object will contain simulation results for all alphas as a list

In [25]:
#alpha expressions are sliced to first 10 for demonstration purpose

result = ace.simulate_alpha_list_multi(s, alpha_list)

100%|█████████████████████████████████████████████████████████████████████████████████| 10/10 [22:24<00:00, 134.44s/it]


위 코드를 실행하면 시뮬레이션이 시작됩니다. 100개의 알파에 대략 1시간 정도 소요되니, 인터넷 연결이 끊기지 않게 주의하시고, 너무 많은 알파를 한 번에 돌리기보다는 몇개씩 끊어서 돌려도 좋습니다.

In [26]:
#prettify_result function can be used from the helpful_functions library to take a look at IS stats of all the simulated alphas

result_st1 = hf.prettify_result(result, detailed_tests_view=False)
result_st1

Unnamed: 0,pnl,book_size,long_count,short_count,turnover,returns,drawdown,margin,fitness,sharpe,start_date,alpha_id,expression,concentrated_weight,high_turnover,is_ladder_sharpe,low_fitness,low_sharpe,low_sub_universe_sharpe,low_turnover
0,-988123,20000000,3156,3179,0.1036,-0.0095,0.2389,-0.000184,-0.06,-0.21,2012-01-22,XnrVMNx,s1 = ts_zscore (ts_backfill (vec_avg(mdl139_mk...,PASS,PASS,FAIL,FAIL,FAIL,PASS,PASS
1,-1120314,20000000,3156,3179,0.1029,-0.0108,0.2501,-0.00021,-0.07,-0.23,2012-01-22,llqE1v2,s1 = ts_zscore (ts_backfill (vec_avg(mdl139_mk...,PASS,PASS,FAIL,FAIL,FAIL,PASS,PASS
2,-1250454,20000000,3215,3237,0.1028,-0.0121,0.2499,-0.000235,-0.08,-0.25,2012-01-22,vlqZPjd,s1 = ts_zscore (ts_backfill (vec_avg(mdl139_mk...,PASS,PASS,FAIL,FAIL,FAIL,PASS,PASS
3,-1368491,20000000,3214,3237,0.1021,-0.0132,0.2605,-0.000259,-0.09,-0.28,2012-01-22,MQWdVna,s1 = ts_zscore (ts_backfill (vec_avg(mdl139_mk...,PASS,PASS,FAIL,FAIL,FAIL,PASS,PASS
4,-1916392,20000000,3478,3500,0.1022,-0.0185,0.3023,-0.000362,-0.13,-0.35,2012-01-22,OZYRgMJ,s1 = ts_zscore (ts_backfill (vec_avg(mdl139_mk...,PASS,PASS,FAIL,FAIL,FAIL,PASS,PASS
5,-2054220,20000000,3478,3501,0.1015,-0.0198,0.3146,-0.000391,-0.15,-0.37,2012-01-22,anYeGg9,s1 = ts_zscore (ts_backfill (vec_avg(mdl139_mk...,PASS,PASS,FAIL,FAIL,FAIL,PASS,PASS
6,-2407059,20000000,3476,3502,0.1006,-0.0232,0.3478,-0.000462,-0.18,-0.42,2012-01-22,8QoVXVX,s1 = ts_zscore (ts_backfill (vec_avg(mdl139_mk...,PASS,PASS,FAIL,FAIL,FAIL,FAIL,PASS
7,-2552634,20000000,3475,3502,0.0999,-0.0246,0.3607,-0.000493,-0.2,-0.45,2012-01-22,OZYRg2v,s1 = ts_zscore (ts_backfill (vec_avg(mdl139_mk...,PASS,PASS,FAIL,FAIL,FAIL,FAIL,PASS
8,-2265098,20000000,3149,3186,0.1464,-0.0219,0.2864,-0.000299,-0.3,-0.78,2012-01-22,xk62Kpl,s1 = ts_zscore (ts_backfill (vec_avg(mdl139_mk...,PASS,PASS,FAIL,FAIL,FAIL,FAIL,PASS
9,-2586950,20000000,3202,3250,0.1445,-0.025,0.3133,-0.000346,-0.35,-0.85,2012-01-22,EE7N9LR,s1 = ts_zscore (ts_backfill (vec_avg(mdl139_mk...,PASS,PASS,FAIL,FAIL,FAIL,FAIL,PASS


prettify 함수를 이용해 dataframe 형식으로 변환하면 결과를 보기 더 편합니다.

In [47]:
result_st1.to_csv('2024_09_13_GLBTOP3000.csv')

### How to submit?

Create a list of submittable alphas - alphas that have no FAIL in is_tests

시뮬레이션을 마친 알파 중 제출 가능한 알파들을 제출해 봅시다.

In [27]:
#to take a look at the combined result of all new alphas

is_tests_df = hf.concat_is_tests(result)
is_tests_df

Unnamed: 0,alpha_id,endDate,limit,name,result,startDate,themes,value,year
0,vlqZPjd,,1.58,LOW_SHARPE,FAIL,,,-0.2500,
1,vlqZPjd,,1.00,LOW_FITNESS,FAIL,,,-0.0800,
2,vlqZPjd,,0.01,LOW_TURNOVER,PASS,,,0.1028,
3,vlqZPjd,,0.70,HIGH_TURNOVER,PASS,,,0.1028,
4,vlqZPjd,,,CONCENTRATED_WEIGHT,PASS,,,,
...,...,...,...,...,...,...,...,...,...
355,VP9mlQ5,,,DATA_DIVERSITY,PENDING,,,,
356,VP9mlQ5,,,PROD_CORRELATION,PENDING,,,,
357,VP9mlQ5,,,REGULAR_SUBMISSION,PENDING,,,,
358,VP9mlQ5,2020-01-25,1.58,IS_LADDER_SHARPE,FAIL,2022-01-24,,-3.2200,2.0


In [28]:
#making a list of failed alphas
failed_alphas = is_tests_df.query('result=="FAIL"')['alpha_id'].unique()

#making a list of passed alphas
passed_alphas = list(set(is_tests_df['alpha_id']).difference(failed_alphas))

print(f'Failed alphas:{failed_alphas}\nPassed alphas:{passed_alphas}')

Failed alphas:['vlqZPjd' '71A3lZQ' 'llqE1v2' 'XnrVMNx' '8QoVXVX' 'OZYRgMJ' 'OZYRg2v'
 'anYeGg9' 'MQWdVna' 'e0pNXvp' 'EE7N9LR' 'K90vWZx' 'e0pVlPE' 'xk62Kpl'
 'R83lJxe' 'olpJm5E' 'd0X35AY' '0ElNmv8' 'MQWem3a' 'VP9mJdY' 'ql1YqE2'
 '0ElNWVK' 'QVJNd8M' '2L0XkYY' 'Y0rEwev' '698oP5Y' 'xk629Kq' '1d9lmAJ'
 'NrwJjEw' 'VP9mlQ5']
Passed alphas:[]


In [5]:
for alpha_id in ['ad59v2O', 'k3z6qqk', 'Lp0YPva', '9wNOzNr', '7weR901', '6w5Ln6E', 'QawpLe5', '17rAQ6m', '273obEY', 'xxJpKLg', 'Wpv2EZo', 'm8EWMk1']:
    hf.set_alpha_properties(s, alpha_id, tags = ['pass_Sep07_mixing'])

In [27]:
for alpha_id in passed_alphas:
    hf.set_alpha_properties(s, alpha_id, tags = ['Sep07_GLB'])

## Temp

In [50]:
passed_alphas = ['3RQeOx0', 'Xnm8ObX', 'zmKRARo', 'VP1Gdn0', 'Xnm8OJX', '1dkzRmX', 'Y0LgRZo', 'wlKake6', 'vlwNR83', 'ql9656v', 'Y0LvopW', '2L8pN9P', 'EEOGRwG', 'L1VGOEe', '0EOMaXq', '5OnkGk1', 'k0EPrOg', '1dkp7YJ', 'vlwNmYz', 'K9lE7oj', '2L8prr5', 'ql9NrPE', 'GL1GAzQ', 'OZQGP5J', 'QVZ9wmG', 'plYK9Jq', 'Y0Lvqqo', 'JOMGP0E', '8Q5pxpa', 'mbwVjGK', 'anjE5N5', 'QVZ97RW', 'APj7LPg', 'd0kRPOK', 'VP137L5', 'Xnm8Npz', 'EEOe2pR', 'MQML7E8', 'GL1el7Z', '1dkzGdX', 'P3QOAgJ', 'GL1elRG', 'gMk8wnv', 'k0EPqxk', 'llpWXZn', '69mpoPE', 'Xnm8N8m', 'n2xNr1w', 'P3QOkn7', 'vlwNPRG', 'vlwNap3', '3RQelNQ', 'ZnPK71n', 'ZnPKXKQ', 'EEOe9qJ', '5OnkGO1', '717znG8', 'j0z6Wqe']

In [51]:
len(passed_alphas)

58

위 함수로 통과한 알파들에 태그를 붙일 수 있습니다.

When you got a list of submittable alphas, you can call function submit_alpha()

In [52]:
#calling submit_alpha on all alphas that have passed the submission tests

submit_result = {alpha_id: ace.submit_alpha(s, alpha_id) for alpha_id in passed_alphas[40:]}

위 함수로 알파를 제출할 수 있습니다.

알파의 id 로 제출하는 것이기 때문에, 그 알파를 시뮬레이션 한 날짜가 중요합니다. 가령, 1주일 전에 시뮬레이션 해 결과를 저장해 두었던 alpha id로 제출을 한다면 제출 날짜가 1주일 전이 됩니다. 그러니 오늘 제출하고 싶은 alpha 가 있다면 시뮬레이션을 다시 돌려 새로 생긴 alpha id 를 이용해야 합니다. 이 과정은 alpha 에 특정 태그를 달아 Brain 플랫폼에서 직접 하는게 훨씬 수월합니다. (다만, 플랫폼에서는 alpha id 로 알파를 검색할 수 없습니다.)

In [53]:
#submit_result will have return values from the submit_result function

submit_result

{'P3QOAgJ': False,
 'GL1elRG': False,
 'gMk8wnv': False,
 'k0EPqxk': False,
 'llpWXZn': False,
 '69mpoPE': False,
 'Xnm8N8m': False,
 'n2xNr1w': False,
 'P3QOkn7': False,
 'vlwNPRG': False,
 'vlwNap3': False,
 '3RQelNQ': False,
 'ZnPK71n': False,
 'ZnPKXKQ': False,
 'EEOe9qJ': False,
 '5OnkGO1': False,
 '717znG8': False,
 'j0z6Wqe': False}

주의하셔야 하실 점은, 하루 최대 알파 제출 한도인 4개를 루프중에 이미 넘기면 그 뒤의 알파들이 제출 가능하더라도 자동으로 제출에 실패하게 됩니다.

### Library Fuctions.

following are some other functions that you can use for your own analysis

**get_alpha_pnl(s, alpha_id)** - to get the pnl for an alpha

**get_alpha_yearly_stats(s, alpha_id)** - to get yearly statistics for an alpha

**get_self_corr(s, alpha_id)** - to get self correlation results for an alpha

**get_prod_corr(s, alpha_id)** - to get prod correlation results for an alpha

**get_check_submission(s, alpha_id)** - to get check submission result for an alpha

**check_self_corr_test(s, alpha_id)** - to check if alpha passes self correlation test (self_corr<0.7)

**check_prod_corr_test(s, alpha_id)** - to check if alpha passes prod correlation test (prod_corr<0.7)

**perfomance_comparison(s, alpha_id)** - to get the result of performance comparison for an alpha merged performance

유용한 추가 함수들입니다. prod correlation 혹은 self correlation 만을 확인하고 싶을 때, 혹은 알파의 pnl 을 확인하고 싶을때 유용히 쓰입니다.