### Import library

In [1]:
import os
os.chdir('../../')
os.getcwd()

'C:\\Users\\csia7\\OneDrive\\문서\\GitHub\\WQBrain_2024_API'

In [2]:
import ace_lib as ace
import helpful_functions as hf
import pandas as pd
import requests
import plotly.express as px
import pygwalker as pyg
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA

### Start session
Enter credentials once - they will be saved to local folder and loaded each time

In [3]:
s = ace.start_session()

Complete biometrics authentication and press any key to continue: 
https://api.worldquantbrain.com/authentication/persona?inquiry=inq_RyTLo2zyNjjwPtYyrQQVyDazqaep

 


## Global region Alpha Template

In [4]:
expression_template = f'''
group = (country+1)*group_max(pv13_52_minvol_1m_all_delay_1_sector, market) + pv13_52_minvol_1m_all_delay_1_sector;
group_neutralize(ts_scale(group_backfill(fnd23_intfvalld1_ecns, industry, 252, std=1), 252), densify(group))
'''

#### Step 1. Download datsets

In [6]:
datasets_df = hf.get_datasets(s, region = 'GLB', universe = 'MINVOL1M') # by default we load all datasets USA TOP3000 delay 1    , region='GLB', universe='MINVOL1M'
datasets_df # DataFrame.head() shows first 5 rows of the dataframe 

Unnamed: 0,id,name,description,category,subcategory,region,delay,universe,coverage,turnover,valueScore,userCount,alphaCount,fieldCount,themes,researchPapers
0,analyst11,ESG scores,Environmental Social Governance scores that ex...,"{'id': 'analyst', 'name': 'Analyst'}","{'id': 'analyst-esg', 'name': 'ESG'}",GLB,1,MINVOL1M,0.7917,,4.0,66,304,197,[],"[{'title': 'Research Paper 19: ESG Preference,..."
1,analyst14,Estimations of Key Fundamentals,This dataset reports many items from financial...,"{'id': 'analyst', 'name': 'Analyst'}","{'id': 'analyst-analyst-estimates', 'name': 'A...",GLB,1,MINVOL1M,0.5186,,3.0,101,589,926,[],[{'title': 'Research Paper 10: Investor Learni...
2,analyst15,Earnings forecasts,This dataset provides bottom-up forecast data ...,"{'id': 'analyst', 'name': 'Analyst'}","{'id': 'analyst-analyst-estimates', 'name': 'A...",GLB,1,MINVOL1M,0.9929,,2.0,158,1497,288,[],[]
3,analyst16,Real Time Estimates,This dataset provides real-time access to the ...,"{'id': 'analyst', 'name': 'Analyst'}","{'id': 'analyst-crowdsourced-estimates', 'name...",GLB,1,MINVOL1M,0.9013,,2.0,134,821,42,[],[]
4,analyst35,ESG Model,The dataset provide ESG related information ba...,"{'id': 'analyst', 'name': 'Analyst'}","{'id': 'analyst-esg', 'name': 'ESG'}",GLB,1,MINVOL1M,0.4375,,3.0,63,202,23,[],[]
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
83,shortinterest6,SmartHoldings Model,This dataset is a global stock selection model...,"{'id': 'shortinterest', 'name': 'Short Interest'}","{'id': 'shortinterest-short-sale-models', 'nam...",GLB,1,MINVOL1M,0.9711,,2.0,92,389,11,[],[]
84,shortinterest7,Short Selling Model,The dataset combines features obtained from sh...,"{'id': 'shortinterest', 'name': 'Short Interest'}","{'id': 'shortinterest-short-sale-models', 'nam...",GLB,1,MINVOL1M,1.0000,,2.0,73,331,16,[],[{'title': 'Research Paper 04: Strategic Rebal...
85,socialmedia12,Sentiment Data for Equity,This dataset provides sentiment data with diff...,"{'id': 'socialmedia', 'name': 'Social Media'}","{'id': 'socialmedia-social-media', 'name': 'So...",GLB,1,MINVOL1M,0.6731,,3.0,17,25,2,[],[{'title': 'Research Paper 01: Textual Sentime...
86,socialmedia5,Lexical Breakdown Data,Sentiment scores derived from social media. So...,"{'id': 'socialmedia', 'name': 'Social Media'}","{'id': 'socialmedia-social-media', 'name': 'So...",GLB,1,MINVOL1M,0.3753,,5.0,2,3,12,[],[]


In [8]:
# select needed datasets
selected_datasets_df = datasets_df[
    (datasets_df["valueScore"] > 3) &
    (datasets_df["valueScore"] < 6) 
].sort_values(by=['valueScore'], ascending=False)
selected_datasets_df

Unnamed: 0,id,name,description,category,subcategory,region,delay,universe,coverage,turnover,valueScore,userCount,alphaCount,fieldCount,themes,researchPapers
86,socialmedia5,Lexical Breakdown Data,Sentiment scores derived from social media. So...,"{'id': 'socialmedia', 'name': 'Social Media'}","{'id': 'socialmedia-social-media', 'name': 'So...",GLB,1,MINVOL1M,0.3753,,5.0,2,3,12,[],[]
58,option2,Implied Volatility and Pricing for Equity Options,Dataset based on security type and option pric...,"{'id': 'option', 'name': 'Option'}","{'id': 'option-option-volatility', 'name': 'Op...",GLB,1,MINVOL1M,0.4789,,5.0,1,3,1,[],[]
71,other83,Insider Transaction Analytics,Data of transactions carried out by the key co...,"{'id': 'other', 'name': 'Other'}","{'id': 'other-event-data', 'name': 'Event Data'}",GLB,1,MINVOL1M,0.4802,,5.0,9,15,15,[],[]
68,other47,Web Intelligence Data,This dataset provides budget expenditure on o...,"{'id': 'other', 'name': 'Other'}","{'id': 'other-employee-data', 'name': 'Employe...",GLB,1,MINVOL1M,0.621,,5.0,7,11,4,[],[]
30,macro27,Job records from job posting,This data set contains data found in the descr...,"{'id': 'macro', 'name': 'Macro'}","{'id': 'macro-macroeconomic-activities', 'name...",GLB,1,MINVOL1M,0.4335,,5.0,9,22,23,[],[{'title': 'Research Paper 75: The Stock Marke...
31,macro4,Macroeconomic Indicators,This dataset encompasses a comprehensive colle...,"{'id': 'macro', 'name': 'Macro'}","{'id': 'macro-macroeconomic-activities', 'name...",GLB,1,MINVOL1M,0.0,,5.0,3,5,1,[],[]
51,news23,MnA Deals Data,The dataset provides information on various tr...,"{'id': 'news', 'name': 'News'}","{'id': 'news-news', 'name': 'News'}",GLB,1,MINVOL1M,0.3279,,5.0,3,34,28,[],[]
65,other452,Earnings Tax Data,This dataset comprehensively studies a wide ra...,"{'id': 'other', 'name': 'Other'}","{'id': 'other-dividend-models', 'name': 'Divid...",GLB,1,MINVOL1M,0.3273,,5.0,14,74,23,[],[]
56,news76,Textual News Feed Data,The dataset is composed of different subscript...,"{'id': 'news', 'name': 'News'}","{'id': 'news-news', 'name': 'News'}",GLB,1,MINVOL1M,1.0,,5.0,1,6,1,[],[]
82,shortinterest2,Short Interest Model,This is a model dataset that gives short inter...,"{'id': 'shortinterest', 'name': 'Short Interest'}","{'id': 'shortinterest-short-sale-models', 'nam...",GLB,1,MINVOL1M,0.344,,4.0,15,44,8,[],[]


#### Step 2. Select the needed datafields

In [11]:
dataset_ids = selected_datasets_df.id.values.tolist() # create a list of selected datasets ids, our list has only one element
dataset_ids

['socialmedia5',
 'option2',
 'other83',
 'other47',
 'macro27',
 'macro4',
 'news23',
 'other452',
 'news76',
 'shortinterest2',
 'pv37',
 'pv132',
 'other455',
 'other450',
 'other351',
 'option4',
 'analyst11',
 'news87',
 'analyst40',
 'news54',
 'news52',
 'news3',
 'model211',
 'fundamental7',
 'fundamental22',
 'fundamental21',
 'fundamental1',
 'analyst44',
 'news66']

In [12]:
selected_datafields_full = []
for dataset_id in dataset_ids:
    datafields_df = hf.get_datafields(s, region = 'GLB', universe = 'MINVOL1M', dataset_id=dataset_id)
    selected_datafields_df = datafields_df[
        (datafields_df['type'] == 'MATRIX')
    ].sort_values(by=['userCount'], ascending=False)
    selected_datafields_df_id = selected_datafields_df.id.values.tolist()
    selected_datafields_full += selected_datafields_df_id[:10]
selected_datafields_full

['opt2_securitymap',
 'mcr4_value',
 'oth452_accrualsratio_d1_asset_change',
 'oth452_deferred_tax_expense_11',
 'oth452_deferred_tax_expense_31',
 'oth452_deferred_tax_liability_11',
 'oth452_deferred_tax_liability_12',
 'oth452_liability_d1_mean',
 'oth452_deferred_tax_expense_21',
 'oth452_deferred_tax_liability_31',
 'oth452_liability_d1_max',
 'oth452_liability_d1_length',
 'star_si_shortsqueeze_rank',
 'star_si_country_rank_unadj',
 'star_si_insown_pct',
 'shrt2_t12m_volatility_rank',
 'shrt2_t3m_volatility_rank',
 'star_si_country_rank',
 'pv37_close_global',
 'pv37_task_mean',
 'pv37_low_global30m',
 'pv37_vwap_13',
 'pv37_taks_mean',
 'pv37_lstp_mean',
 'pv37_volume_global2',
 'pv37_low_13',
 'pv37_close_global30m',
 'pv37_close_global2h',
 'oth455_relation_roam_w5_pca_fact3_value',
 'oth455_relation_roam_w1_pca_fact3_value',
 'oth455_relation_n2v_p10_q200_w1_pca_fact1_value',
 'oth455_partner_roam_w1_pca_fact2_value',
 'oth455_partner_n2v_p10_q50_w2_pca_fact1_value',
 'oth455

#### Step 4. Apply generate_alpha function to the expression list
In generate alpha function you can specify region, universe, decay, delay and other simulation settings

이제 만들어둔 expressions 을 사용해 alpha simulation 을 해줍니다.

In [61]:
#?ace.generate_alpha

In [32]:

expression_template = f'''
group = (country+1)*group_max(pv13_52_minvol_1m_all_delay_1_sector, market) + pv13_52_minvol_1m_all_delay_1_sector;
group_neutralize(ts_scale(group_backfill({data}, industry, 252, std=1), 252), densify(group))
'''

In [23]:
expressions = []
for data in selected_datafields_full:
    expressions.append(f'group = (country+1)*group_max(pv13_52_minvol_1m_all_delay_1_sector, market) + pv13_52_minvol_1m_all_delay_1_sector;group_neutralize(ts_scale(group_backfill({data}, industry, 252, std=1), 252), densify(group))')

In [24]:
len(expressions)

111

In [25]:
#when you send multiple alphas for simulation, please make sure all alphas of a single list should have common settings
#alphas with different settings should be sent in a different list, for instance below list has all alphas with same settings

alpha_list = [ace.generate_alpha(x, region= "GLB", universe = "MINVOL1M", neutralization = 'COUNTRY', truncation = 0.01, delay = 1, decay = 3) for x in expressions]
alpha_list[0]

{'type': 'REGULAR',
 'settings': {'instrumentType': 'EQUITY',
  'region': 'GLB',
  'universe': 'MINVOL1M',
  'delay': 1,
  'decay': 3,
  'neutralization': 'COUNTRY',
  'truncation': 0.01,
  'pasteurization': 'ON',
  'testPeriod': 'P0Y0M0D',
  'unitHandling': 'VERIFY',
  'nanHandling': 'OFF',
  'language': 'FASTEXPR',
  'visualization': False},
 'regular': 'group = (country+1)*group_max(pv13_52_minvol_1m_all_delay_1_sector, market) + pv13_52_minvol_1m_all_delay_1_sector;group_neutralize(ts_scale(group_backfill(opt2_securitymap, industry, 252, std=1), 252), densify(group))'}

### Simulate alpha list, get simulation result

simulate_alpha_list_multi will do a multi-simulation if list of alphas is greater than 10, which is the case here

the returned object will contain simulation results for all alphas as a list

In [30]:
#alpha expressions are sliced to first 10 for demonstration purpose

result = ace.simulate_alpha_list_multi(s, alpha_list[20:73])

100%|███████████████████████████████████████████████████████████████████████████████| 18/18 [1:04:38<00:00, 215.49s/it]


위 코드를 실행하면 시뮬레이션이 시작됩니다. 100개의 알파에 대략 1시간 정도 소요되니, 인터넷 연결이 끊기지 않게 주의하시고, 너무 많은 알파를 한 번에 돌리기보다는 몇개씩 끊어서 돌려도 좋습니다.

In [31]:
#prettify_result function can be used from the helpful_functions library to take a look at IS stats of all the simulated alphas

result_st1 = hf.prettify_result(result, detailed_tests_view=False)
result_st1

Unnamed: 0,pnl,book_size,long_count,short_count,turnover,returns,drawdown,margin,fitness,sharpe,start_date,alpha_id,expression,concentrated_weight,high_turnover,is_ladder_sharpe,low_fitness,low_sharpe,low_sub_universe_sharpe,low_turnover
0,4601334,20000000,4066,3941,0.0762,0.0444,0.1188,0.001166,0.77,1.29,2012-01-22,K90KLRp,group = (country+1)*group_max(pv13_52_minvol_1...,PASS,PASS,FAIL,FAIL,FAIL,PASS,PASS
1,5814975,20000000,3133,5255,0.4043,0.0562,0.069,0.000278,0.61,1.63,2012-01-22,gM6nNPJ,group = (country+1)*group_max(pv13_52_minvol_1...,PASS,PASS,FAIL,FAIL,PASS,PASS,PASS
2,4804502,20000000,4073,4292,0.0501,0.0464,0.1519,0.001853,0.52,0.86,2012-01-22,71AkdOx,group = (country+1)*group_max(pv13_52_minvol_1...,PASS,PASS,FAIL,FAIL,FAIL,PASS,PASS
3,4949151,20000000,3525,4866,0.1171,0.0478,0.2145,0.000816,0.46,0.74,2012-01-22,Y06R3mw,group = (country+1)*group_max(pv13_52_minvol_1...,PASS,PASS,FAIL,FAIL,FAIL,FAIL,PASS
4,4693902,20000000,3452,4941,0.1202,0.0453,0.2135,0.000754,0.42,0.7,2012-01-22,xkqJ79N,group = (country+1)*group_max(pv13_52_minvol_1...,PASS,PASS,FAIL,FAIL,FAIL,PASS,PASS
5,4631501,20000000,3439,4951,0.1203,0.0447,0.2115,0.000743,0.42,0.7,2012-01-22,k0dzpXL,group = (country+1)*group_max(pv13_52_minvol_1...,PASS,PASS,FAIL,FAIL,FAIL,PASS,PASS
6,4656866,20000000,4037,4357,0.0855,0.045,0.1322,0.001052,0.38,0.63,2012-01-22,R83J8jd,group = (country+1)*group_max(pv13_52_minvol_1...,PASS,PASS,FAIL,FAIL,FAIL,FAIL,PASS
7,2069033,20000000,4205,4184,0.0309,0.02,0.0678,0.001294,0.31,0.78,2012-01-22,bqkYO0N,group = (country+1)*group_max(pv13_52_minvol_1...,PASS,PASS,FAIL,FAIL,FAIL,PASS,PASS
8,1713533,20000000,4075,4295,0.0171,0.0165,0.0661,0.001938,0.28,0.76,2012-01-22,8Qomdvl,group = (country+1)*group_max(pv13_52_minvol_1...,PASS,PASS,FAIL,FAIL,FAIL,PASS,PASS
9,2909273,20000000,4516,3875,0.0518,0.0281,0.1392,0.001084,0.28,0.6,2012-01-22,APWRXAY,group = (country+1)*group_max(pv13_52_minvol_1...,PASS,PASS,FAIL,FAIL,FAIL,FAIL,PASS


prettify 함수를 이용해 dataframe 형식으로 변환하면 결과를 보기 더 편합니다.

In [23]:
result_st1.to_csv('2024_09_013_GLB.csv')

### How to submit?

Create a list of submittable alphas - alphas that have no FAIL in is_tests

시뮬레이션을 마친 알파 중 제출 가능한 알파들을 제출해 봅시다.

In [32]:
#to take a look at the combined result of all new alphas

is_tests_df = hf.concat_is_tests(result)
is_tests_df

Unnamed: 0,alpha_id,date,endDate,limit,message,name,result,startDate,themes,value,year
0,xkqJ79N,,,1.58,,LOW_SHARPE,FAIL,,,0.7000,
1,xkqJ79N,,,1.00,,LOW_FITNESS,FAIL,,,0.4200,
2,xkqJ79N,,,0.01,,LOW_TURNOVER,PASS,,,0.1202,
3,xkqJ79N,,,0.70,,HIGH_TURNOVER,PASS,,,0.1202,
4,xkqJ79N,,,,,CONCENTRATED_WEIGHT,PASS,,,,
...,...,...,...,...,...,...,...,...,...,...,...
691,WGMvJnx,,,,,DATA_DIVERSITY,PENDING,,,,
692,WGMvJnx,,,,,PROD_CORRELATION,PENDING,,,,
693,WGMvJnx,,,,,REGULAR_SUBMISSION,PENDING,,,,
694,WGMvJnx,,2020-01-25,1.58,,IS_LADDER_SHARPE,FAIL,2022-01-24,,-2.5700,2.0


In [33]:
#making a list of failed alphas
failed_alphas = is_tests_df.query('result=="FAIL"')['alpha_id'].unique()

#making a list of passed alphas
passed_alphas = list(set(is_tests_df['alpha_id']).difference(failed_alphas))

print(f'Failed alphas:{failed_alphas}\nPassed alphas:{passed_alphas}')

Failed alphas:['xkqJ79N' 'k0dzpXL' 'QVMwRjp' 'Y06R3mw' 'gM6nNPJ' 'ol7xjzE' 'R8MGx6d'
 'an65mVv' 'vl8W7Or' 'j0E5bAo' 'OZY1E8Y' 'QVJ1LKQ' 'j0E5be5' '8Qomdvl'
 'ql1KoL1' 'd0X2wwx' 'MQWjAAr' 'vlqKV0d' 'ql1KXZP' 'K90KLRp' '71AkdOx'
 'wlm8er1' 'L1jPRQM' 'wlm8e01' 'GLaqL1P' 'R83J8jd' '5ObZOn5' 'bqkYO0N'
 'OZY1831' 'P3WJgVq' 'plXqabX' 'e0plaMM' 'APWRxpQ' 'xk6Klnw' '71AkGw8'
 'llqLgle' '698n2WL' 'anYrgpW' 'OZY123d' 'e0plM1N' 'R83Jx9a' '71AkPvb'
 'e0plMME' 'VP9vQr0' 'APWRXAY' 'APW11aw' 'Y0rPPW6' '1d9YYnW' 'AP9oEbg'
 '8QbMX6m' 'rl3KYrJ' 'n2Jp9Ol' 'WGMvJnx']
Passed alphas:[]


In [5]:
for alpha_id in ['ad59v2O', 'k3z6qqk', 'Lp0YPva', '9wNOzNr', '7weR901', '6w5Ln6E', 'QawpLe5', '17rAQ6m', '273obEY', 'xxJpKLg', 'Wpv2EZo', 'm8EWMk1']:
    hf.set_alpha_properties(s, alpha_id, tags = ['pass_Sep07_mixing'])

In [27]:
for alpha_id in passed_alphas:
    hf.set_alpha_properties(s, alpha_id, tags = ['Sep07_GLB'])

위 함수로 통과한 알파들에 태그를 붙일 수 있습니다.

When you got a list of submittable alphas, you can call function submit_alpha()

In [50]:
#calling submit_alpha on all alphas that have passed the submission tests

submit_result = {alpha_id: ace.submit_alpha(s, alpha_id) for alpha_id in passed_alphas}

위 함수로 알파를 제출할 수 있습니다.

알파의 id 로 제출하는 것이기 때문에, 그 알파를 시뮬레이션 한 날짜가 중요합니다. 가령, 1주일 전에 시뮬레이션 해 결과를 저장해 두었던 alpha id로 제출을 한다면 제출 날짜가 1주일 전이 됩니다. 그러니 오늘 제출하고 싶은 alpha 가 있다면 시뮬레이션을 다시 돌려 새로 생긴 alpha id 를 이용해야 합니다. 이 과정은 alpha 에 특정 태그를 달아 Brain 플랫폼에서 직접 하는게 훨씬 수월합니다. (다만, 플랫폼에서는 alpha id 로 알파를 검색할 수 없습니다.)

In [51]:
#submit_result will have return values from the submit_result function

submit_result

{'ZpO36rQ': False}

주의하셔야 하실 점은, 하루 최대 알파 제출 한도인 4개를 루프중에 이미 넘기면 그 뒤의 알파들이 제출 가능하더라도 자동으로 제출에 실패하게 됩니다.

### Library Fuctions.

following are some other functions that you can use for your own analysis

**get_alpha_pnl(s, alpha_id)** - to get the pnl for an alpha

**get_alpha_yearly_stats(s, alpha_id)** - to get yearly statistics for an alpha

**get_self_corr(s, alpha_id)** - to get self correlation results for an alpha

**get_prod_corr(s, alpha_id)** - to get prod correlation results for an alpha

**get_check_submission(s, alpha_id)** - to get check submission result for an alpha

**check_self_corr_test(s, alpha_id)** - to check if alpha passes self correlation test (self_corr<0.7)

**check_prod_corr_test(s, alpha_id)** - to check if alpha passes prod correlation test (prod_corr<0.7)

**perfomance_comparison(s, alpha_id)** - to get the result of performance comparison for an alpha merged performance

유용한 추가 함수들입니다. prod correlation 혹은 self correlation 만을 확인하고 싶을 때, 혹은 알파의 pnl 을 확인하고 싶을때 유용히 쓰입니다.