In [5]:
import bql, pandas as pd, numpy as np, datetime as dt
from scipy.stats import mstats
from tqdm import tqdm
import calendar
# ============================================
# BQuant Multi-Factor Framework (US Equities)
# ============================================


In [65]:
# ============== Function to normalize DATES ==================
def normalize_bql_dates(df):
    """
    Clean Bloomberg BQL dataframe:
    - unify DATE / AS_OF_DATE / ID_DATE / REVISION_DATE / PERIOD_END_DATE → 'DATE'
    - drop duplicates and sort chronologically
    """
    df = df.copy()
    if 'DATE' not in df.columns:
        # pick the first date-like column
        for c in ['AS_OF_DATE', 'ID_DATE', 'REVISION_DATE', 'PERIOD_END_DATE']:
            if c in df.columns:
                df = df.rename(columns={c: 'DATE'})
                # df['DATE'] = df[c]
                break
    # remove others
    for c in ['AS_OF_DATE', 'ID_DATE', 'REVISION_DATE', 'PERIOD_END_DATE', 'CURRENCY']:
        if c in df.columns and c != 'DATE':
            df = df.drop(columns=c)
    # df = df.drop_duplicates(subset=['ID', 'DATE']).sort_values(['ID', 'DATE'])
    return df

def third_friday(year: int, month: int) -> pd.Timestamp:
    cal = calendar.monthcalendar(year, month)
    fridays = [week[calendar.FRIDAY] for week in cal if week[calendar.FRIDAY] != 0]
    return pd.Timestamp(year=year, month=month, day=fridays[2])

def effective_next_business_day(ts: pd.Timestamp) -> pd.Timestamp:
    return ts + pd.offsets.BDay(1)

def effective_previous_business_day(ts: pd.Timestamp) -> pd.Timestamp:
    return ts - pd.offsets.BDay(1)

# ========== Parameters ==========
UNIVERSE = "SPX Index"
start_date = '2015-01-01'
end_date   = '2025-10-24'
freq = '1D'
bq = bql.Service()

# ========== Date series ==========
dates = pd.date_range(start=start_date, end=end_date, freq=freq)
# dates = pd.DatetimeIndex(['2024-08-01', '2024-12-31'], dtype='datetime64[ns]')

# ============== Benchmark SPX ====================
spx = bq.execute(bql.Request("SPX Index", bq.data.px_last(dates=dates)))[0].df().dropna()
spx = spx.reset_index()
cols = [c for c in spx.columns if 'PX_LAST' in c]
spx = spx.rename(columns={cols[0]: 'PX_LAST'})
spx_close = spx.pivot(index="DATE",columns="ID",values="PX_LAST")#.iloc[:,0]

# ============== Standardize dates ================
spx_dates = spx_close.index


In [66]:
spx_close

ID,SPX Index
DATE,Unnamed: 1_level_1
2015-01-02,2058.20
2015-01-05,2020.58
2015-01-06,2002.61
2015-01-07,2025.90
2015-01-08,2062.14
...,...
2025-10-20,6735.13
2025-10-21,6735.35
2025-10-22,6699.40
2025-10-23,6738.44


In [61]:
# ========== Dataframe to save data ==========
all_data = pd.DataFrame()

years = np.arange(2015, 2026, step=1)
q_months = [3, 6, 9, 12]
dates = sorted({
    effective_next_business_day(third_friday(y, m))
    for y in years for m in q_months
})
today = pd.Timestamp.today().normalize()

stop_after_this_loop = False  # deal with current quarter
# ========== Main loop ==========
for i in tqdm(range(len(dates)), desc='Fetching SP500 daily members and prices'):
    # Step 1: Find the start end dates of current quarter
    q_start = dates[i]
    q_end = effective_previous_business_day(dates[i+1])
    if q_end > today:
        q_end = effective_previous_business_day(today)
        stop_after_this_loop = True
    q_start_str = q_start.strftime('%Y-%m-%d')
    q_end_str = q_end.strftime('%Y-%m-%d')
    
    # Step 2: Get members at the time
    members = bq.univ.members(UNIVERSE, dates=q_start_str)

    # Step 3: Get data items
    # dts = pd.date_range(start=q_start, end=q_end, freq=freq)
    dts = bq.func.range(start=q_start_str, end=q_end_str)
    params = {'dates':dts, 'fill':'prev'}
    
    fields = {
    'CLOSE':bq.data.px_last(**params), 
    'OPEN':bq.data.px_open(**params),
    'HIGH':bq.data.px_high(**params),
    'LOW':bq.data.px_low(**params),
    'VOL':bq.data.px_volume(**params),
    'BID_ASK_SPR':bq.data.average_bid_ask_spread_pct(**params), #(ask-bid)/mid
    "VWAP": bq.data.vwap(**params),
    "VWAP_TOratio": bq.data.vwap_turnover(**params)/bq.data.cur_mkt_cap(**params),
    "VWAP_BID_VOL": bq.data.vwap_bid_vol_percentage(**params),
    "VWAP_ASK_VOL": bq.data.vwap_ask_vol_percentage(**params),

    # fundamental
    'TargetPx':bq.data.best_target_price(**params),
    'EPS':bq.data.is_eps(**params,fa_period_type='q'),           # earn_yld
    'PB':bq.data.px_to_book_ratio(**params),
    'PE':bq.data.pe_ratio(**params),
    'PS':bq.data.px_to_sales_ratio(**params),
    'ROE':bq.data.return_com_eqy(**params),
    'ROA':bq.data.return_on_asset(**params),
    'ROC':bq.data.return_on_cap(**params),
    'GM':bq.data.gross_margin(**params),
    'OM':bq.data.oper_margin(**params),
    'MV':bq.data.cur_mkt_cap(**params)/1E9,                    # TotShares*close
    'TO':bq.data.turnover(**params),                           # vol*close
    'TOratio':bq.data.px_volume(**params)/bq.data.eqy_sh_out(**params), # vol/TotShares = turnover/MV
    'FFratio':bq.data.eqy_free_float_pct(**params)/100.0,
    'AssetTO':bq.data.asset_turnover(**params),
    'FCFyield':bq.data.free_cash_flow_yield(**params),
    'DE':bq.data.tot_debt_to_com_eqy(**params),
    'DIVyield':bq.data.dividend_yield(**params),
    'D_EBITDA':bq.data.net_debt_to_ebitda(**params),
    'Rating':bq.data.best_analyst_rating(**params),

    }

    tech = pd.DataFrame()
    for i,(name,f) in enumerate(fields.items()): 
        res = bq.execute(bql.Request(members, fields[name]))
        res_date_normed = normalize_bql_dates(res[0].df())
        res_date_normed = res_date_normed.rename(columns={res_date_normed.columns[-1]: name})
        d = res_date_normed[['DATE',name]]
        # d_wide = d.pivot(index='DATE', columns='ID')
        # d_wide.columns = d_wide.columns.get_level_values(1)
        # d_wide = d_wide.reindex(calendar)#.ffill(limit=60)
        if tech.empty: 
            tech = d
        else:
            tech = pd.merge(tech, d, on=['ID','DATE'])


    # Step 3: Combine all data
    all_data = pd.concat([all_data, tech])

    if stop_after_this_loop: break

all_data = all_data.reset_index().set_index(['DATE', 'ID'])

Fetching SP500 daily members and prices:  95%|█████████▌| 42/44 [41:31<01:58, 59.31s/it]


In [63]:
all_data

Unnamed: 0_level_0,Unnamed: 1_level_0,CLOSE,OPEN,HIGH,LOW,VOL,BID_ASK_SPR,VWAP,VWAP_TOratio,VWAP_BID_VOL,VWAP_ASK_VOL,...,MV,TO,TOratio,FFratio,AssetTO,FCFyield,DE,DIVyield,D_EBITDA,Rating
DATE,ID,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
2015-03-23,1436513D UN Equity,50.59,50.49,50.850,50.48,536517.0,0.028688,50.612060,0.000817,29.896375,70.103625,...,33.252498,27154230.0,0.000816,0.996371,,,,,,3.64000
2015-03-24,1436513D UN Equity,49.85,50.32,50.600,49.84,616241.0,0.026687,50.030720,0.000941,72.463688,27.536313,...,32.766100,30830940.0,0.000938,0.996371,,,,,,3.64000
2015-03-25,1436513D UN Equity,48.82,49.91,50.120,48.82,667641.0,0.026625,49.280590,0.001025,69.628875,30.371125,...,32.089087,32901740.0,0.001016,0.996371,,,,,,3.64000
2015-03-26,1436513D UN Equity,49.19,48.64,49.285,48.42,618763.0,0.024813,49.061500,0.000939,65.237813,34.762188,...,32.332286,30357440.0,0.000941,0.996371,,,,,,3.64000
2015-03-27,1436513D UN Equity,49.08,49.09,49.310,48.93,662526.0,0.026562,49.088030,0.001008,38.273812,61.726188,...,32.259984,32522090.0,0.001008,0.996371,,,,,,3.64000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-10-20,ZTS UN Equity,145.06,143.87,145.610,143.55,619030.0,0.090244,145.000764,0.001236,92.873945,7.126055,...,64.288194,89764460.0,0.001397,0.915179,0.655286,3.451645,136.447659,0.013319,1.349458,4.68421
2025-10-21,ZTS UN Equity,146.09,144.83,147.150,144.83,590111.0,0.084092,146.048537,0.001137,8.954053,91.045947,...,64.744673,86209500.0,0.001332,0.915179,0.655286,3.427309,136.447659,0.013225,1.349458,4.68421
2025-10-22,ZTS UN Equity,147.11,146.40,148.270,145.89,539090.0,0.097857,147.155315,0.000998,90.971084,9.028916,...,65.196721,79360770.0,0.001216,0.915179,0.655286,3.403545,136.447659,0.013133,1.349458,4.68421
2025-10-23,ZTS UN Equity,146.39,146.00,147.360,145.39,396074.0,0.093246,146.281761,0.000738,11.019231,88.980769,...,64.877628,57929470.0,0.000894,0.915179,0.655286,3.420285,136.447659,0.013198,1.349458,4.68421


In [64]:
def preprocess(df_f):
    # unify all column data types
    for col in df_f.columns:
    if df_f[col].dtype == 'object':
        try: # convert to float
            df_f[col] = pd.to_numeric(df_f[col], errors='coerce')
        except Exception:
            # if not, to string
            df_f[col] = df_f[col].astype(str)
    # clean dates
    df_f = df_f[df_f.index.get_level_values('DATE').notna()]
    df_f = df_f.sort_index(level=['ID', 'DATE'])
    df_f = df_f.dropna(subset=['CLOSE'])
    return df_f

df_f = preprocess(all_data)
dates_f = df_f.index.get_level_values('DATE').unique().sort_values()
df_b_aligned = spx_close.reindex(dates_f)
bench_ret = df_b_aligned['SPX Index'].pct_change()


MOM_WIN = 252
RSI_WIN = 14
MA_LONG = 200
TD_SETUP = 9
REV_WIN = 21
SKIP = 21

# === Target yield ===
df_f['Target_yield'] = df_f['TargetPx']/df_f['CLOSE'] - 1


df_f['DailyRet'] = df_f.groupby('ID')['CLOSE'].pct_change()

# === Momentum (12_1) ===
def momentum_func(x):
    return np.prod(1 + x[-(MOM_WIN+SKIP):-SKIP]) - 1 if len(x) >= (MOM_WIN+SKIP) else np.nan

df_f['Momentum_12_1'] = (
    df_f.groupby('ID')['DailyRet']
        .rolling(MOM_WIN+SKIP)
        .apply(momentum_func, raw=True)
        .reset_index(level=0, drop=True)
)

# === Reversal (1 month) ===
def reversal_func(x):
    return -(np.prod(1 + x) - 1)
df_f['Reversal_1M'] = (
    df_f.groupby('ID')['DailyRet']
        .rolling(SKIP)
        .apply(reversal_func, raw=True)
        .reset_index(level=0, drop=True)
)

# === Moving Average Cross ===
ma_long = (
    df_f.groupby('ID')['CLOSE']
        .rolling(MA_LONG)
        .mean()
        .reset_index(level=0, drop=True)
)
df_f['MACross'] = (df_f['CLOSE'] / ma_long) - 1

# === RSI ===
def rsi(series, window=14):
    delta = series.diff()
    up = delta.clip(lower=0)
    down = -delta.clip(upper=0)
    roll_up = up.ewm(alpha=1/window, adjust=False).mean()
    roll_down = down.ewm(alpha=1/window, adjust=False).mean()
    rs = roll_up / (roll_down + 1e-12)
    return 100 - (100 / (1 + rs))

df_f['RSI14'] = (
    df_f.groupby('ID', group_keys=False)['CLOSE']
        .apply(rsi, window=14)
#         .transform(lambda x: rsi(x, window=14))
#         .reset_index(level=0, drop=True)
)

# === MACD ===
def macd_hist(series, f=12, s=26, sig=9):
    ema_f = series.ewm(span=f, adjust=False).mean()
    ema_s = series.ewm(span=s, adjust=False).mean()
    macd = ema_f - ema_s
    signal = macd.ewm(span=sig, adjust=False).mean()
    return macd, signal

macd_all = []
for _, g in df_f.groupby('ID'):
    macd_line, signal_line = macd_hist(g['CLOSE'])
    g['MACD'] = macd_line - signal_line
    g['MACD_diff'] = g['MACD'].diff()
    macd_all.append(g)
df_f = pd.concat(macd_all)

# === Volatility ===
df_f['VOL20'] = (
    df_f.groupby('ID')['DailyRet']
        .rolling(20)
        .std()
        .reset_index(level=0, drop=True)
)
df_f['VOL60'] = (
    df_f.groupby('ID')['DailyRet']
        .rolling(60)
        .std()
        .reset_index(level=0, drop=True)
)
    

# ==== Market Value =====
df_f['MV_normed'] = np.log10(df_f['MV'])

# === Candlestick data ===
upper_shadow = df_f['HIGH'] - np.maximum(df_f['OPEN'], df_f['CLOSE'])
lower_shadow = np.minimum(df_f['OPEN'], df_f['CLOSE']) - df_f['LOW']
df_f['Upp_Shad'] = upper_shadow / (df_f['HIGH'] - df_f['LOW'])
df_f['Low_Shad'] = lower_shadow / (df_f['HIGH'] - df_f['LOW'])
df_f['Body'] = abs(df_f['CLOSE'] - df_f['OPEN']) / (df_f['HIGH'] - df_f['LOW'])

# Buy/sell pressure 
df_f['BS_Pressure'] = (df_f['CLOSE'] - df_f['VWAP']) / df_f['VWAP'] # Positive means buying pressure; negative means selling pressure

# Volume spread
df_f['Volume_Spread'] = ((df_f['VOL'] - df_f['VOL'].shift(1)) / df_f['VOL'].shift(1)) * np.sign(df_f['CLOSE'] - df_f['CLOSE'].shift(1))

# Liquidity Strength
df_f['Liquid_Strength'] = df_f['BS_Pressure'] / (1 + df_f['BID_ASK_SPR'])

# === Rolling beta ===
def rolling_beta(asset_ret, bench_ret, win=252):
    cov = asset_ret.rolling(win).cov(bench_ret)
    var = bench_ret.rolling(win).var()
    return cov / (var+ 1e-12)

df_f['Beta252'] = (
    df_f.groupby('ID', group_keys=False)['DailyRet']
        .apply(lambda x: rolling_beta(x, bench_ret))
)




In [67]:
# Save cleaned data
df_f.to_parquet("SP500_members_2015-2025_data_cleaned.parquet")
df_b_aligned.to_parquet("SPX_2015-2025_data_cleaned.parquet")