In [49]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from polygon import RESTClient
import datetime as dt

import stock_data_functions
from matplotlib.dates import MonthLocator, DateFormatter
from stock_data_functions import TickerComparison
from typing import List, Optional, Dict, Any, Tuple
import requests 
import time
import json
import seaborn as sns
import math
import importlib
importlib.reload(stock_data_functions)

client = RESTClient('tt2gOLH0fHAmPX70a4QURLFy59PRCZr3')
API_key = 'tt2gOLH0fHAmPX70a4QURLFy59PRCZr3'


import scipy.stats as stats
from fredapi import Fred
fred_api = Fred('e48d0413b1cd0a3b30b58d42225373de')

In [53]:
def missing_per_day(df):
    df = df.copy()
    df['time'] = df.index.strftime('%H:%M')

    return df.groupby('time').count()

def select_time(df):
    t = (df.index.time > dt.time(9,30)) & (df.index.time < dt.time(16,30))

    return df[t]

def test(df):
    df = df.copy()
    df = df.isnull().sum()
    return df

def missing_by_time(df):
    df = df.copy()
    df['time'] = df.index.strftime('%H:%M')
    df = df.groupby('time').apply(test) / df.groupby('time').agg(len)

    return df

### Backtest 1

Idea :
- If we see e.g., 5% + move intraday, does it revert? Does it revert in post market? 
- Categorise based on news
- Categorise based on market cap
- Categorise based on volume etc
- Adjust for beta - calculate EWMA beta signal adjusted

Time frame : last 6 months

In [1169]:
import factor_analysis_functions
import importlib
importlib.reload(factor_analysis_functions)
from factor_analysis_functions import run_full_pipeline, rolling_r2_from_intraday, _drop_weekends_index, _select_ext_hours_index
importlib.reload(factor_analysis_functions)

<module 'factor_analysis_functions' from '/Users/phillip/Desktop/Moon2/factor_analysis_functions.py'>

In [None]:
def setup_clean_experiment(
    *,
    filing_date_gte: str,
    stock: List[str],
    regressor: List[str],                 # e.g., ["SPY"] or ["SPY","I:NDX"]; we use regressor[0] as the factor
    date_updated: bool = False,
    date_updated_regressor: bool = False,
    # regime lookbacks
    short_lookback: int = 20,
    long_lookback: int = 60,
    # residual z-scoring
    sigma_lookback_days: int = 20,        # days for time-of-day residual std
    event_windows: Tuple[int, ...] = (5, 10, 15),   # m-minute windows for agg residuals / z
    # minute fetch knobs (pass-through to your pipeline)
    minute_waiting_time: int = 60,
    minute_chunksize: int = 200,
    minute_fetch_in_chunks: bool = False,
    # optional date bounds
    daily_start_date: Optional[str] = None,
    daily_end_date: Optional[str] = None,
    minute_start_date: Optional[str] = None,
    minute_end_date: Optional[str] = None,
    lam: float = 0.94,
    warmup: int = 30,
) -> Dict[str, Any]:
    """
    Build a leakage-safe dataset for testing:
      - Minute residuals per stock (stock_ret - beta_{D-1} * factor_ret)
      - Time-of-day (TOD) residual std and m-minute residual z-scores (m in event_windows)
      - Daily regime features: R2_short, R2_long, Delta (all **lagged by 1 trading day**)
      - Regime features mapped to minutes by trade_date (lagged)
      - m-minute explained share by factor (abs(beta_{D-1} * factor_move)/abs(stock_move))
      - Microstructure proxies: volume time-of-day z-scores

    Returns:
      {
        'pipe': <raw outputs from run_full_pipeline>,
        'minute_meta': { 'trade_date': Series, 'tod': Series["HH:MM"] },
        'minute_stock_rets': DataFrame[minute close log returns],
        'minute_factor_ret': Series (regressor[0]),
        'beta_daily': DataFrame (EWMA betas, daily),
        'beta_daily_lag1': DataFrame (betas shifted by 1 day),
        'beta_minute_lag1': DataFrame (betas mapped to minutes by trade_date),
        'residual_1m': DataFrame (minute residuals),
        'residual_m': { m: DataFrame (m-min residual sums) },
        'sigma_tod_m': { m: DataFrame (TOD std, lagged, no look-ahead) },
        'z_m': { m: DataFrame (residual m-min z-scores) },
        'explained_share_m': { m: DataFrame (abs(predicted)/abs(actual), clipped [0,1]) },
        'volume_5m': DataFrame (minute volume),
        'volume_z_tod': DataFrame (volume TOD z-scores, lagged),
        'regime_daily': {
            'r2_short_daily': DataFrame,
            'r2_long_daily':  DataFrame,
            'delta_daily':    DataFrame (short - long),
            'r2_short_lag1':  DataFrame,
            'delta_lag1':     DataFrame,
        },
        'regime_minute_lag1': { 'r2_short': DataFrame (mapped by minute), 'delta': DataFrame (mapped) }
      }
    """
    # ----------------------- 0) Run your pipeline once -----------------------
    factor_ticker = regressor[0]
    pipe = run_full_pipeline(
        filing_date_gte=filing_date_gte,
        date_updated=date_updated,
        date_updated_regressor=date_updated_regressor,
        regressor=regressor,
        regressor_ticker=factor_ticker,
        stock=stock,
        lam=lam,
        lookback_days=short_lookback,  # pipeline's own R2; we'll recompute both short & long below
        minute_waiting_time=minute_waiting_time,
        minute_chunksize=minute_chunksize,
        minute_fetch_in_chunks=minute_fetch_in_chunks,
        daily_start_date=daily_start_date,
        daily_end_date=daily_end_date,
        minute_start_date=minute_start_date,
        minute_end_date=minute_end_date,
    )

    # Convenience handles
    stock_min_prices = pipe["stock_object_minute"].tickers_stocks_prices     # MultiIndex: (ticker, field)
    reg_min_prices   = pipe["regressor_object_minute"].tickers_stocks_prices
    stock_rets_min   = pipe["stock_returns_minute"]                          # minute close log returns
    reg_rets_min     = pipe["regressor_returns_minute"]
    stock_rets_day   = pipe["stock_returns_daily"]                           # daily close log returns
    reg_rets_day     = pipe["regressor_returns_daily"]
    beta_daily       = pipe["betas_df"]                                      # EWMA betas (daily) vs factor_ticker

    # Minute factor return (Series)
    factor_min_ret = reg_rets_min[factor_ticker].copy()

    # ----------------------- 1) Minute meta: trade_date & time-of-day -----------------------
    # Ensure tz-aware US/Eastern minute index
    min_index = stock_rets_min.index
    if getattr(min_index, "tz", None) is None:
        # if not tz-aware, assume US/Eastern per your invariants
        min_index = min_index.tz_localize("US/Eastern")
        stock_rets_min.index = min_index
        reg_rets_min.index   = min_index
        factor_min_ret.index = min_index
    td_series = pd.Series(min_index.tz_convert("US/Eastern").date, index=min_index, name="trade_date")
    tod_series = pd.Series(min_index.tz_convert("US/Eastern").strftime("%H:%M"), index=min_index, name="tod")

    return {'stock_min_prices' : stock_min_prices,
            'reg_min_prices'   : reg_min_prices,
            'stock_rets_min'   : stock_rets_min,
            'reg_rets_min'     : reg_rets_min,
            'stock_rets_day'   : stock_rets_day,
            'reg_rets_day'     : reg_rets_day,
            'beta_daily'       : beta_daily,
            'factor_min_ret'   : factor_min_ret,
            'td_series'        : td_series,
            'tod_series'       : tod_series,
            'min_index'        : min_index,
            'pipe'             : pipe}

In [1337]:
warmup               = 30 # Number of Days
regressor            = ['SPY', 'I:NDX']
regressor_ticker     = 'SPY'
stock                = ['META', 'MU','ALAB', 'TER', 'CRDO', 'AMKR', 'AMD', 'NVDA', 'INTC','PLTR']
lam                  = 0.94
short_lookback       = 5
long_lookback        = 20
sigma_lookback_days  = 5
event_windows        = (1, 3, 12, 24)
minute_waiting_time  = 60
minute_chunksize     = 200
minute_fetch_in_chunks= False
date_updated         = False
regressor_date_updated = False
daily_start_date     = '2023-11-01'
minute_start_date    = '2023-11-01'


data = setup_clean_experiment(
    filing_date_gte="2023-09-01",
    stock=stock,
    regressor=regressor,
    date_updated=date_updated,
    date_updated_regressor=regressor_date_updated,
    short_lookback=short_lookback,
    long_lookback=long_lookback,
    sigma_lookback_days=sigma_lookback_days,
    event_windows=event_windows,
    minute_waiting_time=minute_waiting_time,
    minute_chunksize=minute_chunksize,
    minute_fetch_in_chunks=minute_fetch_in_chunks,
    lam=lam,
    warmup=warmup,
)

[1/10] META: start
last date saved :  2025-10-31 19:55:00-04:00
Minute level data loaded from CSV (earliest): META_5minute_2023-09_minute_level_data.csv
saving because we have the file and we do not want to update date : 2025-10-31 19:55:00-04:00
2025-10-31 19:55:00-04:00 META
[1/10] META: done
[2/10] MU: start
last date saved :  2025-10-31 19:55:00-04:00
Minute level data loaded from CSV (earliest): MU_5minute_2023-11_minute_level_data.csv
saving because we have the file and we do not want to update date : 2025-10-31 19:55:00-04:00
2025-10-31 19:55:00-04:00 MU
[2/10] MU: done
[3/10] ALAB: start
last date saved :  2025-10-31 19:50:00-04:00
Minute level data loaded from CSV (earliest): ALAB_5minute_2024-03_minute_level_data.csv
saving because we have the file and we do not want to update date : 2025-10-31 19:50:00-04:00
2025-10-31 19:50:00-04:00 ALAB
[3/10] ALAB: done
[4/10] TER: start
last date saved :  2025-10-31 19:55:00-04:00
Minute level data loaded from CSV (earliest): TER_5minute


The behavior of 'isin' with dtype=datetime64[ns] and castable values (e.g. strings) is deprecated. In a future version, these will not be considered matching by isin. Explicitly cast to the appropriate dtype before calling isin instead.



stock_cols :  Index(['META', 'MU', 'ALAB', 'TER', 'CRDO', 'AMKR', 'AMD', 'NVDA', 'INTC'], dtype='object')


In [6]:
def run_clean_experiment(
        stock            = stock,
        factor_ticker    = 'SPY',
        stock_min_prices = data['stock_min_prices'],
        reg_min_prices   = data['reg_min_prices'],
        stock_rets_min   = data['stock_rets_min'],
        reg_rets_min     = data['reg_rets_min'],
        stock_rets_day   = data['stock_rets_day'],
        reg_rets_day     = data['reg_rets_day'],
        beta_daily       = data['beta_daily'],
        factor_min_ret   = data['factor_min_ret'],
        td_series        = data['td_series'],
        tod_series       = data['tod_series'],
        min_index        = data['min_index'],
        pipe             = data['pipe']
):
   # ----------------------- 2) Lagged betas (no look-ahead) & minute mapping -----------------------
    beta_daily_lag1 = beta_daily.shift(1)  # strict D-1
    # Map D-1 betas to minutes by trade_date
    beta_minute_lag1 = pd.DataFrame(index=min_index, columns=stock, dtype=float)
    for s in stock:
        m = td_series.map(beta_daily_lag1[s])
        beta_minute_lag1[s] = m.values

    # ----------------------- 3) Minute residuals (stock - beta_{D-1} * factor) -----------------------
    residual_1m = pd.DataFrame(index=min_index, columns=stock, dtype=float)
    for s in stock:
        residual_1m[s] = stock_rets_min[s] - beta_minute_lag1[s] * factor_min_ret

    # ----------------------- 4) m-minute residual sums & explained-share -----------------------
    def _roll_sum(df: pd.DataFrame, m: int) -> pd.DataFrame:
        return df.rolling(m, min_periods=m).sum()

    residual_m: Dict[int, pd.DataFrame] = {m: _roll_sum(residual_1m, m) for m in event_windows}
    # For explained share, need stock m-min move and factor m-min move
    stock_m = {m: _roll_sum(stock_rets_min, m) for m in event_windows}
    factor_m = {m: _roll_sum(factor_min_ret.to_frame("f"), m)["f"] for m in event_windows}

    explained_share_m: Dict[int, pd.DataFrame] = {}
    for m in event_windows:
        es = pd.DataFrame(index=min_index, columns=stock, dtype=float)
        fmove = factor_m[m]
        for s in stock:
            # predicted = beta_{D-1} * factor move (use minute-mapped beta)
            pred = beta_minute_lag1[s] * fmove
            actual = stock_m[m][s]
            num = pred.abs()
            den = actual.abs().replace(0, np.nan)
            es[s] = (num / den).clip(0.0, 1.0)
        explained_share_m[m] = es

    # ----------------------- 5) Time-of-day sigma for residual m-min (lagged, no leakage) ---------
    # Helper to compute lagged TOD std by minute bucket across past N days
    def _tod_sigma_lagged(x: pd.Series, td: pd.Series, tod: pd.Series, window_days: int) -> pd.Series:
        df = pd.DataFrame({"val": x, "td": td, "tod": tod})
        df = df.dropna(subset=["val"])
        # sort by (tod, trade_date) so each minute-of-day series is ordered by day
        df = df.sort_values(["tod", "td"])
        # rolling std over past N days PER time-of-day, using shift(1) to avoid using same-day value
        grp = df.groupby("tod", sort=False)["val"]
        sigma = grp.apply(lambda s: s.shift(1).rolling(window_days, min_periods=max(5, window_days//2)).std())
        # restore original index order
        df["sigma"] = sigma.values
        df = df.sort_index()
        out = pd.Series(index=x.index, dtype=float)
        out.loc[df.index] = df["sigma"].values
        return out

    sigma_tod_m: Dict[int, pd.DataFrame] = {}
    z_m: Dict[int, pd.DataFrame] = {}
    for m in event_windows:
        # Compute sigma per stock independently
        sig_df = pd.DataFrame(index=min_index, columns=stock, dtype=float)
        z_df   = pd.DataFrame(index=min_index, columns=stock, dtype=float)
        for s in stock:
            rs = residual_m[m][s]
            sig = _tod_sigma_lagged(rs, td_series, tod_series, sigma_lookback_days)
            sig_df[s] = sig
            z_df[s] = rs / sig
        sigma_tod_m[m] = sig_df
        z_m[m] = z_df

    # ----------------------- 6) Daily regime features: R2 short/long (lagged) -----------------------
    # Recompute R² (short & long) from your minute-labelled table to keep consistency
    labelled_min = pipe["labelled_combined_returns_min"]  # has 'trade_date' already
    # short
    r2_short_daily = rolling_r2_from_intraday(
        labelled_min, beta_daily, lookback_days=short_lookback, factor_col=factor_ticker
    )
    # long
    r2_long_daily = rolling_r2_from_intraday(
        labelled_min, beta_daily, lookback_days=long_lookback, factor_col=factor_ticker
    )
    # Align index types & sort
    r2_short_daily = r2_short_daily.sort_index()
    r2_long_daily  = r2_long_daily.sort_index()
    delta_daily    = (r2_short_daily - r2_long_daily).reindex_like(r2_short_daily)

    r2_short_lag1 = r2_short_daily.shift(1)
    delta_lag1    = delta_daily.shift(1)

    # Map lagged regime features to each minute by trade_date (prior day values)
    regime_short_minute = pd.DataFrame(index=min_index, columns=stock, dtype=float)
    regime_delta_minute = pd.DataFrame(index=min_index, columns=stock, dtype=float)
    td_no_tz = pd.Series(pd.to_datetime(td_series).values.astype("datetime64[D]"), index=td_series.index)
    for s in stock:
        # map by date only
        r2_map   = r2_short_lag1[s].copy()
        r2_map.index = pd.to_datetime(r2_map.index).date
        dlt_map  = delta_lag1[s].copy()
        dlt_map.index = pd.to_datetime(dlt_map.index).date
        regime_short_minute[s] = td_series.map(r2_map).values
        regime_delta_minute[s] = td_series.map(dlt_map).values

    # ----------------------- 7) Volume TOD z-scores (microstructure proxy) -----------------------
    # Pull 5-min volume from minute price tables
    vol_5m = pd.DataFrame(index=min_index, columns=stock, dtype=float)
    for s in stock:
        vol_5m[s] = stock_min_prices[(s, "volume")].reindex(min_index)

    def _tod_z_lagged(x: pd.Series, td: pd.Series, tod: pd.Series, window_days: int) -> pd.Series:
        # like sigma, but z-score
        df = pd.DataFrame({"val": x, "td": td, "tod": tod}).dropna(subset=["val"])
        df = df.sort_values(["tod", "td"])
        grp = df.groupby("tod", sort=False)["val"]
        mean_ = grp.apply(lambda s: s.shift(1).rolling(window_days, min_periods=max(5, window_days//2)).mean())
        std_  = grp.apply(lambda s: s.shift(1).rolling(window_days, min_periods=max(5, window_days//2)).std())
        df["z"] = (df["val"] - mean_.values) / std_.values
        df = df.sort_index()
        out = pd.Series(index=x.index, dtype=float)
        out.loc[df.index] = df["z"].values
        return out

    volume_z_tod = pd.DataFrame(index=min_index, columns=stock, dtype=float)
    for s in stock:
        volume_z_tod[s] = _tod_z_lagged(vol_5m[s], td_series, tod_series, sigma_lookback_days)

    # ----------------------- 8) Package & return -----------------------
    return {
        "pipe": pipe,
        "minute_meta": {"trade_date": td_series, "tod": tod_series},
        "minute_stock_rets": stock_rets_min,
        "minute_factor_ret": factor_min_ret,
        "beta_daily": beta_daily,
        "beta_daily_lag1": beta_daily_lag1,
        "beta_minute_lag1": beta_minute_lag1,
        "residual_1m": residual_1m,
        "residual_m": residual_m,
        "sigma_tod_m": sigma_tod_m,
        "z_m": z_m,
        "explained_share_m": explained_share_m,
        "volume_5m": vol_5m,
        "volume_z_tod": volume_z_tod,
        "regime_daily": {
            "r2_short_daily": r2_short_daily,
            "r2_long_daily":  r2_long_daily,
            "delta_daily":    delta_daily,
            "r2_short_lag1":  r2_short_lag1,
            "delta_lag1":     delta_lag1,
        },
        "regime_minute_lag1": {
            "r2_short": regime_short_minute,
            "delta":    regime_delta_minute,
        },
    }

In [1338]:
beta_daily      = data['beta_daily'].copy()
td_series       = data['td_series'].copy()
min_index       = data['min_index'].copy()
stock_rets_min  = data['stock_rets_min'].copy()
factor_min_ret  = data['factor_min_ret'].copy()
tod_series      = data['tod_series'].copy()

# Filter out weekends and times outside when market is open
min_index            = _drop_weekends_index(min_index)
min_index            = _select_ext_hours_index(min_index)
factor_min_ret_index = _drop_weekends_index(factor_min_ret.index)
factor_min_ret_index = _select_ext_hours_index(factor_min_ret_index)

# Filter out when markets are closed
tod_series     = tod_series[min_index].copy()
td_series      = td_series[min_index].copy()
factor_min_ret = factor_min_ret[factor_min_ret_index].copy()

Create beta and idioscryntaic matrix


In [1339]:
beta_daily_lag1 = beta_daily.shift(1)  # strict D-1
# Map D-1 betas to minutes by trade_date
beta_minute_lag1 = pd.DataFrame(index=min_index, columns=stock, dtype=float)
for s in stock:
    m = td_series.map(beta_daily_lag1[s])
    beta_minute_lag1[s] = m.values

# ----------------------- 3) Minute residuals (stock - beta_{D-1} * factor) -----------------------
residual_1m = pd.DataFrame(index=min_index, columns=stock, dtype=float)
for s in stock:
    if s not in stock_rets_min.columns or stock_rets_min[s].isnull().all():
        residual_1m.drop(columns=[s], inplace=True)
        stock.remove(s)
        print(f'Stock {s} : Data not loaded properly - removing from backtest')
    else:
        residual_1m[s] = stock_rets_min[s] - beta_minute_lag1[s] * factor_min_ret


Stock PLTR : Data not loaded properly - removing from backtest


We have 3 data frames:
- residuals (of beta explained returns and the stock)
- stock minute returns 
- regressor minute returns

The rolling sum will sum over past m observations (including the current m period)
- if there are missing values in the prior lookback window -> we drop it 
- we are doing this for different lookback windows (m) 

In [1340]:

# ----------------------- 4) m-minute residual sums & explained-share -----------------------
def _roll_sum(df: pd.DataFrame, m: int) -> pd.DataFrame:

    return df.rolling(m, min_periods=m).sum()
    
residual_m  : Dict[int, pd.DataFrame] = {m: _roll_sum(residual_1m, m) for m in event_windows}

# For explained share, need stock m-min move and factor m-min move
stock_m     : Dict[int, pd.DataFrame] = {m: _roll_sum(stock_rets_min, m) for m in event_windows}
factor_m    : Dict[int, pd.DataFrame] = {m: _roll_sum(factor_min_ret.to_frame("f"), m)["f"] for m in event_windows}


### Idiosyncratic Vol & Diagnostics - Event Detection 

In [1341]:
import factor_analysis_functions
import importlib
importlib.reload(factor_analysis_functions)
from factor_analysis_functions import sanity_check_sigma_and_z, _tod_sigma_lagged, build_sigma_and_z_from_tod, z_exceedance_diagnostic, plot_hourly_net_resid_for_exceedance_hours

Idio syncratic z score prior days diurnal approach:
- Idea is to get a score that sees how much of the stock's returns are explained by the regressor
- Normalize by the rolling standard deviation of idiosyncratic returns at that time of the day (e.g., 9:30-9:35 past m days)
- in the _tod_sigma_lagged function, we require either half the window length or 5 observations to form an actual sigma observation
- The tod_sigma_lagged function gets you the volatility to normalize - but uses the volatility of the hour not of 5 minutes. 

In [1342]:
sigma_tod_m, z_m = build_sigma_and_z_from_tod(
    residual_m=residual_m,
    td_series=td_series,
    tod_series=tod_series,
    stocks=stock,              
    window_days=sigma_lookback_days,
)


This checks the z score anomalies e.g., Z > 10. This groups by
- Session (pre post regular) and ticker and date
- Plots where this happens
- Provides table of the top 10 occurance of these deviations
- This is idiosyncratic movements

In [1373]:
out = z_exceedance_diagnostic(
    sigma_tod_m=sigma_tod_m,
    z_m=z_m,
    residual_m=residual_m,
    m=24,                   
    Z=3.0,
    top_k=10,
    return_plot=True,
    integrity_atol=1e-2,
    integrity_rtol=1e-2
)

summary = out["summary_wide"]        # MultiIndex columns: ticker → ['date','session','z_avg','z_count']
reversion = out["reversion_wide"]    # ticker → ['date','session','spread','count_above_spread','z_count']
events = out["events_long"]          # per-bar exceedances with z, sigma, resid

In [1374]:
import plotly.io as pio
pio.renderers.default = "browser"
fig = out['fig']
fig.show()

python(44384) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.


In [1345]:
res = plot_hourly_net_resid_for_exceedance_hours(
    sigma_tod_m=sigma_tod_m,
    z_m=z_m,
    residual_m=residual_m,
    m=1,
    Z=3.0,
    tickers=['ALAB', 'CRDO'],   # or None to auto-pick top-2
    session=None)
fig = res["fig"]
pio.renderers.default = "browser"
fig.show()



'H' is deprecated and will be removed in a future version, please use 'h' instead.

python(44133) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.


In [1378]:
residual_m[24][['ALAB', 'CRDO']].corr()

Unnamed: 0,ALAB,CRDO
ALAB,1.0,0.353101
CRDO,0.353101,1.0


In [1332]:
ALAB = data['stock_min_prices']['ALAB'].copy()
(ALAB.loc['2025-10-31 12:00:00':'2025-10-31 13:00:00'].drop(columns=['session']).apply(calc_log_rets) * 100)['close'].sum()

np.float64(-1.7228011571023405)

In [1327]:
((data['reg_min_prices']['SPY'].loc['2025-10-31 12:00:00':'2025-10-31 13:00:00'].drop(columns=['session']).apply(calc_log_rets) * 100 ).round(3)['close']).sum()

np.float64(-0.27699999999999997)

In [1328]:
-1.7228011571023405 - (data['beta_daily']['ALAB'].loc['2025-10-30'] * -0.27699999999999997)

np.float64(-1.1092648278678003)

#### Sanity Check

In [1121]:
checks = sanity_check_sigma_and_z(sigma_tod_m=sigma_tod_m, z_m=z_m)

checks[15]["sigma_summary"]    # σ NaN%, <=0% and quantiles per ticker


Unnamed: 0_level_0,nan_rate,nonpos_rate,q10,median,q90
ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
META,0.062409,0.0,0.000982,0.00265,0.007243
MU,0.058766,0.0,0.001736,0.004288,0.011146
ALAB,0.247528,0.0,0.003668,0.008921,0.021341
TER,0.059347,0.0,0.001588,0.004673,0.012167
CRDO,0.059347,0.0,0.002701,0.008118,0.020233
AMKR,0.062409,0.0,0.00186,0.005581,0.014247
AMD,0.058746,0.0,0.001496,0.003993,0.011172
NVDA,0.058746,0.0,0.001534,0.003942,0.010298
INTC,0.058746,0.0,0.001516,0.004128,0.012888


In [1114]:
checks[15]['alignment']

{'aligned_index': False, 'aligned_columns': False}

In [1122]:
checks[15]["z_summary"].T.head(60)        # mean/std/tails/ACF per ticker

ticker,META,MU,ALAB,TER,CRDO,AMKR,AMD,NVDA,INTC
mean,-0.009008,0.030536,0.039336,0.038779,0.031546,0.018039,0.0011,0.013116,-0.015374
std,2.020429,1.903111,1.609644,2.351334,2.252823,2.36883,1.938176,1.630511,2.866876
skew,-4.561792,0.247791,1.817089,13.289953,-2.168036,1.809939,0.70153,0.265886,22.472929
kurtosis,291.161605,200.678605,51.431564,834.663539,344.323852,164.879175,283.641393,55.815355,1588.348636
std_cv_by_hour,0.455971,0.325345,0.163068,0.471235,0.406828,0.400597,0.316975,0.22529,0.572705
p_emp(|z|>1.0),0.32275,0.314976,0.312092,0.303116,0.310817,0.29774,0.319936,0.318848,0.308649
p_emp(|z|>2.0),0.11309,0.116015,0.113357,0.1195,0.117976,0.122608,0.112276,0.108726,0.115903
p_emp(|z|>3.0),0.04807,0.051369,0.050071,0.059343,0.055595,0.063881,0.049731,0.046851,0.055621
p_norm(|z|>1.0),0.317311,0.317311,0.317311,0.317311,0.317311,0.317311,0.317311,0.317311,0.317311
p_norm(|z|>2.0),0.0455,0.0455,0.0455,0.0455,0.0455,0.0455,0.0455,0.0455,0.0455


In [1235]:
checks[15]["z_by_hour"]["CRDO"] # hour-of-day mean/std of z for META

Unnamed: 0_level_0,mean,std
hod,Unnamed: 1_level_1,Unnamed: 2_level_1
4,-0.05113,2.719848
5,0.039924,2.138979
6,0.047278,2.241249
7,-0.065411,2.414209
8,0.03885,1.58659
9,-0.016071,1.499463
10,0.008087,1.317882
11,0.056182,1.490897
12,0.071775,1.360174
13,-0.000569,1.476485


Compute how much of the move in that period is related to the factor