In [26]:
import polars as pl
from datetime import date
from silverfund.datasets import *

Do all of the components of the factor model have the same barrids for each date?

In [27]:
date_ = date(2023, 12, 29)
year = date_.year

In [28]:
def clean(df: pl.DataFrame, index_cols: list[str], value_col: str) -> pl.DataFrame:
    df = df.rename({col: col.split(" ")[0] for col in df.columns if col not in index_cols})

    df = df.unpivot(index=index_cols, variable_name="date", value_name=value_col)

    df = df.with_columns(pl.col("date").cast(pl.Date))

    return df

In [29]:
bfe = BarraFactorExposures().load(year)

bfe = clean(bfe, ["barrid", "factor"], "exposure")

bfe

barrid,factor,date,exposure
str,str,date,f64
"""ARGA1B1""","""BETA""",2023-01-03,0.818
"""ARGA1B1""","""COUNTRY""",2023-01-03,1.0
"""ARGA1B1""","""DIVYILD""",2023-01-03,-0.909
"""ARGA1B1""","""EARNQLTY""",2023-01-03,-1.011
"""ARGA1B1""","""EARNYILD""",2023-01-03,-0.601
…,…,…,…
"""USBPQG1""","""PROFIT""",2023-12-29,-2.455
"""USBPQG1""","""PROSPECT""",2023-12-29,0.234
"""USBPQG1""","""RESVOL""",2023-12-29,-0.721
"""USBPQG1""","""SIZE""",2023-12-29,-4.873


In [30]:
bfc = BarraFactorCovariances().load(year)

bfc = clean(bfc, ["factor_1", "factor_2"], "covariance")

bfc = bfc.rename({"factor_1": "factor"})

bfc

factor,factor_2,date,covariance
str,str,date,f64
"""AERODEF""","""AERODEF""",2023-01-03,207.383191
"""AERODEF""","""AIRLINES""",2023-01-03,96.635459
"""AERODEF""","""APPAREL""",2023-01-03,26.042705
"""AERODEF""","""AUTO""",2023-01-03,-10.064202
"""AERODEF""","""BANKS""",2023-01-03,-4.548321
…,…,…,…
"""WIRELESS""","""COUNTRY""",2023-12-29,-49.777706
"""WIRELESS""","""ELECUTIL""",2023-12-29,8.583867
"""WIRELESS""","""GASUTIL""",2023-12-29,23.611227
"""WIRELESS""","""MULTUTIL""",2023-12-29,20.326507


In [31]:
bsrf = BarraSpecificRiskForecast().load(year)

bsrf = clean(bsrf, ["barrid"], "spec_risk")

bsrf

barrid,date,spec_risk
str,date,f64
"""ARGA1B1""",2023-01-03,25.820439
"""ARGA1G1""",2023-01-03,
"""ARGA1K1""",2023-01-03,
"""ARGA621""",2023-01-03,66.565339
"""ARGA631""",2023-01-03,36.820819
…,…,…
"""USBPPH1""",2023-12-29,18.142673
"""USBPPS1""",2023-12-29,28.983597
"""USBPQA1""",2023-12-29,72.806297
"""USBPQD1""",2023-12-29,15.068052


Factor Exposures vs. Factor Covariances

In [32]:
bfe_factors = bfe.select(["date", "factor"]).unique().sort(["date", "factor"])
bfc_factors = bfc.select(["date", "factor"]).unique().sort(["date", "factor"])

assert bfe_factors.equals(bfc_factors)

Factor Exposures vs. Specific Risk

In [33]:
bfe_barrids = bfe.select(["date", "barrid"]).unique().sort(["date", "barrid"])
bsrf_barrids = bsrf.select(["date", "barrid"]).unique().sort(["date", "barrid"])

display(bfe_barrids, bsrf_barrids)

date,barrid
date,str
2023-01-03,"""ARGA1B1"""
2023-01-03,"""ARGA1G1"""
2023-01-03,"""ARGA1K1"""
2023-01-03,"""ARGA621"""
2023-01-03,"""ARGA631"""
…,…
2023-12-29,"""USBPPH1"""
2023-12-29,"""USBPPS1"""
2023-12-29,"""USBPQA1"""
2023-12-29,"""USBPQD1"""


date,barrid
date,str
2023-01-03,"""ARGA1B1"""
2023-01-03,"""ARGA1G1"""
2023-01-03,"""ARGA1K1"""
2023-01-03,"""ARGA621"""
2023-01-03,"""ARGA631"""
…,…
2023-12-29,"""USBPPH1"""
2023-12-29,"""USBPPS1"""
2023-12-29,"""USBPQA1"""
2023-12-29,"""USBPQD1"""


Here we see that the factor exposures and the specific risk forecasts do not have the exact same combinations of date and barrid

Do the russell constituents barrids match the factor exposures and specific risk?

In [34]:
russell = RussellConstituents().load_all()

russell_barrids = russell.select(["date", "barrid"]).unique()

russell_barrids = russell_barrids.filter(pl.col("date").dt.year() == year)

russell_barrids = russell_barrids.drop_nulls(subset=["barrid"])

russell_barrids = russell_barrids.sort(["date", "barrid"])

russell_barrids

date,barrid
date,str
2023-01-31,"""USA06Z1"""
2023-01-31,"""USA0C11"""
2023-01-31,"""USA0H41"""
2023-01-31,"""USA0SY1"""
2023-01-31,"""USA11I1"""
…,…
2023-12-29,"""USBPGY1"""
2023-12-29,"""USBPID1"""
2023-12-29,"""USBPJV1"""
2023-12-29,"""USBPKS1"""


In [35]:
def aggregate(df: pl.DataFrame) -> pl.DataFrame:
    # Add month columns
    df = df.with_columns(pl.col("date").dt.truncate("1mo").alias("month"))

    # Aggregate to month level and keep the month end date
    df = df.group_by(["month", "barrid"]).agg(pl.col("date").max())

    # Drop month column and sort
    df = df.drop("month").sort(["date", "barrid"])

    # Add check column
    df = df.with_columns(pl.lit(True).alias("check"))

    return df

In [36]:
# Aggregate
bfe_barrids_monthly = aggregate(bfe_barrids)

bfe_barrids_monthly

barrid,date,check
str,date,bool
"""ARGA1B1""",2023-01-31,true
"""ARGA1G1""",2023-01-31,true
"""ARGA1K1""",2023-01-31,true
"""ARGA621""",2023-01-31,true
"""ARGA631""",2023-01-31,true
…,…,…
"""USBPPH1""",2023-12-29,true
"""USBPPS1""",2023-12-29,true
"""USBPQA1""",2023-12-29,true
"""USBPQD1""",2023-12-29,true


In [37]:
subset = russell_barrids.join(bfe_barrids_monthly, on=["date", "barrid"], how="left")

subset = subset.fill_null(False)

display(subset.filter(~pl.col("check")))

assert russell_barrids.equals(subset.drop("check"))

date,barrid,check
date,str,bool


It appears that all barra factor exposures are in the russell subset.

In [38]:
bsrf_barrids_monthly = aggregate(bsrf_barrids)

bsrf_barrids_monthly

barrid,date,check
str,date,bool
"""ARGA1B1""",2023-01-31,true
"""ARGA1G1""",2023-01-31,true
"""ARGA1K1""",2023-01-31,true
"""ARGA621""",2023-01-31,true
"""ARGA631""",2023-01-31,true
…,…,…
"""USBPPH1""",2023-12-29,true
"""USBPPS1""",2023-12-29,true
"""USBPQA1""",2023-12-29,true
"""USBPQD1""",2023-12-29,true


In [39]:
subset = russell_barrids.join(bsrf_barrids_monthly, on=["date", "barrid"], how="left")

subset = subset.fill_null(False)

display(subset.filter(~pl.col("check")))

assert russell_barrids.equals(subset.drop("check"))

date,barrid,check
date,str,bool


It appears that the specific risk barrids are also in the russell subset.

How well do the returns and risk forecasts match?

In [40]:
russell = RussellConstituents().load_all()

russell

date,obsdate,enddate,cusip,permno,barrid,barra_ticker,r3000_wt,issue_name,ticker
date,date,date,str,f64,str,str,f64,str,str
1978-12-29,1979-01-10,1979-04-10,"""00036110""",54594.0,,,0.0019,,
1978-12-29,1979-01-10,1979-04-10,"""00080010""",10006.0,,,0.031,,
1978-12-29,1979-01-10,1979-04-10,"""00103010""",10154.0,,,0.0012,,
1978-12-29,1979-01-10,1979-04-10,"""00103210""",10162.0,,,0.0026,,
1978-12-29,1979-01-10,1979-04-10,"""00144410""",10306.0,,,0.0013,,
…,…,…,…,…,…,…,…,…,…
2024-12-31,2025-01-10,1975-12-31,"""Y2685T13""",15597.0,"""USBC8F1""",,0.00001,"""GENCO SHIPPING & TRADING LIMIT…","""USGNK"""
2024-12-31,2025-01-10,1975-12-31,"""Y4105310""",16454.0,"""USBDIX1""",,0.000026,"""INTERNATIONAL SEAWAYS INC""","""USINSW"""
2024-12-31,2025-01-10,1975-12-31,"""Y7388L10""",92679.0,"""USACPW1""",,0.000004,"""SAFE BULKERS INC""","""USSB"""
2024-12-31,2025-01-10,1975-12-31,"""Y7542C13""",93299.0,"""USALLL1""",,0.000039,"""SCORPIO TANKERS INC""","""USSTNG"""


In [41]:
br = BarraReturns().load(year)

br

date,barrid,price,mktcap,pricesource,currency,ret
date,str,f64,f64,str,str,f64
2023-01-03,"""ARGA1B1""",3036.0,4.1254e12,"""MSCIBARRA""","""ARS""",-0.020645
2023-01-04,"""ARGA1B1""",3150.0,4.2803e12,"""MSCIBARRA""","""ARS""",0.037549
2023-01-05,"""ARGA1B1""",3165.0,4.3007e12,"""MSCIBARRA""","""ARS""",0.004762
2023-01-06,"""ARGA1B1""",3185.0,4.3279e12,"""MSCIBARRA""","""ARS""",0.006319
2023-01-09,"""ARGA1B1""",3115.0,4.2328e12,"""MSCIBARRA""","""ARS""",-0.021978
…,…,…,…,…,…,…
2023-12-28,"""USBPQA1""",2.57,3.8089e7,"""MSCIBARRA""","""USD""",-0.011538
2023-12-29,"""USBPQA1""",2.85,4.2239e7,"""MSCIBARRA""","""USD""",0.108949
2023-12-29,"""USBPQD1""",13.45,5.7439e8,"""MSCIBARRA""","""USD""",-0.023239
2023-12-28,"""USBPQG1""",6.85,6.7512e7,"""MSCIBARRA""","""USD""",


In [42]:
brf = BarraRiskForecasts().load(year)

brf

date,barrid,div_yield,total_risk,spec_risk,histbeta,predbeta
date,str,f64,f64,f64,f64,f64
2023-01-03,"""ARGA1B1""",0.0,0.396221,0.258204,1.245569,1.314258
2023-01-03,"""ARGA621""",0.0,0.772561,0.665653,1.03035,1.545753
2023-01-03,"""ARGA631""",0.0,0.49185,0.368208,0.780698,1.206389
2023-01-03,"""ARGA641""",0.0,0.507191,0.311712,0.760347,1.218184
2023-01-03,"""ARGA651""",0.0182357,0.475045,0.364988,0.740156,1.031541
…,…,…,…,…,…,…
2023-12-29,"""USBPPH1""",,0.308507,0.181427,1.078032,0.95078
2023-12-29,"""USBPPS1""",,0.407579,0.289836,0.805638,0.983537
2023-12-29,"""USBPQA1""",,0.821866,0.728063,1.241031,1.592499
2023-12-29,"""USBPQD1""",,0.365097,0.150681,1.266551,1.112014


How well do the returns and risk data match the russell barrids?

In [43]:
br_barrids = br.select(["date", "barrid"]).unique()

br_barrids_monthly = aggregate(br_barrids)

br_barrids_monthly

barrid,date,check
str,date,bool
"""GER1661""",2023-01-03,true
"""USAAWS1""",2023-01-03,true
"""USAFNM1""",2023-01-03,true
"""USASEZ1""",2023-01-03,true
"""USBEVX1""",2023-01-03,true
…,…,…
"""USBPPH1""",2023-12-29,true
"""USBPPS1""",2023-12-29,true
"""USBPQA1""",2023-12-29,true
"""USBPQD1""",2023-12-29,true


In [44]:
subset = russell_barrids.join(br_barrids_monthly, on=["date", "barrid"], how="left")

display(subset.filter(~pl.col("check")))

assert russell_barrids.equals(subset.drop("check"))

date,barrid,check
date,str,bool


Looks good here!

In [45]:
brf_barrids = brf.select(["date", "barrid"]).unique()

brf_barrids_monthly = aggregate(brf_barrids)

brf_barrids_monthly

barrid,date,check
str,date,bool
"""GER1661""",2023-01-03,true
"""USAAWS1""",2023-01-03,true
"""USAFNM1""",2023-01-03,true
"""USASEZ1""",2023-01-03,true
"""USBEVX1""",2023-01-03,true
…,…,…
"""USBPPH1""",2023-12-29,true
"""USBPPS1""",2023-12-29,true
"""USBPQA1""",2023-12-29,true
"""USBPQD1""",2023-12-29,true


In [46]:
subset = russell_barrids.join(brf_barrids_monthly, on=["date", "barrid"], how="left")

display(subset.filter(~pl.col("check")))

assert russell_barrids.equals(subset.drop("check"))

date,barrid,check
date,str,bool
