In [1]:
import polars as pl
from datetime import date
from silverfund.datasets import *

Do all of the components of the factor model have the same barrids for each date?

In [2]:
date_ = date(2024, 12, 31)
year = date_.year

In [3]:
def clean(df: pl.DataFrame, index_cols: list[str], value_col: str) -> pl.DataFrame:
    df = df.rename({col: col.split(" ")[0] for col in df.columns if col not in index_cols})

    df = df.unpivot(index=index_cols, variable_name="date", value_name=value_col)

    df = df.with_columns(pl.col("date").cast(pl.Date))

    return df

In [4]:
bfe = BarraFactorExposures().load(year)

bfe = clean(bfe, ["barrid", "factor"], "exposure")

bfe

barrid,factor,date,exposure
str,str,date,f64
"""ARGA1B1""","""BETA""",2024-01-02,0.893
"""ARGA1B1""","""COUNTRY""",2024-01-02,1.0
"""ARGA1B1""","""DIVYILD""",2024-01-02,-0.775
"""ARGA1B1""","""EARNQLTY""",2024-01-02,-0.717
"""ARGA1B1""","""EARNYILD""",2024-01-02,-0.919
…,…,…,…
"""USBQWH1""","""PROSPECT""",2024-12-31,-0.068
"""USBQWH1""","""RESVOL""",2024-12-31,0.992
"""USBQWH1""","""SIZE""",2024-12-31,-4.272
"""USBQWH1""","""SPTYSTOR""",2024-12-31,1.0


In [5]:
bfc = BarraFactorCovariances().load(year)

bfc = clean(bfc, ["factor_1", "factor_2"], "covariance")

bfc = bfc.rename({"factor_1": "factor"})

bfc

factor,factor_2,date,covariance
str,str,date,f64
"""AERODEF""","""AERODEF""",2024-01-02,160.058795
"""AERODEF""","""AIRLINES""",2024-01-02,64.17249
"""AERODEF""","""APPAREL""",2024-01-02,24.872283
"""AERODEF""","""AUTO""",2024-01-02,-9.247184
"""AERODEF""","""BANKS""",2024-01-02,2.1289535
…,…,…,…
"""WIRELESS""","""COUNTRY""",2024-12-31,-34.938125
"""WIRELESS""","""ELECUTIL""",2024-12-31,10.081188
"""WIRELESS""","""GASUTIL""",2024-12-31,21.129703
"""WIRELESS""","""MULTUTIL""",2024-12-31,19.819425


In [6]:
bsrf = BarraSpecificRiskForecast().load(year)

bsrf = clean(bsrf, ["barrid"], "spec_risk")

bsrf

barrid,date,spec_risk
str,date,f64
"""ARGA1B1""",2024-01-02,52.431842
"""ARGA1G1""",2024-01-02,44.411108
"""ARGA1K1""",2024-01-02,40.374249
"""ARGA621""",2024-01-02,88.140518
"""ARGA631""",2024-01-02,44.171036
…,…,…
"""USBQVO1""",2024-12-31,21.379409
"""USBQVQ1""",2024-12-31,17.728612
"""USBQVX1""",2024-12-31,16.605512
"""USBQWF1""",2024-12-31,69.712413


Factor Exposures vs. Factor Covariances

In [7]:
bfe_factors = bfe.select(["date", "factor"]).unique().sort(["date", "factor"])
bfc_factors = bfc.select(["date", "factor"]).unique().sort(["date", "factor"])

assert bfe_factors.equals(bfc_factors)

Factor Exposures vs. Specific Risk

In [8]:
bfe_barrids = bfe.select(["date", "barrid"]).unique().sort(["date", "barrid"])
bsrf_barrids = bsrf.select(["date", "barrid"]).unique().sort(["date", "barrid"])

display(bfe_barrids, bsrf_barrids)

date,barrid
date,str
2024-01-02,"""ARGA1B1"""
2024-01-02,"""ARGA1G1"""
2024-01-02,"""ARGA1K1"""
2024-01-02,"""ARGA621"""
2024-01-02,"""ARGA631"""
…,…
2024-12-31,"""USBQVO1"""
2024-12-31,"""USBQVQ1"""
2024-12-31,"""USBQVX1"""
2024-12-31,"""USBQWF1"""


date,barrid
date,str
2024-01-02,"""ARGA1B1"""
2024-01-02,"""ARGA1G1"""
2024-01-02,"""ARGA1K1"""
2024-01-02,"""ARGA621"""
2024-01-02,"""ARGA631"""
…,…
2024-12-31,"""USBQVO1"""
2024-12-31,"""USBQVQ1"""
2024-12-31,"""USBQVX1"""
2024-12-31,"""USBQWF1"""


Here we see that the factor exposures and the specific risk forecasts do not have the exact same combinations of date and barrid

Do the russell constituents barrids match the factor exposures and specific risk?

In [9]:
russell = RussellConstituents().load_all()

russell_barrids = russell.select(["date", "barrid"]).unique()

russell_barrids = russell_barrids.filter(pl.col("date").dt.year() == year)

russell_barrids = russell_barrids.drop_nulls(subset=["barrid"])

russell_barrids = russell_barrids.sort(["date", "barrid"])

russell_barrids

date,barrid
date,str
2024-01-31,"""USA06Z1"""
2024-01-31,"""USA0771"""
2024-01-31,"""USA0BV1"""
2024-01-31,"""USA0C11"""
2024-01-31,"""USA0SY1"""
…,…
2024-12-31,"""USBQNC1"""
2024-12-31,"""USBQND1"""
2024-12-31,"""USBQO11"""
2024-12-31,"""USBQOE1"""


In [26]:
def aggregate(df: pl.DataFrame) -> pl.DataFrame:
    # Add month columns
    df = df.with_columns(pl.col("date").dt.truncate("1mo").alias("month"))

    # Aggregate to month level and keep the month end date
    df = df.group_by(["month", "barrid"]).agg(pl.col("date").max())

    # Drop month column and sort
    df = df.drop("month").sort(["date", "barrid"])

    # Add check column
    df = df.with_columns(pl.lit(True).alias("check"))

    return df

In [28]:
# Aggregate
bfe_barrids_monthly = aggregate(bfe_barrids)

bfe_barrids_monthly

barrid,date,check
str,date,bool
"""ARGA1B1""",2024-01-31,true
"""ARGA1G1""",2024-01-31,true
"""ARGA1K1""",2024-01-31,true
"""ARGA621""",2024-01-31,true
"""ARGA631""",2024-01-31,true
…,…,…
"""USBQVO1""",2024-12-31,true
"""USBQVQ1""",2024-12-31,true
"""USBQVX1""",2024-12-31,true
"""USBQWF1""",2024-12-31,true


In [29]:
subset = russell_barrids.join(bfe_barrids_monthly, on=["date", "barrid"], how="left")

subset = subset.fill_null(False)

display(subset.filter(~pl.col("check")))

assert russell_barrids.equals(subset.drop("check"))

date,barrid,check
date,str,bool


It appears that all barra factor exposures are in the russell subset.

In [30]:
bsrf_barrids_monthly = aggregate(bsrf_barrids)

bsrf_barrids_monthly

barrid,date,check
str,date,bool
"""ARGA1B1""",2024-01-31,true
"""ARGA1G1""",2024-01-31,true
"""ARGA1K1""",2024-01-31,true
"""ARGA621""",2024-01-31,true
"""ARGA631""",2024-01-31,true
…,…,…
"""USBQVO1""",2024-12-31,true
"""USBQVQ1""",2024-12-31,true
"""USBQVX1""",2024-12-31,true
"""USBQWF1""",2024-12-31,true


In [31]:
subset = russell_barrids.join(bsrf_barrids_monthly, on=["date", "barrid"], how="left")

subset = subset.fill_null(False)

display(subset.filter(~pl.col("check")))

assert russell_barrids.equals(subset.drop("check"))

date,barrid,check
date,str,bool


It appears that the specific risk barrids are also in the russell subset.

How well do the returns and risk forecasts match?

In [17]:
russell = RussellConstituents().load_all()

russell

date,obsdate,enddate,cusip,permno,barrid,barra_ticker,r3000_wt,issue_name,ticker
date,date,date,str,f64,str,str,f64,str,str
1978-12-29,1979-01-10,1979-04-10,"""00036110""",54594.0,,,0.0019,,
1978-12-29,1979-01-10,1979-04-10,"""00080010""",10006.0,,,0.031,,
1978-12-29,1979-01-10,1979-04-10,"""00103010""",10154.0,,,0.0012,,
1978-12-29,1979-01-10,1979-04-10,"""00103210""",10162.0,,,0.0026,,
1978-12-29,1979-01-10,1979-04-10,"""00144410""",10306.0,,,0.0013,,
…,…,…,…,…,…,…,…,…,…
2024-12-31,2025-01-10,1975-12-31,"""Y2685T13""",15597.0,"""USBC8F1""",,0.00001,"""GENCO SHIPPING & TRADING LIMIT…","""USGNK"""
2024-12-31,2025-01-10,1975-12-31,"""Y4105310""",16454.0,"""USBDIX1""",,0.000026,"""INTERNATIONAL SEAWAYS INC""","""USINSW"""
2024-12-31,2025-01-10,1975-12-31,"""Y7388L10""",92679.0,"""USACPW1""",,0.000004,"""SAFE BULKERS INC""","""USSB"""
2024-12-31,2025-01-10,1975-12-31,"""Y7542C13""",93299.0,"""USALLL1""",,0.000039,"""SCORPIO TANKERS INC""","""USSTNG"""


In [14]:
br = BarraReturns().load(year)

br

date,barrid,currency,mktcap,price,pricesource,ret
date,str,str,f64,f64,str,f64
2024-01-02,"""ARGA1B1""","""ARS""",2.1981e13,15788.0,"""MSCIBARRA""",-0.048916
2024-01-03,"""ARGA1B1""","""ARS""",2.2653e13,16271.0,"""MSCIBARRA""",0.030593
2024-01-04,"""ARGA1B1""","""ARS""",2.4434e13,17550.0,"""MSCIBARRA""",0.078606
2024-01-05,"""ARGA1B1""","""ARS""",2.6066e13,18722.5,"""MSCIBARRA""",0.066809
2024-01-08,"""ARGA1B1""","""ARS""",2.8935e13,20783.0,"""MSCIBARRA""",0.110055
…,…,…,…,…,…,…
2024-12-30,"""USBQWF1""","""USD""",2.9302e8,9.85,"""MSCIBARRA""",0.29776
2024-12-31,"""USBQWF1""","""USD""",2.3591e8,7.93,"""MSCIBARRA""",-0.194924
2024-12-27,"""USBQWH1""","""USD""",1.06634e8,4.07,"""MSCIBARRA""",
2024-12-30,"""USBQWH1""","""USD""",1.2707e8,4.85,"""MSCIBARRA""",0.191646


In [16]:
brf = BarraRiskForecasts().load(year)

brf

date,barrid,div_yield,total_risk,spec_risk,histbeta,predbeta
date,str,f64,f64,f64,f64,f64
2024-01-02,"""ARGA1B1""",0.0,0.668184,0.524318,1.395039,1.724306
2024-01-02,"""ARGA1G1""",0.023748,0.535321,0.444111,0.561386,1.04126
2024-01-02,"""ARGA1K1""",0.025574,0.463541,0.403742,0.31918,0.631024
2024-01-02,"""ARGA621""",0.0,1.007739,0.881405,1.884395,2.162143
2024-01-02,"""ARGA631""",0.0,0.567876,0.44171,1.170559,1.489842
…,…,…,…,…,…,…
2024-12-31,"""USBQVO1""",,0.448393,0.213794,0.749719,0.931585
2024-12-31,"""USBQVQ1""",,0.380211,0.177286,1.03986,0.875069
2024-12-31,"""USBQVX1""",,0.244966,0.166055,0.933242,0.864934
2024-12-31,"""USBQWF1""",,0.76331,0.697124,1.226263,1.484909


How well do the returns and risk data match the russell barrids?

In [32]:
br_barrids = br.select(["date", "barrid"]).unique()

br_barrids_monthly = aggregate(br_barrids)

br_barrids_monthly

barrid,date,check
str,date,bool
"""GERKGX1""",2024-01-02,true
"""GEROXV1""",2024-01-02,true
"""ITAAZM1""",2024-01-02,true
"""UKIML11""",2024-01-02,true
"""USALHI1""",2024-01-02,true
…,…,…
"""USBQVO1""",2024-12-31,true
"""USBQVQ1""",2024-12-31,true
"""USBQVX1""",2024-12-31,true
"""USBQWF1""",2024-12-31,true


In [33]:
subset = russell_barrids.join(br_barrids_monthly, on=["date", "barrid"], how="left")

display(subset.filter(~pl.col("check")))

assert russell_barrids.equals(subset.drop("check"))

date,barrid,check
date,str,bool


Looks good here!

In [34]:
brf_barrids = brf.select(["date", "barrid"]).unique()

brf_barrids_monthly = aggregate(brf_barrids)

brf_barrids_monthly

barrid,date,check
str,date,bool
"""GERKGX1""",2024-01-02,true
"""GEROXV1""",2024-01-02,true
"""ITAAZM1""",2024-01-02,true
"""UKIML11""",2024-01-02,true
"""USALHI1""",2024-01-02,true
…,…,…
"""USBQVO1""",2024-12-31,true
"""USBQVQ1""",2024-12-31,true
"""USBQVX1""",2024-12-31,true
"""USBQWF1""",2024-12-31,true


In [35]:
subset = russell_barrids.join(brf_barrids_monthly, on=["date", "barrid"], how="left")

display(subset.filter(~pl.col("check")))

assert russell_barrids.equals(subset.drop("check"))

date,barrid,check
date,str,bool


In [47]:
br_raw = BarraReturns().load(year)

br_raw

date,barrid,currency,mktcap,price,pricesource,ret
date,str,str,f64,f64,str,f64
2024-01-02,"""ARGA1B1""","""ARS""",2.1981e13,15788.0,"""MSCIBARRA""",-0.048916
2024-01-03,"""ARGA1B1""","""ARS""",2.2653e13,16271.0,"""MSCIBARRA""",0.030593
2024-01-04,"""ARGA1B1""","""ARS""",2.4434e13,17550.0,"""MSCIBARRA""",0.078606
2024-01-05,"""ARGA1B1""","""ARS""",2.6066e13,18722.5,"""MSCIBARRA""",0.066809
2024-01-08,"""ARGA1B1""","""ARS""",2.8935e13,20783.0,"""MSCIBARRA""",0.110055
…,…,…,…,…,…,…
2024-12-30,"""USBQWF1""","""USD""",2.9302e8,9.85,"""MSCIBARRA""",0.29776
2024-12-31,"""USBQWF1""","""USD""",2.3591e8,7.93,"""MSCIBARRA""",-0.194924
2024-12-27,"""USBQWH1""","""USD""",1.06634e8,4.07,"""MSCIBARRA""",
2024-12-30,"""USBQWH1""","""USD""",1.2707e8,4.85,"""MSCIBARRA""",0.191646


In [48]:
# Add logret column
br = br_raw.with_columns(pl.col("ret").log1p().alias("logret"))

# Add month column
br = br.with_columns(pl.col("date").dt.truncate("1mo").alias("month")).sort(["barrid", "date"])

br = br.group_by(["month", "barrid"]).agg(
    pl.col("date").last(),
    pl.col("currency").last(),
    pl.col("mktcap").last(),
    pl.col("price").last(),
    pl.col("logret").sum(),
)

# Compound up log returns
br = br.with_columns((pl.col("logret").exp() - 1).alias("ret"))

# Drop month and sort
br = br.drop("month").sort(["barrid", "date"])

br

barrid,date,currency,mktcap,price,logret,ret
str,date,str,f64,f64,f64,f64
"""ARGA1B1""",2024-01-31,"""ARS""",2.8571e13,20521.5,0.212071,0.236236
"""ARGA1B1""",2024-02-29,"""ARS""",1.6553e13,11889.5,-0.532011,-0.412577
"""ARGA1B1""",2024-03-28,"""ARS""",1.7317e13,12438.0,0.0451,0.046133
"""ARGA1B1""",2024-04-30,"""ARS""",2.2856e13,16416.5,0.277528,0.319864
"""ARGA1B1""",2024-05-31,"""ARS""",2.5971e13,18654.5,0.127799,0.136324
…,…,…,…,…,…,…
"""USBQVO1""",2024-12-31,"""USD""",8.06625e7,4.78,0.094264,0.09885
"""USBQVQ1""",2024-12-31,"""USD""",2.2792e8,0.02,0.0,0.0
"""USBQVX1""",2024-12-31,"""USD""",2.8731e9,11.75,0.006832,0.006855
"""USBQWF1""",2024-12-31,"""USD""",2.3591e8,7.93,0.146404,0.157664


In [49]:
brf_raw = BarraRiskForecasts().load(year)

brf_raw

date,barrid,div_yield,total_risk,spec_risk,histbeta,predbeta
date,str,f64,f64,f64,f64,f64
2024-01-02,"""ARGA1B1""",0.0,0.668184,0.524318,1.395039,1.724306
2024-01-02,"""ARGA1G1""",0.023748,0.535321,0.444111,0.561386,1.04126
2024-01-02,"""ARGA1K1""",0.025574,0.463541,0.403742,0.31918,0.631024
2024-01-02,"""ARGA621""",0.0,1.007739,0.881405,1.884395,2.162143
2024-01-02,"""ARGA631""",0.0,0.567876,0.44171,1.170559,1.489842
…,…,…,…,…,…,…
2024-12-31,"""USBQVO1""",,0.448393,0.213794,0.749719,0.931585
2024-12-31,"""USBQVQ1""",,0.380211,0.177286,1.03986,0.875069
2024-12-31,"""USBQVX1""",,0.244966,0.166055,0.933242,0.864934
2024-12-31,"""USBQWF1""",,0.76331,0.697124,1.226263,1.484909


In [50]:
# Add month column
brf = brf_raw.with_columns(pl.col("date").dt.truncate("1mo").alias("month")).sort(["barrid", "date"])

brf = brf.group_by(["month", "barrid"]).agg(
    pl.col("date").last(),
    pl.col("div_yield").last(),
    pl.col("total_risk").last(),
    pl.col("spec_risk").last(),
    pl.col("histbeta").last(),
    pl.col("predbeta").last(),
)

# Drop month and sort
brf = brf.drop("month").sort(["barrid", "date"])

brf

barrid,date,div_yield,total_risk,spec_risk,histbeta,predbeta
str,date,f64,f64,f64,f64,f64
"""ARGA1B1""",2024-01-31,0.0,0.681455,0.537725,1.432008,1.766008
"""ARGA1B1""",2024-02-29,0.0,0.75938,0.646299,1.047491,1.586254
"""ARGA1B1""",2024-03-28,0.0,0.735188,0.625949,1.068905,1.619615
"""ARGA1B1""",2024-04-30,0.0,0.760952,0.65042,1.288579,1.758713
"""ARGA1B1""",2024-05-31,0.0,0.757707,0.638382,1.33499,1.782818
…,…,…,…,…,…,…
"""USBQVO1""",2024-12-31,,0.448393,0.213794,0.749719,0.931585
"""USBQVQ1""",2024-12-31,,0.380211,0.177286,1.03986,0.875069
"""USBQVX1""",2024-12-31,,0.244966,0.166055,0.933242,0.864934
"""USBQWF1""",2024-12-31,,0.76331,0.697124,1.226263,1.484909
