In [2]:
import polars as pl
import numpy as np

In [3]:
df = pl.scan_parquet('./Datasets/crsp')
df.head().collect_schema()

Schema([('PERMNO', Int64),
        ('date', String),
        ('NAMEENDT', String),
        ('SHRCD', Int64),
        ('EXCHCD', Int64),
        ('NCUSIP', String),
        ('TICKER', String),
        ('COMNAM', String),
        ('SHRCLS', String),
        ('PRIMEXCH', String),
        ('TRDSTAT', String),
        ('SECSTAT', String),
        ('PERMCO', Int64),
        ('CUSIP', String),
        ('DCLRDT', String),
        ('DLAMT', Float64),
        ('DLPDT', String),
        ('DLSTCD', Float64),
        ('PAYDT', String),
        ('RCRDDT', String),
        ('SHRFLG', Float64),
        ('DISTCD', Float64),
        ('DIVAMT', Float64),
        ('FACPR', Float64),
        ('FACSHR', Float64),
        ('ACPERM', Float64),
        ('ACCOMP', Float64),
        ('SHRENDDT', String),
        ('NWPERM', Float64),
        ('DLRETX', String),
        ('DLPRC', Float64),
        ('DLRET', String),
        ('BIDLO', Float64),
        ('ASKHI', Float64),
        ('PRC', Float64),
        ('VOL', F

In [4]:
# efficient daily log returns for Apple
aapl_permno = df.filter(pl.col('TICKER') == 'AAPL') \
    .select('PERMNO') \
    .unique() \
    .collect() \
    .item()

log_returns = df.filter(pl.col('PERMNO') == aapl_permno) \
    .select(['PERMNO', 'PRC']) \
    .with_columns(
        (pl.col('PRC')/pl.col('PRC').shift(1)) \
            .log()
            .alias('LOG_RET')
        ) \
    .collect()

df_crsp = df

In [26]:
# compustat rolling sum
df_cstat = pl.scan_parquet('./Datasets/compustat/fundamentals_quarterly_all')
cstat_clean = df_cstat.select(['gvkey', 'datadate', 'gsector', 'tic', 'cusip', 'conm', 'cik', 'fyearq', 'fqtr', 
                 'rdq', 'indfmt', 'datafmt', 'consol', 'curcdq', 'costat', 'prccq', 'cshoq',
                 'ajexq', 'mkvaltq', 'atq', 'actq', 'cheq', 'rectq', 'invtq', 'ppentq',
                 'ltq', 'lctq', 'dlcq', 'dlttq', 'apq', 'txdbq','seqq', 'ceqq', 'pstkq',
                 'saleq', 'cogsq', 'xsgaq', 'xrdq', 'dpq', 'xintq','oiadpq', 'piq', 'txtq', 
                 'niq', 'ibq','epsfxq', 'oancfy', 'capxy', 'dvy', 'aqcy'])
cstat_clean.tail().collect()

gvkey,datadate,gsector,tic,cusip,conm,cik,fyearq,fqtr,rdq,indfmt,datafmt,consol,curcdq,costat,prccq,cshoq,ajexq,mkvaltq,atq,actq,cheq,rectq,invtq,ppentq,ltq,lctq,dlcq,dlttq,apq,txdbq,seqq,ceqq,pstkq,saleq,cogsq,xsgaq,xrdq,dpq,xintq,oiadpq,piq,txtq,niq,ibq,epsfxq,oancfy,capxy,dvy,aqcy
str,date,str,str,str,str,str,i32,i16,date,str,str,str,str,str,"decimal[24,12]","decimal[18,4]","decimal[24,12]","decimal[18,4]","decimal[18,4]","decimal[18,4]","decimal[18,4]","decimal[18,4]","decimal[18,4]","decimal[18,4]","decimal[18,4]","decimal[18,4]","decimal[18,4]","decimal[18,4]","decimal[18,4]","decimal[18,4]","decimal[18,4]","decimal[18,4]","decimal[18,4]","decimal[18,4]","decimal[18,4]","decimal[18,4]","decimal[18,4]","decimal[18,4]","decimal[18,4]","decimal[18,4]","decimal[18,4]","decimal[18,4]","decimal[18,4]","decimal[18,4]","decimal[18,4]","decimal[18,4]","decimal[18,4]","decimal[18,4]","decimal[18,4]"
"""177422""",2026-01-31,,"""FEX""","""33734K109""","""FIRST TR LRG CP CORE ALPHADX""",,2026,2,,"""INDL""","""STD""","""C""","""USD""","""A""",123.0934,11.45,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
"""179400""",2026-01-31,,"""PIN""","""46137R109""","""INVESCO INDIA ETF""",,2026,1,,"""INDL""","""STD""","""C""","""USD""","""A""",23.28,7.59,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
"""260849""",2026-01-31,,"""UTG""","""756158101""","""REAVES UTILITY INCOME FUND""","""0001263994""",2026,1,,"""INDL""","""STD""","""C""","""USD""","""A""",37.77,88.023,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
"""260850""",2026-01-31,,"""SCD""","""50208A102""","""LMP CAPITAL & INCOME FND INC""","""0001270131""",2026,1,,"""INDL""","""STD""","""C""","""USD""","""A""",15.59,22.889,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
"""264645""",2026-01-31,,"""CSQ""","""128125101""","""CALAMOS STRAT TOTAL RETRN FD""","""0001275214""",2026,1,,"""INDL""","""STD""","""C""","""USD""","""A""",19.31,160.37,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
