In [1]:
# Standard library imports
import logging
import warnings
from pathlib import Path

# Third-party imports
import pandas as pd
import wrds

# Suppress warnings
warnings.filterwarnings("ignore", category=FutureWarning)
warnings.filterwarnings("ignore")

from settings import config
from pull_compustat import *
from pull_crsp import *
from transform_crsp import *
from transform_compustat import *
from calc_Lewellen_2014 import *

In [2]:
# Change default pandas display options

pd.options.display.max_columns = 30
pd.options.display.max_colwidth = 200
pd.set_option('display.float_format', lambda x: '%.4f' % x)
pd.set_option('display.expand_frame_repr', False)

# Global variables
RAW_DATA_DIR = Path(config("RAW_DATA_DIR"))
RAW_DATA_DIR.mkdir(parents=True, exist_ok=True)
OUTPUT_DIR = Path(config("OUTPUT_DIR"))
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
WRDS_USERNAME = config("WRDS_USERNAME")
START_DATE = config("START_DATE")
END_DATE = config("END_DATE")


In [3]:
crsp_d = pull_CRSP_stock(
    start_date=START_DATE,
    end_date=END_DATE,
    wrds_username=WRDS_USERNAME,
    freq='D',
    data_dir=RAW_DATA_DIR,
    file_name='CRSP_stock_d.parquet',
)

Loading cached data from C:\Users\eduar\Documents\data\raw\CRSP_stock_d.parquet


In [25]:
crsp_m = pull_CRSP_stock(
    start_date=START_DATE,
    end_date=END_DATE,
    wrds_username=WRDS_USERNAME,
    freq='M',
    data_dir=RAW_DATA_DIR,
    file_name='CRSP_stock_m.parquet',
)

Loading cached data from C:\Users\eduar\Documents\data\raw\CRSP_stock_m.parquet


In [170]:
comp = pull_Compustat(
    start_date=START_DATE,
    end_date=END_DATE,
    wrds_username=WRDS_USERNAME,
    data_dir=RAW_DATA_DIR,
    file_name='Compustat_fund.parquet',
)

Loading library list...
Done
Saved data to C:\Users\eduar\Documents\data\raw\Compustat_fund.parquet


In [136]:
ccm = pull_CRSP_Comp_link_table(
    wrds_username=WRDS_USERNAME,
    data_dir=RAW_DATA_DIR,
    file_name="CRSP_Comp_Link_Table.parquet"
    )

Loading library list...
Done
Saved data to C:\Users\eduar\Documents\data\raw\CRSP_Comp_Link_Table.parquet


In [7]:
crsp_index_d = pull_CRSP_index(
    start_date=START_DATE,
    end_date=END_DATE,
    freq='D',
    wrds_username=WRDS_USERNAME,
    file_name="CRSP_index_d.parquet")

Loading cached data from C:\Users\eduar\Documents\data\raw\CRSP_index_d.parquet


In [171]:
crsp = calculate_market_equity(crsp_m)

comp = add_report_date(comp)
comp = calc_book_equity(comp)

crsp_comp = merge_CRSP_and_Compustat(crsp, comp, ccm)

crsp_comp.tail()

Unnamed: 0,permno,permco,mthcaldt,issuertype,securitytype,securitysubtype,sharetype,usincflg,primaryexch,conditionaltype,tradingstatusflg,totret,retx,prc,shrout,...,non_cash_current_assets,lct,total_debt,depreciation,dvpd,dvc,dvt,pstk,pstkl,pstkrv,txditc,seq,report_date,ps,be
230922,91405,58620,2009-06-30,CORP,EQTY,COM,NS,Y,Q,RW,A,-0.0989,-0.0989,8.11,13225,...,,,40.481,0.65,,0.0,0.0,0.0,0.0,0.0,0.0,110.502,2009-04-30,0.0,110.502
230923,91405,58620,2010-06-30,CORP,EQTY,COM,NS,Y,Q,RW,A,-0.0052,-0.0052,5.73,13225,...,,,35.328,0.762,,0.0,0.0,0.0,0.0,0.0,0.0,107.448,2010-04-30,0.0,107.448
230924,91405,58620,2011-06-30,CORP,EQTY,COM,NS,Y,Q,RW,A,0.0015,0.0015,6.78,12797,...,,,25.168,0.833,,0.0,0.0,0.0,0.0,0.0,0.0,108.139,2011-04-30,0.0,108.139
230925,91405,58620,2012-06-29,CORP,EQTY,COM,NS,Y,Q,RW,A,-0.0719,-0.0719,5.29,12645,...,,,20.0,0.712,,0.614,0.614,0.0,0.0,0.0,0.0,107.065,2012-04-30,0.0,107.065
230926,91405,58620,2013-06-28,CORP,EQTY,COM,NS,Y,Q,RW,A,0.0194,0.0194,6.32,12645,...,,,15.0,0.741,,0.669,0.669,0.0,0.0,0.0,0.0,103.849,2013-04-30,0.0,103.849


In [None]:
subsets = get_subsets(crsp)

In [None]:
returns = crsp.pivot_table(index='mthcaldt', columns='permno', values='retx')
    
log_size          = calc_log_size(crsp_comp)
log_bm            = calc_log_bm(crsp_comp)
return_2_12       = calc_return_12_2(crsp_comp)

In [None]:
accruals          = calc_accruals(crsp_comp)  # or calc_accruals(crsp_comp) if you have them merged

In [156]:
roa               = calc_roa(crsp_comp)

In [157]:
log_assets_growth = calc_log_assets_growth(crsp_comp)

In [175]:
dy                = calc_dy(crsp_comp)

In [187]:
log_return_13_36  = calc_log_return_13_36(crsp_comp)

In [177]:
log_issues_12     = calc_log_issues_12(crsp_comp)

In [188]:
log_issues_36     = calc_log_issues_36(crsp_comp)

In [None]:
betas             = calculate_rolling_beta(crsp_d, crsp_index_d)

In [190]:
std_12            = calc_std_12(crsp_d)

In [197]:
debt_price        = calc_debt_price(crsp_comp)

In [198]:
sales_price       = calc_sales_price(crsp_comp)