In [None]:
# Standard library imports
import logging
import warnings
from pathlib import Path

# Third-party imports
import pandas as pd
import wrds

# Suppress warnings
warnings.filterwarnings("ignore", category=FutureWarning)
warnings.filterwarnings("ignore")

from settings import config
from pull_compustat import *
from pull_crsp import *
from transform_crsp import *
from transform_compustat import *

In [None]:
# Change default pandas display options

pd.options.display.max_columns = 30
pd.options.display.max_colwidth = 200
pd.set_option('display.float_format', lambda x: '%.4f' % x)
pd.set_option('display.expand_frame_repr', False)

# Global variables
RAW_DATA_DIR = Path(config("RAW_DATA_DIR"))
RAW_DATA_DIR.mkdir(parents=True, exist_ok=True)
OUTPUT_DIR = Path(config("OUTPUT_DIR"))
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
WRDS_USERNAME = config("WRDS_USERNAME")
START_DATE = config("START_DATE")
END_DATE = config("END_DATE")

In [None]:
crsp_d = pull_CRSP_stock(
    start_date=START_DATE,
    end_date=END_DATE,
    wrds_username=WRDS_USERNAME,
    freq='D',
    data_dir=RAW_DATA_DIR,
    file_name='CRSP_stock_d.parquet',
)

In [None]:
crsp_m = pull_CRSP_stock(
    start_date=START_DATE,
    end_date=END_DATE,
    wrds_username=WRDS_USERNAME,
    freq='M',
    data_dir=RAW_DATA_DIR,
    file_name='CRSP_stock_m.parquet',
)

In [None]:
comp = pull_Compustat(
    start_date=START_DATE,
    end_date=END_DATE,
    wrds_username=WRDS_USERNAME,
    data_dir=RAW_DATA_DIR,
    file_name='Compustat_fund.parquet',
)

In [None]:
ccm = pull_CRSP_Comp_link_table(
    wrds_username=WRDS_USERNAME,
    data_dir=RAW_DATA_DIR,
    file_name="CRSP_Comp_Link_Table.parquet"
    )

In [None]:
crsp_index_d = pull_CRSP_index(
    start_date=START_DATE,
    end_date=END_DATE,
    freq='D',
    wrds_username=WRDS_USERNAME,
    file_name="CRSP_index_d.parquet")

In [None]:
# 2) Calculate market equity
crsp = calculate_market_equity(crsp_m)

# 2) Add report date and calculate book equity
comp = add_report_date(comp)
comp = calc_book_equity(comp)
comp = expand_compustat_annual_to_monthly(comp)


In [None]:
# 3) Merge comp + crsp_m + ccm => crsp_comp
crsp_comp = merge_CRSP_and_Compustat(crsp, comp, ccm)

In [None]:
from calc_Lewellen_2014 import *

In [None]:
crsp_comp          = calc_log_size(crsp_comp)

In [None]:
crsp_comp            = calc_log_bm(crsp_comp)

In [None]:
crsp_comp       = calc_return_12_2(crsp_comp)

In [None]:
crsp_comp          = calc_accruals(crsp_comp) 

In [None]:
crsp_comp               = calc_roa(crsp_comp)

In [None]:
crsp_comp = calc_log_assets_growth(crsp_comp)

In [None]:
crsp_comp                = calc_dy(crsp_comp)

In [None]:
crsp_comp  = calc_log_return_13_36(crsp_comp)

In [None]:
crsp_comp     = calc_log_issues_12(crsp_comp)

In [None]:
crsp_comp     = calc_log_issues_36(crsp_comp)

In [None]:
crsp_comp        = calc_debt_price(crsp_comp)

In [None]:
crsp_comp       = calc_sales_price(crsp_comp)

In [None]:
crsp_comp = calc_std_12(crsp_d, crsp_comp)


In [None]:
crsp_comp = calculate_rolling_beta(crsp_d, crsp_index_d, crsp_comp)

In [None]:
# Winsorize the variables to remove outliers
variables_dict = {
    "Return (%)":                "retx",
    "Log Size (-1)":             "log_size",
    "Log B/M (-1)":              "log_bm",
    "Return (-2, -12)":          "return_12_2",
    "Log Issues (-1,-12)":       "log_issues_12",
    "Accruals (-1)":             "accruals_final",
    "ROA (-1)":                  "roa",
    "Log Assets Growth (-1)":    "log_assets_growth",
    "Dividend Yield (-1,-12)":   "dy",
    "Log Return (-13,-36)":      "log_return_13_36",
    "Log Issues (-1,-36)":       "log_issues_36",
    "Beta (-1,-36)":             "beta",
    "Std Dev (-1,-12)":          "rolling_std_252",
    "Debt/Price (-1)":           "debt_price",
    "Sales/Price (-1)":          "sales_price",
    }
crsp_comp = winsorize(crsp_comp, variables_dict.values())

In [None]:
subsets_comp_crsp = get_subsets(crsp_comp) 

In [None]:
table_1 = build_table_1(subsets_comp_crsp, variables_dict)

In [None]:
table_1

In [None]:
table_2 = build_table_2(subsets_comp_crsp, variables_dict)

In [None]:
table_2

In [None]:
figure_1 = create_figure_1(subsets_comp_crsp)

In [None]:
save_data(table_1, table_2, figure_1)

In [None]:
create_latex_document_from_pkl()
compile_latex_document()