In [None]:
ticks = pd.read_csv ('../data/manual/ticks_v2.csv', sep=",")
ticks['gvkey'] = ticks['gvkey'].fillna(0.0).astype(int)
ticks['Permco'] = ticks['Permco'].fillna(0.0).astype(int)
ticks.head(10)

In [None]:
import pandas as pd
import wrds
import config
from datetime import datetime

db = wrds.Connection(wrds_username=config.WRDS_USERNAME)

In [None]:
# Function to convert date to quarter format 'YYYYQ#'
def date_to_quarter(date):
    year = date.year
    quarter = (date.month - 1) // 3 + 1
    return f"{year}Q{quarter}"

# Function to convert quarter 'YYYYQ#' to date format
def quarter_to_date(quarter):
    year = int(quarter[:4])
    quarter = int(quarter[-1])
    month = quarter * 3 
    return datetime(year, month, 1) + pd.DateOffset(months=1) - pd.DateOffset(days=1)


In [None]:
empty_tickers = []
prim_dealers = pd.DataFrame()

# Iterate over DataFrame rows and fetch data for each ticker
for index, row in ticks.iterrows():
    gvkey = row['gvkey']
    start_date = row['Start Date']
    end_date = row['End Date']     # Formatting date for the query
    
    # Fetch financial data for the ticker if available
    new_data = fetch_financial_data(gvkey, start_date, end_date)
    if isinstance(new_data, tuple):
        empty_tickers.append({row['Ticker']:gvkey})
    else:
        prim_dealers = pd.concat([new_data, prim_dealers], axis=0)
print(empty_tickers)

In [None]:
prim_dealers = prim_dealers.drop_duplicates()
prim_dealers['datafqtr'] = prim_dealers['datafqtr'].apply(quarter_to_date)

In [None]:
quarterly_sum_pd = prim_dealers.groupby('datafqtr').agg({
    'total_assets': 'sum',
    'book_debt': 'sum',
    'book_equity': 'sum',
    'market_equity': 'sum'
}).reset_index()

In [None]:
quarterly_sum_pd = quarterly_sum_pd[quarterly_sum_pd['datafqtr'] >= '1970-01-01']
quarterly_sum_pd = quarterly_sum_pd[quarterly_sum_pd['datafqtr'] <= '2012-12-31']

In [None]:
# calculate market and book capital ratios
quarterly_sum_pd['market_cap_ratio'] = quarterly_sum_pd['market_equity'] / (quarterly_sum_pd['book_debt'] + quarterly_sum_pd['market_equity'])
quarterly_sum_pd['book_cap_ratio'] = quarterly_sum_pd['book_equity'] / (quarterly_sum_pd['book_debt'] + quarterly_sum_pd['book_equity'])

# standardize the capital ratios for plotting
quarterly_sum_pd['market_cap_ratio_std'] = (quarterly_sum_pd['market_cap_ratio'] - quarterly_sum_pd['market_cap_ratio'].mean()) / quarterly_sum_pd['market_cap_ratio'].std()
quarterly_sum_pd['book_cap_ratio_std'] = (quarterly_sum_pd['book_cap_ratio'] - quarterly_sum_pd['book_cap_ratio'].mean()) / quarterly_sum_pd['book_cap_ratio'].std()

In [None]:
# import load_fred and pull data from load_fred_macro_data
from load_fred import load_fred_macro_data
macro_data = load_fred_macro_data()

macro_data = macro_data.rename(columns={'UNRATE': 'unemp_rate', 
                                        'NFCI': 'nfci', 
                                        'GDP': 'nom_gdp', 
                                        'GDPC1': 'real_gdp', 
                                        'A191RL1Q225SBEA': 'real_gdp_growth',
                                        'BOGZ1FL664090005Q': 'bd_fin_assets',
                                        'BOGZ1FL664190005Q': 'bd_liabilities',
                                        })

macro_data['aem_leverage'] = macro_data['bd_fin_assets'] / (macro_data['bd_fin_assets'] - macro_data['bd_liabilities'])
macro_data['aem_leverage_ratio'] = 1 / macro_data['aem_leverage'] 
macro_data.index = pd.to_datetime(macro_data.index)

In [None]:
# import from previous fred data (1969-2012)
fred_bd_data = pd.read_csv('fred_bd_aem.csv', index_col=0)
fred_bd_data.index = fred_bd_data.index.astype(str)
fred_bd_data.index = fred_bd_data.index.str[:4] + 'Q' + fred_bd_data.index.str[5] 
fred_bd_data = fred_bd_data.loc['1969Q4':'2012Q4']
fred_bd_data.index = fred_bd_data.index.to_series().apply(quarter_to_date)

In [None]:
# prepare data for aem leverage -- but it didn't match the plot 
# this can be used for recent data 
macro_data_2 = pd.DataFrame()
macro_data_2['bd_fin_assets'] = fred_bd_data['FL664090005.Q']
macro_data_2['bd_liabilities'] = fred_bd_data['FL664190005.Q']
macro_data_2['aem_leverage'] =  macro_data_2['bd_fin_assets'] / (macro_data_2['bd_fin_assets'] - macro_data_2['bd_liabilities'])

In [None]:
# level data for Panel A
levels = quarterly_sum_pd[['datafqtr', 'market_cap_ratio', 'book_cap_ratio']].merge(macro_data[['aem_leverage']], left_on='datafqtr', right_index=True)
levels = levels.set_index('datafqtr')

In [None]:
# AR(1) for market capital ratio
cleaned_data = quarterly_sum_pd['market_cap_ratio'].dropna()
model = AutoReg(cleaned_data, lags=1)
model_fitted = model.fit()

quarterly_sum_pd['innovations_mkt_cap'] = model_fitted.resid
quarterly_sum_pd['market_capital_factor'] = quarterly_sum_pd['innovations_mkt_cap'] / quarterly_sum_pd['market_cap_ratio'].shift(1)
quarterly_sum_pd = quarterly_sum_pd.drop(columns=['innovations_mkt_cap'])
quarterly_sum_pd['market_capital_factor_std'] = (quarterly_sum_pd['market_capital_factor'] - quarterly_sum_pd['market_capital_factor'].mean()) / quarterly_sum_pd['market_capital_factor'].std()

In [None]:
# AR(1) for book capital ratio
cleaned_data = quarterly_sum_pd['book_cap_ratio'].dropna()
model = AutoReg(cleaned_data, lags=1)
model_fitted = model.fit()

quarterly_sum_pd['innovations_book_cap'] = model_fitted.resid
quarterly_sum_pd['book_capital_factor'] = quarterly_sum_pd['innovations_book_cap'] / quarterly_sum_pd['book_cap_ratio'].shift(1)
quarterly_sum_pd['book_capital_factor_std'] = (quarterly_sum_pd['book_capital_factor'] - quarterly_sum_pd['book_capital_factor'].mean()) / quarterly_sum_pd['book_capital_factor'].std()

In [None]:
# calculate the aem leverage factor, the seasonally adjusted growth rate in broker–dealer book leverage
macro_data['leverage_growth'] = macro_data['aem_leverage'].pct_change()
macro_data['leverage_growth'] = macro_data['leverage_growth'].fillna(0)

decomposition = seasonal_decompose(macro_data['leverage_growth'], model='additive')
macro_data['seasonal'] = decomposition.seasonal
macro_data['aem_leverage_factor'] = macro_data['leverage_growth'] - macro_data['seasonal']

In [None]:
# factor data for Panel B
factors = quarterly_sum_pd[['datafqtr', 'market_capital_factor', 'book_capital_factor']].merge(macro_data[['aem_leverage_factor']], left_on='datafqtr', right_index=True)
factors = factors.set_index('datafqtr')

In [None]:
# shiller's e/p
# shiller_cape = load_shiller_pe()

shiller_cape = pd.read_excel('Shiller_CAPE.xls', skiprows=9, usecols="A,M")
shiller_cape.columns = ['date', 'cape']

shiller_cape['date'] = shiller_cape['date'].astype(str)
shiller_cape['date'] = pd.to_datetime(shiller_cape['date'], format='%Y.%m') + pd.offsets.MonthEnd(0)

shiller_cape = shiller_cape[(shiller_cape['date'] >= '1970-01-01') & (shiller_cape['date'] <= '2012-12-31')]
shiller_cape = shiller_cape.set_index('date')

shiller_cape['e/p'] = 1 / shiller_cape['cape']
shiller_cape

In [None]:
# CRSP value-weighted stock index (volatility)
def pull_CRSP_Value_Weighted_Index():
    """
    Pulls a value-weighted stock index from the CRSP database.

    Returns:
    - pandas.DataFrame: DataFrame containing the value-weighted stock index data.

    Note:
    This function executes a SQL query to retrieve the value-weighted stock index data from CRSP. 
    The returned DataFrame includes columns for 'date' and 'vwretd' (value-weighted return including dividends).
    """
    
    sql_query = """
        SELECT date, vwretd
        FROM crsp.msi as msi
        WHERE msi.date >= '1970-01-01' AND msi.date <= '2012-12-31'
        """
    
    data = db.raw_sql(sql_query, date_cols=["date"])
    return data

In [None]:
value_wtd_indx = pull_CRSP_Value_Weighted_Index()

value_wtd_indx['date'] = pd.to_datetime(value_wtd_indx['date'])
value_wtd_indx.set_index('date', inplace=True)

value_wtd_indx['change'] = value_wtd_indx['vwretd'].pct_change()
volatility_by_quarter = value_wtd_indx['change'].groupby(pd.Grouper(freq='Q')).std()
annualized_volatility = volatility_by_quarter.rename('mkt_vol')

In [None]:
startdate = '19700101'
enddate = '20121231'

rawdata = web.DataReader('F-F_Research_Data_5_Factors_2x3', data_source='famafrench',start=startdate,end=enddate)
ff_facs = rawdata[0] / 100
# convert the yyyy-mm to timestamp object. default is to put first of month, but should be end of month
ff_facs = ff_facs.to_timestamp().resample('Q').last()
ff_facs.rename(columns={'Mkt-RF':'MKT'},inplace=True)
ff_facs.rename(columns={'MKT':'mkt_ret'},inplace=True)

In [None]:
# prepare the macro data 
macro_merged = shiller_cape[['e/p']].merge(macro_data[['unemp_rate', 'nfci', 'real_gdp']], left_index=True, right_index=True)
macro_merged = macro_merged.merge(ff_facs[['mkt_ret']],left_index=True, right_index=True)
macro_merged = macro_merged.merge(annualized_volatility, left_index=True, right_index=True)

# take the difference (for factor comparison)
macro_merged_2 = np.log(macro_merged / macro_merged.shift(1))

In [None]:
levels2 = levels.merge(macro_merged, left_index=True, right_index=True)
levels2.columns = ['Market capital', 'Book capital', 'AEM leverage', 
                    'E/P', 'Unemployment', 'Financial conditions', 'GDP', 'Market excess return', 'Market volatility']

In [None]:
# get the correlation of levels
correlation_panelA = levels2.iloc[:,:3].corr()

In [None]:
# get the correlation of levels with macro variables
main_cols = levels2[['Market capital', 'Book capital', 'AEM leverage']]
other_cols = levels2[['E/P', 'Unemployment', 'GDP', 'Financial conditions', 'Market volatility']]

correlation_results_panelA = pd.DataFrame(index=main_cols.columns)

for column in other_cols.columns:
    correlation_results_panelA[column] = main_cols.corrwith(other_cols[column])

In [None]:
# final result 
pd.concat([correlation_panelA, correlation_results_panelA.T], axis=0)

In [None]:
factors2 = factors.merge(macro_merged_2, left_index=True, right_index=True)
factors2.columns = ['Market capital factor', 'Book capital factor', 'AEM leverage factor', 
                    'E/P growth', 'Unemployment growth', 'Financial conditions growth', 'GDP growth', 'Market excess return', 'Market volatility growth']

In [None]:
# get the correlation of factors
correlation_panelB = factors2.iloc[:,:3].corr()

In [None]:
# get the correlation of factors with macro variables
main_cols = factors2[['Market capital factor', 'Book capital factor', 'AEM leverage factor']]
other_cols = factors2[['Market excess return', 'E/P growth', 'Unemployment growth', 'GDP growth', 'Financial conditions growth', 'Market volatility growth']]

correlation_results_panelB = pd.DataFrame(index=main_cols.columns)

for column in other_cols.columns:
    correlation_results_panelB[column] = main_cols.corrwith(other_cols[column])

In [None]:
# final result 
pd.concat([correlation_panelB, correlation_results_panelB.T], axis=0)
