In [11]:
import pandas as pd
pd.options.mode.chained_assignment = None  # default='warn'
import numpy as np
from itertools import chain
import datetime as dt
import zipfile
import gzip
import warnings
warnings.filterwarnings("ignore")

import config
from pathlib import Path

OUTPUT_DIR = Path(config.OUTPUT_DIR)
DATA_DIR = Path(config.DATA_DIR)
WRDS_USERNAME = config.WRDS_USERNAME

In [40]:
all_illiqs = pd.read_csv('..' / DATA_DIR / "pulled" /'Illiq.csv.gzip', compression='gzip')
all_illiqs = all_illiqs.sort_values(by=['cusip_id','date']).reset_index(drop=True)
all_illiqs = all_illiqs.rename(columns = {'trd_exctn_dt':'date'})
all_illiqs

Unnamed: 0,cusip_id,date,prc_bid,prc_ask
0,000325AA8,2002-07-15,101.0000,100.5000
1,000325AA8,2002-07-24,100.9370,100.2685
2,000325AA8,2002-07-26,100.7500,100.5000
3,000325AA8,2002-08-07,96.0000,94.0000
4,000325AA8,2002-10-25,101.2500,101.0000
...,...,...,...,...
16987309,Y7542C122,2017-04-04,24.7505,24.6992
16987310,Y7542C122,2017-04-05,24.7936,24.7164
16987311,Y7542C122,2017-04-06,24.7318,24.6907
16987312,Y7542C122,2017-04-07,24.8126,24.6949


In [39]:
rating = pd.read_csv('..' / DATA_DIR / "pulled" / 'sp_ratings_with_CUSIP.csv')
rating = rating[['complete_cusip','rating_date', 'rating','category']].sort_values(by=['complete_cusip','rating_date']).reset_index(drop=True)
rating = rating.rename(columns = {'complete_cusip':'cusip_id', 'rating_date':'date'})
rating

Unnamed: 0,cusip_id,date,rating,category
0,000305AA0,2004-11-09,B-,Junk
1,000305AA0,2006-06-05,CCC+,Junk
2,000305AA0,2009-09-04,CCC,Junk
3,000305AB8,2004-11-09,B-,Junk
4,000305AB8,2006-06-05,CCC+,Junk
...,...,...,...,...
636407,Y9695NAG8,2004-03-09,BBB-,Junk
636408,Y9695NAG8,2005-06-06,BBB,BBB
636409,Y9695NAG8,2005-09-27,BBB+,BBB
636410,ZR6553955,2019-09-16,A-,BBB


# 1. Merge bid ask price with ratings

In [47]:
all_illiqs['source'] = 'A'
rating['source'] = 'R'

all_df = pd.concat([all_illiqs,rating],axis=0)
all_df = all_df.sort_values(by=['cusip_id','date','source'])

all_df_filled = all_df.groupby('cusip_id').apply(lambda group: group.ffill())
all_df_filled = all_df_filled.reset_index(drop=True)
all_df_filled = all_df_filled[all_df_filled['source']=='A']
all_df_filled = all_df_filled[all_df_filled['category'].notna()]
all_df_filled = all_df_filled.reset_index(drop=True)
all_df_filled

Unnamed: 0,cusip_id,date,prc_bid,prc_ask,source,rating,category
0,000325AA8,2002-07-15,101.0000,100.5000,A,B+,Junk
1,000325AA8,2002-07-24,100.9370,100.2685,A,B+,Junk
2,000325AA8,2002-07-26,100.7500,100.5000,A,B+,Junk
3,000325AA8,2002-08-07,96.0000,94.0000,A,B+,Junk
4,000325AA8,2002-10-25,101.2500,101.0000,A,B+,Junk
...,...,...,...,...,...,...,...
16316908,U70577AL2,2010-10-01,100.0113,100.0091,A,BBB,BBB
16316909,U70577AL2,2010-10-04,100.0000,100.0000,A,BBB,BBB
16316910,U70577AL2,2010-10-05,100.0018,100.0006,A,BBB,BBB
16316911,U70577AL2,2010-10-06,100.0000,100.0000,A,BBB,BBB


In [48]:
len(set(all_df_filled['cusip_id'].values))

49941

# 2. Calculate spread and bias

In [56]:
res_df = pd.DataFrame(all_df_filled.groupby(['date','category'])['prc_bid'].mean())
res_df = res_df.rename(columns={'prc_bid':'vw_prc_bid_mean'})
res_df['vw_prc_ask_mean'] = all_df_filled.groupby(['date','category'])['prc_ask'].mean().values
res_df['bid_ask_spread'] = 2 * (res_df['vw_prc_bid_mean'] - res_df['vw_prc_ask_mean'])/(res_df['vw_prc_ask_mean'] + res_df['vw_prc_bid_mean'])*10000
res_df['bid_ask_bias'] = ((res_df['vw_prc_ask_mean'] - res_df['vw_prc_bid_mean'])/(res_df['vw_prc_ask_mean'] + res_df['vw_prc_bid_mean']))**2 * 10000
res_df

Unnamed: 0_level_0,Unnamed: 1_level_0,vw_prc_bid_mean,vw_prc_ask_mean,bid_ask_spread,bid_ask_bias
date,category,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2002-07-01,A and above,102.067324,100.486404,156.098862,0.609171
2002-07-01,BBB,100.558523,99.211375,134.869993,0.454748
2002-07-01,Junk,77.324646,75.022403,302.236589,2.283674
2002-07-02,A and above,102.179769,100.857976,130.201789,0.423813
2002-07-02,BBB,100.178278,98.807156,137.811231,0.474798
...,...,...,...,...,...
2023-06-29,BBB,92.916897,92.813097,11.177563,0.003123
2023-06-29,Junk,90.999232,90.853390,16.039630,0.006432
2023-06-30,A and above,92.387767,92.305900,8.865254,0.001965
2023-06-30,BBB,92.584041,92.500223,9.057293,0.002051


In [57]:
df = res_df.copy().reset_index()
df['date'] = pd.to_datetime(df['date'])

# Define the subsample date ranges
subsamples = {
    'Full sample': ('2002-07-01', '2022-09-30'),
    'Pre-crisis': ('2002-07-01', '2007-06-30'),
    'Crisis': ('2007-07-01', '2009-04-30'),
    'Post-Crisis': ('2009-05-01', '2012-05-31'),
    'Basel II.5 & III': ('2012-06-01', '2014-03-31'),
    'Post-Volcker': ('2014-04-01', '2022-09-30'),
    'All': (df['date'].min(), df['date'].max())  # Entire dataset range
}

# Initialize a dictionary to store the mean values
mean_values = {}

# Loop over each subsample and calculate the means
for subsample, (start_date, end_date) in subsamples.items():
    # Filter the dataframe for the subsample date range
    subsample_df = df[(df['date'] >= start_date) & (df['date'] <= end_date)]
    # Calculate the mean of 'bid_ask_spread' and 'bid_ask_bias'
    mean_values[subsample] = {
    'bid_ask_spread_mean': [subsample_df['bid_ask_spread'].mean()] + subsample_df.groupby('category')['bid_ask_spread'].mean().values.tolist(),
    'bid_ask_bias_mean': [subsample_df['bid_ask_bias'].mean()] + subsample_df.groupby('category')['bid_ask_bias'].mean().values.tolist()
}
    
rating_categories = ['Full','A and above','BBB','Junk']

df_samples = []
for sample, metrics in mean_values.items():
    data = {}
    for metric, values in metrics.items():
        for rating, value in zip(rating_categories, values):
            data[(metric, rating)] = value
    df_sample = pd.DataFrame(data, index=[sample])
    df_samples.append(df_sample)

# Combine all sample DataFrames into one final DataFrame
df_final = pd.concat(df_samples)

# Show the final DataFrame
df_final

Unnamed: 0_level_0,bid_ask_spread_mean,bid_ask_spread_mean,bid_ask_spread_mean,bid_ask_spread_mean,bid_ask_bias_mean,bid_ask_bias_mean,bid_ask_bias_mean,bid_ask_bias_mean
Unnamed: 0_level_1,Full,A and above,BBB,Junk,Full,A and above,BBB,Junk
Full sample,33.202399,29.682652,31.247823,38.676571,0.05475,0.034539,0.043355,0.086353
Pre-crisis,49.661822,45.47427,43.230618,60.2839,0.132916,0.066802,0.091443,0.240557
Crisis,54.116655,56.185112,51.739759,54.425093,0.097867,0.089029,0.092873,0.1117
Post-Crisis,39.722194,37.047896,39.95042,42.175132,0.046808,0.042703,0.045547,0.052185
Basel II.5 & III,27.140776,22.050811,26.66222,32.719278,0.021667,0.01366,0.019128,0.032226
Post-Volcker,18.365995,14.078439,17.973092,23.04203,0.010743,0.006139,0.009747,0.016338
All,32.424595,28.876916,30.499576,37.908296,0.053241,0.033296,0.041879,0.084611
