# Imports

In [1]:
import polars as pl

# Load Data from openbondassetpricing.com

For the definitions of the features, refer to the pdf in the `docs` folder

In [12]:
def load_bond_data(fetch_online: bool = False) -> pl.DataFrame: 
    """
    Returns WRDS bond data as pandas dataframe.
    
    Parameters
    ----------
    fetch_online: bool, optional. Whether to fetch online bond data or to take from the data folder.

    Returns
    -------
    pd.DataFrame: bond data
    """

    if fetch_online: 
        _url = "https://openbondassetpricing.com/wp-content/uploads/2024/04/WRDS_MMN_Corrected_Data_2024.csv"
    else: 
        _url = "../data/WRDS_MMN_Corrected_Data_2024.csv"

    # url to fetch
    bond_data = pl.read_csv(_url, try_parse_dates=True)

    return bond_data

In [13]:
data_obap = load_bond_data(fetch_online=True)

data_obap.head()

Unnamed: 0_level_0,date,cusip,exretn_t+1,exretnc_t+1,exretnc_dur_t+1,bond_ret_t+1,bond_ret,exretn,exretnc_dur,rating,cs,cs_6m_delta,bond_yield,bond_amount_out,offering_amt,bondprc,perc_par,tmt,duration,ind_num_17,sic_code,mom6_1,ltrev48_12,BOND_RET,ILLIQ,var95,n_trades_month,size_ig,size_jk,BOND_YIELD,CS,BONDPRC,PRFULL,DURATION,CONVEXITY,CS_6M_DELTA,bond_value,BOND_VALUE,coupon,bond_type,principal_amt,bondpar_mil
i64,date,str,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,i64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,str,f64,f64
0,2002-08-31,"""000336AE7""",-0.056089,-0.087082,-0.081158,-0.054689,-0.008212,-0.009612,-0.023343,8.0,0.043019,,0.073689,100000.0,100000.0,97.693,0.97693,70.033333,4.585289,,6211,0.0,0.009058,0.023939,3.597388,,2.0,0.0,1.0,0.064316,0.032526,102.092,103.75346,4.63593,26.445978,,9769300.0,10209200.0,6.875,"""CDEB""",1000.0,100.0
5794,2002-09-30,"""000336AE7""",0.050663,0.057193,0.054383,0.052063,-0.054689,-0.056089,-0.081158,8.0,0.063356,,0.087596,100000.0,100000.0,91.691494,0.916915,69.033333,4.437498,,6211,-0.008212,-0.037939,-0.009819,34.628775,,2.0,0.0,1.0,0.075301,0.049044,97.0,99.138889,4.514543,25.309944,,9169149.0,9700000.0,6.875,"""CDEB""",1000.0,100.0
11572,2002-10-31,"""000336AE7""",0.07968,0.098591,0.095571,0.08088,0.052063,0.050663,0.054383,10.0,0.052394,,0.077672,100000.0,100000.0,96.0,0.96,68.0,4.404651,,6211,-0.062452,-0.027926,0.080031,,,1.0,0.0,1.0,0.074274,0.045408,97.5,100.288197,4.428541,24.496262,,9600000.0,9750000.0,6.875,"""CDEB""",1000.0,100.0
17371,2002-11-30,"""000336AE7""",,,,,0.08088,0.07968,0.095571,10.0,0.030568,,0.061289,100000.0,100000.0,103.431,1.03431,67.0,4.40485,,6211,-0.01364,-0.020827,0.029465,-33.329326,,3.0,0.0,1.0,0.068748,0.039786,100.0,103.361111,4.374959,23.981731,,10343100.0,10000000.0,6.875,"""CDEB""",1000.0,100.0
23064,2002-12-31,"""000336AE7""",,,,,,,,10.0,0.051327,,0.07661,100000.0,100000.0,,,65.966667,4.401693,,6211,0.066137,-0.032151,0.023574,,,1.0,0.0,1.0,0.107563,0.079508,84.25,84.440971,4.321904,22.989123,,,8425000.0,6.875,"""CDEB""",1000.0,100.0


Loading the data from the WRDS Query

In [10]:
data_wrds = pl.read_csv("../data/wrds_bond_data.csv.gz")

data_wrds.head()

  data_wrds = pd.read_csv("../data/wrds_bond_data.csv.gz")


Unnamed: 0,DATE,ISSUE_ID,CUSIP,bond_sym_id,bsym,ISIN,company_symbol,BOND_TYPE,SECURITY_LEVEL,CONV,...,RET_LDM,RET_L5M,TMT,REMCOUPS,DURATION,DEFAULTED,DEFAULT_DATE,DEFAULT_TYPE,REINSTATED,REINSTATED_DATE
0,2002-07-31,2,000361AB1,AIR.GA,,US000361AB18,AIR,CDEB,SEN,0,...,,,1.23,3.0,1.13,N,,,,
1,2002-08-31,2,000361AB1,AIR.GA,,US000361AB18,AIR,CDEB,SEN,0,...,,,1.14,3.0,1.05,N,,,,
2,2002-09-30,2,000361AB1,AIR.GA,,US000361AB18,AIR,CDEB,SEN,0,...,,,1.06,3.0,0.97,N,,,,
3,2002-11-30,2,000361AB1,AIR.GA,,US000361AB18,AIR,CDEB,SEN,0,...,,,0.89,2.0,0.84,N,,,,
4,2002-12-31,2,000361AB1,AIR.GA,,US000361AB18,AIR,CDEB,SEN,0,...,,,0.8,2.0,0.75,N,,,,


In [16]:
data_wrds['DATE'] = pd.to_datetime(data_wrds['DATE'])

In [17]:
merged_bonds_data = pd.merge(data_obap, data_wrds, how='left', left_on=['cusip', 'date'], right_on=['CUSIP', 'DATE'])

In [19]:
merged_bonds_data.to_csv('../data/bond_data_final.csv')

In [18]:
merged_bonds_data.to_hdf('../data/bond_data_final.hdf5', key='bond_data', complevel=7, format='table')

KeyboardInterrupt: 

# Merging with Equity Data

Load final version of bond data

In [14]:
final_bond_data = pl.read_csv('../../data/bond_data_final.csv', try_parse_dates=True)

In [15]:
final_bond_data.head()

Unnamed: 0_level_0,date,cusip,exretn_t+1,exretnc_t+1,exretnc_dur_t+1,bond_ret_t+1,bond_ret,exretn,exretnc_dur,rating,cs,cs_6m_delta,bond_yield,bond_amount_out,offering_amt,bondprc,perc_par,tmt,duration,ind_num_17,sic_code,mom6_1,ltrev48_12,BOND_RET,ILLIQ,var95,n_trades_month,size_ig,size_jk,BOND_YIELD,CS,BONDPRC,PRFULL,DURATION_x,CONVEXITY,CS_6M_DELTA,…,NCOUPS,AMOUNT_OUTSTANDING,R_SP,R_MR,R_FR,N_SP,N_MR,N_FR,RATING_NUM,RATING_CAT,RATING_CLASS,T_DATE,T_Volume,T_DVolume,T_Spread,T_Yld_Pt,YIELD,PRICE_EOM,PRICE_LDM,PRICE_L5M,GAP,COUPMONTH,nextcoup,COUPAMT,COUPACC,MULTICOUPS,RET_EOM,RET_LDM,RET_L5M,TMT,REMCOUPS,DURATION_y,DEFAULTED,DEFAULT_DATE,DEFAULT_TYPE,REINSTATED,REINSTATED_DATE
i64,date,str,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,i64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,…,f64,f64,str,str,str,f64,f64,f64,f64,str,str,date,str,str,str,f64,str,f64,f64,f64,f64,f64,date,f64,f64,f64,str,str,str,f64,f64,f64,str,str,str,str,str
0,2002-08-31,"""000336AE7""",-0.056089,-0.087082,-0.081158,-0.054689,-0.008212,-0.009612,-0.023343,8.0,0.043019,,0.073689,100000.0,100000.0,97.693,0.97693,70.033333,4.585289,,6211,0.0,0.009058,0.023939,3.597388,,2.0,0.0,1.0,0.064316,0.032526,102.092,103.75346,4.63593,26.445978,,…,2.0,100000.0,"""BBB+""","""BAA2""","""BBB+""",8.0,9.0,8.0,8.0,"""BBB""","""0.IG""",2002-08-30,"""$119,000""","""$118,742""",,6.918,"""7.369%""",97.7,97.7,97.7,1.0,44.0,2002-12-01,0.0,1.71,1.0,"""-.8212%""",,"""-.8212%""",5.84,12.0,4.59,"""N""",,,,
1,2002-09-30,"""000336AE7""",0.050663,0.057193,0.054383,0.052063,-0.054689,-0.056089,-0.081158,8.0,0.063356,,0.087596,100000.0,100000.0,91.691494,0.916915,69.033333,4.437498,,6211,-0.008212,-0.037939,-0.009819,34.628775,,2.0,0.0,1.0,0.075301,0.049044,97.0,99.138889,4.514543,25.309944,,…,2.0,100000.0,"""BBB+""","""BAA2""","""BBB+""",8.0,9.0,8.0,8.0,"""BBB""","""0.IG""",2002-09-30,"""$618,000""","""$568,962""","""0.77%""",8.6558,"""8.760%""",91.7,91.7,91.7,1.0,45.0,2002-12-01,0.0,2.28,1.0,"""-5.469%""","""-5.469%""","""-5.469%""",5.75,12.0,4.44,"""N""",,,,
2,2002-10-31,"""000336AE7""",0.07968,0.098591,0.095571,0.08088,0.052063,0.050663,0.054383,10.0,0.052394,,0.077672,100000.0,100000.0,96.0,0.96,68.0,4.404651,,6211,-0.062452,-0.027926,0.080031,,,1.0,0.0,1.0,0.074274,0.045408,97.5,100.288197,4.428541,24.496262,,…,2.0,100000.0,"""BBB-""","""BAA2""","""BBB+""",10.0,9.0,8.0,10.0,"""BBB""","""0.IG""",2002-10-31,"""$11,510,000""","""$11,001,450""",,7.855,"""7.767%""",96.0,96.0,96.0,1.0,46.0,2002-12-01,0.0,2.86,1.0,"""5.206%""","""5.206%""","""5.206%""",5.67,12.0,4.4,"""N""",,,,
3,2002-11-30,"""000336AE7""",,,,,0.08088,0.07968,0.095571,10.0,0.030568,,0.061289,100000.0,100000.0,103.431,1.03431,67.0,4.40485,,6211,-0.01364,-0.020827,0.029465,-33.329326,,3.0,0.0,1.0,0.068748,0.039786,100.0,103.361111,4.374959,23.981731,,…,2.0,100000.0,"""BBB-""","""BAA2""","""BBB+""",10.0,9.0,8.0,10.0,"""BBB""","""0.IG""",2002-11-27,"""$38,526,000""","""$33,498,709""","""0.73%""",9.9963,"""6.129%""",103.4,,103.4,1.0,47.0,2002-12-01,0.0,3.43,1.0,"""8.088%""",,"""8.088%""",5.58,12.0,4.4,"""N""",,,,
4,2002-12-31,"""000336AE7""",,,,,,,,10.0,0.051327,,0.07661,100000.0,100000.0,,,65.966667,4.401693,,6211,0.066137,-0.032151,0.023574,,,1.0,0.0,1.0,0.107563,0.079508,84.25,84.440971,4.321904,22.989123,,…,2.0,100000.0,"""BBB-""","""BAA2""","""BBB+""",10.0,9.0,8.0,10.0,"""BBB""","""0.IG""",2002-12-13,"""$4,090,000""","""$3,435,014""","""1.66%""",10.8358,"""7.661%""",96.6,,,1.0,48.0,2003-06-01,3.44,0.57,1.0,"""-5.884%""",,,5.5,11.0,4.4,"""N""",,,,


Export the list of cusips

In [26]:
cusips = final_bond_data.select('cusip').unique()
with open('../../data/cusips_list.txt', 'w') as file:
    for cusip in cusips.iter_rows(): 
        file.write(cusip[0] + '\n')

Download the csv linking cusip to permno from WRDS, and import it

In [27]:
cusip_permno_mapping = pl.read_csv('../../data/cusip_to_permno.csv.gz')

cusip_permno_mapping.head()

cusip,PERMNO,PERMCO,trace_startdt,trace_enddt,crsp_startdt,crsp_enddt,link_startdt,link_enddt
str,i64,i64,str,str,str,str,str,str
"""000336AE7""",75188,21651,"""2002-07-05""","""2008-05-19""","""2002-01-02""","""2007-09-28""","""2002-07-05""","""2007-09-28"""
"""000361AB1""",54594,20000,"""2002-07-05""","""2003-05-22""","""2002-01-02""","""2023-12-29""","""2002-07-05""","""2003-05-22"""
"""000361AC9""",54594,20000,"""2002-07-03""","""2007-06-14""","""2002-01-02""","""2023-12-29""","""2002-07-03""","""2007-06-14"""
"""000361AQ8""",54594,20000,"""2013-01-30""","""2015-04-23""","""2002-01-02""","""2023-12-29""","""2013-01-30""","""2015-04-23"""
"""00037BAA0""",88953,41444,"""2012-05-03""","""2012-05-04""","""2008-04-25""","""2016-08-31""","""2012-05-03""","""2012-05-04"""


In [30]:
bond_data_with_permnos = final_bond_data.join(
    cusip_permno_mapping.select(['cusip', 'PERMNO', 'PERMCO']),
    how='left', 
    left_on='cusip',
    right_on='cusip',
)

In [42]:
bond_data_with_permnos.group_by('cusip').n_unique().select('PERMNO').to_series().value_counts()

PERMNO,count
u32,u32
4,3
1,24353
2,1621
3,93


In [46]:
bond_data_with_permnos.group_by('PERMNO').n_unique().shape

(2069, 103)

In [48]:
bond_data_with_permnos.group_by('date').n_unique().select('PERMNO')

PERMNO
u32
1036
1026
1050
1188
1164
…
1089
979
1139
1155
