In [1]:
!pip install -r ../requirements.txt

Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com


In [2]:
import pandas as pd
import QuantLib as ql
import numpy as np
import matplotlib.pyplot as plt 
from typing import Literal
import math
import warnings
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 50)

# Load Bond Returns data and Zero Rate Curve

## Bond Returns + Data Cleaning

For the definitions of the features, refer to the pdf in the `docs` folder

Variable Summary: 

* `cs`: credit spread computed as bond_yield in excess of duration-matched portfolio of US Treasuries yields
* `tmt`: time to maturity (in months, I guess)
* `ind_num_17`: Fama-French 17 Industry Classification (may be useful for value factor)
* `size_ig`, `size_jk`: dummy for (respectively) IG/HY bonds in the BAML/ICE index
* `bond_type`: US Corporate Convertible (CCOV), US Corporate Debentures (CDEB), US Corporate Medium Term Note (CMTN), US Corporate Medium Term Note Zero (CMTZ), or US Corporate Paper (CP)
* `R_FR`, `N_FR` and co.: rating as names and one-hot encoded, probably from different providers (SP may be S&P, FR Fitch Ratings, MR Moody's
* `INTEREST_FREQUENCY`: e.g. 13 for variable coupon, -1 for NA, 14 for bi-monthly, 15-16 undocumented
* `DATED_DATE`: date from which the bond interest accrues
* Additional Info on variables: FISD data dictionary 2012 document

Prices Variables

* `PRICE_EOM`: considers all trading days and takes the last
* `PRICE_LDM`: consider only last trading day of the month otherwise NaN
* `PRICE_L5M`: consider only last 5 trading days of the month otherwise NaN
* `T_SPREAD`: average trade-weighted bid-ask spread
* `T_YLD_PT`: average trade-weighted yield point
* `T_VOLUME`: volume traded during the month, par-value
* `T_DVOLUME`: volume traded during the month, dollar value
* `bondprc` is adjusted for MMN, `BONDPRC` is unadjusted

Other notes

* We may want to remove defaulted bonds (check if they were actually already removed)

In [3]:
date_cols = [
    'date', 
    'MATURITY',
    'OFFERING_DATE', 
    'FIRST_INTEREST_DATE', 
    'LAST_INTEREST_DATE',
    'nextcoup',
    'DEFAULT_DATE',
    'REINSTATED_DATE',
    'DATED_DATE'
]

bond_data = pd.read_csv('../data/bond_data_final.csv', index_col=0, parse_dates=date_cols)

bond_data.iloc[:2,:10]

  bond_data = pd.read_csv('../data/bond_data_final.csv', index_col=0, parse_dates=date_cols)


Unnamed: 0,date,cusip,exretn_t+1,exretnc_t+1,exretnc_dur_t+1,bond_ret_t+1,bond_ret,exretn,exretnc_dur,rating
0,2002-08-31,000336AE7,-0.056089,-0.087082,-0.081158,-0.054689,-0.008212,-0.009612,-0.023343,8.0
1,2002-09-30,000336AE7,0.050663,0.057193,0.054383,0.052063,-0.054689,-0.056089,-0.081158,8.0


In [4]:
# merge duration across the two dataframes
bond_data['duration'] = bond_data['DURATION_y'].fillna(bond_data['DURATION_x'])
bond_data.drop(['DURATION_x', 'DURATION_y'], axis=1, inplace=True)

In [5]:
useless_cols = [
    'company_symbol', # we do not need it
    'TREASURY_MATURITY', # we do not need it
    'CONV', # convertible bonds (we already removed all convertibles) 
    'sic_code', # SIC Industry Code (we don't need it) 
    'mom6_1', # 6m momentum (we don't need it) 
    'ltrev48_12', # sum of bond returns over 48months (momentum) 
    'TMT', # time to maturity in years (we have it in months in tmt) and TMT has NaNs while tmt does not
    'DATE', # has NaNs while date does not
    'CUSIP', # has Nans while cusip does not
    'COUPON', # repeated in coupon and COUPON has NaNs while coupon does not
]

bond_data.drop(useless_cols, axis=1, inplace=True, errors='ignore')

In [6]:
# fill all DATED_DATE, OFFERING_DATE, MATURITY, and DAY_COUNT_BASIS since they are all the same for each issue
cols_to_fill = ['DATED_DATE', 'OFFERING_DATE', 'DAY_COUNT_BASIS', 'MATURITY', 'NCOUPS']
grouped = bond_data.groupby('cusip')
def fill_dates(group): 
    group[cols_to_fill] = group[cols_to_fill].ffill().bfill()
    return group 

filled_data = grouped.apply(fill_dates, include_groups=False)
bond_data = filled_data.reset_index(0)

  group[cols_to_fill] = group[cols_to_fill].ffill().bfill()
  group[cols_to_fill] = group[cols_to_fill].ffill().bfill()
  group[cols_to_fill] = group[cols_to_fill].ffill().bfill()
  group[cols_to_fill] = group[cols_to_fill].ffill().bfill()
  group[cols_to_fill] = group[cols_to_fill].ffill().bfill()


In [7]:
# remove the CUSIPS for which there is no DATED_DATE, OFFERING_DATE and DAY COUNT BASIS? we should probably do that
new_bond_data = bond_data.dropna(subset=cols_to_fill)

removed_cusips = bond_data['cusip'].nunique() - new_bond_data['cusip'].nunique()
bond_data = new_bond_data

print(f'Removed {removed_cusips} CUSIPs from the DataFrame')

Removed 6 CUSIPs from the DataFrame


In [8]:
required_cols = ['coupon', 'date'] + cols_to_fill

bond_data[required_cols].isna().sum()

coupon             0
date               0
DATED_DATE         0
OFFERING_DATE      0
DAY_COUNT_BASIS    0
MATURITY           0
NCOUPS             0
dtype: int64

Great, now we have all variables needed for the analysis!

# Load the historical zero curve

In [9]:
yield_curve = pd.read_csv('../data/yield_panel_monthly_frequency_daily_maturity.csv', index_col=0)
yield_curve.index = pd.to_datetime(yield_curve.index)
yield_curve.drop('MAX_DATA_TTM', axis=1, inplace=True)  

In [10]:
yield_curve.dtypes

1        float64
2        float64
3        float64
4        float64
5        float64
          ...   
10946    float64
10947    float64
10948    float64
10949    float64
10950    float64
Length: 10950, dtype: object

In [11]:
yield_curve = yield_curve.resample('ME').last()


In [12]:
yield_curve.iloc[:10, :10]

Unnamed: 0,1,2,3,4,5,6,7,8,9,10
1961-06-30,0.022809,0.02281,0.022811,0.022813,0.022814,0.022816,0.022817,0.022819,0.022821,0.022823
1961-07-31,0.020838,0.020839,0.02084,0.020841,0.020843,0.020845,0.020847,0.020849,0.020851,0.020854
1961-08-31,0.021072,0.021074,0.021075,0.021077,0.02108,0.021083,0.021086,0.021089,0.021093,0.021098
1961-09-30,0.021769,0.02177,0.021772,0.021774,0.021776,0.021779,0.021782,0.021785,0.021789,0.021793
1961-10-31,0.022248,0.022249,0.02225,0.022252,0.022254,0.022256,0.022259,0.022261,0.022265,0.022268
1961-11-30,0.024908,0.024909,0.02491,0.024912,0.024914,0.024915,0.024918,0.02492,0.024922,0.024925
1961-12-31,0.026685,0.026687,0.026688,0.02669,0.026692,0.026694,0.026696,0.026699,0.026701,0.026704
1962-01-31,0.026356,0.026357,0.026359,0.02636,0.026362,0.026364,0.026366,0.026369,0.026372,0.026375
1962-02-28,0.026842,0.026843,0.026845,0.026846,0.026848,0.02685,0.026852,0.026854,0.026857,0.026859
1962-03-31,0.027758,0.027759,0.02776,0.027762,0.027763,0.027764,0.027766,0.027767,0.027769,0.02777


# OAS Calculation with QuantLib

OAS is the spread that added to the zero rates in the pricing function returns the price of the bond. We use the Newton method to get a solution for the OAS. In our case, since we stripped bonds with optionality, the OAS is the Z-Spread

In [23]:
mask = (bond_data['tmt'] / 12) < 30

bond_data[mask].shape

(1363499, 88)

In [24]:
bond_data[mask].shape[0] / bond_data.shape[0]

0.9791129596305004

In [25]:
bond_data = bond_data[mask]

In [26]:
bond_data['bondprc'].isna().sum() / bond_data.shape[0]

0.14985049493985694

In [27]:
mask_bond = (bond_data['cusip'] == '00103YAE1') &(bond_data['date'] == '2002-08-31') 
example_bond = bond_data.loc[mask_bond].iloc[0] 

example_bond[:5]

cusip                        00103YAE1
date               2002-08-31 00:00:00
exretn_t+1                         NaN
exretnc_t+1                        NaN
exretnc_dur_t+1                    NaN
Name: 1930, dtype: object

In [28]:
bond_data[bond_data['bondprc'] < 2]

Unnamed: 0,cusip,date,exretn_t+1,exretnc_t+1,exretnc_dur_t+1,bond_ret_t+1,bond_ret,exretn,exretnc_dur,rating,cs,cs_6m_delta,bond_yield,bond_amount_out,offering_amt,bondprc,perc_par,tmt,duration,ind_num_17,BOND_RET,ILLIQ,var95,n_trades_month,size_ig,...,RATING_CLASS,T_DATE,T_Volume,T_DVolume,T_Spread,T_Yld_Pt,YIELD,PRICE_EOM,PRICE_LDM,PRICE_L5M,GAP,COUPMONTH,nextcoup,COUPAMT,COUPACC,MULTICOUPS,RET_EOM,RET_LDM,RET_L5M,REMCOUPS,DEFAULTED,DEFAULT_DATE,DEFAULT_TYPE,REINSTATED,REINSTATED_DATE
46877,014477AG8,2009-02-28,,,,,-0.17349,-0.17359,,16.0,,,,400000.0,400000.0,1.25,0.0125,94.9,0.13031,9.0,-0.810151,-138.167622,,17.0,1.0,...,1.HY,2009-02-27,"$41,021,000","$1,903,917",51.82%,162.3098,,1.3,1.3,1.3,1.0,14.0,2009-06-15,0.0,2.05,1.0,-17.06%,-17.06%,-17.06%,16.0,N,2009-02-12,B,Y,2010-06-01
104678,036778AB4,2003-03-31,,,,,,,,22.0,,,,155000.0,155000.0,1.0,0.01,65.0,,17.0,,,,,1.0,...,,2003-03-25,"$25,000",$250,,611.801,,1.0,,1.0,5.0,43.0,2003-08-01,0.0,1.65,1.0,,,,11.0,N,NaT,,,NaT
115186,03841XAB0,2012-07-31,,,,,,,,22.0,,,,3747.0,225000.0,0.01,0.0001,53.266667,,17.0,,,,,1.0,...,,2012-07-27,"$9,000",$1,,1300.7089,,0.0,,0.0,6.0,19.0,2012-12-15,0.0,1.4,1.0,,,,9.0,N,NaT,,,NaT
115735,039380AC4,2015-12-31,-0.17359,,,-0.17349,-0.304715,-0.304815,,22.0,,,,500000.0,500000.0,0.446429,0.004464,57.866667,0.11937,2.0,-0.235976,1355.56157,0.471067,19.0,1.0,...,,2015-12-31,"$101,644,205","$1,798,845",50.00%,327.1425,,0.4,0.4,0.4,1.0,56.0,2016-04-01,0.0,1.81,1.0,-30.47%,,-30.47%,10.0,N,2016-01-11,B,Y,2016-10-05
115736,039380AC4,2016-01-31,,,,,-0.17349,-0.17359,,22.0,,,,500000.0,500000.0,0.3185,0.003185,56.833333,0.111051,2.0,0.219874,2541.672253,0.471067,12.0,1.0,...,,2016-01-29,"$33,885,000","$258,730",16.83%,543.5048,,0.3,0.3,0.3,1.0,57.0,2016-04-01,0.0,2.42,1.0,-17.06%,-17.06%,-17.06%,10.0,N,2016-01-11,B,Y,2016-10-05
115779,039380AE0,2015-12-31,-0.17359,,,-0.17349,-0.220954,-0.221054,,22.0,,,,1000000.0,1000000.0,0.299,0.00299,42.066667,0.046286,2.0,-0.220472,1232.914356,0.382501,21.0,1.0,...,,2015-12-31,"$240,773,010","$4,701,888",25.51%,520.801,,0.3,0.3,0.3,1.0,42.0,2016-06-15,3.5,0.31,1.0,-22.10%,-22.10%,-22.10%,7.0,N,2016-01-11,B,Y,2016-10-05
115780,039380AE0,2016-01-31,,,,,-0.17349,-0.17359,,22.0,,,,1000000.0,1000000.0,0.7,0.007,41.033333,0.117401,2.0,0.034072,948.066705,0.382501,17.0,1.0,...,,2016-01-29,"$174,684,000","$1,325,051",38.58%,329.4148,,0.7,0.7,0.7,1.0,43.0,2016-06-15,0.0,0.9,1.0,-17.06%,-17.06%,-17.06%,7.0,N,2016-01-11,B,Y,2016-10-05
115823,039380AG5,2015-12-31,-0.17359,,,-0.17349,-0.076046,-0.076146,,22.0,,,,1000000.0,1000000.0,0.988323,0.009883,66.433333,0.141232,2.0,-0.059673,509.963136,0.385093,19.0,1.0,...,,2015-12-31,"$327,443,000","$4,709,075",45.23%,492.3164,,1.0,1.0,1.0,1.0,36.0,2016-06-15,3.63,0.32,1.0,-7.605%,-7.605%,-7.605%,11.0,N,2016-01-11,B,Y,2016-10-05
115824,039380AG5,2016-01-31,,,,,-0.17349,-0.17359,,22.0,,,,1000000.0,1000000.0,0.8175,0.008175,65.4,0.150638,2.0,0.563548,171.766889,0.385093,8.0,1.0,...,,2016-01-29,"$108,993,000","$776,162",16.86%,418.251,,0.8,0.8,0.8,1.0,37.0,2016-06-15,0.0,0.93,1.0,-17.06%,-17.06%,-17.06%,11.0,N,2016-01-11,B,Y,2016-10-05
115851,039380AJ9,2015-10-31,0.102482,,,0.102482,-0.582897,-0.582897,,20.0,,,,375000.0,375000.0,0.964059,0.009641,44.1,0.189648,2.0,-0.440004,506.153891,0.487523,18.0,1.0,...,1.HY,2015-10-30,"$122,803,000","$2,612,762",26.56%,288.6459,,1.0,1.0,1.0,1.0,22.0,2015-12-15,0.0,3.73,1.0,-58.29%,-58.29%,-58.29%,8.0,N,2016-01-11,B,Y,2016-10-05


In [64]:
mask = bond_data['NCOUPS'] > 0
bond_data[mask]['cusip'].nunique() / bond_data['cusip'].nunique()

bond_data = bond_data[mask]

In [29]:
example_bond.DAY_COUNT_BASIS

'30/360'

In [30]:
8.54 / 4 /100 * 100

2.135

In [31]:
bond_data.columns

Index(['cusip', 'date', 'exretn_t+1', 'exretnc_t+1', 'exretnc_dur_t+1',
       'bond_ret_t+1', 'bond_ret', 'exretn', 'exretnc_dur', 'rating', 'cs',
       'cs_6m_delta', 'bond_yield', 'bond_amount_out', 'offering_amt',
       'bondprc', 'perc_par', 'tmt', 'duration', 'ind_num_17', 'BOND_RET',
       'ILLIQ', 'var95', 'n_trades_month', 'size_ig', 'size_jk', 'BOND_YIELD',
       'CS', 'BONDPRC', 'PRFULL', 'CONVEXITY', 'CS_6M_DELTA', 'bond_value',
       'BOND_VALUE', 'coupon', 'bond_type', 'principal_amt', 'bondpar_mil',
       'ISSUE_ID', 'bond_sym_id', 'bsym', 'ISIN', 'BOND_TYPE',
       'SECURITY_LEVEL', 'OFFERING_DATE', 'OFFERING_AMT', 'OFFERING_PRICE',
       'PRINCIPAL_AMT', 'MATURITY', 'DAY_COUNT_BASIS', 'DATED_DATE',
       'FIRST_INTEREST_DATE', 'LAST_INTEREST_DATE', 'NCOUPS',
       'AMOUNT_OUTSTANDING', 'R_SP', 'R_MR', 'R_FR', 'N_SP', 'N_MR', 'N_FR',
       'RATING_NUM', 'RATING_CAT', 'RATING_CLASS', 'T_DATE', 'T_Volume',
       'T_DVolume', 'T_Spread', 'T_Yld_Pt', 'YIELD', 'P

In [69]:
bond_data['DAY_COUNT_BASIS'].unique()

array(['30/360', 'ACT/360', 'ACT/ACT'], dtype=object)

In [70]:
def decompose_date(date: pd.Timestamp):
    """
    Returns day, month, year given a `pd.Timestamp`
    Parameters
    ----------
    date: pd.Timestamp

    Returns
    -------
    tuple[int, int, int]: day, month, year

    """
    return date.day, date.month, date.year

def get_day_count(bond: pd.Series): 
    day_count_convention = bond.DAY_COUNT_BASIS
    
    if day_count_convention == '30/360': 
        return ql.Thirty360(ql.Thirty360.USA)
    elif day_count_convention == 'ACT/360': 
        return ql.Actual360()
    elif day_count_convention == 'ACT/ACT': 
        return ql.ActualActual(ql.ActualActual.Bond)
    
    raise Exception(f'we did not implement day count {day_count_convention}')
    
def get_coupon_freq(bond: pd.Series): 
    coupon_freq = bond.NCOUPS
    if coupon_freq == 1: 
        return ql.Period(ql.Annual)
    elif coupon_freq == 2: 
        return ql.Period(ql.Semiannual)
    elif coupon_freq == 4: 
        return ql.Period(ql.Quarterly)
    elif coupon_freq == 12: 
        return ql.Period(ql.Monthly)
    
    raise Exception(f'we did not implement coupon freq {coupon_freq}')

In [71]:

def get_zero_curve(date: ql.Date, calendar: ql.Calendar, maturity_freq: Literal['daily', 'monthly']) -> ql.ZeroCurve: 
    # get the zero rates for that specific date
    date_mask = pd.to_datetime(date.to_date())
    zero_rates = yield_curve.loc[date_mask]
    zero_rates: pd.Series
    
    # create the list of tenors based on the number of observations
    tenors = np.arange(0, len(zero_rates) + 1)
    
    # set the tenor unit and compounding frequency based on the type of data used
    if maturity_freq == 'daily':
        tenor_unit = ql.Days
        compounding_freq = ql.Daily
    elif maturity_freq == 'monthly': 
        tenor_unit = ql.Months
        compounding_freq = ql.Monthly
    else: 
        raise Exception(f'maturity not valid')
    
    # create the list of spot dates and rates
    #   (need to add a point for the evaluation date, hence the 0.)
    spot_dates = [date + ql.Period(tenor.item(), tenor_unit) for tenor in tenors] 
    spot_rates = [0.] + zero_rates.to_list()
    
    # set payment convention as specified in the paper (365 days)
    pmt_convention = ql.Actual365Fixed(ql.Actual365Fixed.Standard)
    
    # create the ZeroCurve and return it
    calendar = ql.UnitedStates(ql.UnitedStates.SOFR)
    spot_curve = ql.ZeroCurve(spot_dates, spot_rates, pmt_convention, calendar, ql.Linear(), ql.Compounded, ql.Continuous)
    
    return spot_curve

def debug_cashflows(bond: ql.FixedRateBond, bond_data: pd.Series, mkt_price: float, z_spread: float, impl_clean_price: float):
    """
    Debug cashflows given a bond and bond_data. 
    
    Function to debug the results of the OAS calcuations.
    
    Parameters
    ----------
    bond
    bond_data
    """
    cashflows = bond.cashflows()
    print('--- BOND SETUP & CALCS CHECKS ---')
    print(f'\tCalc Date = {bond_data.date}, \n\tOffering date = {bond_data.OFFERING_DATE}, Maturity = {bond_data.MATURITY}')
    
    # check for coupon_amt
    data_coupon_amt = bond_data.coupon * 100 / bond_data.NCOUPS / 100 # todo account for the coupon frequency
    bond_ql_coup_amt = np.round(cashflows[2].amount(), 2)
    print(f'\tCoupon Check: Data = {data_coupon_amt}, Model = {bond_ql_coup_amt}')
        
    # check that Accrued Interest
    data_accrued_interest = bond_data.COUPACC
    bond_ql_accr_interest = np.round(bond.dirtyPrice() - bond.cleanPrice(), 2)
    print(f'\tAccrued Interest Check: Data = {data_accrued_interest}, Model = {bond_ql_accr_interest}')
    
    print('\tCASHFLOWS SCHEDULE')
    for c in cashflows:
        print('\t%20s %12f' % (c.date(), c.amount()))
        
    coupons = [ql.as_coupon(c) for c in bond.cashflows()[:-1]]
    coupons_df = pd.DataFrame([(c.date().to_date(), c.rate(), c.accrualPeriod()) for c in coupons], columns=['date', 'rate', 'accrual_period'], index=range(1,len(coupons)+1))
    print(coupons_df)
    
    # checks for coupon dates
    bond_first_pmt_date = bond_data.FIRST_INTEREST_DATE.date()
    bond_last_pmt_date = bond_data.LAST_INTEREST_DATE.date()
    bond_ql_first_pmt_date = cashflows[0].date().to_date()
    bond_ql_last_pmt_date = cashflows[-3].date().to_date()
    
    first_delta = (bond_first_pmt_date - bond_ql_first_pmt_date).days
    last_delta = (bond_last_pmt_date - bond_ql_last_pmt_date).days
    
    print('\tChecks for Payment Dates')
    print(f'\t\tFirst pmt: Data = {bond_first_pmt_date}, Model = {bond_ql_first_pmt_date}, Delta = {first_delta}')
    print(f'\t\tLast pmt: Data = {bond_last_pmt_date}, Model = {bond_ql_last_pmt_date}, Delta = {last_delta}')
    
    
    delta_p = mkt_price - impl_clean_price
    delta_bps = delta_p / mkt_price * 100 * 100

    print(f'\tZ-SPREAD = {z_spread:.5f} ({z_spread * 100:.3f}%)')
    print(f'\tMkt Price = {mkt_price}, Implied Clean Price = {impl_clean_price:.5f}, Delta = {delta_p:.5f}, Delta (bps): {delta_bps:.2f}')
    
    print(f'DEBUG: {data_coupon_amt} {bond_ql_coup_amt}')
    assert math.isclose(data_coupon_amt, bond_ql_coup_amt, rel_tol=1e-2)
    if not math.isclose(data_accrued_interest, bond_ql_accr_interest): 
        warnings.warn('Accrued Interest is not correct')
    # assert math.isclose(data_accrued_interest, bond_ql_accr_interest) 
    assert bond_first_pmt_date == bond_ql_first_pmt_date # check the first payment date matches
    print('--- ALL CHECKS PASSED FOR BOND CALCULATIONS ---')
    print('--- CHECKS FOR Z-SPREAD CALCULATIONS ---')
    assert abs(first_delta) < 3
    assert abs(last_delta) < 3
    # delta p less than 1bp
    assert abs(delta_p) < 0.05
    print('--- ALL CHECKS PASSED FOR Z-SPREAD CALCULATION ---')
    
class ParameterNaNException(Exception):
    def __init__(self, varname: str):
        self.msg = f'Variable {varname} is NaN, and it is required.'
        super().__init__(self.msg)
        
def check_parameters(bond: pd.Series): 
    for varname in ['coupon', 'principal_amt']: 
        if np.isnan(bond[varname]): raise ParameterNaNException(varname)
    
    for varname in ['date', 'OFFERING_DATE', 'MATURITY', 'DATED_DATE']: 
        if pd.isnull(bond[varname]): raise ParameterNaNException(varname)
    
def compute_OAS(bond: pd.Series, debug: bool = False):
    # check that parameters are defined
    print(f'computing OAS for bond {bond.cusip} at {bond.date.date()}...', end='')
    if np.isnan(bond.bondprc): 
        print('No price data, skipping this row')
        return np.nan
    check_parameters(bond)
    
    calc_date = ql.Date(*decompose_date(bond.date))
    ql.Settings.instance().evaluationDate = calc_date
    
    # key data
    calendar = ql.UnitedStates(ql.UnitedStates.NYSE) # calendar to follow for calculations
    calendar = ql.NullCalendar()
    day_count_convention = get_day_count(bond) # the day count convention as specified in the bond
    day_count_convention = ql.ActualActual(ql.ActualActual.Bond) # the day count convention as specified in the bond
    
    # bond data
    issue_date = ql.Date(*decompose_date(bond.OFFERING_DATE))
    accruing_start_date = ql.Date(*decompose_date(bond.DATED_DATE)) # this is the date from which the bond starts accruing interest
    maturity_date = ql.Date(*decompose_date(bond.MATURITY))
    tenor = get_coupon_freq(bond)
    date_generation = ql.DateGeneration.Backward
    month_end = False
    face_value = bond.principal_amt
    face_value = 100
    coupon = bond.coupon / 100
    mkt_price = bond.bondprc
    first_pmt_date = ql.Date(*decompose_date(bond.FIRST_INTEREST_DATE))
    
    schedule = ql.Schedule(accruing_start_date, maturity_date, tenor, calendar, ql.Unadjusted, ql.Unadjusted, date_generation, month_end, first_pmt_date)
    
    settlement_days = 0
    
    # zero curve
    spot_curve = get_zero_curve(calc_date, calendar, 'daily')
    pricing_curve = ql.YieldTermStructureHandle(spot_curve)
    
    bond_ql = ql.FixedRateBond(
        settlement_days, 
        face_value, 
        schedule, 
        [coupon],
        day_count_convention
    )
    bond_ql.setPricingEngine(ql.DiscountingBondEngine(pricing_curve))
    
    # Z-spread calculation 
    z_spread = ql.BondFunctions.zSpread(
        bond_ql, 
        mkt_price,
        spot_curve,
        day_count_convention, 
        ql.Compounded,
        ql.Continuous, 
        calc_date,
        1.e-16,
        10_000_000,
        0.
    )
    
    def get_impl_clean_price(spread):
        spread1 = ql.SimpleQuote(spread)
        spread_handle1 = ql.QuoteHandle(spread1)
        ts_spreaded1 = ql.ZeroSpreadedTermStructure(pricing_curve,
                                                    spread_handle1,
                                                    ql.Compounded,
                                                    ql.Continuous)
        ts_spreaded_handle1 = ql.YieldTermStructureHandle(ts_spreaded1)
        fixed_rate_bond = ql.FixedRateBond(settlement_days,
                                        face_value,
                                        schedule,
                                        [coupon],
                                        day_count_convention)
        # Set Valuation engine
        bond_engine = ql.DiscountingBondEngine(ts_spreaded_handle1)
        fixed_rate_bond.setPricingEngine(bond_engine)
        value = fixed_rate_bond.cleanPrice()
        print(f'bond NPV: {fixed_rate_bond.NPV()}, clean: {fixed_rate_bond.cleanPrice()}')
        return value
    
    if debug: 
        impl_clean_price = get_impl_clean_price(z_spread)
        debug_cashflows(bond_ql, bond, mkt_price, z_spread, impl_clean_price)
        
    print(f' ...Z-spread is {z_spread}')

    return z_spread

In [56]:
spot_crv = compute_OAS(example_bond)

computing OAS for bond 00103YAE1 at 2002-08-31... ...Z-spread is 0.07041684706306062


In [43]:
bond_data.sort_values(['date', 'cusip'], inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  bond_data.sort_values(['date', 'cusip'], inplace=True)


In [44]:
bond_data[bond_data['date'] == '2002-8-31']

Unnamed: 0,cusip,date,exretn_t+1,exretnc_t+1,exretnc_dur_t+1,bond_ret_t+1,bond_ret,exretn,exretnc_dur,rating,cs,cs_6m_delta,bond_yield,bond_amount_out,offering_amt,bondprc,perc_par,tmt,duration,ind_num_17,BOND_RET,ILLIQ,var95,n_trades_month,size_ig,...,RATING_CLASS,T_DATE,T_Volume,T_DVolume,T_Spread,T_Yld_Pt,YIELD,PRICE_EOM,PRICE_LDM,PRICE_L5M,GAP,COUPMONTH,nextcoup,COUPAMT,COUPACC,MULTICOUPS,RET_EOM,RET_LDM,RET_L5M,REMCOUPS,DEFAULTED,DEFAULT_DATE,DEFAULT_TYPE,REINSTATED,REINSTATED_DATE
0,000336AE7,2002-08-31,-0.056089,-0.087082,-0.081158,-0.054689,-0.008212,-0.009612,-0.023343,8.0,0.043019,,0.073689,100000.0,100000.0,97.693000,0.976930,70.033333,4.59,,0.023939,3.597388,,2.0,0.0,...,0.IG,2002-08-30,"$119,000","$118,742",,6.9180,7.369%,97.7,97.7,97.7,1.0,44.0,2002-12-01,0.0,1.71,1.0,-.8212%,,-.8212%,12.0,N,NaT,,,NaT
56,000361AB1,2002-08-31,,,,,,,,9.0,0.026231,,0.043858,50000.0,50000.0,103.089000,1.030890,13.666667,1.05,,,,,,0.0,...,0.IG,2002-08-30,"$35,000","$35,309",,6.3985,4.386%,103.1,103.1,103.1,1.0,100.0,2002-10-15,0.0,2.74,1.0,.8709%,,,3.0,N,NaT,,,NaT
58,000361AC9,2002-08-31,,,,,,,,9.0,0.041406,,0.070995,60000.0,60000.0,,,64.400000,4.30,,,,,,0.0,...,0.IG,2002-08-23,"$25,000","$24,750",,7.1010,7.100%,99.0,,,,50.0,2002-12-15,0.0,1.45,1.0,,,,11.0,N,NaT,,,NaT
518,00077QAA8,2002-08-31,0.008205,-0.005359,-0.003284,0.009605,0.010179,0.008779,0.003689,4.0,0.012687,-0.211716,0.035228,1000000.0,1000000.0,109.647872,1.096479,33.466667,2.47,16.0,0.003821,1.278353,0.00725,21.0,1.0,...,0.IG,2002-08-30,"$156,716,000","$171,212,727",0.68%,3.7306,3.523%,109.6,109.6,109.6,1.0,81.0,2002-11-30,0.0,1.85,1.0,1.018%,1.018%,1.018%,6.0,N,NaT,,,NaT
540,00077QAB6,2002-08-31,0.034165,0.003664,0.008736,0.035565,0.038498,0.037098,0.023558,4.0,0.017466,,0.047955,250000.0,250000.0,110.674333,1.106743,68.000000,4.54,16.0,0.032905,1.288907,,6.0,1.0,...,0.IG,2002-08-30,"$1,266,000","$1,400,995",0.24%,4.8036,4.795%,110.7,110.7,110.7,1.0,70.0,2002-10-01,0.0,2.92,1.0,3.850%,,3.850%,12.0,N,NaT,,,NaT
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1388148,987434AQ0,2002-08-31,-0.006066,-0.038650,-0.032247,-0.004666,0.008311,0.006911,-0.007284,15.0,0.056607,,0.087713,250000.0,250000.0,100.468750,1.004687,76.600000,4.70,17.0,0.010786,0.113572,,13.0,1.0,...,1.HY,2002-08-28,"$55,525,000","$55,352,125",0.40%,8.5288,8.771%,100.5,,100.5,1.0,,2002-12-15,0.0,0.00,1.0,.8311%,,.8311%,13.0,N,NaT,,,NaT
1388235,988498AA9,2002-08-31,0.021898,-0.023209,-0.011851,0.023298,0.015064,0.013664,-0.006355,12.0,0.039229,,0.075869,400000.0,400000.0,102.013889,1.020139,119.733333,6.75,15.0,0.021559,1.443438,,19.0,1.0,...,1.HY,2002-08-30,"$70,495,000","$71,088,826",2.41%,7.5748,7.587%,102.0,102.0,102.0,1.0,,2003-01-01,0.0,0.00,1.0,1.506%,1.506%,1.506%,20.0,N,NaT,,,NaT
1389402,988858AB2,2002-08-31,,,,,,,,10.0,0.056812,0.167800,0.084940,100000.0,100000.0,,,61.900000,3.92,,,,,1.0,0.0,...,0.IG,2002-08-12,"$3,300,000","$3,315,000",0.50%,8.3260,8.494%,100.0,,,1.0,52.0,2002-10-01,0.0,3.54,1.0,-.0271%,,,11.0,N,NaT,,,NaT
1389689,98934KAB6,2002-08-31,0.095739,0.055447,0.051070,0.097139,0.035613,0.034213,0.003681,2.0,0.021661,,0.063312,300000.0,300000.0,107.704500,1.077045,258.200000,11.14,6.0,0.040409,7.658524,0.05115,6.0,1.0,...,0.IG,2002-08-29,"$51,557,000","$55,593,550",3.57%,6.3240,6.331%,107.7,,107.7,1.0,99.0,2002-11-15,0.0,2.07,1.0,3.561%,,3.561%,43.0,N,NaT,,,NaT


In [None]:
OAS = bond_data.apply(compute_OAS, axis=1)

computing OAS for bond 000336AE7 at 2002-08-31... ...Z-spread is 0.03961535328525767
computing OAS for bond 000361AB1 at 2002-08-31... ...Z-spread is 0.026596315988716323
computing OAS for bond 000361AC9 at 2002-08-31...No price data, skipping this row
computing OAS for bond 00077QAA8 at 2002-08-31... ...Z-spread is 0.011422603036837663
computing OAS for bond 00077QAB6 at 2002-08-31... ...Z-spread is 0.014246243728831003
computing OAS for bond 00077QAC4 at 2002-08-31...No price data, skipping this row
computing OAS for bond 00077QAD2 at 2002-08-31... ...Z-spread is 0.01457912613922593
computing OAS for bond 00077QAG5 at 2002-08-31... ...Z-spread is 0.013942460448036393
computing OAS for bond 001031AC7 at 2002-08-31...No price data, skipping this row
computing OAS for bond 00103XAC7 at 2002-08-31...No price data, skipping this row
computing OAS for bond 00103YAE1 at 2002-08-31... ...Z-spread is 0.07041684706306062
computing OAS for bond 00104CAA6 at 2002-08-31... ...Z-spread is 0.184893

In [None]:
OAS.head()

In [None]:
def plot_yc(date, curve: ql.ZeroCurve = None): 
    date_mask = date
    zero_rates = yield_curve.loc[date_mask]
    
    fig, ax = plt.subplots()
    
    dates, rates = zip(*curve.nodes())
    dates = [d.to_date() for d in dates]
    
    print(dates[:3])
    print(rates[:3])
    
    ax.plot(dates[1:], zero_rates, lw='5')
    ax.plot(dates, rates, c='red')
    
plot_yc('2012-04-30', spot_crv)


<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=809bdc60-6cc4-4659-aae8-7be15b203bd4' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>