# Install Packages and Imports

In [None]:
!pip install -r ../../requirements.txt

In [None]:
import polars as pl
import QuantLib as ql
import numpy as np
import matplotlib.pyplot as plt 
from typing import Literal
import math
import warnings
import datetime

# Load Bond Returns data and Zero Rate Curve

## Bond Returns + Data Cleaning

For the definitions of the features, refer to the pdf in the `docs` folder

Variable Summary: 

* `cs`: credit spread computed as bond_yield in excess of duration-matched portfolio of US Treasuries yields
* `tmt`: time to maturity (in months, I guess)
* `ind_num_17`: Fama-French 17 Industry Classification (may be useful for value factor)
* `size_ig`, `size_jk`: dummy for (respectively) IG/HY bonds in the BAML/ICE index
* `bond_type`: US Corporate Convertible (CCOV), US Corporate Debentures (CDEB), US Corporate Medium Term Note (CMTN), US Corporate Medium Term Note Zero (CMTZ), or US Corporate Paper (CP)
* `R_FR`, `N_FR` and co.: rating as names and one-hot encoded, probably from different providers (SP may be S&P, FR Fitch Ratings, MR Moody's
* `INTEREST_FREQUENCY`: e.g. 13 for variable coupon, -1 for NA, 14 for bi-monthly, 15-16 undocumented
* `DATED_DATE`: date from which the bond interest accrues
* Additional Info on variables: FISD data dictionary 2012 document

Prices Variables

* `PRICE_EOM`: considers all trading days and takes the last
* `PRICE_LDM`: consider only last trading day of the month otherwise NaN
* `PRICE_L5M`: consider only last 5 trading days of the month otherwise NaN
* `T_SPREAD`: average trade-weighted bid-ask spread
* `T_YLD_PT`: average trade-weighted yield point
* `T_VOLUME`: volume traded during the month, par-value
* `T_DVOLUME`: volume traded during the month, dollar value
* `bondprc` is adjusted for MMN, `BONDPRC` is unadjusted

Ratings
* ['R_SP', 'R_MR', 'R_FR', 'RATING_NUM', 'RATING_CAT', 'RATING_CLASS'] are useless
* 'rating' column is the useful one and contains the S&P rating code.   
    * A-rated (1-7): AAA (1), AA+ (2), AA (3), AA- (4), A+ (5), A(6), A- (7)
    * B-rated (8-16): BBB+ (8), BBB (9), BBB- (10), BB+ (11), BB (12), BB- (13), B+ (14), B (15), B- (16)
    * C-rated (17-20): CCC+ (17), CCC (18), CCC- (19), CC (20), C (21)
    * D: 22
    * NR (not rated): null

In [None]:
bond_data = pl.read_parquet('../../data/final_data/bond_data_final.pq')

bond_data.head(1)

# Load the historical zero curve

In [None]:
yield_curve = pl.read_csv('../../data/yield_curve/yield_panel_monthly_frequency_daily_maturity.csv', try_parse_dates=True)
yield_curve = yield_curve.drop('MAX_DATA_TTM')

In [None]:
yield_curve = yield_curve.with_columns(pl.col('').alias('date')).drop('')
yield_curve = yield_curve.with_columns(pl.all().exclude('date').cast(float))
yield_curve[:3, -3:]

In [None]:
yield_curve = yield_curve.sort('date').upsample(
    time_column='date',
    every='1d', 
).select(pl.all().forward_fill())

In [None]:
yield_curve = yield_curve.group_by(pl.col('date').dt.month_end()).agg(pl.all().last()).sort('date')

In [None]:
yield_curve[:10, :10]

# OAS Calculation with QuantLib

OAS is the spread that added to the zero rates in the pricing function returns the price of the bond. We use the Newton method to get a solution for the OAS. In our case, since we stripped bonds with optionality, the OAS is the Z-Spread

In [None]:
def decompose_date(date: datetime.date):
    """
    Returns day, month, year given a `pd.Timestamp`
    Parameters
    ----------
    date: pd.Timestamp

    Returns
    -------
    tuple[int, int, int]: day, month, year

    """
    return date.day, date.month, date.year

def get_day_count(bond: dict): 
    day_count_convention = bond['DAY_COUNT_BASIS']
    
    if day_count_convention == '30/360': 
        return ql.Thirty360(ql.Thirty360.USA)
    elif day_count_convention == 'ACT/360': 
        return ql.Actual360()
    elif day_count_convention == 'ACT/ACT': 
        return ql.ActualActual(ql.ActualActual.Bond)
    
    raise Exception(f'we did not implement day count {day_count_convention}')
    
def get_coupon_freq(bond: dict): 
    coupon_freq = bond['NCOUPS']
    if coupon_freq == 1: 
        return ql.Period(ql.Annual)
    elif coupon_freq == 2: 
        return ql.Period(ql.Semiannual)
    elif coupon_freq == 4: 
        return ql.Period(ql.Quarterly)
    elif coupon_freq == 12: 
        return ql.Period(ql.Monthly)
    
    raise Exception(f'we did not implement coupon freq {coupon_freq}')

In [None]:
def get_zero_curve(date: ql.Date | datetime.date) -> ql.ZeroCurve: 
    # get the zero rates for that specific date
    if isinstance(date, ql.Date):
        date_polar = date.to_date()
    elif isinstance(date, datetime.date): 
        date_polar = date
    else: 
        raise Exception('unknown type for date,', type(date))
    
    zero_rates = yield_curve.row(by_predicate=(pl.col('date') == date_polar))
    if not isinstance(zero_rates[0], datetime.date): 
        raise Exception('we are cutting yields from the calculation')
    zero_rates = list(zero_rates[1:])
    # create the list of tenors based on the number of observations
    tenors = np.arange(0, len(zero_rates) + 1)
    
    # set the tenor unit and compounding frequency based on the type of data used
    tenor_unit = ql.Days
    
    # create the list of spot dates and rates
    #   (need to add a point for the evaluation date, hence the 0.)
    spot_dates = [date + ql.Period(tenor.item(), tenor_unit) for tenor in tenors] 
    spot_rates = [0.] + zero_rates
    
    # set payment convention as specified in the paper (365 days)
    pmt_convention = ql.Actual365Fixed(ql.Actual365Fixed.Standard)
    
    # create the ZeroCurve and return it
    calendar = ql.UnitedStates(ql.UnitedStates.SOFR)
    spot_curve = ql.ZeroCurve(spot_dates, spot_rates, pmt_convention, calendar, ql.Linear(), ql.Compounded, ql.Continuous)
    
    return spot_curve

def map_zero_curves(zero_curve: pl.Series): 
    date = zero_curve[0]
    
    if not isinstance(zero_curve[0], datetime.date): 
        raise Exception('we are cutting yields from the calculation')
    
    zero_rates = list(zero_curve[1:])
    # create the list of tenors based on the number of observations
    tenors = np.arange(0, len(zero_rates) + 1)
    
    # set the tenor unit and compounding frequency based on the type of data used
    tenor_unit = ql.Days
    
    # create the list of spot dates and rates
    #   (need to add a point for the evaluation date, hence the 0.)
    spot_dates = [ql.Date(date.day, date.month, date.year) + ql.Period(tenor.item(), tenor_unit) for tenor in tenors] 
    spot_rates = [0.] + zero_rates
    
    # set payment convention as specified in the paper (365 days)
    pmt_convention = ql.Actual365Fixed(ql.Actual365Fixed.Standard)
    
    # create the ZeroCurve and return it
    calendar = ql.UnitedStates(ql.UnitedStates.SOFR)
    spot_curve = ql.ZeroCurve(spot_dates, spot_rates, pmt_convention, calendar, ql.Linear(), ql.Compounded, ql.Continuous)
    
    return date, spot_curve
    


## Cache the Zero Curves for each EoM in a dictionary so to not recompute them every time

In [None]:
# get zero curves for all days
zero_curves = dict()
for data in yield_curve.filter(pl.col('date') >= datetime.date(2000,1,1)).iter_rows(): 
    date, curve = map_zero_curves(data)
    zero_curves[date] = curve   

In [None]:
len(zero_curves)

## Final Function for OAS Calculation

In [None]:
class ParameterNaNException(Exception):
    def __init__(self, varname: str):
        self.msg = f'Variable {varname} is NaN, and it is required.'
        super().__init__(self.msg)
        
def check_parameters(bond: dict): 
    for varname in ['coupon', 'principal_amt']: 
        if np.isnan(bond[varname]): raise ParameterNaNException(varname)
    
def compute_OAS(bond: dict, debug: bool = False):
    # check that parameters are defined
    print(f'computing OAS for bond {bond['cusip']} at {bond['date']}...', end='')
    if bond['bondprc'] is None: 
        print('No price data, skipping this row')
        return np.nan
    check_parameters(bond)
    
    calc_date = ql.Date(*decompose_date(bond['date']))
    ql.Settings.instance().evaluationDate = calc_date
    
    # key data
    calendar = ql.UnitedStates(ql.UnitedStates.NYSE) # calendar to follow for calculations
    # calendar = ql.NullCalendar()
    # day_count_convention = get_day_count(bond) # the day count convention as specified in the bond
    day_count_convention = ql.ActualActual(ql.ActualActual.Bond) # the day count convention as specified in the bond
    
    # bond data
    accruing_start_date = ql.Date(*decompose_date(bond['DATED_DATE'])) # this is the date from which the bond starts accruing interest
    maturity_date = ql.Date(*decompose_date(bond['MATURITY']))
    tenor = get_coupon_freq(bond)
    date_generation = ql.DateGeneration.Backward
    month_end = False
    face_value = bond['principal_amt']
    face_value = 100
    coupon = bond['coupon'] / 100
    mkt_price = bond['bondprc']
    first_pmt_date = ql.Date(*decompose_date(bond['FIRST_INTEREST_DATE']))
    
    schedule = ql.Schedule(accruing_start_date, maturity_date, tenor, calendar, ql.Unadjusted, ql.Unadjusted, date_generation, month_end, first_pmt_date)
    
    settlement_days = 0
    
    # zero curve
    spot_curve = zero_curves[bond['date']]
    pricing_curve = ql.YieldTermStructureHandle(spot_curve)
    
    bond_ql = ql.FixedRateBond(
        settlement_days, 
        face_value, 
        schedule, 
        [coupon],
        day_count_convention
    )
    bond_ql.setPricingEngine(ql.DiscountingBondEngine(pricing_curve))
    
    # Z-spread calculation 
    z_spread = ql.BondFunctions.zSpread(
        bond_ql, 
        mkt_price,
        spot_curve,
        day_count_convention, 
        ql.Compounded,
        ql.Continuous, 
        calc_date,
        1.e-16,
        10_000_000,
        0.
    )
    
    def get_impl_clean_price(spread):
        spread1 = ql.SimpleQuote(spread)
        spread_handle1 = ql.QuoteHandle(spread1)
        ts_spreaded1 = ql.ZeroSpreadedTermStructure(pricing_curve,
                                                    spread_handle1,
                                                    ql.Compounded,
                                                    ql.Continuous)
        ts_spreaded_handle1 = ql.YieldTermStructureHandle(ts_spreaded1)
        fixed_rate_bond = ql.FixedRateBond(settlement_days,
                                        face_value,
                                        schedule,
                                        [coupon],
                                        day_count_convention)
        # Set Valuation engine
        bond_engine = ql.DiscountingBondEngine(ts_spreaded_handle1)
        fixed_rate_bond.setPricingEngine(bond_engine)
        value = fixed_rate_bond.cleanPrice()
        print(f'bond NPV: {fixed_rate_bond.NPV()}, clean: {fixed_rate_bond.cleanPrice()}')
        return value
    
    if debug: 
        impl_clean_price = get_impl_clean_price(z_spread)
        
    print(f' ...Z-spread is {z_spread}')

    return z_spread

Compute OAS for a Random Bond

In [None]:
example_bond = bond_data.row(15, named=True)
spot_crv = compute_OAS(example_bond)

## Run the function on the whole DataFrame

In [None]:
bond_data = bond_data.sort(['date', 'cusip'])

In [None]:
new_bond_data = bond_data.with_columns(oas=pl.Series(compute_OAS(bond) for bond in bond_data.iter_rows(named=True)))

In [None]:
bond_data = new_bond_data

# Export the DataFrame 

In [None]:
data = bond_data.sort(['date', 'cusip']).with_columns(
    pl.col('equity_month_return').sub(1)
)

In [None]:
data.tail()

In [None]:
data.write_parquet('../../data/final_data/bond_data_oas.pq', compression='zstd', compression_level=10)