In [None]:
import numpy as np
import pandas as pd
import sys
import helperfunctions

Read `combined` from `combined.csv` where we stored options info each year from 2002 to 2021.

In [None]:
combined = pd.read_csv('/content/combined/MyDrive/combined.csv')
combined.date_traded = combined.date_traded.apply(lambda x: pd.Timestamp(x))

**Get interest rate info from `zerocd` file.**

We extract data with maturity outside of our (2,365) range to help make splicing more accurate and allow more date rows to be spliced.

In [None]:
interest_statement = f"""SELECT date AS date_traded, days AS days_to_maturity, rate
                         FROM optionm.zerocd
                         WHERE days BETWEEN 1 AND 373
                            AND date in {tuple(combined.date_traded.drop_duplicates().dt.strftime('%Y-%m-%d'))}
                         GROUP BY date, days, rate
                         ORDER BY date, days"""
interests = get_data(interest_statement,'date_traded')

**Prepare DataFrame for splicing.**


*   `repeated_dates`: pd.DataFrame, one column named `date_traded`, each date in `interests.date_traded` repeated for 373 times.

*   `repeated_maturities`: pd.DataFrame, one column named `days_to_maturity`, maturity of 1 to 373 repeated for `len(interests.date_traded.unique())` times.
*   `dated_maturities`: pd.DataFrame, two columns, created by contacting `repeated_dates` and `repeated_maturities`.
* `all_rates`: pd.DataFrame, three columns, contains dates with maturity and interest rates.
*`all_rates_purged`: pd.DataFrame, three columns, contains dates with enough rates to be spliced.


In [None]:
repeated_dates = pd.DataFrame({'date_traded': np.repeat(interests.date_traded.unique(), 373)})
repeated_maturities = pd.DataFrame({'days_to_maturity':np.tile(np.linspace(1,373,373), len(interests.date_traded.unique()))})
dated_maturities = pd.concat([repeated_dates,repeated_maturities], axis = 1)

In [None]:
all_rates = pd.merge(interests, dated_maturities, how='right', on=['date_traded','days_to_maturity'])

In [None]:
# Remove all dates with known interest rates less than 4 (too less data for cubic splicing).
# Calculates the number of non NaN rates in each date(use groupby), assign True if that number > 3.
non_nan_counts = all_rates.groupby('date_traded')['rate'].apply(lambda x: (len(x)-np.isnan(x).sum()) >3)
toosmall(non_nan_counts)
all_rates_purged = all_rates[np.repeat(non_nan_counts.values, 373)]

**Start Cubic-splice interpolating.**



*   `inter_rates`: pd.DataFrame, applied cubic-splice interpolating to rates
*   `req_rates_dated`: pd.DataFrame, added date and maturity columns to`inter_rates`, then discard rows with maturities not corresponding to option prices and remove any dates where the rate points led to extreme rate values from inter/extrapolation


In [None]:
def splicer(vec):
  return vec.interpolate(method = 'spline', order = 3, s = 0.,limit_direction= 'both')

In [None]:
inter_rates = pd.DataFrame(all_rates_purged.groupby('date_traded')['rate'].apply(splicer))
req_rates_dated = pd.concat([pd.DataFrame(all_rates.date_traded),pd.DataFrame(all_rates.days_to_maturity), inter_rates], axis = 1)
req_rates_dated = req_rates_dated[req_rates_dated['days_to_maturity'].isin(combined.days_to_maturity.unique())].reset_index(drop=True)

In [None]:
# Remove any dates where the rate points led to extreme rate values from inter/extrapolation
lim_min = min(interests.rate)
lim_max = max(interests.rate)
well_spliced_dates = pd.DataFrame(req_rates_dated.groupby('date_traded')['rate'].apply(
                        lambda vec: (lim_min < min(vec)) & (max(vec) < lim_max) )).reset_index()
req_rates_dated = req_rates_dated[req_rates_dated.date_traded.isin(well_spliced_dates.date_traded)]

In [None]:
# Rates were reported as percentages
req_rates_dated['rate'] = req_rates_dated['rate']/100