In [None]:
import os
import sys
import warnings
import pandas as pd
import numpy as np
import CONFIG

In [None]:
from itertools import product
from tqdm import tqdm

In [None]:
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
from sklearn.metrics import mean_absolute_error

In [None]:
from datapreparation.preparation_functions import processing_data, read_download_preprocessed_data
from datapreparation.adaptive_sampling import creating_sample
from optimizator.simultaneous_min import iter_minimizer
from optimizator.fixed_minimization import fixed_tau_minimizer
from optimizator.gridsearch import grid_search
from dataextraction import *
from estimation_ytm.estimation_ytm import new_ytm, newton_estimation, filtering_ytm
from error_measures import area_boot, MAE_YTM, mean_absolute_error
from weight_scheme import weight
from Loss import yield_Loss, price_Loss, naive_yield_Loss
from payments_calendar import download_calendar
from ns_func import Z, D, F, par_yield
from stability_assessment import stability_assession

In [None]:
%load_ext autoreload

In [None]:
%autoreload 2

In [None]:
pd.set_option('display.max_columns', 50)

In [None]:
warnings.simplefilter('ignore')

In [None]:
idx = pd.IndexSlice

In [None]:
PATH = 'extracted_data'
calendar_data_path = os.path.join('datasets', 'coupons_data.hdf')
original_data_path = os.path.join('datasets', 'bonds.xls')
clean_data_path = os.path.join('datasets', 'clean_data.hdf')

In [None]:
loss = yield_Loss

### Initialization

In [None]:
df = pd.read_excel(original_data_path, skiprows=2)
df = df.rename(columns=CONFIG.NAME_MASK)

In [None]:
save_data = False

### Load data

In [None]:
%%time
### data mungling
if save_data:
    clean_data = processing_data(df, 
                  mask_face_value=CONFIG.MASK_FACE_VALUE, mask_base_time=CONFIG.MASK_BASE_TIME,
                  needed_bonds=CONFIG.INSTRUMENTS, use_otc=CONFIG.USE_OTC, deal_market=CONFIG.DEAL_MARKET,
                  notes_in_otc=CONFIG.NOTES_IN_OTC, maturity_filter=CONFIG.MATURITY_FILTER, 
                  specific_deals=CONFIG.SPECIFIC_DEALS)
    
    #calendar payments data: saving and loading
    coupons_cf, streak_data = download_calendar(clean_data, hdf_coupons_path=calendar_data_path)
    #Estimating correct yield for data
    clean_data = (clean_data.pipe(new_ytm, coupons_cf, streak_data)
                            .pipe(filtering_ytm, max_yield=CONFIG.MAX_YIELD, 
                                  min_yield=CONFIG.MIN_YIELD))
    clean_data['bond_symb'] = clean_data.index.get_level_values(1).str.extract(r'([A-Z]+)')[0]
    clean_data = read_download_preprocessed_data(save_data, clean_data=clean_data,
                                                 clean_data_path=clean_data_path)
else:
    clean_data = read_download_preprocessed_data(save_data, clean_data_path=clean_data_path)
    #Coupon Data: saving and loading
    coupons_cf, streak_data = download_calendar(clean_data, hdf_coupons_path=calendar_data_path)

### GET TO WORK!!!

In [None]:
%%time
filtered_data = creating_sample(CONFIG.SETTLE_DATE, clean_data, min_n_deal=CONFIG.MIN_N_DEAL, 
                                time_window=CONFIG.TIME_WINDOW, fix_first_cut=False)

In [None]:
filtered_data.groupby('bond_maturity_type').reverse_span.max()

In [None]:
filtered_data.bond_maturity_type.value_counts().sort_index()

In [None]:
filtered_data.shape

In [None]:
filtered_data.plot.scatter(x='span', y='ytm', figsize=(15, 10));plt.show();

Setting Loss arguments and optimization parameters

In [None]:
#Initial guess vector(for optimization)
x0 = [0.09, -0.01, 0, 1.5]
ytm_max = filtered_data['ytm'].max() 
#Parameters constraints
constr = ({'type':'ineq',
           'fun': lambda x: np.array(x[0] + x[1])})
#Longest matuiry year of deals in data
teta_cap = 6
max_deal_span = (filtered_data.span / 365).round().max()
#Parameters bounds for constraint optimization
bounds = ((0, 1), (None, None), (None, None), (1 / 12, teta_cap))
#Maturity limit for Zero-curve plot
longest_maturity_year = max([max_deal_span, 30])
theor_maturities = np.linspace(0.001, longest_maturity_year, 10000)
options = {'maxiter': 150, 'disp': True}
#Tuple of arguments for loss function            
loss_args = (filtered_data, coupons_cf, streak_data, CONFIG.RHO, CONFIG.WEIGHT_SCHEME)

Age of deals

In [None]:
labels = dict(zip(filtered_data['bond_maturity_type'].astype('str').unique(), 
                    ['r', 'b', 'g','k']))
              

In [None]:
fig, ax = plt.subplots(figsize=(15, 10))
for mtype in filtered_data['bond_maturity_type'] .sort_values().unique():
    subsample = (filtered_data
                 .loc[filtered_data
                 .loc[:,'bond_maturity_type'] == mtype,:])
    
    ax.scatter(subsample.loc[:,'span'], subsample.loc[:,'reverse_span'], 
                s=1e2, label = str(mtype), c = labels[str(mtype)])
    
plt.legend()


## <center>Optimization

###  Fixed Tau

In [None]:
CONFIG.WEIGHT_SCHEME

In [None]:
###Setting Loss arguments and optimization paramters
#Initial guess vector(for optimization)
x0 = [0.09, -0.01, 0]

#Parameters constraints
constr = ({'type':'ineq',
           'fun': lambda x: np.array(x[0] + x[1])})

#Longest matuiry year of deals in data
max_deal_span = (filtered_data.span / 365).round().max()

#Parameters bounds for constraint optimization
bounds = ((0, 1), (None, None), (None, None))

#Maturity limit for Zero-curve plot
longest_maturity_year = max([max_deal_span, 20])
theor_maturities = np.linspace(0.001, longest_maturity_year, 10000)
options = {'maxiter': 150, 'eps': 9e-5, 'disp': True}
filtered_data['weight'] = weight([1, 1, 1, 1], filtered_data, 'test')

#Tuple of arguments for loss function
loss_args = (filtered_data, coupons_cf, streak_data, CONFIG.RHO, CONFIG.WEIGHT_SCHEME, CONFIG.TAU)

#defining loss -- Crucial
loss = yield_Loss
filtered_data['weight'] = weight([1, 1, 1, 1], filtered_data, CONFIG.WEIGHT_SCHEME)

In [None]:
###### OPTIMIZATION
res_ = fixed_tau_minimizer(Loss=loss, beta_init=x0,
                loss_args=loss_args, method='SLSQP', bounds=bounds,
                #constraints=constr,
                max_deal_span=max_deal_span, options=options)

In [None]:
beta_best = np.append(res_.x, CONFIG.TAU)


In [None]:
fig, ax = plt.subplots(figsize=(15, 10))
ax.plot(theor_maturities, Z(theor_maturities, beta_best)) 
plt.title(f'Curve for {CONFIG.SETTLE_DATE}')
plt.ylim(0, 0.12);
plt.xlim(0, 15)

In [None]:
fig = plt.figure(figsize=(15, 10))
plt.plot(theor_maturities, D(theor_maturities, beta_best)) 
plt.title(f'Discount curve for {CONFIG.SETTLE_DATE}')
plt.xlim(0, 20)

### 4 variable minimization

In [None]:
x0 = [0.09, -0.01, 0, 1.5]
loss_args = (filtered_data, coupons_cf, streak_data, CONFIG.RHO, CONFIG.WEIGHT_SCHEME, None)


In [None]:
%%time
res_ = iter_minimizer(Loss=yield_Loss, 
                      beta_init=[0.09, -0.01, 0, 1.5],
                      loss_args=loss_args, method='SLSQP', 
                      bounds=((0,1), (None, None), (None, None), (None, None)),
                      max_deal_span=max_deal_span, options=options)
                      

In [None]:
beta_best = res_.x
beta_best

In [None]:
fig, ax = plt.subplots(figsize=(15, 10))
ax.plot(theor_maturities, Z(theor_maturities, beta_best)) 

plt.title(f'Curve for {CONFIG.SETTLE_DATE}')
plt.ylim(0, 0.12);
plt.xlim(0, 15)

## Exctracting data

In [None]:
maturities = np.array([7/365, 14/365, 30/365, 90/365, 180/365, 270/365, 1, 2, 
                       3, 5, 7, 10, 
                       15, 20])

In [None]:
curves_data = curves_to_excel(f'{PATH}/curves_beta_{CONFIG.SETTLE_DATE}.xlsx', 
                              beta_best, 
                              settle_date=CONFIG.SETTLE_DATE, 
                              maturities=maturities,
                              shift = True)

In [None]:
CONFIG.SETTLE_DATE

In [None]:
curves_data

In [None]:
payment_calendar_to_excel('coupons.xlsx', coupons_cf, streak_data)

In [None]:
path_curve = os.path.join(PATH, f'{loss.__name__}_{CONFIG.SETTLE_DATE}.pdf')
draw(beta_best, filtered_data, theor_maturities,  title_date=CONFIG.SETTLE_DATE, 
     longest_maturity_year=longest_maturity_year, draw_points=True,
     weight_scheme='even', ls='--', linewidth=3, shift=True)

plt.title(f'Curves for {CONFIG.SETTLE_DATE}')
plt.xlim(0, longest_maturity_year);