In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm

from sklearn.metrics import max_error, mean_absolute_error, mean_squared_error, r2_score 

from sklearn.linear_model import LinearRegression, Lasso, Ridge
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.svm import SVR

from sklearn.preprocessing import StandardScaler, QuantileTransformer, PolynomialFeatures

from sklearn.inspection import permutation_importance

from sklearn.model_selection import GridSearchCV

from scipy.stats import kurtosis, skew

In [None]:
df = pd.read_csv('CORP_prices_6_years_to_predict.csv')

## Default filtering and date to dist 

In [None]:
# Remove defaulted Bonds
defaults = pd.read_csv('default_data.csv')
defaults.replace('-', np.NaN, inplace=True)
defaults['Def_date'] = pd.to_datetime(defaults['Def_date'], format='%d.%m.%Y')
defaults['Date_repay'] = pd.to_datetime(defaults['Date_repay'], format='%d.%m.%Y')
defaults['Repay_lag'] = defaults['Date_repay'] - defaults['Def_date']
defaults['Repay_lag'] = defaults['Repay_lag'].apply(lambda x:x.days)
real_defaults = defaults[(defaults['Obl_type'] != 'No Default') & (defaults['Def_type'] != 'технический дефолт')]
df['TRADEDATE'] = pd.to_datetime(df['TRADEDATE'], format='%Y-%m-%d')

full_drop = real_defaults[(real_defaults['Def_date'] < pd.to_datetime('2017-01-01', format='%Y-%m-%d')) & 
(real_defaults['Date_repay'].isna())]['SECID'].unique() #убрал всех с дефолтами до 2017 и без repay по обязательствам
df = df[~df['SECID'].isin(full_drop)]

full_drop_1 = real_defaults[(real_defaults['Def_date'] < pd.to_datetime('2017-01-01', format='%Y-%m-%d')) & 
(real_defaults['Repay_lag']>100)]['SECID'].unique() #убрал всех с дефолтами до 2017и с долгим repay по обязательствам 
df = df[~df['SECID'].isin(full_drop_1)]

# Для дефолтов после 2017 убираются все наблюдения от недели до дефолта и дальше для нужной облигации
sorted_defaults = real_defaults[(real_defaults['Def_date'] > pd.to_datetime('2017-01-01', format='%Y-%m-%d'))].\
        sort_values(by='Def_date').groupby('SECID').first()
sorted_defaults['remove_date'] = sorted_defaults['Def_date'] - pd.Timedelta(7, 'd')
for secid in sorted_defaults.index:
    df = df[~((df['SECID'] == secid) & (df['TRADEDATE'] > sorted_defaults.loc[secid, 'remove_date']))]

In [None]:
# Convert DATE to DIST (lag from observation date)
df['DATE_1'] = pd.to_datetime(df['DATE_1'], format='%Y-%m-%d')
df['DATE_2'] = pd.to_datetime(df['DATE_2'], format='%Y-%m-%d')
df['DATE_3'] = pd.to_datetime(df['DATE_3'], format='%Y-%m-%d')
df['DATE_4'] = pd.to_datetime(df['DATE_4'], format='%Y-%m-%d')
df['DATE_5'] = pd.to_datetime(df['DATE_5'], format='%Y-%m-%d')

for i in range(1, 6):
    df[f'DIST_{i}'] = (df['TRADEDATE'] - df[f'DATE_{i}']).apply(lambda x:x.days)

df.drop(['DATE_1', 'DATE_2', 'DATE_3', 'DATE_4', 'DATE_5'], axis=1, inplace=True)

## Main feature generation

In [None]:
df['TRADEDATE'] = pd.to_datetime(df['TRADEDATE'])
df['LN_SIZE'] = np.log(df['ISSUESIZE'])
df['TOMAT_YEARS'] = df['TOMAT']/365
df['RETURN'] = (df['CLOSE'] - df['CLOSE_1'])/df['CLOSE_1']

In [None]:
#VAR5, VAR10, ES5, ES10 - out of 40 observations
df = df.join(df.groupby('SECID').apply(lambda x: x.set_index('TRADEDATE')['RETURN'].rolling(40).apply(
        raw=True, func=lambda y: np.sort(y)[1])), on=['SECID','TRADEDATE'], rsuffix='_VAR5')

df = df.join(df.groupby('SECID').apply(lambda x: x.set_index('TRADEDATE')['RETURN'].rolling(40).apply(
        raw=True, func=lambda y: np.sort(y)[3])), on=['SECID','TRADEDATE'], rsuffix='_VAR10')

df = df.join(df.groupby('SECID').apply(lambda x: x.set_index('TRADEDATE')['RETURN'].rolling(40).apply(
        raw=True, func=lambda y: np.sort(y)[:2].mean())), on=['SECID','TRADEDATE'], rsuffix='_ES5')

df = df.join(df.groupby('SECID').apply(lambda x: x.set_index('TRADEDATE')['RETURN'].rolling(40).apply(
        raw=True, func=lambda y: np.sort(y)[:4].mean())), on=['SECID','TRADEDATE'], rsuffix='_ES10')

In [None]:
# ILLIQ  - covariation of daily price changes by month
df['delta_now'] = np.log(df['CLOSE']) - np.log(df['CLOSE_1'])
df['delta_prev'] = np.log(df['CLOSE_1']) - np.log(df['CLOSE_2'])

df['deltas'] = [[i,j] for i,j in zip(df['delta_now'], df['delta_prev'])]

out = [list(window) for window in
       df.groupby('SECID').apply(lambda x: x.set_index('TRADEDATE')['deltas'].rolling('30d'))]

df_gr = df.groupby(['SECID', 'TRADEDATE']).last().reset_index()

out_unpacked = [i for j in out for i in j]

covs = [np.cov([i for i in j], rowvar=False)[0][1] if len(j)>1 else np.cov([i for i in j], rowvar=False).item()
 for j in tqdm(out_unpacked)]

df_gr['covs'] = covs

df_gr['ILLIQ'] = -df_gr['covs']

df = df.join(df_gr.set_index(['TRADEDATE', 'SECID'])['ILLIQ'], on=['TRADEDATE', 'SECID'])

In [None]:
# Roll measure of illiquidity
df['RETURN_prev'] = (df['CLOSE_1'] - df['CLOSE_2'])/df['CLOSE_2']


df['returns'] =  [[i,j] for i,j in zip(df['RETURN'].fillna(0), df['RETURN_prev'].fillna(0))]
out = [list(window) for window in
       df.groupby('SECID').apply(lambda x: x.set_index('TRADEDATE')['returns'].rolling('30d'))]

out_unpacked = [i for j in out for i in j]

covs_return = [np.cov([i for i in j], rowvar=False)[0][1] if len(j)>1 else np.cov([i for i in j], rowvar=False).item()
 for j in tqdm(out_unpacked)]

df_gr['covs_return'] = covs_return

df_gr['ROLL'] = ((df_gr['covs_return'] < 0).astype(int) * -df_gr['covs_return'])**0.5

df = df.join(df_gr.reset_index().set_index(['TRADEDATE', 'SECID'])['ROLL'], on=['TRADEDATE', 'SECID'])

In [None]:
# P_HighLow
df_2 = df

df_2['day_of_week'] = df_2['TRADEDATE'].apply(lambda x:x.dayofweek)

df_2['day_of_week_prev'] = df_2.groupby(['SECID'])['day_of_week'].shift()

df_2['prev_high'] = df_2.groupby(['SECID'])['HIGH'].shift()

df_2['prev_low'] = df_2.groupby(['SECID'])['HIGH'].shift()

df_2['consecutive'] = (((df_2['day_of_week'] == 0) & (df_2['day_of_week_prev'] == 4))| (df_2['DIST_1'] == 1)).astype(int)

df_2 = df_2[df_2['consecutive'] == 1]

df_2 = df_2[df_2['prev_high'].notna()]

df_2['gamma'] = np.log(df_2[['HIGH', 'prev_high']].max(axis=1)/df_2[['LOW', 'prev_low']].min(axis=1))**2
df_2['beta'] = (np.log(df_2['HIGH']/df_2['LOW']) + np.log(df_2['prev_high']/df_2['prev_low']))**2
df_2['alpha'] = (2 * df_2['beta'] ** 0.5 - df_2['beta'] ** 0.5)/(2 - 2 * 2 ** 0.5) - np.sqrt(df_2['gamma']/(3 - 2 * 2 ** 0.5))
df_2['P_HIGHLOW'] = 2 * (np.exp(df_2['alpha']) - 1)/(1 + np.exp(df_2['alpha']))

df = df.join(df_2.set_index(['TRADEDATE', 'SECID'])['P_HIGHLOW'], on=['TRADEDATE', 'SECID'])
df['P_HIGHLOW'] = df.groupby('SECID')['P_HIGHLOW'].shift()

In [None]:
# P_ZEROS, df_1 = CORP_prices_full
days_no_trades = df.groupby('SECID').apply(lambda x: x.set_index('TRADEDATE')['VALUE'].rolling('30d').apply(raw=True, 
                                                                                   func=lambda y:(y == 0).sum()))

df_3 = df.join(days_no_trades.reset_index().set_index(['SECID', 'TRADEDATE'])['VALUE'], on=['SECID', 'TRADEDATE'], 
               rsuffix='_r')

df_3 = df_3[~df_3.index.duplicated()]

df_3 = df_3.rename(columns={'VALUE_r':'days_no_trade'})

df = df.join(df_3.set_index(['SECID','TRADEDATE'])['days_no_trade'], on=['SECID','TRADEDATE'])

df = df.join(df.groupby('SECID').apply(lambda x: x.set_index('TRADEDATE')['RETURN'].rolling('30d').apply(
                raw=True, func=lambda y: (y == 0).sum())), on=['SECID', 'TRADEDATE'], rsuffix='_no_count')

df['P_ZEROS'] = (df['RETURN_no_count'] + df['days_no_trade'])/20

In [None]:
# AMIHUD and STD_AMIHUDS
out_returns_volumes = [list(window) for window in
       df.groupby('SECID').apply(lambda x: x.set_index('TRADEDATE')[['RETURN', 'VALUE']].rolling('30d'))]

ahimuds = [(np.abs(out_returns_volumes[j][i]['RETURN'])/out_returns_volumes[j][i]['VALUE']).mean() 
 for j in tqdm(range(len(out_returns_volumes))) for i in range(len(out_returns_volumes[j]))]

std_ahimuds = [(np.abs(out_returns_volumes[j][i]['RETURN'])/out_returns_volumes[j][i]['VALUE']).std()
 for j in tqdm(range(len(out_returns_volumes))) for i in range(len(out_returns_volumes[j]))]

df_gr = df.groupby(['SECID', 'TRADEDATE']).last()

df_gr['AMIHUD'] = ahimuds
df_gr['STD_AMIHUD'] = std_ahimuds

df = df.join(df_gr[['AMIHUD', 'STD_AMIHUD']], on=['SECID', 'TRADEDATE'])

df['STD_AMIHUD'] = df['STD_AMIHUD'].fillna(0)

In [None]:
# VOL, SKEW, KURT
df = df.join(df.groupby('SECID').apply(lambda x:x.set_index('TRADEDATE').rolling('60d')['CLOSE'].apply(
    raw=True, func=lambda y: np.var(y))), on=['SECID','TRADEDATE'], rsuffix='_VOL')

df = df.join(df.groupby('SECID').apply(lambda x:x.set_index('TRADEDATE').rolling('60d')['CLOSE'].apply(
    raw=True, func=lambda y: kurtosis(y, axis=None))), on=['SECID','TRADEDATE'], rsuffix='_KURT')

df = df.join(df.groupby('SECID').apply(lambda x:x.set_index('TRADEDATE').rolling('60d')['CLOSE'].apply(
    raw=True, func=lambda y: skew(y, axis=None))), on=['SECID','TRADEDATE'], rsuffix='_SKEW')

In [None]:
# add Government bonds index data
df_ofz = pd.read_csv('OFZ_index.csv')

df_ofz['TRADEDATE'] = pd.to_datetime(df_ofz['TRADEDATE'])

df = df.join(df_ofz.set_index('TRADEDATE'), on='TRADEDATE')

df['OFZ_index'] = (df['TOMAT_YEARS'] < 1) * df['RUGBITR1Y'] + (df['TOMAT_YEARS'].between(1, 3) * df['RUGBITR3Y']) + \
(df['TOMAT_YEARS'].between(3, 5, inclusive=False) * df['RUGBITR5Y']) + (df['TOMAT_YEARS'] >=5) * df['RUGBITR5+']

df['PREV_DATE'] = df['TRADEDATE'] - df['DIST_1'] * pd.Timedelta('1d')

df = df.join(df_ofz.set_index('TRADEDATE').shift(), on='TRADEDATE', rsuffix='_PREV')

df['OFZ_index_prev'] = (df['TOMAT_YEARS'] < 1) * df['RUGBITR1Y_PREV'] + \
(df['TOMAT_YEARS'].between(1, 3) * df['RUGBITR3Y_PREV']) + \
(df['TOMAT_YEARS'].between(3, 5, inclusive=False) * df['RUGBITR5Y_PREV']) + \
(df['TOMAT_YEARS'] >=5) * df['RUGBITR5+_PREV']

df['OFZ_DELTA'] = df['OFZ_index'] - df['OFZ_index_prev']
df['OFZ_DELTA'] = df['OFZ_DELTA']/df['OFZ_index_prev']

df.drop(['RUGBITR1Y', 'RUGBITR3Y', 'RUGBITR5Y', 'RUGBITR5+', 'RUGBITR1Y_PREV', 'RUGBITR3Y_PREV', 'RUGBITR5Y_PREV', 
        'RUGBITR5+_PREV'], axis=1,inplace=True)

# add CB rate data
cbr = pd.read_excel('cbrate.xlsx')

cbr['TRADEDATE'] = pd.to_datetime(cbr['TRADEDATE'])
cbr_new = pd.DataFrame(pd.Index(df['TRADEDATE'].unique())).join(cbr.set_index('TRADEDATE'), on=0, how='outer')
cbr_new = cbr_new.fillna(method='ffill')

df = df.join(cbr_new.set_index('TRADEDATE'), on='TRADEDATE')

df = df.join(cbr_new.set_index('TRADEDATE'), on='PREV_DATE', rsuffix='_PREV')

df['DELTA_CBRATE'] = df['CBRATE'] - df['CBRATE_PREV']

In [None]:
# vwap and twap
df['VWAP'] = (df['VALUE_1'] * df['CLOSE_1'] + df['VALUE_2'] * df['CLOSE_2'] + df['VALUE_3'] * df['CLOSE_3'] + 
df['VALUE_4'] * df['CLOSE_4'] + df['VALUE_5'] * df['CLOSE_5'])/df[['VALUE_1', 'VALUE_2', 'VALUE_3',
                                                                   'VALUE_4', 'VALUE_5']].sum(axis=1)

df['TWAP'] = (df['CLOSE_1'] / df['DIST_1'] + df['CLOSE_2']/df['DIST_2'] + df['CLOSE_3']/df['DIST_3'] + 
df['CLOSE_4'] / df['DIST_4'] + df['CLOSE_5']/df['DIST_5']) / (1/df['DIST_1'] + 1/df['DIST_2'] + 1/df['DIST_3'] +
                                                             1/df['DIST_4'] + 1/df['DIST_5'])

In [None]:
df['P_HIGHLOW'] = df['P_HIGHLOW'].fillna(0)

In [None]:
df['ILLIQ'] = df['ILLIQ'].fillna(0)

## Calclulate Betas (market, default, etc)

In [None]:
df_1 = pd.read_csv('CORP_prices_6_years.csv')

In [None]:
#Prepare index and gcurve data
corp_index = pd.read_csv('CORP_index.csv')
g_curve = pd.read_csv('g_curves.csv')
ofz_index = pd.read_csv('OFZ_index.csv')

corp_index['TRADEDATE'] = pd.to_datetime(corp_index['TRADEDATE'])
ofz_index['TRADEDATE'] = pd.to_datetime(ofz_index['TRADEDATE'])

corp_index = corp_index.drop_duplicates()
corp_index = corp_index[corp_index['TRADEDATE'] <= '2021-01-01']
ofz_index = ofz_index[ofz_index['TRADEDATE'] <= '2021-01-01']

ofz_index['TRADEDATE'] = pd.to_datetime(ofz_index['TRADEDATE'])
corp_index['TRADEDATE'] = pd.to_datetime(corp_index['TRADEDATE'])
g_curve['DATE'] = pd.to_datetime(g_curve['DATE'])


corp_index['RETURN_3Y'] = (corp_index['RUCBTR3Y'] - corp_index['RUCBTR3Y'].shift())/corp_index['RUCBTR3Y'].shift()

corp_index['RETURN_1Y'] = (corp_index['RUCBITR1Y'] - corp_index['RUCBITR1Y'].shift())/corp_index['RUCBTR3Y'].shift()

corp_index['RETURN_5Y'] = (corp_index['RUCBTR5Y'] - corp_index['RUCBTR5Y'].shift())/corp_index['RUCBTR3Y'].shift()

corp_index['RETURN_3+'] = (corp_index['RUCBITR3+'] - corp_index['RUCBITR3+'].shift())/corp_index['RUCBTR3Y'].shift()

g_curve['0.25'] = (1 + g_curve['0.25']/100) ** (1/365) - 1
g_curve['5'] = (1 + g_curve['5']/100) ** (1/365) - 1
g_curve['10'] = (1 + g_curve['10']/100) ** (1/365) - 1

In [None]:
#join index returns and g_curve to df
df_1['years_tomat'] = df_1['TOMAT']/365

df_1['TRADEDATE'] = pd.to_datetime(df_1['TRADEDATE'])

df_1 = df_1.join(corp_index.set_index('TRADEDATE')[['RUCBITR1Y', 'RUCBTR3Y', 'RUCBTR5Y', 'RUCBITR3+']], on='TRADEDATE')

df_1['corp_index'] = (df_1['years_tomat'] < 1) * df_1['RUCBITR1Y'] \
+ df_1['years_tomat'].between(1, 3) * df_1['RUCBTR3Y']\
+ df_1['years_tomat'].between(3, 5, inclusive=False) * df_1['RUCBTR5Y']\
+ (df_1['years_tomat'] >= 5)  * df_1['RUCBITR3+']

df_1.drop(['RUCBITR1Y', 'RUCBTR3Y', 'RUCBTR5Y', 'RUCBITR3+'], axis=1, inplace=True)

df_1 = df_1.join(g_curve.set_index('DATE')[['0.25', '5']], on='TRADEDATE')

In [None]:
#calculate daily return for corp index and bond
df_1['return_daily'] = ((df_1['CLOSE'] - df_1['CLOSE_1'])/df_1['CLOSE_1'] + 1)**(1/df_1['DIST_1']) - 1

df_1['return_corp_index'] = ((df_1['corp_index'] - df_1.groupby('SECID')['corp_index'].shift())/
df_1.groupby('SECID')['corp_index'].shift() + 1)**(1/df_1['DIST_1']) - 1

In [None]:
lr = LinearRegression()

df_gr = df_1.groupby(['SECID', 'TRADEDATE']).last()

In [None]:
#crop return_daily to stay in normal range
df_1['return_daily'] = \
(df_1['return_daily'] > df_1['return_daily'].quantile(0.95)).astype(int) * df_1['return_daily'].quantile(0.95) + \
(df_1['return_daily'] < df_1['return_daily'].quantile(0.05)).astype(int) * df_1['return_daily'].quantile(0.05) +  \
(df_1['return_daily'].between(df_1['return_daily'].quantile(0.05), df_1['return_daily'].quantile(0.95))).astype(int) *\
df_1['return_daily']

In [None]:
#ordinary beta
def calculate_beta_ord(x, lr):
    return lr.fit((x['return_corp_index'].fillna(0).values - x['0.25'].values).reshape(-1, 1),
                  x['return_daily'].fillna(0) - x['0.25']
                 ).coef_.item()

#calculate ordinary beta for bonds
for_beta_ord = [calculate_beta_ord(window, lr) for windows in tqdm(df_1.groupby('SECID').apply(
lambda x:x.set_index('TRADEDATE')[['return_daily', 'return_corp_index', '0.25']].rolling(40)))
               for window in list(windows)]

df_gr = df_1.groupby(['SECID', 'TRADEDATE']).last()
df_gr['BETA_ORD'] = for_beta_ord
df_1 = df_1.join(df_gr['BETA_ORD'], on=['SECID', 'TRADEDATE'])

In [None]:
#beta default factor

#calculate default factor

df_1['DEF'] = df_1['RETURN_3+'] - df_1['5']

def calculate_beta_def(x, lr):
    return lr.fit(np.stack([(x['return_corp_index'].fillna(0) - x['0.25']).values, x['DEF'].values]).reshape(-1,2),
           x['return_daily'].fillna(0) - x['0.25']).coef_[1]

for_beta_def = [calculate_beta_def(window, lr) for windows in tqdm(df_1.groupby('SECID').apply(
lambda x:x.set_index('TRADEDATE')[['return_daily', 'return_corp_index', 'DEF', '0.25']].rolling(40)))
               for window in list(windows)]

df_gr['BETA_DEF'] = for_beta_def
df_1 = df_1.join(df_gr['BETA_DEF'], on=['SECID', 'TRADEDATE'])

In [None]:
#beta term
def calculate_beta_term(x, lr):
    return lr.fit(x[['mkt', 'TERM']], 
           x['return_daily'].fillna(0) - x['0.25']).coef_[1]

df_1['mkt'] = df_1['return_corp_index'].fillna(0) - df_1['0.25']

df_1['TERM'] = df_1['10'] - df_1['0.25']

for_beta_term = [calculate_beta_term(window, lr) for windows in tqdm(df_1.groupby('SECID').apply(
lambda x:x.set_index('TRADEDATE')[['return_daily', 'mkt', 'TERM', '0.25']].rolling(40)))
                for window in list(windows)]

df_gr['BETA_TERM'] = for_beta_term
df_1 = df_1.join(df_gr['BETA_TERM'], on=['SECID', 'TRADEDATE'])

In [None]:
#beta drf
def calculate_drf(x):
    x_low = x[x['RETURN_VAR10'] >= x['RETURN_VAR10'].quantile(0.8)]
    x_high = x[x['RETURN_VAR10'] <= x['RETURN_VAR10'].quantile(0.2)]
    return (x_high['return_daily'] * x_high['VALUE_1']).sum()/x_high['VALUE_1'].sum() -\
    (x_low['return_daily'] * x_low['VALUE_1']).sum()/x_low['VALUE_1'].sum()

drf = df_1.groupby('TRADEDATE').apply(calculate_drf).fillna(0)
drf.columns = ['DRF']
drf.name = 'DRF'
df_1 = df_1.join(drf, on='TRADEDATE')

def calculate_beta_drf(x, lr):
    return lr.fit(x[['mkt', 'DRF']], 
           x['return_daily'].fillna(0) - x['0.25']).coef_[1]

for_beta_drf = [calculate_beta_drf(window, lr) for windows in tqdm(df_1.groupby('SECID').apply(
lambda x:x.set_index('TRADEDATE')[['return_daily', 'mkt', 'DRF', '0.25']].rolling(40)))
                for window in list(windows)]

df_gr['BETA_DRF'] = for_beta_drf
df_1 = df_1.join(df_gr['BETA_DRF'], on=['SECID', 'TRADEDATE'])

In [None]:
#add info on trades up to 10
for i in range(5, 11):
    df[f'CLOSE_{i}'] = df.groupby('SECID')['CLOSE'].shift(i)
    df[f'VALUE_{i}'] = df.groupby('SECID')['VALUE'].shift(i)
    df[f'DIST_{i}'] = (df['TRADEDATE'] - df.groupby('SECID')['TRADEDATE'].shift(i)).apply(lambda x:x.days)

In [None]:
# Bound betas between 1st and 99th percentile
df_1['BETA_ORD'] = (df_1['BETA_ORD'] > df_1['BETA_ORD'].quantile(0.99)).astype(int) * df_1['BETA_ORD'].quantile(0.99) + \
(df_1['BETA_ORD'] < df_1['BETA_ORD'].quantile(0.01)).astype(int) * df_1['BETA_ORD'].quantile(0.01) + \
df_1['BETA_ORD'].between(df_1['BETA_ORD'].quantile(0.01), df_1['BETA_ORD'].quantile(0.99)) * df_1['BETA_ORD']

df_1['BETA_DEF'] = (df_1['BETA_DEF'] > df_1['BETA_DEF'].quantile(0.99)).astype(int) * df_1['BETA_DEF'].quantile(0.99) + \
(df_1['BETA_DEF'] < df_1['BETA_DEF'].quantile(0.01)).astype(int) * df_1['BETA_DEF'].quantile(0.01) + \
df_1['BETA_DEF'].between(df_1['BETA_DEF'].quantile(0.01), df_1['BETA_DEF'].quantile(0.99)) * df_1['BETA_DEF']

df_1['BETA_TERM'] = ((df_1['BETA_TERM'] > df_1['BETA_TERM'].quantile(0.99)).astype(int) * df_1['BETA_TERM'].quantile(0.99) + \
(df_1['BETA_TERM'] < df_1['BETA_TERM'].quantile(0.01)).astype(int) * df_1['BETA_TERM'].quantile(0.01) + \
df_1['BETA_TERM'].between(df_1['BETA_TERM'].quantile(0.01), df_1['BETA_TERM'].quantile(0.99)) * df_1['BETA_TERM'])