In [1]:
import pandas as pd
import numpy as np
from datetime import datetime
from datetime import timedelta
import time
import os

In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
path = r'D:\РЭШ\Research\PostThesis\data'

## 1. Read RUONIA data

In [4]:
path_ruo = path + r'\ruonia_key'

In [5]:
df = pd.read_excel(path+r'\external_data\RC_F01_01_2017_T02_05_2024.xlsx').sort_values(by=['DT']).set_index('DT')

In [6]:
df

Unnamed: 0_level_0,ruo,vol,T,C,MinRate,Percentile25,Percentile75,MaxRate,StatusXML,DateUpdate
DT,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2017-01-09,10.13,242.45,,,,,,,,NaT
2017-01-10,9.93,288.00,,,,,,,,NaT
2017-01-11,9.97,165.50,,,,,,,,NaT
2017-01-12,9.93,129.57,,,,,,,,NaT
2017-01-13,10.06,114.31,,,,,,,,NaT
...,...,...,...,...,...,...,...,...,...,...
2024-04-24,15.60,512.48,55.0,18.0,15.10,15.51,15.60,16.35,0.0,2024-04-25 14:11:56.410
2024-04-25,15.59,433.50,50.0,20.0,15.10,15.55,15.60,15.95,0.0,2024-04-26 14:06:40.640
2024-04-26,15.70,348.94,41.0,18.0,15.10,15.60,15.75,16.00,0.0,2024-04-27 14:05:07.210
2024-04-27,15.86,347.81,40.0,18.0,15.15,15.70,16.00,16.75,0.0,2024-05-02 14:07:39.440


In [7]:
def quarter_to_date(quarter):
    """Quarter of the format 2003q1 to date format 2003-01-01"""
    quarter_to_month = {'1': '01', '2': '04', '3': '07', '4': '10'}
    return datetime.strptime(quarter[:4] + '-' + quarter_to_month[quarter[-1]] + '-01', '%Y-%m-%d')

In [8]:
def date_to_quarter(date):
    """Date to format YYYYqN (2003-09-01 to 2003q3)"""
    month_to_quarter = {1: 'q1', 2: 'q1', 3: 'q1',
                        4: 'q2', 5: 'q2', 6: 'q2',
                        7: 'q3', 8: 'q3', 9: 'q3',
                        10: 'q4', 11: 'q4', 12: 'q4'}
    quarter = str(date.year) + month_to_quarter[date.month]
    return quarter

In [9]:
def dates_to_quarters(dates):
    """An array of dates to format YYYYqN (2003-09-01 to 2003q3)"""
    quarters = []
    for date in dates:
        quarters.append(date_to_quarter(date))
    return quarters

#### Aggregate RUONIA data to quarter

In [10]:
def quarterly_interest_aggregation(df, column):
    """Given data on date-to-date basis in a column, aggregate it into quarterly data as geometric mean"""
    df['date'] = df.index
    df['quarter'] = dates_to_quarters(df['date'])
    res = pd.DataFrame()
    for quarter in df['quarter'].unique():
        ind = 1
        count = 0
        for date in df.loc[df['quarter']==quarter]['date']:
            ind *= df.loc[df.index == date][column][date]
            count += 1
        ind = ind**(1/count)
        df_temp = pd.DataFrame({'quarter': [quarter], column: [ind], 'obs': [count]})
        res = pd.concat([res, df_temp], ignore_index = True)
        res.reset_index()
    return res

In [11]:
df['ruo_return'] = 1+df['ruo']/100
df_output = quarterly_interest_aggregation(df, 'ruo_return')
df_output['ruo'] = (df_output['ruo_return']-1)*100

In [12]:
df_output

Unnamed: 0,quarter,ruo_return,obs,ruo
0,2017q1,1.100449,57,10.044911
1,2017q2,1.092559,61,9.255887
2,2017q3,1.085853,65,8.585349
3,2017q4,1.079385,63,7.93847
4,2018q1,1.071343,56,7.134349
5,2018q2,1.069848,61,6.984829
6,2018q3,1.070793,65,7.079323
7,2018q4,1.073212,64,7.321224
8,2019q1,1.075731,57,7.573094
9,2019q2,1.076182,59,7.61817


In [13]:
df_output.to_excel(path_ruo + r'\ruonia_quarter.xlsx', sheet_name='quarter')
#df_output.to_csv(path_ruo + r'\ruonia_quarter.csv', encoding='utf-8')

#### Aggregate RUONIA data to year

In [14]:
def dates_to_years(dates):
    """An array of dates to format YYYY (2003-09-01 to 2003)"""
    years = []
    for date in dates:
        years.append(date.year)
    return years

In [15]:
def yearly_interest_aggregation(df, column):
    """Given data on date-to-date basis in a column, aggregate it into yearly data as geometric mean"""
    df['date'] = df.index
    df['year'] = dates_to_years(df['date'])
    res = pd.DataFrame()
    for year in df['year'].unique():
        ind = 1
        count = 0
        for date in df.loc[df['year']==year]['date']:
            ind *= df.loc[df.index == date][column][date]
            count += 1
        ind = ind**(1/count)
        df_temp = pd.DataFrame({'year': [year], column: [ind], 'obs': [count]})
        res = pd.concat([res, df_temp], ignore_index = True)
        res.reset_index()
    return res

In [16]:
df_output = yearly_interest_aggregation(df, 'ruo_return')
df_output['ruo'] = (df_output['ruo_return']-1)*100
df_output

Unnamed: 0,year,ruo_return,obs,ruo
0,2017,1.089214,246,8.921407
1,2018,1.071313,246,7.13128
2,2019,1.071565,246,7.156506
3,2020,1.048358,240,4.835757
4,2021,1.056293,246,5.629321
5,2022,1.103443,246,10.344277
6,2023,1.09746,246,9.745966
7,2024,1.157051,79,15.705136


In [17]:
df_output.to_excel(path_ruo + r'\ruonia_year.xlsx', sheet_name='year')
#df_output.to_csv(path_ruo + r'\ruonia_year.csv', encoding='utf-8')

## 2. Read Key rate data

In [18]:
def monthyear_to_date(monthyear):
    """Monthyear of the format MM.YYYY to date format YYYY-MM-DD (from 03.2003 to 2003-03-01)"""
    monthyear = str(monthyear)
    if len(monthyear[monthyear.find('.')+1:])<4:
        monthyear += '0'
    return datetime.strptime(monthyear[monthyear.find('.')+1:] + '-' + 
                             monthyear[:monthyear.find('.')] + '-01', '%Y-%m-%d')

In [19]:
def monthyears_to_dates(monthyears):
    """An array of monthyears MM.YYYY to date format"""
    dates = []
    for monthyear in monthyears:
        dates.append(monthyear_to_date(monthyear))
    return dates

### Key rate from monthly data

In [20]:
df = pd.read_excel(path+r'\external_data\Инфляция и ключевая ставка Банка России_F01_01_2017_T03_05_2024.xlsx')

In [21]:
df['date'] = monthyears_to_dates(df['Дата'])
df['key_rate'] = df['Ключевая ставка, % годовых']
df = df.sort_values(by=['date']).set_index('date')
df['key_return'] = 1+df['key_rate']/100

In [22]:
# quarterly aggregation
df_output = quarterly_interest_aggregation(df, 'key_return')
df_output['key_rate'] = (df_output['key_return']-1)*100
df_output.to_excel(path_ruo + r'\key_quarter_from_monthly.xlsx')

In [23]:
# yearly aggregation
df_output = yearly_interest_aggregation(df, 'key_return')
df_output['key_rate'] = (df_output['key_return']-1)*100
df_output.to_excel(path_ruo + r'\key_year_from_monthly.xlsx')

### Key rate from daily data

In [24]:
df = pd.read_excel(path+r'\external_data\key_rate_data.xlsx')
df = df.rename(columns={'Дата': 'date', 'Ставка': 'key_rate'}).set_index('date')
df['key_return'] = 1+df['key_rate']/100

In [25]:
# quarterly aggregation
df_output = quarterly_interest_aggregation(df, 'key_return')
df_output['key_rate'] = (df_output['key_return']-1)*100
df_output.to_excel(path_ruo + r'\key_quarter_from_daily.xlsx')

In [26]:
# yearly aggregation
df_output = yearly_interest_aggregation(df, 'key_return')
df_output['key_rate'] = (df_output['key_return']-1)*100
df_output.to_excel(path_ruo + r'\key_year_from_daily.xlsx')

### Combined

Quarterly

In [27]:
df_m = pd.read_excel(path_ruo+r'\key_quarter_from_monthly.xlsx')[['quarter','key_rate']].rename(columns={'key_rate': 'key_rate_m'})
df_d = pd.read_excel(path_ruo+r'\key_quarter_from_daily.xlsx')[['quarter','key_rate']]
df = df_m.join(df_d.set_index('quarter'), on='quarter')
os.remove(path_ruo+r'\key_quarter_from_monthly.xlsx')
os.remove(path_ruo+r'\key_quarter_from_daily.xlsx')
print(f"{np.corrcoef(np.array(df['key_rate_m']), np.array(df['key_rate']))[0,1]:.3f}")

0.976


In [28]:
df.to_excel(path_ruo+r'\key_quarter.xlsx')

Yearly

In [29]:
df_m = pd.read_excel(path_ruo+r'\key_year_from_monthly.xlsx')[['year','key_rate']].rename(columns={'key_rate': 'key_rate_m'})
df_d = pd.read_excel(path_ruo+r'\key_year_from_daily.xlsx')[['year','key_rate']]
df = df_m.join(df_d.set_index('year'), on='year')
os.remove(path_ruo+r'\key_year_from_monthly.xlsx')
os.remove(path_ruo+r'\key_year_from_daily.xlsx')
print(f"{np.corrcoef(np.array(df['key_rate_m']), np.array(df['key_rate']))[0,1]:.3f}")

0.998


In [30]:
df.to_excel(path_ruo+r'\key_year.xlsx')

## 3. Add RUONIA and Key rate to the var dataset

Lookup a directory with all var files, prepared in ```02_Processing.ipynb```. Take only``` _var_real.xlsx``` files. 

In [31]:
path_var = path + r'\var'
dir_list = os.listdir(path_var)

In [32]:
dir_list

['bankdata_2017-2020_y_var.xlsx',
 'bankdata_2017-2020_y_var_real.xlsx',
 'bankdata_201709-202109_q_var.xlsx',
 'bankdata_201709-202109_q_var_real.xlsx',
 'bankdata_201903-202009_nodeacc.xlsx']

In [33]:
files = []
for file in dir_list:
    if file.find('_var_real.xlsx')>0:
        files.append(file)
files

['bankdata_2017-2020_y_var_real.xlsx',
 'bankdata_201709-202109_q_var_real.xlsx']

### Quarterly data

In [34]:
df_ruo = pd.read_excel(path_ruo + r'\ruonia_quarter.xlsx', sheet_name='quarter')
df_key = pd.read_excel(path_ruo + r'\key_quarter.xlsx')

In [35]:
files = []
for file in dir_list:
    if file.find('_q_var_real.xlsx')>0:
        files.append(file)
files

['bankdata_201709-202109_q_var_real.xlsx']

In [36]:
for i in range(len(files)):
    df = pd.read_excel(path_var + r'\\' + files[i])
    df_output = df.join(df_ruo[['quarter','ruo']].set_index('quarter'), on='quarter')
    df_output = df_output.join(df_key[['quarter','key_rate']].set_index('quarter'), on='quarter')
    df_output.to_excel(path + r'\var\\'+files[i][:files[i].find('.')] + '_ruo.xlsx')
    #df_output.to_csv(path + r'\var\\'+files[i][:files[i].find('.')] + '_ruo.csv', encoding='utf-8')

### Yearly data

In [37]:
df_ruo = pd.read_excel(path_ruo + r'\ruonia_year.xlsx', sheet_name='year')
df_ruo['dt_year'] = df_ruo['year']
df_key = pd.read_excel(path_ruo + r'\key_year.xlsx')
df_key['dt_year'] = df_key['year']
#df_ruo

In [38]:
files = []
for file in dir_list:
    if file.find('_y_var_real.xlsx')>0:
        files.append(file)
files

['bankdata_2017-2020_y_var_real.xlsx']

In [39]:
for i in range(len(files)):
    df = pd.read_excel(path_var + r'\\' + files[i])
    df_output = df.join(df_ruo[['dt_year','ruo']].set_index('dt_year'), on='dt_year')
    df_output = df_output.join(df_key[['dt_year','key_rate']].set_index('dt_year'), on='dt_year')
    df_output.to_excel(path + r'\var\\'+files[i][:files[i].find('.')] + '_ruo.xlsx')
    #df_output.to_csv(path + r'\var\\'+files[i][:files[i].find('.')] + '_ruo.csv', encoding='utf-8')