In [1]:
import pandas as pd
import json

In [2]:
from auxiliary import calc_log_returns

In [3]:
with open('config.json') as file:
    config = json.load(file)

In [4]:
SOURCE_FOLDER = config['files_folders']['source']
OUTPUT_FOLDER = config['files_folders']['processed']

# RTSI history

In [52]:
rtsi = pd.read_csv(f'{SOURCE_FOLDER}/RTSI_hist_2022-10-04.csv', sep=',')

In [53]:
if (rtsi['<TICKER>'] != 'RTSI').any():
    raise Exception('There are extra tickers')

In [54]:
if (rtsi['<TIME>'] != '00:00:00').any():
    raise Exception('Not all times are 00:00:00')

In [55]:
rtsi.drop(columns=['<PER>', '<TIME>', '<TICKER>'], inplace=True)

In [56]:
rtsi['<DATE>'] = pd.to_datetime(rtsi['<DATE>'], format='%Y%m%d')
rtsi.set_index('<DATE>', inplace=True, verify_integrity=True)
rtsi.sort_index(inplace=True)

In [57]:
rtsi['return'] = rtsi['<CLOSE>'].pct_change(1)
rtsi['log return'] = calc_log_returns(rtsi['<CLOSE>'])  # rtsi['<CLOSE>'].rolling(2).apply(lambda values: np.log(values[1] / values[0])).copy()

rtsi = rtsi.iloc[1:].copy()

In [58]:
rtsi.isna().any()

<OPEN>        False
<HIGH>        False
<LOW>         False
<CLOSE>       False
<VOL>         False
return        False
log return    False
dtype: bool

In [59]:
rtsi

Unnamed: 0_level_0,<OPEN>,<HIGH>,<LOW>,<CLOSE>,<VOL>,return,log return
<DATE>,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1995-09-04,99.80,99.80,99.80,99.80,1000395,-0.002000,-0.002002
1995-09-05,100.42,100.42,100.42,100.42,1669902,0.006212,0.006193
1995-09-06,99.74,99.74,99.74,99.74,927829,-0.006772,-0.006795
1995-09-07,101.30,101.30,101.30,101.30,2153945,0.015641,0.015520
1995-09-08,101.30,101.30,101.30,101.30,1318431,0.000000,0.000000
...,...,...,...,...,...,...,...
2022-09-28,1055.17,1082.49,1050.69,1074.57,860170151,0.020039,0.019841
2022-09-29,1089.57,1092.62,1054.68,1072.26,746668464,-0.002150,-0.002152
2022-09-30,1094.77,1172.16,1037.78,1055.72,1940913502,-0.015425,-0.015546
2022-10-03,1080.33,1116.09,1070.10,1091.87,765414097,0.034242,0.033669


## dumping

In [60]:
rtsi.to_pickle(f'{OUTPUT_FOLDER}/rtsi.pkl')

# IMOEX history

In [43]:
imoex = pd.read_csv(f'{SOURCE_FOLDER}/IMOEX_hist_2022-10-04.csv', sep=',')

In [44]:
if (imoex['<TICKER>'] != 'IMOEX').any():
    raise Exception('There are extra tickers')

if (imoex['<TIME>'] != '00:00:00').any():
    raise Exception('Not all times are 00:00:00')

In [45]:
imoex.drop(columns=['<PER>', '<TIME>', '<TICKER>'], inplace=True)

In [46]:
imoex['<DATE>'] = pd.to_datetime(imoex['<DATE>'], format='%Y%m%d')
imoex.set_index('<DATE>', inplace=True, verify_integrity=True)
imoex.sort_index(inplace=True)

In [47]:
imoex['return'] = imoex['<CLOSE>'].pct_change(1)
imoex['log return'] = calc_log_returns(imoex['<CLOSE>'])  # imoex['<CLOSE>'].rolling(2).apply(lambda values: np.log(values[1] / values[0])).copy()

imoex = imoex.iloc[1:].copy()

In [48]:
imoex.isna().any()

<OPEN>        False
<HIGH>        False
<LOW>         False
<CLOSE>       False
<VOL>         False
return        False
log return    False
dtype: bool

In [49]:
imoex

Unnamed: 0_level_0,<OPEN>,<HIGH>,<LOW>,<CLOSE>,<VOL>,return,log return
<DATE>,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1997-09-23,100.67,100.67,100.67,100.67,0,0.006700,0.006678
1997-09-24,99.94,99.94,99.94,99.94,0,-0.007251,-0.007278
1997-09-25,99.46,99.46,99.46,99.46,0,-0.004803,-0.004814
1997-09-26,98.87,98.87,98.87,98.87,0,-0.005932,-0.005950
1997-09-29,99.14,99.14,99.14,99.14,0,0.002731,0.002727
...,...,...,...,...,...,...,...
2022-09-28,1971.63,2018.94,1948.28,1963.86,49087660321,0.005324,0.005310
2022-09-29,2000.41,2005.74,1917.71,1953.77,42213898795,-0.005138,-0.005151
2022-09-30,1983.74,2004.89,1872.95,1957.31,99965222809,0.001812,0.001810
2022-10-03,1991.88,2047.54,1972.77,2041.96,44244631719,0.043248,0.042339


## dumping

In [50]:
imoex.to_pickle(f'{OUTPUT_FOLDER}/imoex.pkl')