In [1]:
# Imports / Configs / Global vars

# Import of native python tools
import os

# Visualization libraries
# import plotly.express as px

# Logging configuraiton
import logging
logging.basicConfig(format='[ %(asctime)s ][ %(levelname)s ]: %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p')
logger = logging.getLogger()
logger.setLevel(logging.INFO)


# Ipython configs
from IPython.core.display import display, HTML
from IPython.core.interactiveshell import InteractiveShell
display(HTML("<style>.container { width:90% !important; }</style>"))
InteractiveShell.ast_node_interactivity = 'all'

# Pandas configs
import pandas as pd
pd.options.display.max_rows = 350
pd.options.display.max_columns = 250

# Jupyter configs
%load_ext autoreload
%autoreload 2
%config Completer.use_jedi = False

  from IPython.core.display import display, HTML


# Data

### Kase: reading & preparation

In [2]:
# Reading KASE
KASE_PATH = '../data/01_raw/Kase'


fpaths = (KASE_PATH + '/' + pd.Series(os.listdir(KASE_PATH)))
fpaths = fpaths[fpaths.str.contains('.xlsx')].to_list()


def filter_completely_missing_fields(df: pd.DataFrame) -> pd.DataFrame:
    return df[~df.T.isna().all()].reset_index(drop=True)


dataframes = []


for fpath in fpaths:
    logging.info(f'Start processing: {fpath}...')
    date = pd.read_excel(fpath, dtype=str, nrows=1, engine='openpyxl').columns[1]
    df = pd.read_excel(fpath, skiprows=2, dtype=str, engine='openpyxl')
    df = filter_completely_missing_fields(df)
    df['date'] = date
    dataframes.append(df)
    logging.info(f'Finish processing: {fpath}...')

df = pd.concat(dataframes).reset_index(drop=True)
df = df.rename(columns={
    '№ п/п': '#',
    'Торговый код': 'ticker',
    'Вид ценной бумаги': 'type',
    'Краткое наименование эмитента': 'emmitent',
    'Расчетная цена': 'calculated_cost',
    'Расчетная грязная': 'gross_cost',
    'Доходность до погашения, % годовых': 'relative_revenue',
    'Дней до погашения': 'days_to_repay',
    'Единица измерения цены': 'price_unit',
    'Тип цены': 'cost_type',
})
df['date'] = pd.to_datetime(df['date'], format='%d.%m.%Y')
del dataframes

[ 05/06/2023 06:29:05 PM ][ INFO ]: Start processing: ../data/01_raw/Kase/ђл­®з­лҐ жҐ­л ­  27_03_2023.xlsx...
[ 05/06/2023 06:29:05 PM ][ INFO ]: Finish processing: ../data/01_raw/Kase/ђл­®з­лҐ жҐ­л ­  27_03_2023.xlsx...
[ 05/06/2023 06:29:05 PM ][ INFO ]: Start processing: ../data/01_raw/Kase/ė­®ē­ė„ ę„­ė ­  20_04_2022.xlsx...
[ 05/06/2023 06:29:05 PM ][ INFO ]: Finish processing: ../data/01_raw/Kase/ė­®ē­ė„ ę„­ė ­  20_04_2022.xlsx...
[ 05/06/2023 06:29:05 PM ][ INFO ]: Start processing: ../data/01_raw/Kase/ė­®ē­ė„ ę„­ė ­  21_02_2022.xlsx...
[ 05/06/2023 06:29:05 PM ][ INFO ]: Finish processing: ../data/01_raw/Kase/ė­®ē­ė„ ę„­ė ­  21_02_2022.xlsx...
[ 05/06/2023 06:29:05 PM ][ INFO ]: Start processing: ../data/01_raw/Kase/ђл­®з­лҐ жҐ­л ­  20_03_2023.xlsx...
[ 05/06/2023 06:29:05 PM ][ INFO ]: Finish processing: ../data/01_raw/Kase/ђл­®з­лҐ жҐ­л ­  20_03_2023.xlsx...
[ 05/06/2023 06:29:05 PM ][ INFO ]: Start processing: ../data/01_raw/Kase/ė­®ē­ė„ ę„­ė ­  

[ 05/06/2023 06:29:06 PM ][ INFO ]: Start processing: ../data/01_raw/Kase/ė­®ē­ė„ ę„­ė ­  10_02_2022.xlsx...
[ 05/06/2023 06:29:06 PM ][ INFO ]: Finish processing: ../data/01_raw/Kase/ė­®ē­ė„ ę„­ė ­  10_02_2022.xlsx...
[ 05/06/2023 06:29:06 PM ][ INFO ]: Start processing: ../data/01_raw/Kase/ė­®ē­ė„ ę„­ė ­  14_01_2022.xlsx...
[ 05/06/2023 06:29:07 PM ][ INFO ]: Finish processing: ../data/01_raw/Kase/ė­®ē­ė„ ę„­ė ­  14_01_2022.xlsx...
[ 05/06/2023 06:29:07 PM ][ INFO ]: Start processing: ../data/01_raw/Kase/ė­®ē­ė„ ę„­ė ­  17_02_2022.xlsx...
[ 05/06/2023 06:29:07 PM ][ INFO ]: Finish processing: ../data/01_raw/Kase/ė­®ē­ė„ ę„­ė ­  17_02_2022.xlsx...
[ 05/06/2023 06:29:07 PM ][ INFO ]: Start processing: ../data/01_raw/Kase/ė­®ē­ė„ ę„­ė ­  15_07_2022.xlsx...
[ 05/06/2023 06:29:07 PM ][ INFO ]: Finish processing: ../data/01_raw/Kase/ė­®ē­ė„ ę„­ė ­  15_07_2022.xlsx...
[ 05/06/2023 06:29:07 PM ][ INFO ]: Start processing: ../data/01_raw/Kase/e

[ 05/06/2023 06:29:08 PM ][ INFO ]: Finish processing: ../data/01_raw/Kase/ė­®ē­ė„ ę„­ė ­  05_04_2022.xlsx...
[ 05/06/2023 06:29:08 PM ][ INFO ]: Start processing: ../data/01_raw/Kase/ė­®ē­ė„ ę„­ė ­  04_02_2022.xlsx...
  warn("Workbook contains no default style, apply openpyxl's default")
  warn("Workbook contains no default style, apply openpyxl's default")
[ 05/06/2023 06:29:08 PM ][ INFO ]: Finish processing: ../data/01_raw/Kase/ė­®ē­ė„ ę„­ė ­  04_02_2022.xlsx...
[ 05/06/2023 06:29:08 PM ][ INFO ]: Start processing: ../data/01_raw/Kase/ė­®ē­ė„ ę„­ė ­  29_12_2022.xlsx...
[ 05/06/2023 06:29:08 PM ][ INFO ]: Finish processing: ../data/01_raw/Kase/ė­®ē­ė„ ę„­ė ­  29_12_2022.xlsx...
[ 05/06/2023 06:29:08 PM ][ INFO ]: Start processing: ../data/01_raw/Kase/ђл­®з­лҐ жҐ­л ­  03_05_2023.xlsx...
[ 05/06/2023 06:29:08 PM ][ INFO ]: Finish processing: ../data/01_raw/Kase/ђл­®з­лҐ жҐ­л ­  03_05_2023.xlsx...
[ 05/06/2023 06:29:08 PM ][ INFO ]: Start processing: ../da

  warn("Workbook contains no default style, apply openpyxl's default")
  warn("Workbook contains no default style, apply openpyxl's default")
[ 05/06/2023 06:29:09 PM ][ INFO ]: Finish processing: ../data/01_raw/Kase/ė­®ē­ė„ ę„­ė ­  04_11_2022.xlsx...
[ 05/06/2023 06:29:09 PM ][ INFO ]: Start processing: ../data/01_raw/Kase/ė­®ē­ė„ ę„­ė ­  08_09_2022.xlsx...
[ 05/06/2023 06:29:09 PM ][ INFO ]: Finish processing: ../data/01_raw/Kase/ė­®ē­ė„ ę„­ė ­  08_09_2022.xlsx...
[ 05/06/2023 06:29:09 PM ][ INFO ]: Start processing: ../data/01_raw/Kase/ė­®ē­ė„ ę„­ė ­  07_12_2022.xlsx...
[ 05/06/2023 06:29:09 PM ][ INFO ]: Finish processing: ../data/01_raw/Kase/ė­®ē­ė„ ę„­ė ­  07_12_2022.xlsx...
[ 05/06/2023 06:29:09 PM ][ INFO ]: Start processing: ../data/01_raw/Kase/ė­®ē­ė„ ę„­ė ­  28_07_2022.xlsx...
[ 05/06/2023 06:29:09 PM ][ INFO ]: Finish processing: ../data/01_raw/Kase/ė­®ē­ė„ ę„­ė ­  28_07_2022.xlsx...
[ 05/06/2023 06:29:09 PM ][ INFO ]: Start process

[ 05/06/2023 06:29:10 PM ][ INFO ]: Start processing: ../data/01_raw/Kase/ė­®ē­ė„ ę„­ė ­  30_12_2022.xlsx...
[ 05/06/2023 06:29:10 PM ][ INFO ]: Finish processing: ../data/01_raw/Kase/ė­®ē­ė„ ę„­ė ­  30_12_2022.xlsx...
[ 05/06/2023 06:29:10 PM ][ INFO ]: Start processing: ../data/01_raw/Kase/ė­®ē­ė„ ę„­ė ­  18_07_2022.xlsx...
[ 05/06/2023 06:29:10 PM ][ INFO ]: Finish processing: ../data/01_raw/Kase/ė­®ē­ė„ ę„­ė ­  18_07_2022.xlsx...
[ 05/06/2023 06:29:10 PM ][ INFO ]: Start processing: ../data/01_raw/Kase/ė­®ē­ė„ ę„­ė ­  19_01_2022.xlsx...
[ 05/06/2023 06:29:10 PM ][ INFO ]: Finish processing: ../data/01_raw/Kase/ė­®ē­ė„ ę„­ė ­  19_01_2022.xlsx...
[ 05/06/2023 06:29:10 PM ][ INFO ]: Start processing: ../data/01_raw/Kase/ė­®ē­ė„ ę„­ė ­  17_08_2022.xlsx...
[ 05/06/2023 06:29:10 PM ][ INFO ]: Finish processing: ../data/01_raw/Kase/ė­®ē­ė„ ę„­ė ­  17_08_2022.xlsx...
[ 05/06/2023 06:29:10 PM ][ INFO ]: Start processing: ../data/01_raw/Kase/ђл

[ 05/06/2023 06:29:11 PM ][ INFO ]: Start processing: ../data/01_raw/Kase/ђл­®з­лҐ жҐ­л ­  17_02_2023.xlsx...
  warn("Workbook contains no default style, apply openpyxl's default")
  warn("Workbook contains no default style, apply openpyxl's default")
[ 05/06/2023 06:29:11 PM ][ INFO ]: Finish processing: ../data/01_raw/Kase/ђл­®з­лҐ жҐ­л ­  17_02_2023.xlsx...
[ 05/06/2023 06:29:11 PM ][ INFO ]: Start processing: ../data/01_raw/Kase/ė­®ē­ė„ ę„­ė ­  01_12_2022.xlsx...
[ 05/06/2023 06:29:11 PM ][ INFO ]: Finish processing: ../data/01_raw/Kase/ė­®ē­ė„ ę„­ė ­  01_12_2022.xlsx...
[ 05/06/2023 06:29:11 PM ][ INFO ]: Start processing: ../data/01_raw/Kase/ė­®ē­ė„ ę„­ė ­  02_11_2022.xlsx...
[ 05/06/2023 06:29:11 PM ][ INFO ]: Finish processing: ../data/01_raw/Kase/ė­®ē­ė„ ę„­ė ­  02_11_2022.xlsx...
[ 05/06/2023 06:29:11 PM ][ INFO ]: Start processing: ../data/01_raw/Kase/ė­®ē­ė„ ę„­ė ­  06_12_2022.xlsx...
[ 05/06/2023 06:29:11 PM ][ INFO ]: Finish processing: ../da

[ 05/06/2023 06:29:12 PM ][ INFO ]: Finish processing: ../data/01_raw/Kase/ђл­®з­лҐ жҐ­л ­  25_04_2023.xlsx...
[ 05/06/2023 06:29:12 PM ][ INFO ]: Start processing: ../data/01_raw/Kase/ђл­®з­лҐ жҐ­л ­  27_01_2023.xlsx...
[ 05/06/2023 06:29:12 PM ][ INFO ]: Finish processing: ../data/01_raw/Kase/ђл­®з­лҐ жҐ­л ­  27_01_2023.xlsx...
[ 05/06/2023 06:29:12 PM ][ INFO ]: Start processing: ../data/01_raw/Kase/ė­®ē­ė„ ę„­ė ­  20_06_2022.xlsx...
[ 05/06/2023 06:29:12 PM ][ INFO ]: Finish processing: ../data/01_raw/Kase/ė­®ē­ė„ ę„­ė ­  20_06_2022.xlsx...
[ 05/06/2023 06:29:12 PM ][ INFO ]: Start processing: ../data/01_raw/Kase/ė­®ē­ė„ ę„­ė ­  24_05_2022.xlsx...
[ 05/06/2023 06:29:12 PM ][ INFO ]: Finish processing: ../data/01_raw/Kase/ė­®ē­ė„ ę„­ė ­  24_05_2022.xlsx...
[ 05/06/2023 06:29:12 PM ][ INFO ]: Start processing: ../data/01_raw/Kase/ђл­®з­лҐ жҐ­л ­  23_02_2023.xlsx...
[ 05/06/2023 06:29:12 PM ][ INFO ]: Finish processing: ../data/01_raw/Kase/ђл­®з­лҐ жҐ­л ­  23_0

[ 05/06/2023 06:29:13 PM ][ INFO ]: Finish processing: ../data/01_raw/Kase/ė­®ē­ė„ ę„­ė ­  23_08_2022.xlsx...
[ 05/06/2023 06:29:13 PM ][ INFO ]: Start processing: ../data/01_raw/Kase/ђл­®з­лҐ жҐ­л ­  07_02_2023.xlsx...
  warn("Workbook contains no default style, apply openpyxl's default")
  warn("Workbook contains no default style, apply openpyxl's default")
[ 05/06/2023 06:29:13 PM ][ INFO ]: Finish processing: ../data/01_raw/Kase/ђл­®з­лҐ жҐ­л ­  07_02_2023.xlsx...
[ 05/06/2023 06:29:13 PM ][ INFO ]: Start processing: ../data/01_raw/Kase/ė­®ē­ė„ ę„­ė ­  28_10_2022.xlsx...
  warn("Workbook contains no default style, apply openpyxl's default")
  warn("Workbook contains no default style, apply openpyxl's default")
[ 05/06/2023 06:29:13 PM ][ INFO ]: Finish processing: ../data/01_raw/Kase/ė­®ē­ė„ ę„­ė ­  28_10_2022.xlsx...
[ 05/06/2023 06:29:13 PM ][ INFO ]: Start processing: ../data/01_raw/Kase/ђл­®з­лҐ жҐ­л ­  05_01_202.xlsx...
[ 05/06/2023 06:29:13 PM ][ INFO ]: Fin

[ 05/06/2023 06:29:14 PM ][ INFO ]: Start processing: ../data/01_raw/Kase/ė­®ē­ė„ ę„­ė ­  25_08_2022.xlsx...
[ 05/06/2023 06:29:14 PM ][ INFO ]: Finish processing: ../data/01_raw/Kase/ė­®ē­ė„ ę„­ė ­  25_08_2022.xlsx...
[ 05/06/2023 06:29:14 PM ][ INFO ]: Start processing: ../data/01_raw/Kase/ђл­®з­лҐ жҐ­л ­  01_02_2023.xlsx...
[ 05/06/2023 06:29:14 PM ][ INFO ]: Finish processing: ../data/01_raw/Kase/ђл­®з­лҐ жҐ­л ­  01_02_2023.xlsx...
[ 05/06/2023 06:29:14 PM ][ INFO ]: Start processing: ../data/01_raw/Kase/ђл­®з­лҐ жҐ­л ­  06_02_2023.xlsx...
  warn("Workbook contains no default style, apply openpyxl's default")
  warn("Workbook contains no default style, apply openpyxl's default")
[ 05/06/2023 06:29:14 PM ][ INFO ]: Finish processing: ../data/01_raw/Kase/ђл­®з­лҐ жҐ­л ­  06_02_2023.xlsx...
[ 05/06/2023 06:29:14 PM ][ INFO ]: Start processing: ../data/01_raw/Kase/ė­®ē­ė„ ę„­ė ­  22_08_2022.xlsx...
[ 05/06/2023 06:29:14 PM ][ INFO ]: Finish processing: ../data/01_raw/

[ 05/06/2023 06:29:15 PM ][ INFO ]: Finish processing: ../data/01_raw/Kase/ђл­®з­лҐ жҐ­л ­  18_04_2023.xlsx...
[ 05/06/2023 06:29:15 PM ][ INFO ]: Start processing: ../data/01_raw/Kase/ė­®ē­ė„ ę„­ė ­  12_09_2022.xlsx...
[ 05/06/2023 06:29:15 PM ][ INFO ]: Finish processing: ../data/01_raw/Kase/ė­®ē­ė„ ę„­ė ­  12_09_2022.xlsx...
[ 05/06/2023 06:29:15 PM ][ INFO ]: Start processing: ../data/01_raw/Kase/ђл­®з­лҐ жҐ­л ­  31_03_2023.xlsx...
[ 05/06/2023 06:29:15 PM ][ INFO ]: Finish processing: ../data/01_raw/Kase/ђл­®з­лҐ жҐ­л ­  31_03_2023.xlsx...
[ 05/06/2023 06:29:15 PM ][ INFO ]: Start processing: ../data/01_raw/Kase/ė­®ē­ė„ ę„­ė ­  15_09_2022.xlsx...
[ 05/06/2023 06:29:15 PM ][ INFO ]: Finish processing: ../data/01_raw/Kase/ė­®ē­ė„ ę„­ė ­  15_09_2022.xlsx...
[ 05/06/2023 06:29:15 PM ][ INFO ]: Start processing: ../data/01_raw/Kase/ė­®ē­ė„ ę„­ė ­  08_06_2022.xlsx...
[ 05/06/2023 06:29:15 PM ][ INFO ]: Finish processing: ../data/01_raw/Kase/ė­®ē­ė„ ę„­

[ 05/06/2023 06:29:17 PM ][ INFO ]: Start processing: ../data/01_raw/Kase/ђл­®з­лҐ жҐ­л ­  09_01_2023.xlsx...
[ 05/06/2023 06:29:17 PM ][ INFO ]: Finish processing: ../data/01_raw/Kase/ђл­®з­лҐ жҐ­л ­  09_01_2023.xlsx...
[ 05/06/2023 06:29:17 PM ][ INFO ]: Start processing: ../data/01_raw/Kase/ė­®ē­ė„ ę„­ė ­  09_06_2022.xlsx...
[ 05/06/2023 06:29:17 PM ][ INFO ]: Finish processing: ../data/01_raw/Kase/ė­®ē­ė„ ę„­ė ­  09_06_2022.xlsx...
[ 05/06/2023 06:29:17 PM ][ INFO ]: Start processing: ../data/01_raw/Kase/ė­®ē­ė„ ę„­ė ­  26_07_2022.xlsx...
[ 05/06/2023 06:29:17 PM ][ INFO ]: Finish processing: ../data/01_raw/Kase/ė­®ē­ė„ ę„­ė ­  26_07_2022.xlsx...
[ 05/06/2023 06:29:17 PM ][ INFO ]: Start processing: ../data/01_raw/Kase/ė­®ē­ė„ ę„­ė ­  24_02_2022.xlsx...
[ 05/06/2023 06:29:17 PM ][ INFO ]: Finish processing: ../data/01_raw/Kase/ė­®ē­ė„ ę„­ė ­  24_02_2022.xlsx...
[ 05/06/2023 06:29:17 PM ][ INFO ]: Start processing: ../data/01_raw/Kase/ė­®ē­ė„ 

[ 05/06/2023 06:29:18 PM ][ INFO ]: Start processing: ../data/01_raw/Kase/ђл­®з­лҐ жҐ­л ­  13_03_2023.xlsx...
[ 05/06/2023 06:29:18 PM ][ INFO ]: Finish processing: ../data/01_raw/Kase/ђл­®з­лҐ жҐ­л ­  13_03_2023.xlsx...
[ 05/06/2023 06:29:18 PM ][ INFO ]: Start processing: ../data/01_raw/Kase/ė­®ē­ė„ ę„­ė ­  13_04_2022.xlsx...
[ 05/06/2023 06:29:18 PM ][ INFO ]: Finish processing: ../data/01_raw/Kase/ė­®ē­ė„ ę„­ė ­  13_04_2022.xlsx...
[ 05/06/2023 06:29:18 PM ][ INFO ]: Start processing: ../data/01_raw/Kase/ė­®ē­ė„ ę„­ė ­  11_01_2022.xlsx...
[ 05/06/2023 06:29:18 PM ][ INFO ]: Finish processing: ../data/01_raw/Kase/ė­®ē­ė„ ę„­ė ­  11_01_2022.xlsx...
[ 05/06/2023 06:29:18 PM ][ INFO ]: Start processing: ../data/01_raw/Kase/ė­®ē­ė„ ę„­ė ­  30_09_2022.xlsx...
[ 05/06/2023 06:29:18 PM ][ INFO ]: Finish processing: ../data/01_raw/Kase/ė­®ē­ė„ ę„­ė ­  30_09_2022.xlsx...
[ 05/06/2023 06:29:18 PM ][ INFO ]: Start processing: ../data/01_raw/Kase/ђл­®з­лҐ жҐ­

### Currency: reading & preparation

In [3]:
# Reading currencies
from bs4 import BeautifulSoup


def apply_aggregate_daily(currencies: pd.DataFrame) -> pd.DataFrame:
    transform_months = {
        ' января ': '.01.',
        ' февраля ': '.02.',
        ' марта ': '.03.',
        ' апреля ': '.04.',
        ' мая ': '.05.',
        ' июня ': '.06.',
        ' июля ': '.07.',
        ' августа ': '.08.',
        ' сентября ': '.09.',
        ' октября ': '.10.',
        ' ноября ': '.11.',
        ' декабря ': '.12.',
    }

    for month_str, month_num in transform_months.items():
        currencies['date'] = currencies['date'].str.replace(month_str, month_num)
    currencies['date'] = currencies['date'].str.split(' ').str[0]
    currencies.iloc[:, 1:] = currencies.iloc[:, 1:].astype(float)
    return currencies \
        .groupby('date') \
        .agg('mean') \
        .reset_index()


def prepare_headers(fpath: str) -> list[str]:
    with open(fpath) as f:
        html_file = f.read()
    bs = BeautifulSoup(html_file)
    table = bs.findAll(name='table', attrs={'class': 'table table-hover'})
    headers = list(map(
        lambda x: x.contents[1].attrs['data-base'],
        table[0].findAll('th', attrs={'colspan': ' 1 '})
    ))
    headers = ['date'] + headers
    return headers


def collect_daily_currnecy_aggregate(fpaths: list[str], headers: list[str]) -> pd.DataFrame:
    monthly_currency_report = []
    for path in fpaths:
        with open(path) as f:
            html_file = f.read()
        logging.info(f'Start processing file: {path}...')
        bs = BeautifulSoup(html_file)
        table = bs.findAll(name='table', attrs={'class': 'table table-hover'})
        rows = table[0].findAll('tbody')[0].findAll('tr')
        matrix = []
        for row in rows:
            rec_row = []
            for currency_value in row.findAll('td'):
                rec_row.append(currency_value.contents[0])
            matrix.append(rec_row)
        currencies = pd.DataFrame(matrix, columns=headers)
        currencies = apply_aggregate_daily(currencies.copy())
        monthly_currency_report.append(currencies)
        logging.info(f'Finish processing file: {path}...')
    return pd.concat(monthly_currency_report).reset_index(drop=True)


CURRENCIES_PATH = '../data/01_raw/Currencies'


fpaths = (CURRENCIES_PATH + '/' + pd.Series(os.listdir(CURRENCIES_PATH)))
fpaths = fpaths[fpaths.str.contains('.html')].reset_index(drop=True).to_list()
headers = prepare_headers(fpaths[0])
daily_curr_agg = collect_daily_currnecy_aggregate(fpaths, headers)
daily_curr_agg.iloc[:, 1:] = daily_curr_agg.iloc[:, 1:].astype(float)
daily_curr_agg['date'] = pd.to_datetime(daily_curr_agg['date'], format='%d.%m.%Y')

[ 05/06/2023 06:29:19 PM ][ INFO ]: Start processing file: ../data/01_raw/Currencies/9.html...
[ 05/06/2023 06:29:19 PM ][ INFO ]: Finish processing file: ../data/01_raw/Currencies/9.html...
[ 05/06/2023 06:29:19 PM ][ INFO ]: Start processing file: ../data/01_raw/Currencies/19.html...
[ 05/06/2023 06:29:19 PM ][ INFO ]: Finish processing file: ../data/01_raw/Currencies/19.html...
[ 05/06/2023 06:29:19 PM ][ INFO ]: Start processing file: ../data/01_raw/Currencies/5.html...
[ 05/06/2023 06:29:19 PM ][ INFO ]: Finish processing file: ../data/01_raw/Currencies/5.html...
[ 05/06/2023 06:29:19 PM ][ INFO ]: Start processing file: ../data/01_raw/Currencies/15.html...
[ 05/06/2023 06:29:19 PM ][ INFO ]: Finish processing file: ../data/01_raw/Currencies/15.html...
[ 05/06/2023 06:29:19 PM ][ INFO ]: Start processing file: ../data/01_raw/Currencies/14.html...
[ 05/06/2023 06:29:20 PM ][ INFO ]: Finish processing file: ../data/01_raw/Currencies/14.html...
[ 05/06/2023 06:29:20 PM ][ INFO ]: Sta

### Brent futures: reading & preparation

In [4]:
oil_price = pd.read_csv('../data/01_raw/Futures/Фьючерс на нефть Brent (1).csv.csv').rename(
    columns={
        'Дата': 'date',
        'Цена': 'price_per_barrel',
        'Откр.': 'price_per_barrel_open',
        'Макс.': 'price_per_barrel_max',
        'Мин.': 'price_per_barrel_min',
        'Объём': 'price_per_barrel_volume',
        'Изм. %': 'relative_diff_lag_1'
    }
)
oil_price['date'] = pd.to_datetime(oil_price['date'], format='%d.%m.%Y')
oil_price = oil_price.replace(',', '.', regex=True).replace('K|%', '', regex=True)
oil_price.iloc[:, 1:] = oil_price.iloc[:, 1:].astype(float)

In [5]:
oil_price.to_csv('../data/02_intermediate/oil_price.csv', index=False)
df.to_csv('../data/02_intermediate/securities.csv', index=False)
daily_curr_agg.to_csv('../data/02_intermediate/currencies.csv', index=False)