In [None]:
import json
import requests
import datetime as dt
import time

from lxml import etree
import pandas as pd

In [None]:
INVESTMENT_OBJECT_TYPE_XPATH = "//ul[contains(@class, 'param_list')]/li[span[text() = 'Объект инвестирования']]/div[contains(@class, 'value')]/text()"
GEOGRAPHY_XPATH = "//ul[contains(@class, 'param_list')]/li[span[text() = 'География инвестирования']]/div[contains(@class, 'value')]/text()"
CURRENCY_XPATH = "//ul[contains(@class, 'param_list')]/li[span[text() = 'Валюта фонда']]/div[contains(@class, 'value')]/text()"

In [None]:
URL = 'https://investfunds.ru/etf'
FINEX_RUB_TICKERS = [
    'FXRL', 
    'FXTP', 
    'FXIP', 
    'FXMM', 
    'FXTB', 
    'FXCN', 
    'FXDM', 
    'FXKZ', 
    'FXRD', 
    'FXDE', 
    'FXWO', 
    'FXRW', 
    'FXGD', 
    'FXRB', 
    'FXRU', 
    'FXIT', 
    'FXIM', 
    'FXUS', 
    'FXES'
]

In [None]:
finex_tickers_investfunds_id = {}
for ticker in FINEX_RUB_TICKERS:
    response = requests.get(URL, {'searchString': ticker}).json()
    finex_tickers_investfunds_id[ticker] = response['currentResults'][0]['id.numeric']

In [None]:
date_from = '01.01.2018'
date_to = dt.date.today().strftime('%d.%m.%Y')
payload_default = {
    'action': 'chartData',
    'data_key': 'close',
    'date_from': date_from,
    'date_to': date_to,
    'needVolume': 1
}

In [None]:
def parse_etf_information(url, investfunds_id):
    url = f"{URL}/{investfunds_id}"
    page_content = requests.get(url)
    doc = etree.HTML(page_content.content)
    investemnt_object_type = doc.xpath(INVESTMENT_OBJECT_TYPE_XPATH)[0]
    geography = doc.xpath(GEOGRAPHY_XPATH)[0]
    currency = doc.xpath(CURRENCY_XPATH)[0]
    return {'investemnt_object_type': investemnt_object_type, 'geography': geography, 'currency': currency}


for ticker, investfunds_id in finex_tickers_investfunds_id.items():
    url_etf = f"{URL}/{investfunds_id}/1"
    chart_data = requests.get(url_etf, payload_default).json()[0]['data']
    information = parse_etf_information(URL, investfunds_id)
    # save to pandas dataframe
    df = pd.DataFrame(chart_data, columns=['dt', 'close_price'])
    df['dt'] = df['dt'].apply(lambda x: dt.datetime.fromtimestamp(x // 1000))
    df = df.assign(**information)
    # save to csv
    df.to_csv(f'data/investfunds/{ticker}.csv', header=True, index=False, sep=',')
    time.sleep(1)