In [None]:
"""request_url = ('https://iss.moex.com/iss/engines/stock/'
               'markets/shares/boards/TQBR/securities.json')
arguments = {'securities.columns': ('SECID,'
                                    'REGNUMBER,'
                                    'LOTSIZE,'
                                    'SHORTNAME')}
with requests.Session() as session:
    iss = apimoex.ISSClient(session, request_url, arguments)
    data = iss.get()
    df = pd.DataFrame(data['securities'])
    df.set_index('SECID', inplace=True)
    print(df)"""

In [None]:
# !pip install apimoex
# !pip install lxml

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

from os import path
import pathlib

import requests
from bs4 import BeautifulSoup
import xml.etree.ElementTree as ET

import datetime
from datetime import datetime as extra_datetime
import time

import apimoex
import warnings
from tqdm.notebook import tqdm

warnings.filterwarnings('ignore')
# creating root for data 
parent_dir = r"/Users/andrewnizov/Desktop/АнДан/Проект"  

## Stock quotes parser
API MOEX

In [None]:
# |sector|country|ticker|...|
companies = pd.read_csv('companies_list.csv')

In [None]:
companies_ru = companies[companies.country == 'RU']
companies_ru.shape

In [None]:
def parse_tickers(companies, fmp_api_keys):
    '''
    This function is needed for parsing stock quotes for the studied period of time.
    '''
    # Sets the trading board mode as 'TQBR' which is intended for highly liquid and capitalized shares.
    board = 'TQBR'
    today = str(datetime.date.today())

    # Russian companies are filtered from a given dataset and subsequently code iterates through them
    companies_ru = companies[companies.country == 'RU']
    with requests.Session() as session:
        for indx, row in tqdm(companies_ru.iterrows(), desc = 'Processing russian stock', total = companies_ru.shape[0]):
            
            # To retrieve the trading history for a specified security in a given trading mode over a specified date range.
            # Important to specify the "internet connection session", the ticker of the security, and the trading mode (by default T+2)
            # Result: A list of dictionaries that can be directly converted into a pandas.DataFrame
            data = apimoex.get_board_history(session, row.ticker, board=board)
            
            if data == []:
                print('Empty set')
            df = pd.DataFrame(data)
            
            # Depending on the company's sector, code saves the DataFrame to a specific CSV file 
            # categorized by sector, which facilitates easy data management and access.
            if row.sector == 'Renewable Energy':
                df.to_csv(path.join(parent_dir, 'renewable_energy', f'{row.ticker}_RU.csv'))
            elif row.sector == 'Healthcare':
                df.to_csv(path.join(parent_dir, 'healthcare_services', f'{row.ticker}_RU.csv'))
            elif row.sector == 'Financial Services':
                df.to_csv(path.join(parent_dir, 'fintech', f'{row.ticker}_RU.csv'))
            elif row.sector == 'Industrials':
                df.to_csv(path.join(parent_dir, 'industrial_goods', f'{row.ticker}_RU.csv'))
    
    # The function separates American companies from the dataset
    companies_usa = companies[companies.country == 'USA']
    
    # Function get_data fetches historical stock data from the FMP API 
    # for a given ticker, handling API key rotation upon limit reaching.
    def get_data(ticker, today):
        for key in fmp_api_keys:
            try:
                link = f"https://financialmodelingprep.com/api/v3/historical-chart/4hour/{ticker}?to={today}&apikey={key}"
                return pd.DataFrame(requests.get(link).json(), index=[0])
            except requests.HTTPError as e:
                print(f"API key limit reached for {key}, switching keys.")
                continue
            
    for indx, row in tqdm(companies_usa.iterrows(), desc='Processing USA stock', total=companies_usa.shape[0]):
        try:
            df = get_data(row.ticker, today)
        except ValueError as e:
            print(e)
            break
            

        if row.sector == 'Renewable Energy':
            df.to_csv(path.join(parent_dir, 'renewable_energy', f'{row.ticker}_USA.csv'))
        elif row.sector == 'Healthcare':
            df.to_csv(path.join(parent_dir, 'healthcare_services', f'{row.ticker}_USA.csv'))
        elif row.sector == 'Financial Services':
            df.to_csv(path.join(parent_dir, 'fintech', f'{row.ticker}_USA.csv'))
        elif row.sector == 'Industrials':
            df.to_csv(path.join(parent_dir, 'industrial_goods', f'{row.ticker}_USA.csv'))
            


In [None]:
FMP_API_KEYS = [FMP_API_KEY_1, FMP_API_KEY_2]
parse_tickers(companies, FMP_API_KEYS)

## USD/RUB parser
CBR XML

In [None]:
def parse_usdrub():
    '''
    This function is needed for parsing USD/RUB exchange rate for the studied period of time.
    '''
    # Dates in DD/MM/YYYY format
    start_date = datetime.date(2010, 1, 1).strftime('%d/%m/%Y')
    end_date = datetime.date.today().strftime('%d/%m/%Y')
    
    # URL creating for the further request
    url = f'https://www.cbr.ru/scripts/XML_dynamic.asp?date_req1={start_date}&date_req2={end_date}&VAL_NM_RQ=R01235'
    response_usd = requests.get(url)
    
    tree_usd_rate = BeautifulSoup(response_usd.content, 'html.parser')
    
    dates = []
    usd_rates = []
    
    # Forms two sets with dates and exchange rates, that will be used for final dataframe
    for line in tree_usd_rate.find_all('record'):
        dates.append(extra_datetime.strptime(line.get('date'), '%d.%m.%Y').date().strftime('%d.%m.%Y'))
        usd_rates.append(float(line.value.text.replace(',', '.')))
    
    usdrub = pd.DataFrame(data=usd_rates, index=pd.to_datetime(dates), columns=['usdrub'])
    usdrub_final = usdrub.sort_index()
    
    # Code saves the result to CSV file
    usdrub_final.to_csv(path.join(parent_dir, 'usdrub_rates', 'data_usdrub.csv'))

In [None]:
parse_usdrub()

## Precious metals prices parser
CBR XML

* 1 - gold
* 2 - silver
* 3 - platinum
* 4 - palladium

In [None]:
def parse_precious_metals_rates():
    '''
    This function is needed for parsing prices of 4 precious metals for the studied period of time.
    '''
    # Dates in DD/MM/YYYY format
    start_date = datetime.date(2010, 1, 1).strftime('%d/%m/%Y')
    end_date = datetime.date.today().strftime('%d/%m/%Y')
    
    # URL creating for the further request
    url_1 = f'https://www.cbr.ru/scripts/xml_metall.asp?date_req1={start_date}&date_req2={end_date}'
    response_precious = requests.get(url_1)
    
    tree_precious_rate = BeautifulSoup(response_precious.content, 'html.parser')
    
    dates = []
    gold_rates = []
    silver_rates = []
    platinum_rates = []
    palladium_rates = []
    
    # Forms five sets with dates and prices of 4 precious metals, that will be used for final dataframe
    helpa = 1
    for stat in tree_precious_rate.find_all('record'):
        if helpa == 1:
            gold_rates.append(float(stat.buy.text.replace(',', '.')))
            dates.append(extra_datetime.strptime(stat.get('date'), '%d.%m.%Y').date().strftime('%d.%m.%Y'))
        elif helpa == 2:
            silver_rates.append(float(stat.buy.text.replace(',', '.')))
        elif helpa == 3:
            platinum_rates.append(float(stat.buy.text.replace(',', '.')))
        else:
            palladium_rates.append(float(stat.buy.text.replace(',', '.')))

        if helpa == 4:
            helpa = 0

        helpa += 1

    precious_metals = pd.DataFrame({'gold_rates': gold_rates, 'silver_rates': silver_rates, 'platinum_rates': platinum_rates, 'palladium_rates': palladium_rates}, index=pd.to_datetime(dates))
    final_precious_metals = precious_metals.sort_index()
    
    return final_precious_metals
    
    # Code saves the result to CSV file
    '''final_precious_metals.to_csv(path.join(parent_dir, 'precious_metals', 'data_precious_metals.csv'))'''

In [None]:
parse_precious_metals_rates()

In [None]:
gold_price = parse_precious_metals_rates()['gold_rates']
silver_price = parse_precious_metals_rates()['silver_rates']
platinum_price = parse_precious_metals_rates()['platinum_rates']
palladium_price = parse_precious_metals_rates()['palladium_rates']
x = gold_price.index

In [None]:
fig, ax = plt.subplots(figsize = (12, 6))
plt.plot(x, palladium_price)
plt.show()

## Парсинг календаря расчетных дней 
для бинарного признака (есть торги/нет)

## Interbank market rates parser
CBR XML

**Code = 3: Weighted average actual rates on ruble loans provided by Moscow banks.**

In [None]:
def parse_interbank_market_rates():
    '''
    This function is needed for parsing dynamics of interbank credit market rates for the studied period of time.
    '''
    # Dates in DD/MM/YYYY format
    start_date = datetime.date(2010, 1, 1).strftime('%d/%m/%Y')
    end_date = datetime.date.today().strftime('%d/%m/%Y')
    
    # URL creating for the further request
    url_2 = f'https://www.cbr.ru/scripts/xml_mkr.asp?date_req1={start_date}&date_req2={end_date}' 
    response_credit_rate = requests.get(url_2)
    
    tree_credit_rate = BeautifulSoup(response_credit_rate.content, 'html.parser')
    
    dates = []
    interbank_credit_market_rates = []
    
    # Forms two sets with dates and exchange rates, that will be used for final dataframe
    # Code appeals to elusive html element, using xml.etree.ElementTree
    for line in tree_credit_rate.find_all('record'):
        root = ET.fromstring(str(line))
        if root.attrib.get('code') == '3':
            dates.append(extra_datetime.strptime(line.get('date'), '%d/%m/%Y').date().strftime('%d/%m/%Y'))
            if line.c1.text != '-':
                interbank_credit_market_rates.append(float(line.c1.text))
            else:
                interbank_credit_market_rates.append('-')
    
    interbank_rates = pd.DataFrame(data=interbank_credit_market_rates, index=pd.to_datetime(dates), columns=['Interbank credit market rates'])
    interbank_rates_final = interbank_rates.sort_index()
    
    # Code saves the result to CSV file
    interbank_rates_final.to_csv(path.join(parent_dir, 'interbank_credit_market_rates', 'data_interbank_rates.csv'))

In [None]:
parse_interbank_market_rates()

## Парсер FMP

In [None]:
ticker = 'IMPP'
date = str(datetime.date.today())
APIKEY = "uILltAaGY2ms0reL0RVtgtALlh2BbYH5"

link = f"https://financialmodelingprep.com/api/v3/historical-chart/4hour/{ticker}?to={date}&apikey={APIKEY}"
response = requests.get(link)

In [None]:
pd.DataFrame(response.json(), index=[0]).head()

In [None]:
'''    for indx, row in tqdm(companies_usa.iterrows(), desc = 'Processing USA stock', total = companies_usa.shape[0]):
        try:
            link = f"https://financialmodelingprep.com/api/v3/historical-chart/4hour/{row.ticker}?to={today}&apikey={fmp_api_keys[0]}"
            df = pd.DataFrame(requests.get(link).json())
        except ValueError:
            link = f"https://financialmodelingprep.com/api/v3/historical-chart/4hour/{row.ticker}?to={today}&apikey={fmp_api_keys[1]}"
            df = pd.DataFrame(requests.get(link).json())'''