<h3><b>Parsing of candle data for stocks and indexes</b></h3>

In [5]:
!pip install requests
!pip install apimoex
!pip install pandas



In [6]:
import requests
import apimoex
from datetime import datetime
import pandas as pd
import pathlib

# opening the txt files with the ticker names of necessary companies
with open("TICK_DIV.txt", "r") as TICKs_file:
    TICKs = [line.rstrip() for line in TICKs_file]

with open("TICK_IND.txt", "r") as TICK2s_file:
    TICK2s = [line.rstrip() for line in TICK2s_file]

pathlib.Path("Database").mkdir(parents=True, exist_ok=True)

# empty lists to collect DataFrames
all_data_frames = []
all_data_frames_index = []

# additional info for parsing
end_date = datetime.now()
day = end_date.day
month = end_date.month
process = 0
process2 = 0

# parsing process of candles info
with requests.Session() as session:
    # parsing process of candles info (for stocks)
    for i in range(0, len(TICKs)):
        process += 1
        print(f"Parsing candle data for the necessary tickers {(process / len(TICKs)) * 100} %")

        # parsing stocks data for the ticker
        data = apimoex.get_market_candles(session, TICKs[i], start=f"2020-{month}-{day}", end=f"2025-{month}-{day}")
        if not data:  # skiping if there is no data for the ticker
            continue

        # creating a DataFrame from the candles data (of stocks)
        df = pd.DataFrame(data)
        # adding a new column for the STOCK_TICK value
        df['STOCK_TICK'] = TICKs[i]

        # appending the DataFrame to the list
        all_data_frames.append(df)
    
    # parsing process of candles info (for indexes)
    for i in range(0, len(TICK2s)):
        process2 += 1
        print(f"Parsing index data for the necessary tickers {(process2 / len(TICK2s)) * 100} %")

        # parsing indexes data for the ticker
        data_index = apimoex.get_market_candles(session, TICK2s[i], start=f"2020-{month}-{day}", end=f"2025-{month}-{day}", market="index")
        if not data:  # skiping if there is no data for the ticker
            continue

        # crearing a DataFrame from the candles data (of indexes)
        df_index = pd.DataFrame(data_index)
        # adding a new column for the INDEX_TICK value
        df_index['INDEX_TICK'] = TICK2s[i]
        
        # appending the DataFrame to the list
        all_data_frames_index.append(df_index)

# concatenating all the collected DataFrames into one (for stocks info)
combined_data = pd.concat(all_data_frames, ignore_index=True) 
# saving the combined DataFrame to a single JSON file (for stocks info)
combined_data.to_json("Database/combined_data_stock.json", orient='records', lines=True)

# concatenating all the collected DataFrames into one (for indexes info)
combined_data_index = pd.concat(all_data_frames_index, ignore_index=True)
# Save the combined DataFrame to a single JSON file (for indexes info)
combined_data_index.to_json("Database/combined_data_index.json", orient='records', lines=True)

Parsing candle data for the necessary tickers 2.631578947368421 %
Parsing candle data for the necessary tickers 5.263157894736842 %
Parsing candle data for the necessary tickers 7.894736842105263 %
Parsing candle data for the necessary tickers 10.526315789473683 %
Parsing candle data for the necessary tickers 13.157894736842104 %
Parsing candle data for the necessary tickers 15.789473684210526 %
Parsing candle data for the necessary tickers 18.421052631578945 %
Parsing candle data for the necessary tickers 21.052631578947366 %
Parsing candle data for the necessary tickers 23.684210526315788 %
Parsing candle data for the necessary tickers 26.31578947368421 %
Parsing candle data for the necessary tickers 28.947368421052634 %
Parsing candle data for the necessary tickers 31.57894736842105 %
Parsing candle data for the necessary tickers 34.21052631578947 %
Parsing candle data for the necessary tickers 36.84210526315789 %
Parsing candle data for the necessary tickers 39.473684210526315 %
Pa

<h3><b>Parsing of dividends data for stocks</b></h3>

In [7]:
import xml.etree.ElementTree as ET

# the URLs for the API endpoint
url1 = "https://iss.moex.com/iss/securities/"
url2 = "/dividends.xml"

# opening the txt file with the ticker names of necessary companies
with open("TICK_DIV.txt", "r") as TICK3s_file:
    TICK3s = [line.rstrip() for line in TICK3s_file]

# empty list to collect DataFrames
div_data = []
process3 = 0

# parsing process of dividends data
for i in range(len(TICK3s)):
    process3 += 1
    print(f"Parsing dividends data for the necessary tickers {(process3 / len(TICK3s)) * 100} %")
    response = requests.get(url1 + TICK3s[i] + url2)

    # parsing in case of success
    if response.status_code == 200:
        xml_data = response.content
        xml_string = xml_data.decode('utf-8')
        root = ET.fromstring(xml_string)

        for row in root.findall('.//row'):
            registry_closed_date = row.attrib.get('registryclosedate')
            # constraints on the necessary data format/values
            if registry_closed_date and int(registry_closed_date[0:4]) >= 2019:
                # a tuple to represent the unique entry
                unique_entry = row.attrib
                # appending the DataFrame to the list
                div_data.append(unique_entry)
                    
    # in case of fail
    else:
        print(f"Failed to retrieve data, status code: {response.status_code}")

# converting to DataFrame
df = pd.DataFrame(div_data)
df.rename(columns={'secid': 'DIV_TICK'}, inplace=True)

# converting the DataFrame to a JSON string
json_data = df.to_json("Database/combined_data_dividends.json", orient='records', lines=True)

Parsing dividends data for the necessary tickers 2.631578947368421 %
Parsing dividends data for the necessary tickers 5.263157894736842 %
Parsing dividends data for the necessary tickers 7.894736842105263 %
Parsing dividends data for the necessary tickers 10.526315789473683 %
Parsing dividends data for the necessary tickers 13.157894736842104 %
Parsing dividends data for the necessary tickers 15.789473684210526 %
Parsing dividends data for the necessary tickers 18.421052631578945 %
Parsing dividends data for the necessary tickers 21.052631578947366 %
Parsing dividends data for the necessary tickers 23.684210526315788 %
Parsing dividends data for the necessary tickers 26.31578947368421 %
Parsing dividends data for the necessary tickers 28.947368421052634 %
Parsing dividends data for the necessary tickers 31.57894736842105 %
Parsing dividends data for the necessary tickers 34.21052631578947 %
Parsing dividends data for the necessary tickers 36.84210526315789 %
Parsing dividends data for 