In [59]:
from bs4 import BeautifulSoup, Tag
import pandas as pd
from typing import TypedDict, Optional, List

### Data types of retrieved information

In [None]:
class IndexRecord(TypedDict):
    """
    Class representing a single record of an index
    """

    date: Optional[str]
    price_at_opening: Optional[pd.Float32Dtype]
    max_price: Optional[pd.Float32Dtype]
    min_price: Optional[pd.Float32Dtype]
    price_at_closure: Optional[pd.Float32Dtype]
    volume_of_trade: Optional[pd.Float32Dtype]
    capitalization: Optional[pd.Float32Dtype]

### Scraping

In [60]:
def convert_to_df(soup_table: Tag) -> pd.DataFrame:
    """
    Converts a table into a pandas dataframe with string values
    
    Arguments:
        soup_table (Tag): table in the html format
    
    Returns:
        pd.Dataframe: dataframe with retrieved information
    """
    columns = [header.text.strip() for header in soup_table.find('thead').find_all('th')]
    assert len(columns) == 7
    
    data: List[IndexRecord] = []

    assert soup_table.find('tbody') is not None
    tbody = soup_table.find('tbody')
    assert tbody.find('tr') is not None

    for row in tbody.find_all('tr'):
        elements = row.find_all('td')
        assert len(elements) == 7
        data.append([element.text.strip() for element in elements])
    
    return pd.DataFrame(data, columns=columns)
        

In [None]:
# load the specified page
with open('pages/page.html', 'r', encoding='UTF-8') as f:
    html = f.read()

In [32]:
soup = BeautifulSoup(html, 'html.parser')
table = soup.find('div', {'class': 'ui-table__container'}).find('table')
assert table is not None

In [56]:
df = convert_to_df(table)

In [57]:
df.head()

Unnamed: 0,Дата,Открытие,Максимум,Минимум,Закрытие,"Объем, RUB","Капитализация, RUB"
0,27.02.2025,"3 249,63","3 261,13","3 190,72","3 232,65","101 069 017 703,52","6 991 269 568 114,285"
1,26.02.2025,"3 338,4","3 339,41","3 233,39",3 276,"134 684 772 736,37","7 085 025 987 193,121"
2,25.02.2025,"3 362,15","3 371,06","3 320,24","3 326,14","139 815 174 833,9","7 193 456 708 246,5"
3,24.02.2025,"3 310,75","3 316,42","3 272,99","3 303,63","87 195 616 629,4","7 144 764 358 692,558"
4,21.02.2025,"3 310,03","3 320,05","3 258,82","3 283,29","94 601 047 430,17","7 100 792 330 158,207"
