In [None]:
import yfinance as yf
import pandas as pd
from tqdm import tqdm
import pyarrow as pa
import pyarrow.parquet as pq
import financedatabase as fd

In [None]:
#Границы дат для парсинга котировок
DATES = ['2023-10-25', '2024-10-25']

In [None]:
def get_currency_names(url):
    try:
        df = pd.read_html(url)[1]
        currency = df['ISO code[3]'].unique()
        list_currency = ' '.join(['USD'+ name + "=x" for name in currency if name != '(none)'])
        return list_currency
    except Exception as e:
        raise e

In [None]:
def get_quote(start_date, end_date, symbols,candle_data = ['Close']):

    tickers = yf.Tickers(symbols)
    quote_data = pd.DataFrame()
    for ticker_symbol in tqdm(tickers.tickers):
        try:
            ticker = yf.Ticker(ticker_symbol)
            history = ticker.history(start=start_date, end=end_date)[candle_data]
            if len(candle_data) != 1:
                history.columns = [f"{ticker_symbol}"+"_"+ i for i in candle_data]
            else:
                history.columns = [ticker_symbol]
            quote_data = pd.concat([quote_data, history], axis=1)

        except:
            continue
    currency_data.dropna(axis=1, inplace=True, how='all')
    return quote_data

In [None]:
#берем с financedatabase список акций и используем их для сбора котировок с yfinance
equities = fd.Equities()
df = equities.select()
symbols_stocks = df.index.dropna().tolist()
stock_data = get_quote(*DATES,symbols_stocks)
stock_data = stock_data.asfreq('D')
stock_table = pa.Table.from_pandas(stock_data)
pq.write_table(stock_table, 'stock_raw.parquet')

In [None]:
#парсим с википедии названия валют и используем их для сбора котировок с yfinance
symbols_currencies = get_currency_names('https://en.wikipedia.org/wiki/List_of_circulating_currencies')
candle_data = ['Open', 'High', 'Low', 'Close']
currency_data = get_quote(*DATES, symbols_currencies, candle_data)
currency_data = currency_data.asfreq('D')
currency_table = pa.Table.from_pandas(currency_data)
pq.write_table(currency_table, 'currency_raw.parquet')

In [None]:
# Удаляем последние нули в каждом столбце (заменяем их на NaN)
import numpy as np
for col in currency_data.columns:
    while len(currency_data) > 0 and currency_data[col].iloc[-1] == 0:
        currency_data.at[currency_data.index[-1], col] = np.nan
# Удаляем строки, где все значения NaN (обычно это последние строки)
currency_data = currency_data.dropna(how='all')

currency_table = pa.Table.from_pandas(currency_data)
pq.write_table(currency_table, 'currency_raw.parquet')