In [None]:
from multiprocessing import cpu_count
from concurrent.futures import ThreadPoolExecutor
import time
import ast

import requests
import pandas as pd

def get_etf_list():
    URL = 'https://finance.naver.com/api/sise/etfItemList.nhn'
    response = requests.get(URL)
    etf_item_list = response.json().get('result').get('etfItemList')
    print(f'etf_item_list : {len(etf_item_list)}')
    return etf_item_list

def filter_etf_item(df, kwds):
    return df.loc[
        (df.etfTabCode != 1)
        &(df.quant > df.quant.median())
        &(df.marketSum > df.marketSum.median() * 2)
        &~df.itemname.str.contains('|'.join(kwds))
        &(~df.itemname.str.contains('합성')
            |df.itemname.str.contains('인버스'))]\
        .iloc[:, [0, 2]]\
        .reset_index(drop=True)

def get_unique_etf_item(df):
    df_ = df.copy()
    df_['sector'] = df_.itemname.replace(
    '(^\D+ )|(\(H\)|Fn|iSelect|SOLACTIVE|INDXX|선물|\s|플러스|Plus|PLUS)',
    '', regex=True)
    return df_.groupby('sector')\
        .first()[['itemcode', 'itemname']]\
        .set_index('itemcode').sort_index()

def get_price(symbol):
    URL = 'https://api.finance.naver.com/siseJson.naver'
    params = dict(symbol=symbol,
                  requestType=1,
                  startTime='20230101',
                  endTime='20991231',
                  timeframe='day')
    response = requests.get(URL, params=params)
    literal = response.text.replace('\n', '').replace('\t', '')
    data = ast.literal_eval(literal)
    df = pd.DataFrame(data[1:], columns=data[0])
    return (symbol, df.drop(columns=['시가', '거래량', '외국인소진율']))

def get_prices(symbols):
    cpu_num = cpu_count()
    print(f'cpu_num : {cpu_num} → worker_num : {cpu_num * 2}')

    t = time.time()

    # with ThreadPoolExecutor() as executor:
    with ThreadPoolExecutor(max_workers=cpu_num * 2) as executor:
        results = executor.map(get_price, symbols)

    print(f'process time : {time.time() - t:.3f}s')

    prices = {}
    for symbol, price in results:
        prices[symbol] = price
    print(prices.keys())
    return prices

etf_item_df = pd.DataFrame(get_etf_list())
kwds = ['액티브','TR','배당','금리','단기','혼합','MSCI','인도']
etf_item_filterd = filter_etf_item(etf_item_df, kwds)
etf_item_unique = get_unique_etf_item(etf_item_filterd)
prices = get_prices(etf_item_unique.index)
prices

etf_item_list : 805
cpu_num : 2 → worker_num : 4
process time : 19.346s
dict_keys(['091160', '091170', '091180', '102780', '102970', '108450', '114260', '114800', '122630', '132030', '133690', '138540', '139230', '139250', '139260', '139280', '143860', '144600', '148070', '150460', '157490', '192090', '203780', '204450', '213610', '227540', '228790', '228800', '228810', '233740', '241180', '244580', '245340', '245360', '251340', '252670', '261220', '261240', '261260', '261270', '267770', '271050', '290130', '292150', '292560', '298770', '300640', '302190', '304660', '305080', '305540', '305720', '309230', '314250', '325010', '326240', '329200', '360750', '364960', '364970', '364980', '365000', '367760', '367770', '371160', '371450', '371460', '371470', '381170', '381180', '385560', '390390', '394660', '395160', '395170', '395270', '395290', '396500', '400570', '401170', '411060', '412570', '441540', '446770', '449450', '455850', '455860', '461950', '462010', '462330', '464930', '465350

{'091160':            날짜     고가     저가     종가
 0    20230102  22208  21583  21703
 1    20230103  21911  21186  21871
 2    20230104  23022  21588  22988
 3    20230105  23131  22650  22824
 4    20230106  23573  22630  23474
 ..        ...    ...    ...    ...
 232  20231211  34335  33860  34160
 233  20231212  34850  34420  34720
 234  20231213  34945  34645  34720
 235  20231214  35405  34910  35315
 236  20231215  35600  35030  35115
 
 [237 rows x 4 columns],
 '091170':            날짜    고가    저가    종가
 0    20230102  5929  5774  5774
 1    20230103  5892  5708  5883
 2    20230104  6085  5826  6071
 3    20230105  6467  6085  6458
 4    20230106  6679  6415  6604
 ..        ...   ...   ...   ...
 232  20231211  6470  6415  6460
 233  20231212  6525  6470  6510
 234  20231213  6520  6455  6455
 235  20231214  6565  6520  6550
 236  20231215  6640  6570  6620
 
 [237 rows x 4 columns],
 '091180':            날짜     고가     저가     종가
 0    20230102  15463  15081  15180
 1    20230103  