# Download historical equity data for NASDAQ stocks from yahoo finance

In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
from time import time
from tqdm import tqdm
from pathlib import Path
import pandas as pd
import FinanceDataReader as fdr
import yfinance as yf

In [6]:
idx = pd.IndexSlice

In [7]:
results_path = Path('KR2_results', 'asset_pricing')
if not results_path.exists():
    results_path.mkdir(parents=True)

In [8]:
def chunks(l, n): 
    for i in range(0, len(l), n):  
        yield l[i:i + n] 

In [9]:
def format_time(t):
    """Return a formatted time string 'HH:MM:SS
    based on a numeric time() value"""
    m, s = divmod(t, 60)
    h, m = divmod(m, 60)
    return f'{h:0>2.0f}:{m:0>2.0f}:{s:0>2.0f}'

## Get KRX symbols

In [11]:
krx = fdr.StockListing('KRX')
krx = krx[krx["Market"] != "KONEX"]
krx = krx[krx["Market"] != "KOSDAQ"]
krx['Code'] = krx['Code'] + '.' + krx['Market'].apply(lambda x: 'KS')
krx = krx["Code"].to_list()
n = len(krx)
print(f'# Symbols: {n:,.0f}')

# Symbols: 998


## Download metadata from yahoo finance

### NASDAQ symbols

In [13]:
yf_codes = yf.Tickers(krx)

In [14]:
meta_data = []
start = time()
for code in tqdm(krx):
    try:
        yf_object = yf.Ticker(code)
        s = pd.Series(yf_object.get_info())
        meta_data.append(s.to_frame(code))
    except Exception as e:
        print(code, e)

print(f'Success: {len(meta_data):5,.0f} / {len(krx):5,.0f}')

  2%|▏         | 15/998 [00:08<07:43,  2.12it/s]

247540.KS 404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v10/finance/quoteSummary/247540.KS?modules=summaryProfile%2CfinancialData%2CquoteType%2CdefaultKeyStatistics%2CassetProfile%2CsummaryDetail&ssl=true


  3%|▎         | 28/998 [00:15<07:43,  2.09it/s]

091990.KS 404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v10/finance/quoteSummary/091990.KS?modules=summaryProfile%2CfinancialData%2CquoteType%2CdefaultKeyStatistics%2CassetProfile%2CsummaryDetail&ssl=true


  4%|▍         | 42/998 [00:23<07:32,  2.11it/s]

066970.KS 404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v10/finance/quoteSummary/066970.KS?modules=summaryProfile%2CfinancialData%2CquoteType%2CdefaultKeyStatistics%2CassetProfile%2CsummaryDetail&ssl=true


 10%|▉         | 95/998 [00:52<07:05,  2.12it/s]

293490.KS 404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v10/finance/quoteSummary/293490.KS?modules=summaryProfile%2CfinancialData%2CquoteType%2CdefaultKeyStatistics%2CassetProfile%2CsummaryDetail&ssl=true


 11%|█         | 107/998 [00:58<07:07,  2.08it/s]

263750.KS 404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v10/finance/quoteSummary/263750.KS?modules=summaryProfile%2CfinancialData%2CquoteType%2CdefaultKeyStatistics%2CassetProfile%2CsummaryDetail&ssl=true


 13%|█▎        | 126/998 [01:09<07:12,  2.02it/s]

196170.KS 404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v10/finance/quoteSummary/196170.KS?modules=summaryProfile%2CfinancialData%2CquoteType%2CdefaultKeyStatistics%2CassetProfile%2CsummaryDetail&ssl=true


 14%|█▍        | 143/998 [01:18<07:14,  1.97it/s]

058470.KS 404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v10/finance/quoteSummary/058470.KS?modules=summaryProfile%2CfinancialData%2CquoteType%2CdefaultKeyStatistics%2CassetProfile%2CsummaryDetail&ssl=true


 15%|█▍        | 146/998 [01:20<06:31,  2.18it/s]

022100.KS 404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v10/finance/quoteSummary/022100.KS?modules=summaryProfile%2CfinancialData%2CquoteType%2CdefaultKeyStatistics%2CassetProfile%2CsummaryDetail&ssl=true


 15%|█▍        | 149/998 [01:21<06:20,  2.23it/s]

278280.KS 404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v10/finance/quoteSummary/278280.KS?modules=summaryProfile%2CfinancialData%2CquoteType%2CdefaultKeyStatistics%2CassetProfile%2CsummaryDetail&ssl=true


 16%|█▌        | 160/998 [01:27<06:38,  2.10it/s]

035760.KS 404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v10/finance/quoteSummary/035760.KS?modules=summaryProfile%2CfinancialData%2CquoteType%2CdefaultKeyStatistics%2CassetProfile%2CsummaryDetail&ssl=true


 17%|█▋        | 170/998 [01:33<06:34,  2.10it/s]

214150.KS 404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v10/finance/quoteSummary/214150.KS?modules=summaryProfile%2CfinancialData%2CquoteType%2CdefaultKeyStatistics%2CassetProfile%2CsummaryDetail&ssl=true


 17%|█▋        | 171/998 [01:33<05:40,  2.43it/s]

240810.KS 404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v10/finance/quoteSummary/240810.KS?modules=summaryProfile%2CfinancialData%2CquoteType%2CdefaultKeyStatistics%2CassetProfile%2CsummaryDetail&ssl=true


 17%|█▋        | 172/998 [01:33<05:02,  2.73it/s]

056190.KS 404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v10/finance/quoteSummary/056190.KS?modules=summaryProfile%2CfinancialData%2CquoteType%2CdefaultKeyStatistics%2CassetProfile%2CsummaryDetail&ssl=true


 18%|█▊        | 175/998 [01:35<05:41,  2.41it/s]

145020.KS 404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v10/finance/quoteSummary/145020.KS?modules=summaryProfile%2CfinancialData%2CquoteType%2CdefaultKeyStatistics%2CassetProfile%2CsummaryDetail&ssl=true


 19%|█▉        | 192/998 [01:44<06:19,  2.12it/s]

137400.KS 404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v10/finance/quoteSummary/137400.KS?modules=summaryProfile%2CfinancialData%2CquoteType%2CdefaultKeyStatistics%2CassetProfile%2CsummaryDetail&ssl=true


 20%|██        | 203/998 [01:50<06:24,  2.07it/s]

039030.KS 404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v10/finance/quoteSummary/039030.KS?modules=summaryProfile%2CfinancialData%2CquoteType%2CdefaultKeyStatistics%2CassetProfile%2CsummaryDetail&ssl=true


 21%|██        | 209/998 [01:53<06:07,  2.14it/s]

213420.KS 404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v10/finance/quoteSummary/213420.KS?modules=summaryProfile%2CfinancialData%2CquoteType%2CdefaultKeyStatistics%2CassetProfile%2CsummaryDetail&ssl=true


 21%|██▏       | 213/998 [01:55<05:49,  2.24it/s]

214450.KS 404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v10/finance/quoteSummary/214450.KS?modules=summaryProfile%2CfinancialData%2CquoteType%2CdefaultKeyStatistics%2CassetProfile%2CsummaryDetail&ssl=true


 21%|██▏       | 214/998 [01:55<05:06,  2.56it/s]

141080.KS 404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v10/finance/quoteSummary/141080.KS?modules=summaryProfile%2CfinancialData%2CquoteType%2CdefaultKeyStatistics%2CassetProfile%2CsummaryDetail&ssl=true


 22%|██▏       | 215/998 [01:55<04:34,  2.85it/s]

003380.KS 404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v10/finance/quoteSummary/003380.KS?modules=summaryProfile%2CfinancialData%2CquoteType%2CdefaultKeyStatistics%2CassetProfile%2CsummaryDetail&ssl=true


 23%|██▎       | 229/998 [02:04<06:30,  1.97it/s]

195940.KS 404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v10/finance/quoteSummary/195940.KS?modules=summaryProfile%2CfinancialData%2CquoteType%2CdefaultKeyStatistics%2CassetProfile%2CsummaryDetail&ssl=true


 23%|██▎       | 233/998 [02:06<05:57,  2.14it/s]

098460.KS 404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v10/finance/quoteSummary/098460.KS?modules=summaryProfile%2CfinancialData%2CquoteType%2CdefaultKeyStatistics%2CassetProfile%2CsummaryDetail&ssl=true


 25%|██▍       | 248/998 [02:14<05:59,  2.09it/s]

166090.KS 404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v10/finance/quoteSummary/166090.KS?modules=summaryProfile%2CfinancialData%2CquoteType%2CdefaultKeyStatistics%2CassetProfile%2CsummaryDetail&ssl=true


 26%|██▋       | 264/998 [02:23<06:01,  2.03it/s]

215000.KS 404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v10/finance/quoteSummary/215000.KS?modules=summaryProfile%2CfinancialData%2CquoteType%2CdefaultKeyStatistics%2CassetProfile%2CsummaryDetail&ssl=true


 27%|██▋       | 268/998 [02:25<05:33,  2.19it/s]

215200.KS 404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v10/finance/quoteSummary/215200.KS?modules=summaryProfile%2CfinancialData%2CquoteType%2CdefaultKeyStatistics%2CassetProfile%2CsummaryDetail&ssl=true


 27%|██▋       | 274/998 [02:28<05:37,  2.15it/s]

086450.KS 404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v10/finance/quoteSummary/086450.KS?modules=summaryProfile%2CfinancialData%2CquoteType%2CdefaultKeyStatistics%2CassetProfile%2CsummaryDetail&ssl=true


 28%|██▊       | 280/998 [02:31<05:39,  2.12it/s]

084370.KS 404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v10/finance/quoteSummary/084370.KS?modules=summaryProfile%2CfinancialData%2CquoteType%2CdefaultKeyStatistics%2CassetProfile%2CsummaryDetail&ssl=true


 29%|██▊       | 286/998 [02:34<05:37,  2.11it/s]

074600.KS 404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v10/finance/quoteSummary/074600.KS?modules=summaryProfile%2CfinancialData%2CquoteType%2CdefaultKeyStatistics%2CassetProfile%2CsummaryDetail&ssl=true


 29%|██▉       | 290/998 [02:36<05:14,  2.25it/s]

348210.KS 404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v10/finance/quoteSummary/348210.KS?modules=summaryProfile%2CfinancialData%2CquoteType%2CdefaultKeyStatistics%2CassetProfile%2CsummaryDetail&ssl=true


 29%|██▉       | 292/998 [02:37<04:52,  2.42it/s]

046890.KS 404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v10/finance/quoteSummary/046890.KS?modules=summaryProfile%2CfinancialData%2CquoteType%2CdefaultKeyStatistics%2CassetProfile%2CsummaryDetail&ssl=true


 30%|███       | 304/998 [02:43<05:35,  2.07it/s]

084850.KS 404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v10/finance/quoteSummary/084850.KS?modules=summaryProfile%2CfinancialData%2CquoteType%2CdefaultKeyStatistics%2CassetProfile%2CsummaryDetail&ssl=true


 31%|███       | 305/998 [02:44<04:48,  2.40it/s]

036830.KS 404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v10/finance/quoteSummary/036830.KS?modules=summaryProfile%2CfinancialData%2CquoteType%2CdefaultKeyStatistics%2CassetProfile%2CsummaryDetail&ssl=true


 31%|███       | 306/998 [02:44<04:21,  2.65it/s]

319660.KS 404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v10/finance/quoteSummary/319660.KS?modules=summaryProfile%2CfinancialData%2CquoteType%2CdefaultKeyStatistics%2CassetProfile%2CsummaryDetail&ssl=true


 35%|███▍      | 347/998 [03:07<05:16,  2.05it/s]

091700.KS 404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v10/finance/quoteSummary/091700.KS?modules=summaryProfile%2CfinancialData%2CquoteType%2CdefaultKeyStatistics%2CassetProfile%2CsummaryDetail&ssl=true


 36%|███▌      | 359/998 [03:13<05:03,  2.10it/s]

183300.KS 404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v10/finance/quoteSummary/183300.KS?modules=summaryProfile%2CfinancialData%2CquoteType%2CdefaultKeyStatistics%2CassetProfile%2CsummaryDetail&ssl=true


 37%|███▋      | 367/998 [03:17<04:44,  2.22it/s]

095610.KS 404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v10/finance/quoteSummary/095610.KS?modules=summaryProfile%2CfinancialData%2CquoteType%2CdefaultKeyStatistics%2CassetProfile%2CsummaryDetail&ssl=true


 37%|███▋      | 371/998 [03:19<04:43,  2.21it/s]

131290.KS 404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v10/finance/quoteSummary/131290.KS?modules=summaryProfile%2CfinancialData%2CquoteType%2CdefaultKeyStatistics%2CassetProfile%2CsummaryDetail&ssl=true


 38%|███▊      | 380/998 [03:24<04:53,  2.11it/s]

243070.KS 404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v10/finance/quoteSummary/243070.KS?modules=summaryProfile%2CfinancialData%2CquoteType%2CdefaultKeyStatistics%2CassetProfile%2CsummaryDetail&ssl=true


100%|██████████| 998/998 [09:33<00:00,  1.74it/s]

Success:   960 /   998





meta_data

In [15]:
df = pd.concat(meta_data, axis=1).dropna(how='all').T
df = df.apply(pd.to_numeric, errors='ignore')
df.info(show_counts=True)

<class 'pandas.core.frame.DataFrame'>
Index: 960 entries, 005930.KS to 000547.KS
Columns: 117 entries, address1 to fullTimeEmployees
dtypes: float64(89), int64(1), object(27)
memory usage: 885.0+ KB


In [16]:
results_path

WindowsPath('KR2_results/asset_pricing')

In [17]:
df.to_hdf(results_path / 'data.h5', 'stocks/info')

## Download adjusted price data using yfinance

In [18]:
prices_adj = []
start = time()
for i, chunk in enumerate(chunks(krx, 100), 1):
    prices_adj.append(yf.download(chunk, period='max', auto_adjust=True).stack(-1))

    per_ticker = (time()-start) / (i * 100)
    to_do = n - (i * 100)
    to_go = to_do * per_ticker    
    print(f'Success: {len(prices_adj):5,.0f}/{i:5,.0f} | To go: {format_time(to_go)} ({to_do:5,.0f})')

[*********************100%***********************]  100 of 100 completed

6 Failed downloads:
- 247540.KS: No timezone found, symbol may be delisted
- 015760.KS: No data found for this date range, symbol may be delisted
- 035900.KS: No timezone found, symbol may be delisted
- 293490.KS: No timezone found, symbol may be delisted
- 066970.KS: No timezone found, symbol may be delisted
- 091990.KS: No timezone found, symbol may be delisted
Success:     1/    1 | To go: 00:01:09 (  898)
[*********************100%***********************]  100 of 100 completed

12 Failed downloads:
- 263750.KS: No timezone found, symbol may be delisted
- 240810.KS: No timezone found, symbol may be delisted
- 278280.KS: No timezone found, symbol may be delisted
- 056190.KS: No timezone found, symbol may be delisted
- 137400.KS: No timezone found, symbol may be delisted
- 022100.KS: No timezone found, symbol may be delisted
- 237690.KS: No timezone found, symbol may be delisted
- 196170.KS: No timezone found, s

In [19]:
prices_adj = (pd.concat(prices_adj)
              .dropna(how='all', axis=1)
              .rename(columns=str.lower)
              .swaplevel())

In [20]:
prices_adj.index.names = ['ticker', 'date']

In [21]:
len(prices_adj.index.unique('ticker'))

951

### Remove outliers

In [22]:
df = prices_adj.close.unstack('ticker')
pmax = df.pct_change().max()
pmin = df.pct_change().min()
to_drop = pmax[pmax > 1].index.union(pmin[pmin<-1].index)
len(to_drop)

69

In [23]:
prices_adj = prices_adj.drop(to_drop, level='ticker')

In [24]:
len(prices_adj.index.unique('ticker'))

882

In [25]:
prices_adj

Unnamed: 0_level_0,Unnamed: 1_level_0,close,high,low,open,volume
ticker,date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
000100.KS,2000-01-04,3359.394287,3469.774594,3081.044475,3081.044475,728643.0
000270.KS,2000-01-04,5074.385254,5074.385254,4832.747722,4901.787017,636300.0
000720.KS,2000-01-04,283844.906250,283844.906250,258453.027565,258453.027565,70240.0
000810.KS,2000-01-04,21973.476562,22570.286194,19640.491416,19640.491416,271254.0
001450.KS,2000-01-04,791.158936,811.978908,705.797050,705.797050,1399000.0
...,...,...,...,...,...,...
35320K.KS,2023-05-08,10150.000000,10200.000000,10100.000000,10160.000000,8258.0
36328K.KS,2023-05-08,10520.000000,10590.000000,10450.000000,10500.000000,9188.0
38380K.KS,2023-05-08,11570.000000,11740.000000,11390.000000,11740.000000,11604.0
45014K.KS,2023-05-08,9700.000000,9980.000000,9700.000000,9980.000000,10562.0


In [26]:
prices_adj.sort_index().loc[idx[:, '2000': '2023'], :].to_hdf(results_path / 'data.h5', 
                                                              'stocks/prices/adjusted')

In [27]:
prices_adj

Unnamed: 0_level_0,Unnamed: 1_level_0,close,high,low,open,volume
ticker,date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
000100.KS,2000-01-04,3359.394287,3469.774594,3081.044475,3081.044475,728643.0
000270.KS,2000-01-04,5074.385254,5074.385254,4832.747722,4901.787017,636300.0
000720.KS,2000-01-04,283844.906250,283844.906250,258453.027565,258453.027565,70240.0
000810.KS,2000-01-04,21973.476562,22570.286194,19640.491416,19640.491416,271254.0
001450.KS,2000-01-04,791.158936,811.978908,705.797050,705.797050,1399000.0
...,...,...,...,...,...,...
35320K.KS,2023-05-08,10150.000000,10200.000000,10100.000000,10160.000000,8258.0
36328K.KS,2023-05-08,10520.000000,10590.000000,10450.000000,10500.000000,9188.0
38380K.KS,2023-05-08,11570.000000,11740.000000,11390.000000,11740.000000,11604.0
45014K.KS,2023-05-08,9700.000000,9980.000000,9700.000000,9980.000000,10562.0
