# Get Stock Data From Yahoo Finance

In [1]:
import numpy as np
import pandas as pd
import scipy as sp
import yfinance as yf
import os
# Progress bar
from tqdm.notebook import tqdm
# Stack trace
import traceback

# Stock data instructions
instructions_dir = "stock_data"
stock_dir = "stock_data_trunc"
ins_path = os.path.join(instructions_dir, "stock_list.csv")
instructions = pd.read_csv(
    ins_path,
    index_col="ticker",
)
instructions

Unnamed: 0_level_0,start,end
ticker,Unnamed: 1_level_1,Unnamed: 2_level_1
AAPL,2020-03-09,2023-12-16
ACGLO,2023-11-28,2023-12-16
AMD,2016-12-12,2024-01-09
BABA,2014-11-07,2023-12-16
BHFAL,2023-11-30,2023-12-16
BPYPO,2023-11-30,2023-12-16
BROGW,2023-11-30,2023-12-16
CLSN,2009-09-28,2023-12-16
DIS,2019-06-13,2023-12-16
GILD,2009-09-11,2023-12-16


In [2]:
failed_stocks = set()

# Get and store data from Yahoo! Finance
for ticker in tqdm(instructions.index, desc="Tickers Completed"):
    try:
        # Get price data
        data = yf.download(
            ticker,
            start=instructions.loc[ticker, "start"],
            end=instructions.loc[ticker, "end"],
            interval="1d",
            actions=True,
            repair=True
        )

        # Format data
        data.columns = data.columns.get_level_values("Price")
        data.columns.name = None
        data.columns = data.columns.to_list()
        data.drop(columns=["Repaired?"], inplace=True)
        data.rename(columns={label : label.lower() for label in data.columns})
        
        # Add to stocks directory
        path = os.path.join(stock_dir, f"{ticker}.csv")
        data.to_csv(path)
    except Exception:
        failed_stocks.add(ticker)
        print(f"On stock {ticker}:")

print(failed_stocks)

Tickers Completed:   0%|          | 0/22 [00:00<?, ?it/s]

  data = yf.download(
[*********************100%***********************]  1 of 1 completed
  data = yf.download(
[*********************100%***********************]  1 of 1 completed
  data = yf.download(
[*********************100%***********************]  1 of 1 completed
  data = yf.download(
[*********************100%***********************]  1 of 1 completed
  data = yf.download(
[*********************100%***********************]  1 of 1 completed
  data = yf.download(
[*********************100%***********************]  1 of 1 completed
  data = yf.download(
[*********************100%***********************]  1 of 1 completed

1 Failed download:
['BROGW']: YFPricesMissingError('possibly delisted; no price data found  (1d 2023-11-30 -> 2023-12-16)')
Traceback (most recent call last):
  File "C:\Users\karth\AppData\Local\Temp\ipykernel_19248\1612454548.py", line 20, in <module>
    data.drop(columns=["Repaired?"], inplace=True)
  File "C:\Users\karth\anaconda3\envs\finbert-env\lib\sit

On stock BROGW:


[*********************100%***********************]  1 of 1 completed

1 Failed download:
['CLSN']: YFTzMissingError('possibly delisted; no timezone found')
Traceback (most recent call last):
  File "C:\Users\karth\AppData\Local\Temp\ipykernel_19248\1612454548.py", line 20, in <module>
    data.drop(columns=["Repaired?"], inplace=True)
  File "C:\Users\karth\anaconda3\envs\finbert-env\lib\site-packages\pandas\core\frame.py", line 5581, in drop
    return super().drop(
  File "C:\Users\karth\anaconda3\envs\finbert-env\lib\site-packages\pandas\core\generic.py", line 4788, in drop
    obj = obj._drop_axis(labels, axis, level=level, errors=errors)
  File "C:\Users\karth\anaconda3\envs\finbert-env\lib\site-packages\pandas\core\generic.py", line 4830, in _drop_axis
    new_axis = axis.drop(labels, errors=errors)
  File "C:\Users\karth\anaconda3\envs\finbert-env\lib\site-packages\pandas\core\indexes\base.py", line 7070, in drop
    raise KeyError(f"{labels[mask].tolist()} not found in axis")
K

On stock CLSN:


[*********************100%***********************]  1 of 1 completed
  data = yf.download(
[*********************100%***********************]  1 of 1 completed
  data = yf.download(
[*********************100%***********************]  1 of 1 completed
  data = yf.download(
[*********************100%***********************]  1 of 1 completed
  data = yf.download(
[*********************100%***********************]  1 of 1 completed
  data = yf.download(
[*********************100%***********************]  1 of 1 completed
  data = yf.download(
[*********************100%***********************]  1 of 1 completed
  data = yf.download(
[*********************100%***********************]  1 of 1 completed
  data = yf.download(
[*********************100%***********************]  1 of 1 completed
  data = yf.download(
[*********************100%***********************]  1 of 1 completed
  data = yf.download(
[*********************100%***********************]  1 of 1 completed
  data = yf.download(

['BROGW', 'CLSN']





In [9]:
import pickle

# Useful ticker filter
useful = instructions.query("ticker not in @failed_stocks")

# Keep track of working stocks
with open("stocks.pkl", "wb") as downloaded:
    pickle.dump(useful, downloaded)

print(useful.shape[0])
useful

20


Unnamed: 0_level_0,start,end
ticker,Unnamed: 1_level_1,Unnamed: 2_level_1
AAPL,2020-03-09,2023-12-16
ACGLO,2023-11-28,2023-12-16
AMD,2016-12-12,2024-01-09
BABA,2014-11-07,2023-12-16
BHFAL,2023-11-30,2023-12-16
BPYPO,2023-11-30,2023-12-16
DIS,2019-06-13,2023-12-16
GILD,2009-09-11,2023-12-16
GOOG,2018-11-13,2023-12-16
INTC,2017-12-26,2023-12-16
