# Yahoo Finance API
Below contains some utility for downloading stock data in bulk via the yfinance library. 

## NOTE
This is not an ideal solution as the yahoo finance api is no longer maintained and many users run into rate limit errors. The ideal solution would be to use proxies but for the sake of simplicity we will use another API provider. 

Check notebook ```stocks_notebooks.ipynb``` 

In [1]:
import pandas
import yfinance as yf
import os
from pathlib import Path
from typing import List, Dict, Union, Optional

In [None]:
FORMATS = ["csv", "json", "parquet", "feather", "arrow", "pickle"]
class YFDownloader:
    """
    This class is used to download data from Yahoo Finance API.
    ARGS:
        interval: str
            The interval of the data to download. Default is "1d".
        threads: int
            The number of threads to use for downloading. Default is 2.
        progress: bool
            Whether to show the progress of the download. Default is True.
        fpath: str
            The path to save the downloaded data. Default is './data/stocks'.
    """
    def __init__(self, interval: str = "1d", threads: int = 2, progress: bool = True, fpath: str = "./data/stocks", override: bool = False):
        self.interval = interval
        self.threads = threads
        self.progress = progress
        self.fpath = Path(fpath)
        self.override = override
        self.tickers = []
        self._create_dir(self.fpath)

    def _create_dir(self, path: str): 
        if not os.path.exists(path): os.makedirs(path)

    def _format_tickers(self, tickers: Union[str, List[str]]) -> str:
        if isinstance(tickers, list): self.tickers = tickers
        elif isinstance(tickers, str):
            tickers = tickers.replace(",", "").replace("\n", " ").strip()
            self.tickers = tickers.split(" ")
        return tickers

    def _save(self, data: pandas.DataFrame, ticker: str, format: str) -> None:
        if format not in FORMATS: raise ValueError(f"Format {format} not supported, please use one of {FORMATS}")
        fpath = self.fpath / f"{ticker}.{format}"
        if os.path.exists(fpath) and not self.override: 
            print(f"ticker {ticker} already exists, skipping...")
            return
        if format == "csv": data.to_csv(fpath)
        elif format == "json": data.to_json(fpath)
        elif format == "parquet": data.to_parquet(fpath)
        elif format == "feather": data.to_feather(fpath)
        elif format == "arrow": data.to_arrow(fpath)
        elif format == "pickle": data.to_pickle(fpath)

    def get(self, tickers: Union[str, List[str]], period: str = "max") -> Dict[str, pandas.DataFrame]:
        tickers = self._format_tickers(tickers)
        if len(self.tickers) > 2000: raise ValueError(f"Yahoo finance can only process 2000 tickers per hour. Too many tickers have been added, please use less than 2000 tickers")
        data = yf.download(tickers=tickers, period=period, interval=self.interval, group_by="ticker", threads=self.threads, progress=self.progress)
        return data
    
    def download(self, tickers: Union[str, List[str]], period: str = "max", format: str = "csv") -> None:
        try:
            data = self.get(tickers=tickers, period=period)
            for ticker in self.tickers: self._save(data[ticker], ticker, format)
        except Exception as e:
            print(f"Error downloading data: {e}")