In [53]:
import os
import requests
import pandas as pd
from pathlib import Path
from dotenv import load_dotenv
import datetime as dt
import yfinance as yf

path = Path("data/raw")
path.mkdir(parents=True,exist_ok=True)

load_dotenv()
try:
    api_key = os.getenv("API_KEY")
except Exception as e:
    print("Error occur",e)

In [48]:
#useable function
def validate(df:pd.DataFrame,required_columns):
    missing = [c for c in required_columns if c not in df.columns ]
    return {"missing":missing,"shape":df.shape,"none_count":df.isna().sum().sum}
def time_stamp():
    return dt.datetime.now().strftime("%Y%m%d-%H%M%S")
def save_csv(df:pd.DataFrame,prefix:str,**meta):
    mid = '_'.join([f"{k}-{v}" for k,v in meta.items()])
    save_path = path / f"{prefix}_{mid}_{time_stamp()}.csv"
    df.to_csv(save_path,index=False)
    print(f"saved path:{save_path}")
    return save_path

In [49]:
#request stock data from public API alpha vantage
symbol = "NVDA"
if api_key:
    url = "https://www.alphavantage.co/query"
    params = {
        "function":"TIME_SERIES_DAILY",
        "symbol":symbol,
        "outsize":"compact",
        "data_type":"json",
        "apikey":api_key
    }
    r = requests.get(url=url,params=params)
    js = r.json()
    js.keys()
    series = js["Time Series (Daily)"]
    df_api = pd.DataFrame(series).T.rename_axis("date").reset_index()
    df_api["date"] = pd.to_datetime(df_api["date"])
    df_api.rename(columns={"1. open":"open","2. high":"high","3. low":"low","4. close":"close","5. volume":"volume"},inplace=True)
    for column in df_api.columns[1:]:
        df_api[column] = pd.to_numeric(df_api[column])
else:
    df_api = yf.download(tickers=symbol,period="6mo",interval="1d",auto_adjust=False)
    df_api = df_api.reset_index()
    df_api.rename(columns={"Open":"open","High":"high","Low":"low","Close":"close","Volume":"volume"},inplace=True)
df_api["date"].sort_values().reset_index(drop=True)

0    2025-03-25
1    2025-03-26
2    2025-03-27
3    2025-03-28
4    2025-03-31
        ...    
95   2025-08-11
96   2025-08-12
97   2025-08-13
98   2025-08-14
99   2025-08-15
Name: date, Length: 100, dtype: datetime64[ns]

In [None]:
#validate dataframe for required column 
required = ["open","high","low","close","volume","adj close"]
validate(df_api,required_columns=required)

{'missing': ['adj close'],
 'shape': (100, 6),
 'none_count': <bound method Series.sum of date      0
 open      0
 high      0
 low       0
 close     0
 volume    0
 dtype: int64>}

In [None]:
#save csv file to path
_ = save_csv(df_api, prefix='api', source='alpha' if api_key else 'yfinance', symbol=symbol)

saved path:data/raw/api_source-alpha_symbol-NVDA_20250817-212610.csv
