## yfinance

In [15]:
import pandas as pd
import yfinance as yf
from pathlib import Path

In [16]:
TICKERS = ['AAPL', 'MSFT', 'TSLA', 'AMZN']
START_DATE = '2015-01-01'
END_DATE = '2025-11-01'

DATA_DIR = Path('data/stock_prices')
DATA_DIR.mkdir(parents=True, exist_ok=True)

In [25]:
def download_stock_data(ticker):
    data = yf.download(ticker, start=START_DATE, end=END_DATE)
    data.reset_index(inplace=True)

    file_path = DATA_DIR / f'{ticker}.csv'
    data.to_csv(file_path, index=False)
    print(f'Saved: {file_path}')

if __name__ == '__main__':
    for ticker in TICKERS:
        download_stock_data(ticker)
    print("✅ All data downloaded successfully.")


  data = yf.download(ticker, start=START_DATE, end=END_DATE)
[*********************100%***********************]  1 of 1 completed
  data = yf.download(ticker, start=START_DATE, end=END_DATE)
[*********************100%***********************]  1 of 1 completed
  data = yf.download(ticker, start=START_DATE, end=END_DATE)
[*********************100%***********************]  1 of 1 completed
  data = yf.download(ticker, start=START_DATE, end=END_DATE)
[*********************100%***********************]  1 of 1 completed

Saved: data/stock_prices/AAPL.csv
Saved: data/stock_prices/MSFT.csv
Saved: data/stock_prices/TSLA.csv
Saved: data/stock_prices/AMZN.csv
✅ All data downloaded successfully.





In [26]:
import os

for file in os.listdir(DATA_DIR):
    df = pd.read_csv(DATA_DIR / file)
    df['Date'] = pd.to_datetime(df['Date'], errors='coerce')
    print(f"{file}: {len(df)} rows, {df['Date'].min()} → {df['Date'].max()}")

AMZN.csv: 2726 rows, 2015-01-02 00:00:00 → 2025-10-31 00:00:00
MSFT.csv: 2726 rows, 2015-01-02 00:00:00 → 2025-10-31 00:00:00
TSLA.csv: 2726 rows, 2015-01-02 00:00:00 → 2025-10-31 00:00:00
AAPL.csv: 2726 rows, 2015-01-02 00:00:00 → 2025-10-31 00:00:00


In [28]:
df = pd.read_csv(DATA_DIR / "AAPL.csv")
df.head()

Unnamed: 0,Date,Close,High,Low,Open,Volume
0,,AAPL,AAPL,AAPL,AAPL,AAPL
1,2015-01-02,24.26105308532715,24.729276225914997,23.821677198130857,24.718180216834167,212818400
2,2015-01-05,23.577573776245117,24.110149898969926,23.391173064448488,24.030263395910563,257142000
3,2015-01-06,23.57979393005371,23.83942422525078,23.218085117057225,23.641927502270494,263188400
4,2015-01-07,23.910432815551758,24.01029009442088,23.677430240840117,23.78838352536704,160423600


## Alpha Vantage API

In [35]:
from alpha_vantage.timeseries import TimeSeries
import pandas as pd
from pathlib import Path

In [36]:
API_KEY = '0T0XEMBD6N69XGTV'
ts = TimeSeries(API_KEY, output_format='pandas')

DATA_DIR = Path("data/alpha_vantage")
DATA_DIR.mkdir(parents=True, exist_ok=True)

In [37]:
def download_stock(symbol):
    try:
        data, meta = ts.get_daily(symbol=symbol, outputsize='full')
        data.rename(columns={
            '1. open': 'Open',
            '2. high': 'High',
            '3. low': 'Low',
            '4. close': 'Close',
            '5. volume': 'Volume'
        }, inplace=True)
        data.reset_index(inplace=True)
        data.rename(columns={'date': 'Date'}, inplace=True)
        data['Date'] = pd.to_datetime(data['Date'], errors='coerce')

        file_path = DATA_DIR / f'{symbol}.csv'
        data.to_csv(file_path, index=False)
        print(f"Saved {symbol} to {file_path}")
    except Exception as e:
        print(f'Error downloading {symbol}: {e}')

In [38]:
symbols = ["AAPL", "MSFT", "GOOGL", "TSLA", "AMZN"]
for s in symbols:
    download_stock(s)

Saved AAPL to data/alpha_vantage/AAPL.csv
Saved MSFT to data/alpha_vantage/MSFT.csv
Saved GOOGL to data/alpha_vantage/GOOGL.csv
Saved TSLA to data/alpha_vantage/TSLA.csv
Saved AMZN to data/alpha_vantage/AMZN.csv


In [40]:
import os

for file in os.listdir(DATA_DIR):
    df = pd.read_csv(DATA_DIR / file)
    df['Date'] = pd.to_datetime(df['Date'], errors='coerce')
    print(f"{file}: {len(df)} rows, {df['Date'].min().date()} → {df['Date'].max().date()}")

AMZN.csv: 6544 rows, 1999-11-01 → 2025-11-05
MSFT.csv: 6544 rows, 1999-11-01 → 2025-11-05
TSLA.csv: 3864 rows, 2010-06-29 → 2025-11-05
GOOGL.csv: 5339 rows, 2004-08-19 → 2025-11-05
AAPL.csv: 6544 rows, 1999-11-01 → 2025-11-05


In [41]:
df = pd.read_csv(DATA_DIR / "AAPL.csv")
df.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume
0,2025-11-05,268.61,271.7,266.93,270.14,42586288.0
1,2025-11-04,268.325,271.486,267.615,270.04,49274846.0
2,2025-11-03,270.42,270.85,266.25,269.05,50194583.0
3,2025-10-31,276.99,277.32,269.16,270.37,86167123.0
4,2025-10-30,271.99,274.14,268.48,271.4,69886534.0


### URL method (Alpha Vantage API)

In [42]:
import requests
import pandas as pd
from io import StringIO

API_KEY = "0T0XEMBD6N69XGTV"
symbol = "AAPL"

url = f"https://www.alphavantage.co/query?function=TIME_SERIES_DAILY&symbol={symbol}&outputsize=full&apikey={API_KEY}&datatype=csv"

response = requests.get(url)

data = pd.read_csv(StringIO(response.text))

data.to_csv(f"{symbol}.csv", index=False)

print("✅ Data downloaded and saved successfully!")
print(data.head())

✅ Data downloaded and saved successfully!
    timestamp     open     high      low   close    volume
0  2025-11-05  268.610  271.700  266.930  270.14  42586288
1  2025-11-04  268.325  271.486  267.615  270.04  49274846
2  2025-11-03  270.420  270.850  266.250  269.05  50194583
3  2025-10-31  276.990  277.320  269.160  270.37  86167123
4  2025-10-30  271.990  274.140  268.480  271.40  69886534
