In [46]:
import yfinance as yf
import pandas as pd
from typing import Dict, List, Tuple
import numpy as np
def load_and_save() -> None:
    # Set pandas display options to prevent column concatenation in print
    pd.set_option('display.max_rows', None)
    pd.set_option('display.expand_frame_repr', False)

    pairs: List[List[str]] = [
        ['NVDA', 'AMD'],   # NVIDIA / Advanced Micro Devices
        ['MSFT', 'GOOGL'], # Microsoft / Alphabet (Google)
        ['AAPL', 'MSFT'],  # Apple / Microsoft
        ['V', 'MA'],       # Visa / Mastercard
        ['CRM', 'ADBE'],   # Salesforce / Adobe
        ['INTC', 'QCOM'],  # Intel / Qualcomm
        ['CSCO', 'ANET'],  # Cisco / Arista Networks
        ['ORCL', 'SAP'],   # Oracle / SAP
        ['UBER', 'LYFT'],  # Uber / Lyft
        ['META', 'SNAP']   # Meta Platforms / Snap Inc.
    ]
    #remove duplicates
    tickers: List[str] =list(set([ticker for pair in pairs for ticker in pair]))
    print("loading data...")
    df: pd.DataFrame = yf.download(
        tickers, 
        start='2020-01-01', 
        end='2025-01-01',
        auto_adjust=True,
        threads=True
    )
    #flatten columns from multi-level columns
    df=df.reset_index().rename(columns={"index": "Date"})
    df['Date'] = df[('Date','')]
    df.drop(columns=[('Date','')])
    df.columns = ['__'.join([str(x) for x in col if x not in (None, '')]) for col in df.columns]
    df['Date'] = pd.to_datetime(df['Date'],errors='coerce')
    df = df.dropna(subset=['Date']).sort_values('Date').drop_duplicates(subset=['Date'])
    print(df.columns)
    df.to_parquet('stock_data.parquet',engine='fastparquet',index=False)
    df.to_csv('stock_data.csv',index=False)
    print("Data saved to 'stock_data.parquet' and 'stock_data.csv'.")
load_and_save()


[**********************79%*************          ]  15 of 19 completed

loading data...


[*********************100%***********************]  19 of 19 completed


Index(['Date', 'Close__AAPL', 'Close__ADBE', 'Close__AMD', 'Close__ANET',
       'Close__CRM', 'Close__CSCO', 'Close__GOOGL', 'Close__INTC',
       'Close__LYFT', 'Close__MA', 'Close__META', 'Close__MSFT', 'Close__NVDA',
       'Close__ORCL', 'Close__QCOM', 'Close__SAP', 'Close__SNAP',
       'Close__UBER', 'Close__V', 'High__AAPL', 'High__ADBE', 'High__AMD',
       'High__ANET', 'High__CRM', 'High__CSCO', 'High__GOOGL', 'High__INTC',
       'High__LYFT', 'High__MA', 'High__META', 'High__MSFT', 'High__NVDA',
       'High__ORCL', 'High__QCOM', 'High__SAP', 'High__SNAP', 'High__UBER',
       'High__V', 'Low__AAPL', 'Low__ADBE', 'Low__AMD', 'Low__ANET',
       'Low__CRM', 'Low__CSCO', 'Low__GOOGL', 'Low__INTC', 'Low__LYFT',
       'Low__MA', 'Low__META', 'Low__MSFT', 'Low__NVDA', 'Low__ORCL',
       'Low__QCOM', 'Low__SAP', 'Low__SNAP', 'Low__UBER', 'Low__V',
       'Open__AAPL', 'Open__ADBE', 'Open__AMD', 'Open__ANET', 'Open__CRM',
       'Open__CSCO', 'Open__GOOGL', 'Open__INTC', 'Open_