In [13]:
import yfinance as yf
import pandas as pd
from typing import Dict, List, Tuple
import numpy as np
def load_and_save() -> None:
    # Set pandas display options to prevent column concatenation in print
    pd.set_option('display.max_rows', None)
    pd.set_option('display.expand_frame_repr', False)

    pairs: List[List[str]] = [
        ['NVDA', 'AMD'],   # NVIDIA / Advanced Micro Devices
        ['MSFT', 'GOOGL'], # Microsoft / Alphabet (Google)
        ['AAPL', 'MSFT'],  # Apple / Microsoft
        ['V', 'MA'],       # Visa / Mastercard
        ['CRM', 'ADBE'],   # Salesforce / Adobe
        ['INTC', 'QCOM'],  # Intel / Qualcomm
        ['CSCO', 'ANET'],  # Cisco / Arista Networks
        ['ORCL', 'SAP'],   # Oracle / SAP
        ['UBER', 'LYFT'],  # Uber / Lyft
        ['META', 'SNAP'],   # Meta Platforms / Snap Inc.
        ['SNOW', 'DDOG'],  # Snowflake / Datadog
        ['MDB', 'ESTC'],   # MongoDB / Elastic

    ]
    #remove duplicates
    tickers: List[str] =list(set([ticker for pair in pairs for ticker in pair]))
    print("loading data...")
    df: pd.DataFrame = yf.download(
        tickers, 
        start='2020-10-01', 
        end='2025-09-01',
        auto_adjust=True,
        threads=True
    )
    print("Columns with NaN values:")
    nan_columns = df.columns[df.isnull().any()]
    print(nan_columns.tolist())
    print(f"Total columns with NaNs: {len(nan_columns)}")
    #flatten columns from multi-level columns
    df=df.reset_index().rename(columns={"index": "Date"})
    df.columns = ['__'.join([str(x) for x in col if x not in (None, '')]) for col in df.columns]
    df['Date'] = pd.to_datetime(df['Date'],errors='coerce')
    df = df.dropna(subset=['Date']).sort_values('Date').drop_duplicates(subset=['Date'])
    print(df.columns)
    df.to_parquet('stock_data.parquet',engine='fastparquet',index=False)
    df.to_csv('stock_data.csv',index=False)
    print("Data saved to 'stock_data.parquet' and 'stock_data.csv'.")
load_and_save()


[********************* 43%                       ]  10 of 23 completed

loading data...


[*********************100%***********************]  23 of 23 completed


Columns with NaN values:
[]
Total columns with NaNs: 0
Index(['Date', 'Close__AAPL', 'Close__ADBE', 'Close__AMD', 'Close__ANET',
       'Close__CRM', 'Close__CSCO', 'Close__DDOG', 'Close__ESTC',
       'Close__GOOGL',
       ...
       'Volume__META', 'Volume__MSFT', 'Volume__NVDA', 'Volume__ORCL',
       'Volume__QCOM', 'Volume__SAP', 'Volume__SNAP', 'Volume__SNOW',
       'Volume__UBER', 'Volume__V'],
      dtype='object', length=116)
Data saved to 'stock_data.parquet' and 'stock_data.csv'.
