In [None]:
# 1. Imports
import os
import yfinance as yf
import pandas as pd
from scripts.eda_analysis import InteractiveDataAnalyzer
from scripts.time_series_analysis import TimeSeriesAnalyzer

In [None]:
# 2. Download data for multiple tickers
tickers = ["AAPL", "AMZN", "GOOG", "META", "MSFT", "NVDA", "TSLA"]
data = {}
for ticker in tickers:
    df = yf.download(tickers=ticker, start="2000-01-01", end="2024-12-31", interval="1d")
    data[ticker] = df
    print(f"{ticker} Head:\n", df.head(5), "\n")

In [None]:
# 3. Save each ticker's data to a CSV file
save_dir = "../data/raw/downloaded"
os.makedirs(save_dir, exist_ok=True)
for ticker, df in data.items():
    filename = f"{ticker}_yfdata_download.csv"
    filepath = os.path.join(save_dir, filename)
    df.to_csv(filepath)

In [None]:
# 4. Load saved CSVs for EDA and time series analysis
import glob

csv_files = glob.glob(os.path.join(save_dir, "*_yfdata_download.csv"))
downloaded_data = {}
for file in csv_files:
    ticker = os.path.basename(file).split("_")[0]
    try:
        # Always load with index_col=0 (yfinance saves date as index)
        df = pd.read_csv(file, index_col=0)
        if df.empty:
            print(f"❌ {ticker}: File is empty, skipping.")
            continue
        # Convert index to datetime and reset as column
        df.index = pd.to_datetime(df.index, errors='coerce')
        df = df.reset_index()
        # Ensure the date column is named 'Date'
        if 'index' in df.columns:
            df = df.rename(columns={'index': 'Date'})
        elif 'Date' not in df.columns:
            df = df.rename(columns={df.columns[0]: 'Date'})
        # Drop rows where Date could not be parsed
        df = df.dropna(subset=['Date'])
        # Ensure Date column is datetime
        df['Date'] = pd.to_datetime(df['Date'], errors='coerce')
        df = df.dropna(subset=['Date'])
        downloaded_data[ticker] = df
    except Exception as e:
        print(f"❌ Failed to load {ticker}: {e}")


In [None]:
# 5. EDA Analysis for each ticker
for ticker, df in downloaded_data.items():
    print(f"\n--- EDA for {ticker} ---")
    try:
        eda = InteractiveDataAnalyzer(df)
        eda.interactive_summary(save_pdf=True, pdf_path=f"eda_{ticker}_yf.pdf")
    except Exception as e:
        print(f"❌ EDA failed for {ticker}: {e}")

In [None]:
# 6. Time Series Analysis for each ticker
for ticker, df in downloaded_data.items():
    print(f"\n--- Time Series Analysis for {ticker} ---")
    try:
        tsa = TimeSeriesAnalyzer(
            data=df,
            date_col='Date',
            value_col='Close',
            open_col='Open',
            high_col='High',
            low_col='Low',
            volume_col='Volume'
        )
        tsa.plot_series(title=f"{ticker} Close Price Time Series")
        tsa.plot_decomposition(freq=252)
        stationarity = tsa.test_stationarity()
        print("ADF Test:", stationarity['ADF'])
        print("KPSS Test:", stationarity['KPSS'])
        tsa.plot_acf_pacf(lags=40)
        indicators = tsa.calculate_technical_indicators()
        print("✅ Technical indicators calculated")
        display(indicators.tail())
        model_fit = tsa.fit_arima(order=(1, 1, 1))
        tsa.forecast(steps=30)
        tsa.save_summary_pdf(pdf_path=f"tsa_summary_{ticker}_yf.pdf")
    except Exception as e:
        print(f"❌ Time series analysis failed for {ticker}: {e}")