## EDA Analysis

In [7]:
# 1: Setup
import sys
import os

# Add scripts directory to sys.path
scripts_path = os.path.abspath(os.path.join(os.getcwd(), '..', 'scripts'))
if scripts_path not in sys.path:
    sys.path.append(scripts_path)

import pandas as pd
from scripts.eda_analysis import InteractiveDataAnalyzer

In [8]:
# 2: Load Multiple DataFrames
import glob

data_dir = os.path.abspath(os.path.join('..', 'data', 'raw', 'historical'))
csv_files = glob.glob(os.path.join(data_dir, '*.csv'))

# Also include news and analytics ratings if present
extra_files = [os.path.abspath(os.path.join('..', 'data', 'raw', 'raw_analyst_ratings.csv'))]
for ef in extra_files:
    if os.path.exists(ef):
        csv_files.append(ef)

dataframes = {}
for file in csv_files:
    name = os.path.splitext(os.path.basename(file))[0]
    try:
        df = pd.read_csv(file)
        dataframes[name] = df
        print(f"✅ Loaded {name}: {df.shape}")
    except Exception as e:
        print(f"❌ Failed to load {name}: {e}")
        

✅ Loaded AAPL_historical_data: (10998, 9)
✅ Loaded AMZN_historical_data: (6846, 9)
✅ Loaded GOOG_historical_data: (5020, 9)
✅ Loaded META_historical_data: (2926, 9)
✅ Loaded MSFT_historical_data: (9672, 9)
✅ Loaded NVDA_historical_data: (6421, 9)
✅ Loaded TSLA_historical_data: (3545, 9)
✅ Loaded raw_analyst_ratings: (1407328, 6)


In [9]:
# 3: Interactive EDA for Any Dataset (Stock or News)
from ipywidgets import Dropdown, interact

if dataframes:
    df_selector = Dropdown(options=list(dataframes.keys()), description='Dataset:')
    def run_eda(selected_name):
        print(f"\n--- EDA for {selected_name} ---")
        try:
            analyzer = InteractiveDataAnalyzer(dataframes[selected_name])
            analyzer.interactive_summary(
                save_pdf=True, 
                pdf_path=f"eda_{selected_name}.pdf"
            )
        except Exception as e:
            print(f"❌ Error analyzing {selected_name}: {e}")
    interact(run_eda, selected_name=df_selector)
else:
    print("No dataframes loaded.")

interactive(children=(Dropdown(description='Dataset:', options=('AAPL_historical_data', 'AMZN_historical_data'…

In [None]:
# 4: (Optional) Direct EDA for News Dataset
# This cell is useful if you want to always run EDA on news data and save a PDF, regardless of interactive selection.

if 'raw_analyst_ratings' in dataframes:
    print("\n=== News Data EDA (raw_analyst_ratings) ===")
    news_analyzer = InteractiveDataAnalyzer(dataframes['raw_analyst_ratings'])
    news_analyzer.interactive_summary(
        save_pdf=True, 
        pdf_path="../reports/pdfs/eda_raw_analyst_ratings.pdf"
    )
else:
    print("No news data loaded (raw_analyst_ratings.csv not found in dataframes).")