# Test Notebook - Stock Analysis Functions

This notebook tests all functions in `functions.py` with real data to verify functionality.

**Purpose:** Automated testing and validation of all analysis functions

**Last Updated:** 2026-01-05

## Setup

In [None]:
# Import all functions
from functions import *
from datetime import datetime

print(f"Test started at: {datetime.now()}")
print("="*80)

## Test 1: get_sp500_symbols()

Tests fetching S&P 500 company list from Wikipedia.

In [None]:
print("\n" + "="*80)
print("TEST 1: get_sp500_symbols()")
print("="*80)

try:
    sp500 = get_sp500_symbols()
    
    # Validation
    assert sp500 is not None, "Failed: Function returned None"
    assert len(sp500) > 0, "Failed: DataFrame is empty"
    assert 'Symbol' in sp500.columns, "Failed: Missing 'Symbol' column"
    assert 'GICS Sector' in sp500.columns, "Failed: Missing 'GICS Sector' column"
    
    # Statistics
    print(f"✅ SUCCESS: Fetched {len(sp500)} S&P 500 companies")
    print(f"\nColumns: {list(sp500.columns)}")
    print(f"\nSector breakdown:")
    print(sp500['GICS Sector'].value_counts())
    
    print(f"\nSample of companies:")
    print(sp500.head(10))
    
except Exception as e:
    print(f"❌ FAILED: {e}")

## Test 2: fetch_one_ticker()

Tests comprehensive single stock analysis with visualizations.

In [None]:
print("\n" + "="*80)
print("TEST 2: fetch_one_ticker()")
print("="*80)

test_ticker = 'AAPL'
test_period = '2y'

try:
    result = fetch_one_ticker(test_ticker, period=test_period)
    
    # Validation
    assert result is not None, "Failed: Function returned None"
    assert len(result) > 0, "Failed: DataFrame is empty"
    assert 'Adj_Close' in result.columns, "Failed: Missing 'Adj_Close' column"
    assert 'Market_Cap' in result.columns, "Failed: Missing 'Market_Cap' column"
    assert 'P/E_Ratio' in result.columns, "Failed: Missing 'P/E_Ratio' column"
    assert 'Dividend_Yield' in result.columns, "Failed: Missing 'Dividend_Yield' column"
    
    # Statistics
    print(f"\n✅ SUCCESS: Fetched {len(result)} data points for {test_ticker}")
    print(f"\nData range: {result.index[0].date()} to {result.index[-1].date()}")
    print(f"\nSummary statistics:")
    print(result.describe())
    
    print(f"\nLatest values:")
    print(result.tail(1))
    
except Exception as e:
    print(f"❌ FAILED: {e}")

## Test 3: download_and_plot_stock_data()

Tests normalized price comparison for multiple stocks.

In [None]:
print("\n" + "="*80)
print("TEST 3: download_and_plot_stock_data()")
print("="*80)

test_tickers = ['AAPL', 'MSFT', 'GOOGL']
test_period = '1y'

try:
    result = download_and_plot_stock_data(test_tickers, period=test_period)
    
    # Validation
    assert result is not None, "Failed: Function returned None"
    assert len(result) > 0, "Failed: DataFrame is empty"
    assert len(result.columns) >= len(test_tickers), "Failed: Missing ticker columns"
assert 'SPY' in result.columns, "Failed: SPY benchmark not auto-included"
    assert 'RSP' in result.columns, "Failed: RSP benchmark not auto-included"
    
    # Check normalization (first value should be ~1.0)
    first_values = result.iloc[0]
    assert all(abs(v - 1.0) < 0.01 for v in first_values), "Failed: Data not properly normalized"
    
    # Statistics
    print(f"\n✅ SUCCESS: Downloaded and normalized {len(result.columns)} tickers")
    print(f"\nTickers analyzed: {list(result.columns)}")
    print(f"\nData points: {len(result)}")
    print(f"\nFinal normalized values (starting = 1.0):")
    final_values = result.iloc[-1].sort_values(ascending=False)
    print(final_values)
    
    print(f"\nPerformance ranking (best to worst):")
    for i, (ticker, value) in enumerate(final_values.items(), 1):
        pct_change = (value - 1.0) * 100
        print(f"  {i}. {ticker}: {pct_change:+.2f}%")
    
except Exception as e:
    print(f"❌ FAILED: {e}")

## Test 4: download_and_plot_daily_pct_change()

Tests daily percentage change visualization.

In [None]:
print("\n" + "="*80)
print("TEST 4: download_and_plot_daily_pct_change()")
print("="*80)

test_tickers = ['AAPL', 'TSLA']
test_period = '6mo'

try:
    result = download_and_plot_daily_pct_change(test_tickers, period=test_period)
    
    # Validation
    assert result is not None, "Failed: Function returned None"
    assert len(result) > 0, "Failed: DataFrame is empty"
    assert len(result.columns) >= len(test_tickers), "Failed: Missing ticker columns"
    assert 'SPY' in result.columns, "Failed: SPY benchmark not auto-included"
    assert 'RSP' in result.columns, "Failed: RSP benchmark not auto-included"
    
    # Statistics
    print(f"\n✅ SUCCESS: Downloaded daily % changes for {len(result.columns)} tickers")
    print(f"\nTickers analyzed: {list(result.columns)}")
    print(f"\nData points: {len(result)}")
    
    print(f"\nVolatility analysis (standard deviation of daily returns):")
    volatility = result.std().sort_values(ascending=False)
    for ticker, vol in volatility.items():
        print(f"  {ticker}: {vol:.2f}%")
    
    print(f"\nAverage daily return:")
    avg_return = result.mean().sort_values(ascending=False)
    for ticker, ret in avg_return.items():
        print(f"  {ticker}: {ret:+.3f}%")
    
    print(f"\nBest day for each ticker:")
    for ticker in result.columns:
        best_day = result[ticker].max()
        best_date = result[ticker].idxmax()
        print(f"  {ticker}: {best_day:+.2f}% on {best_date.date()}")
    
    print(f"\nWorst day for each ticker:")
    for ticker in result.columns:
        worst_day = result[ticker].min()
        worst_date = result[ticker].idxmin()
        print(f"  {ticker}: {worst_day:+.2f}% on {worst_date.date()}")
    
except Exception as e:
    print(f"❌ FAILED: {e}")

## Test 5: generate_performance_summary()

Tests comprehensive performance metrics calculation.

In [None]:
print("\n" + "="*80)
print("TEST 5: generate_performance_summary()")
print("="*80)

test_tickers = ['AAPL', 'MSFT', 'NVDA', 'GOOGL', 'TSLA']
test_period = '1y'
test_benchmark = 'SPY'

try:
    result = generate_performance_summary(test_tickers, period=test_period, benchmark=test_benchmark)
    
    # Validation
    assert result is not None, "Failed: Function returned None"
    assert len(result) > 0, "Failed: DataFrame is empty"
    assert test_benchmark in result.index, f"Failed: Benchmark {test_benchmark} not in results"
    
    required_columns = ['Total Return (%)', 'Annualized Return (%)', 'Volatility (%)', 
                       'Sharpe Ratio', 'Max Drawdown (%)', 'Current Price']
    for col in required_columns:
        assert col in result.columns, f"Failed: Missing column '{col}'"
    
    # Statistics
    print(f"\n✅ SUCCESS: Generated performance summary for {len(result)} tickers")
    
    print(f"\n{'='*80}")
    print("ADDITIONAL ANALYSIS")
    print(f"{'='*80}")
    
    print(f"\nBest total return:")
    best = result['Total Return (%)'].idxmax()
    print(f"  {best}: {result.loc[best, 'Total Return (%)']:.2f}%")
    
    print(f"\nWorst total return:")
    worst = result['Total Return (%)'].idxmin()
    print(f"  {worst}: {result.loc[worst, 'Total Return (%)']:.2f}%")
    
    print(f"\nBest Sharpe Ratio (risk-adjusted return):")
    best_sharpe = result['Sharpe Ratio'].idxmax()
    print(f"  {best_sharpe}: {result.loc[best_sharpe, 'Sharpe Ratio']:.2f}")
    
    print(f"\nLowest volatility:")
    lowest_vol = result['Volatility (%)'].idxmin()
    print(f"  {lowest_vol}: {result.loc[lowest_vol, 'Volatility (%)']:.2f}%")
    
    print(f"\nHighest volatility:")
    highest_vol = result['Volatility (%)'].idxmax()
    print(f"  {highest_vol}: {result.loc[highest_vol, 'Volatility (%)']:.2f}%")
    
    print(f"\nStocks that beat {test_benchmark}:")
    if 'vs Benchmark (%)' in result.columns:
        outperformers = result[result['vs Benchmark (%)'] > 0].sort_values('vs Benchmark (%)', ascending=False)
        if len(outperformers) > 0:
            for ticker in outperformers.index:
                print(f"  {ticker}: +{outperformers.loc[ticker, 'vs Benchmark (%)']:.2f}% vs {test_benchmark}")
        else:
            print(f"  None - all underperformed {test_benchmark}")
    
except Exception as e:
    print(f"❌ FAILED: {e}")

## Test 6: fetch_historical_stock_data()

Tests monthly historical data with weighted portfolio metrics.

In [None]:
print("\n" + "="*80)
print("TEST 6: fetch_historical_stock_data()")
print("="*80)

test_tickers = ['AAPL', 'MSFT', 'GOOGL']
test_period = '2y'

try:
    result = fetch_historical_stock_data(test_tickers, period=test_period, verbose=True)
    
    # Validation
    assert result is not None, "Failed: Function returned None"
    assert len(result) > 0, "Failed: Dictionary is empty"
    assert isinstance(result, dict), "Failed: Result is not a dictionary"
    
    for ticker in test_tickers:
        assert ticker in result, f"Failed: Missing ticker {ticker} in results"
        assert len(result[ticker]) > 0, f"Failed: No data for {ticker}"
    
    # Statistics
    print(f"\n✅ SUCCESS: Fetched historical data for {len(result)} tickers")
    
    print(f"\n{'='*80}")
    print("DATA SUMMARY BY TICKER")
    print(f"{'='*80}")
    
    for ticker, df in result.items():
        print(f"\n{ticker}:")
        print(f"  Data points: {len(df)}")
        print(f"  Columns: {list(df.columns)}")
        print(f"  Date range: {df.index[0].date()} to {df.index[-1].date()}")
        
        if 'Close' in df.columns:
            print(f"  Current price: ${df['Close'].iloc[-1]:.2f}")
            price_change = ((df['Close'].iloc[-1] / df['Close'].iloc[0]) - 1) * 100
            print(f"  Price change: {price_change:+.2f}%")
        
        if 'P/E_Ratio' in df.columns:
            current_pe = df['P/E_Ratio'].iloc[-1]
            avg_pe = df['P/E_Ratio'].mean()
            print(f"  Current P/E: {current_pe:.2f}")
            print(f"  Average P/E: {avg_pe:.2f}")
    
except Exception as e:
    print(f"❌ FAILED: {e}")

## Test 7: get_etfdb_pe_ratio()

Tests web scraping for ETF P/E ratios (may be slow due to web requests).

In [None]:
print("\n" + "="*80)
print("TEST 7: get_etfdb_pe_ratio()")
print("="*80)

test_etfs = ['VOO', 'SPY']

results = {}

for etf in test_etfs:
    try:
        print(f"\nFetching P/E ratio for {etf}...")
        pe_ratio = get_etfdb_pe_ratio(etf)
        results[etf] = pe_ratio
        
        if pe_ratio is not None:
            print(f"✅ SUCCESS: {etf} P/E ratio = {pe_ratio}")
        else:
            print(f"⚠️  WARNING: Could not fetch P/E ratio for {etf}")
            
    except Exception as e:
        print(f"❌ FAILED for {etf}: {e}")
        results[etf] = None

print(f"\n{'='*80}")
print("SUMMARY")
print(f"{'='*80}")
print(f"\nETF P/E Ratios:")
for etf, pe in results.items():
    if pe is not None:
        print(f"  {etf}: {pe}")
    else:
        print(f"  {etf}: Not available")

## Test 8: Multi-Period Comparison

Tests performance across different time periods.

In [None]:
print("\n" + "="*80)
print("TEST 8: Multi-Period Performance Comparison")
print("="*80)

test_ticker = 'NVDA'
periods = ['1mo', '3mo', '6mo', '1y']

try:
    performance_by_period = {}
    
    for period in periods:
        print(f"\nAnalyzing {test_ticker} for {period}...")
        result = generate_performance_summary([test_ticker], period=period, benchmark='SPY')
        
        performance_by_period[period] = {
            'Total Return (%)': result.loc[test_ticker, 'Total Return (%)'],
            'Volatility (%)': result.loc[test_ticker, 'Volatility (%)'],
            'Sharpe Ratio': result.loc[test_ticker, 'Sharpe Ratio'],
            'Max Drawdown (%)': result.loc[test_ticker, 'Max Drawdown (%)'],
        }
        
        if 'vs Benchmark (%)' in result.columns:
            performance_by_period[period]['vs SPY (%)'] = result.loc[test_ticker, 'vs Benchmark (%)']
    
    # Create summary DataFrame
    summary_df = pd.DataFrame(performance_by_period).T
    
    print(f"\n✅ SUCCESS: Multi-period analysis complete")
    print(f"\n{test_ticker} Performance Across Time Periods:")
    print("="*80)
    print(summary_df.to_string())
    
    # Plot returns across periods
    plt.figure(figsize=(10, 5))
    plt.bar(summary_df.index, summary_df['Total Return (%)'])
    plt.title(f'{test_ticker} - Total Returns Across Different Periods')
    plt.xlabel('Period')
    plt.ylabel('Total Return (%)')
    plt.grid(True, alpha=0.3)
    plt.tight_layout()
    plt.show()
    
except Exception as e:
    print(f"❌ FAILED: {e}")

## Test Summary

In [None]:
print("\n" + "="*80)
print("TEST SUITE COMPLETED")
print("="*80)
print(f"\nTest finished at: {datetime.now()}")
print("\nAll functions have been tested with real data.")
print("Review the output above for any failures or warnings.")
print("\n" + "="*80)