# Setup & Test Notebook

**Purpose**: Verify your environment is set up correctly for Hassett forecasting.

Run this notebook first to ensure everything works!

## 1. Import Required Packages

In [None]:
# Core data science
import numpy as np
import pandas as pd
import sqlite3
from pathlib import Path

# Visualization
import matplotlib.pyplot as plt
import seaborn as sns

# Time series
from statsmodels.tsa.seasonal import STL

# Machine learning
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, r2_score

# Set display options
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 100)
pd.set_option('display.float_format', '{:.2f}'.format)

# Plotting style
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette('husl')

print("‚úÖ All packages imported successfully!")
print(f"\nVersions:")
print(f"  pandas: {pd.__version__}")
print(f"  numpy: {np.__version__}")

## 2. Set Up Project Paths

In [None]:
# Get project root directory
import sys
from pathlib import Path

# Add src to path for imports
project_root = Path.cwd().parent
src_path = project_root / 'src'
sys.path.insert(0, str(src_path))

# Define data paths
data_dir = project_root / 'data'
models_dir = project_root / 'models'
docs_dir = project_root / 'docs'

print("üìÅ Project Structure:")
print(f"  Root: {project_root}")
print(f"  Data: {data_dir}")
print(f"  Source: {src_path}")
print(f"  Models: {models_dir}")
print(f"\n‚úÖ Paths configured!")

## 3. Test Database Connection

In [None]:
# Check if database exists
db_path = data_dir / 'hassett.db'

if db_path.exists():
    print(f"‚úÖ Database found: {db_path}")
    
    # Connect and check
    conn = sqlite3.connect(db_path)
    
    # Get table info
    tables = pd.read_sql("SELECT name FROM sqlite_master WHERE type='table'", conn)
    print(f"\nüìä Tables: {', '.join(tables['name'].tolist())}")
    
    # Quick sample
    sample = pd.read_sql("""
        SELECT * FROM hassett_report 
        WHERE ProductType IN ('MAX', 'EXP')
        LIMIT 5
    """, conn)
    
    print("\nüìã Sample Data:")
    display(sample)
    
    # Get record count
    count = pd.read_sql("""
        SELECT COUNT(*) as total,
               MIN(DATE_SHIP) as min_date,
               MAX(DATE_SHIP) as max_date
        FROM hassett_report
        WHERE ProductType IN ('MAX', 'EXP')
    """, conn)
    
    print(f"\nüìà Database Stats:")
    print(f"  Total Records: {count['total'].iloc[0]:,}")
    print(f"  Date Range: {count['min_date'].iloc[0]} to {count['max_date'].iloc[0]}")
    
    conn.close()
else:
    print(f"‚ùå Database not found at: {db_path}")
    print("\n‚ö†Ô∏è  Please copy hassett.db to the data/ folder:")
    print(f"   cp /path/to/hassett.db {data_dir}/")

## 4. Test Tier Mapping Data

In [None]:
# Check tier mapping
tier_path = data_dir / 'odc_tier_mapping.csv'

if tier_path.exists():
    tiers = pd.read_csv(tier_path)
    print("‚úÖ Tier mapping loaded!\n")
    print("üìä ODC Tiers:")
    display(tiers)
    
    print("\nüìà Tier Summary:")
    print(tiers.groupby('tier').agg({
        'ODC': 'count',
        'total_2024': 'sum'
    }).rename(columns={'ODC': 'count'}))
else:
    print(f"‚ö†Ô∏è  Tier mapping not found at: {tier_path}")

## 5. Quick Forecasting Test

In [None]:
# Simple 2024 baseline forecast test
if db_path.exists():
    conn = sqlite3.connect(db_path)
    
    # Get Week 50 from 2024 as baseline
    query = """
    SELECT 
        ODC,
        ProductType,
        SUM(PIECES) as total_pieces
    FROM hassett_report
    WHERE ProductType IN ('MAX', 'EXP')
        AND strftime('%Y', DATE_SHIP) = '2024'
        AND strftime('%W', DATE_SHIP) = '50'
    GROUP BY ODC, ProductType
    ORDER BY total_pieces DESC
    LIMIT 10
    """
    
    baseline = pd.read_sql(query, conn)
    conn.close()
    
    print("üìä Top 10 ODC-Product Combinations (2024 Week 50 Baseline):\n")
    display(baseline)
    
    # Visualize
    fig, ax = plt.subplots(figsize=(10, 6))
    baseline_pivot = baseline.pivot(index='ODC', columns='ProductType', values='total_pieces')
    baseline_pivot.plot(kind='bar', ax=ax, width=0.8)
    ax.set_ylabel('Pieces (Week 50, 2024)')
    ax.set_title('Top ODCs by Product Type - Week 50 Baseline')
    ax.legend(title='Product')
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()
    
    print("\n‚úÖ Forecasting test complete!")

## 6. Environment Summary

In [None]:
print("="*60)
print("ENVIRONMENT SUMMARY")
print("="*60)

checks = [
    ("Python packages", True),
    ("Project paths", True),
    ("Database connection", db_path.exists()),
    ("Tier mapping", tier_path.exists()),
]

print("\n‚úÖ Status Check:")
for check, status in checks:
    symbol = "‚úÖ" if status else "‚ùå"
    print(f"  {symbol} {check}")

all_good = all(status for _, status in checks)

if all_good:
    print("\n" + "="*60)
    print("üéâ ALL CHECKS PASSED! You're ready to start forecasting!")
    print("="*60)
    print("\nNext steps:")
    print("  1. Open 01_quick_forecast.ipynb for a forecasting demo")
    print("  2. Open 02_data_exploration.ipynb to explore the data")
    print("  3. Review docs/META_ANALYSIS_100_EXPERIMENTS.md")
else:
    print("\n" + "="*60)
    print("‚ö†Ô∏è  SOME CHECKS FAILED")
    print("="*60)
    print("\nPlease:")
    if not db_path.exists():
        print(f"  - Copy hassett.db to {data_dir}/")
    if not tier_path.exists():
        print(f"  - Copy odc_tier_mapping.csv to {data_dir}/")