# BMW Sales Analysis


1. Data Loading & Preprocessing
2. Exploratory Data Analysis
3. Time Series Aggregation
4. Visualization (Static & Interactive)
5. Reporting


In [None]:
# Core imports and note: modules are embedded in subsequent cells
import sys
import os
import numpy as np
from datetime import datetime


In [None]:
# --- config.py (embedded) ---
from pathlib import Path
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import warnings

PROJECT_ROOT = Path().resolve()
OUTPUT_DIR = PROJECT_ROOT / 'outputs'
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

def out_path(name: str) -> str:
    return str(OUTPUT_DIR / name)

warnings.filterwarnings('ignore')
matplotlib.use('Agg')
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette('husl')

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 100)

DATA_CSV_URL = 'https://raw.githubusercontent.com/StephenEastham/bmw-sales-forecast/refs/heads/main/v251125/BMW-sales-data-2010-2024.csv'
DATA_CSV_FILE = 'BMW-sales-data-2010-2024.csv'


# Feature Flags
ENABLE_DATA_PROCESSING = True
ENABLE_EXPLORATORY_ANALYSIS = True
ENABLE_TIME_SERIES = True
ENABLE_STATIC_PLOTS = True
ENABLE_REPORTING = True
ENABLE_DASHBOARDS = True
ENABLE_AGGREGATOR = True

In [None]:
# --- utils.py (embedded) ---
import logging
import shutil
import os
from zipfile import ZipFile, ZIP_DEFLATED

def clean_outputs():
    """Delete all files in the output directory."""
    print(f"Cleaning output directory: {OUTPUT_DIR}")
    if OUTPUT_DIR.exists():
        for item in OUTPUT_DIR.iterdir():
            try:
                if item.is_file():
                    item.unlink()
                elif item.is_dir():
                    shutil.rmtree(item)
            except Exception as e:
                print(f"Failed to delete {item}: {e}")
    else:
        OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

def setup_logger(log_file='sales_alerts.log'):
    """Setup logging to file and console"""
    logging.basicConfig(
        level=logging.INFO,
        format='%(asctime)s - %(levelname)s - %(message)s',
        handlers=[
            logging.FileHandler(out_path(log_file)),
            logging.StreamHandler()
        ],
        force=True
    )
    return logging.getLogger(__name__)

def print_section(title):
    """Print a formatted section header"""
    print("\n" + "="*80)
    print(title)
    print("="*80)

def zip_all_outputs(zip_filename=None, patterns=('*.png','*.html','*.csv','*.txt')):
    """Create a zip archive of generated outputs in `OUTPUT_DIR`."""
    if zip_filename is None:
        zip_path = OUTPUT_DIR / 'all_outputs.zip'
    else:
        zip_path = Path(zip_filename)
        if not zip_path.is_absolute():
            zip_path = OUTPUT_DIR / zip_path

    # Ensure OUTPUT_DIR exists
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

    added = 0
    try:
        with ZipFile(zip_path, 'w', ZIP_DEFLATED) as zf:
            for pat in patterns:
                for p in OUTPUT_DIR.glob(pat):
                    if p.is_file():
                        zf.write(p, arcname=p.name)
                        added += 1
        print(f"‚úÖ Created zip: {zip_path.resolve()} ({added} files)")
        return zip_path
    except Exception as e:
        print(f"‚ö†Ô∏è Error while creating zip: {e}")
        raise
# --- end utils ---

In [None]:
# --- data.py (embedded) ---
import os
import requests
import pandas as pd
# uses DATA_CSV_FILE, DATA_CSV_URL, HOWTO_FILE, HOWTO_URL from config

def download_data_file(file_name, data_url):
    if not os.path.exists(file_name):
        try:
            print(f"Attempting to download {file_name} from {data_url}...")
            response = requests.get(data_url)
            response.raise_for_status()
            with open(file_name, 'wb') as f:
                f.write(response.content)
            print(f"‚úÖ {file_name} downloaded successfully!")
        except requests.exceptions.RequestException as e:
            print(f"‚ùå Failed to download {file_name}. Please ensure the URL is correct and accessible.\nError: {e}")
    else:
        print(f"‚úÖ {file_name} already exists.")

def download_required_files():
    download_data_file(DATA_CSV_FILE, DATA_CSV_URL)

def load_and_explore_data(csv_path):
    print_section("üìä DATASET OVERVIEW")
    df = pd.read_csv(csv_path)
    print(f"\n‚úÖ Data loaded successfully!")
    print(f"Shape: {df.shape}")
    print(f"\nFirst few rows:")
    print(df.head(10))
    print(f"\nColumn names and types:")
    print(df.dtypes)
    print(f"\nData summary:")
    print(df.describe())
    return df

def preprocess_data(df):
    df_clean = df.copy()
    print_section("üìã COLUMN ANALYSIS")
    print('\nColumn names:')
    for i, col in enumerate(df_clean.columns, 1):
        print(f"  {i}. '{col}' ({df_clean[col].dtype})")
    print(f"\nüîç Missing values:")
    print(df_clean.isnull().sum())
    df_clean.columns = df_clean.columns.str.strip()
    empty_columns = []
    for col in df_clean.columns:
        non_na = ~df_clean[col].isna()
        if non_na.any():
            non_empty = df_clean.loc[non_na, col].astype(str).str.strip() != ''
            has_values = non_empty.any()
        else:
            has_values = False
        if not has_values:
            empty_columns.append(col)
    if empty_columns:
        print("\n‚ö†Ô∏è Warning: The following columns contain empty values:")
        for c in empty_columns:
            print(f"  - {c}")
        print("Consider dropping or filling these columns before further processing.")
    else:
        print("\n‚úÖ No empty columns found. All columns contain at least one non-empty value.")
    print(f"\n‚úÖ Data preprocessing complete. Shape: {df_clean.shape}")
    print(f"\nüìä Cleaned columns:")
    print(df_clean.columns.tolist())
    return df_clean
# --- end data ---

In [None]:
# --- analysis.py (embedded) ---
import numpy as np

def aggregate_time_series(df_clean):
    print_section("üìà TIME SERIES AGGREGATION")
    df_yearly = df_clean.groupby('Year')['Sales_Volume'].sum().reset_index()
    df_yearly = df_yearly.sort_values('Year')
    df_yearly.columns = ['Year', 'Total_Sales']
    print(f"\n‚úÖ Yearly Sales Aggregation:")
    print(df_yearly)
    ts_data = df_yearly['Total_Sales'].values
    ts_years = df_yearly['Year'].values
    print(f"\nüìä Time Series Summary:")
    print(f"   Total years: {len(ts_years)}")
    print(f"   Date range: {ts_years[0]:.0f} - {ts_years[-1]:.0f}")
    print(f"   Average annual sales: {ts_data.mean():,.0f}")
    print(f"   Peak sales: {ts_data.max():,.0f} (Year {ts_years[np.argmax(ts_data)]:.0f})")
    print(f"   Lowest sales: {ts_data.min():,.0f} (Year {ts_years[np.argmin(ts_data)]:.0f})")
    df_yearly['YoY_Growth'] = df_yearly['Total_Sales'].pct_change() * 100
    print(f"\nüìä Year-over-Year Growth:")
    print(df_yearly[['Year', 'Total_Sales', 'YoY_Growth']].to_string(index=False))
    df_model_yearly = df_clean.groupby(['Year', 'Model'])['Sales_Volume'].sum().reset_index()
    df_region_yearly = df_clean.groupby(['Year', 'Region'])['Sales_Volume'].sum().reset_index()
    print(f"\n‚úÖ Model and Region time series aggregations complete")
    return df_yearly, ts_data, ts_years, df_model_yearly, df_region_yearly
# --- end analysis ---

In [None]:
# --- exploratory_analysis.py (embedded) ---
def exploratory_data_analysis(df_clean):
    print_section("üìä EXPLORATORY DATA ANALYSIS")
    print("\nüèéÔ∏è Sales by Model (Top 10):")
    model_sales = df_clean.groupby('Model')['Sales_Volume'].sum().sort_values(ascending=False)
    print(model_sales.head(10))
    print("\nüåç Sales by Region:")
    region_sales = df_clean.groupby('Region')['Sales_Volume'].sum().sort_values(ascending=False)
    print(region_sales)
    print("\nüìÖ Sales by Year:")
    year_sales = df_clean.groupby('Year')['Sales_Volume'].sum().sort_values()
    print(year_sales)
    print("\nüìà Sales Volume Statistics:")
    print(df_clean['Sales_Volume'].describe())
    print("\nüí∞ Price Statistics:")
    print(df_clean['Price_USD'].describe())
# --- end exploratory_analysis ---

In [None]:
# --- viz_static.py (embedded) ---
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

def create_overview_visualizations(df_yearly, df_clean):
    fig, axes = plt.subplots(2, 2, figsize=(16, 10))
    fig.suptitle('BMW Sales Overview (2010-2024)', fontsize=16, fontweight='bold')
    ax1 = axes[0, 0]
    ax1.plot(df_yearly['Year'], df_yearly['Total_Sales'], marker='o', linewidth=2.5, 
             markersize=8, color='#1f77b4', label='Total Sales')
    ax1.set_xlabel('Year', fontsize=11, fontweight='bold')
    ax1.set_ylabel('Sales', fontsize=11, fontweight='bold')
    ax1.set_title('Total Sales Trend', fontsize=12, fontweight='bold')
    ax1.grid(True, alpha=0.3)
    ax1.legend()
    ax2 = axes[0, 1]
    colors = ['green' if x > 0 else 'red' for x in df_yearly['YoY_Growth'].fillna(0)]
    ax2.bar(df_yearly['Year'][1:], df_yearly['YoY_Growth'][1:], color=colors[1:], alpha=0.7)
    ax2.set_xlabel('Year', fontsize=11, fontweight='bold')
    ax2.set_ylabel('Growth Rate (%)', fontsize=11, fontweight='bold')
    ax2.set_title('Year-over-Year Growth Rate', fontsize=12, fontweight='bold')
    ax2.axhline(y=0, color='black', linestyle='-', linewidth=0.8)
    ax2.grid(True, alpha=0.3, axis='y')
    ax3 = axes[1, 0]
    model_total = df_clean.groupby('Model')['Sales_Volume'].sum().sort_values(ascending=True).tail(10)
    model_total.plot(kind='barh', ax=ax3, color='#ff7f0e', alpha=0.8)
    ax3.set_xlabel('Total Sales', fontsize=11, fontweight='bold')
    ax3.set_title('Top 10 Models by Sales', fontsize=12, fontweight='bold')
    ax3.grid(True, alpha=0.3, axis='x')
    ax4 = axes[1, 1]
    region_total = df_clean.groupby('Region')['Sales_Volume'].sum().sort_values(ascending=False)
    colors_region = plt.cm.Set3(np.linspace(0, 1, len(region_total)))
    ax4.pie(region_total, labels=region_total.index, autopct='%1.1f%%', 
            colors=colors_region, startangle=90)
    ax4.set_title('Sales Distribution by Region', fontsize=12, fontweight='bold')
    plt.tight_layout()
    p = out_path('01_sales_overview.png')
    plt.savefig(p, dpi=300, bbox_inches='tight')
    print(f"‚úÖ Saved: {p}")
    plt.close()

def create_heatmap(df_clean):
    heatmap_data = df_clean.pivot_table(
        values='Sales_Volume',
        index='Model',
        columns='Region',
        aggfunc='sum',
        fill_value=0
    )
    heatmap_data = heatmap_data.loc[heatmap_data.sum(axis=1).nlargest(15).index]
    plt.figure(figsize=(12, 10))
    sns.heatmap(heatmap_data, annot=True, fmt='.0f', cmap='YlOrRd', cbar_kws={'label': 'Sales'})
    plt.title('Sales Heatmap: Model vs Region (Top 15 Models)', fontsize=14, fontweight='bold', pad=20)
    plt.xlabel('Region', fontsize=12, fontweight='bold')
    plt.ylabel('Model', fontsize=12, fontweight='bold')
    plt.tight_layout()
    p = out_path('02_model_region_heatmap.png')
    plt.savefig(p, dpi=300, bbox_inches='tight')
    print(f"‚úÖ Saved: {p}")
    plt.close()
# --- end viz_static ---

In [None]:
# --- viz_interactive.py (embedded) ---
import plotly.graph_objects as go
from plotly.subplots import make_subplots

def create_interactive_dashboard(ts_years, ts_data, df_yearly, df_clean):
    print_section("üìä CREATING INTERACTIVE DASHBOARD")
    fig_forecast = make_subplots(
        rows=2, cols=2,
        subplot_titles=(
            'Total Sales Trend',
            'Year-over-Year Growth',
            'Model Performance (Top 5)',
            'Regional Distribution'
        ),
        specs=[
            [{'type': 'scatter'}, {'type': 'bar'}],
            [{'type': 'bar'}, {'type': 'pie'}]
        ]
    )
    fig_forecast.add_trace(
        go.Scatter(
            x=ts_years, y=ts_data, mode='lines+markers',
            name='Historical Sales', line=dict(color='#1f77b4', width=2),
            marker=dict(size=8)
        ),
        row=1, col=1
    )
    fig_forecast.add_trace(
        go.Bar(
            x=df_yearly['Year'][1:], y=df_yearly['YoY_Growth'][1:],
            name='Growth Rate', marker=dict(
                color=df_yearly['YoY_Growth'][1:],
                colorscale='RdYlGn', showscale=False
            )
        ),
        row=1, col=2
    )
    top_5_models = df_clean.groupby('Model')['Sales_Volume'].sum().nlargest(5).sort_values()
    fig_forecast.add_trace(
        go.Bar(
            y=top_5_models.index, x=top_5_models.values,
            orientation='h', name='Model Sales', 
            marker=dict(color='#ff7f0e')
        ),
        row=2, col=1
    )
    region_dist = df_clean.groupby('Region')['Sales_Volume'].sum()
    fig_forecast.add_trace(
        go.Pie(
            labels=region_dist.index, values=region_dist.values,
            name='Regions'
        ),
        row=2, col=2
    )
    fig_forecast.update_xaxes(title_text="Year", row=1, col=1)
    fig_forecast.update_yaxes(title_text="Sales", row=1, col=1)
    fig_forecast.update_xaxes(title_text="Year", row=1, col=2)
    fig_forecast.update_yaxes(title_text="Growth %", row=1, col=2)
    fig_forecast.update_xaxes(title_text="Sales", row=2, col=1)
    fig_forecast.update_yaxes(title_text="Model", row=2, col=1)
    fig_forecast.update_layout(
        title_text="BMW Sales Analytics Dashboard",
        showlegend=True,
        height=900,
        width=1400
    )
    p = out_path('05_interactive_dashboard.html')
    fig_forecast.write_html(p)
    print(f"\n‚úÖ Saved: {p}")

def create_heatmap_interactive(df_model_yearly):
    heatmap_data_pivot = df_model_yearly.pivot_table(
        values='Sales_Volume',
        index='Model',
        columns='Year',
        fill_value=0
    )
    heatmap_data_pivot = heatmap_data_pivot.loc[heatmap_data_pivot.sum(axis=1).nlargest(10).index]
    fig_heatmap = go.Figure(data=go.Heatmap(
        z=heatmap_data_pivot.values,
        x=heatmap_data_pivot.columns,
        y=heatmap_data_pivot.index,
        colorscale='YlOrRd',
        colorbar=dict(title='Sales')
    ))
    fig_heatmap.update_layout(
        title='BMW Model Sales Trends Over Years (Top 10 Models)',
        xaxis_title='Year',
        yaxis_title='Model',
        height=600,
        width=1200
    )
    p = out_path('06_model_heatmap_interactive.html')
    fig_heatmap.write_html(p)
    print(f"‚úÖ Saved: {p}")
# --- end viz_interactive ---

In [None]:
# --- reporting.py (embedded) ---
import pandas as pd
from datetime import datetime

def generate_monthly_report(df_clean, average_sales):
    timestamp = datetime.now()
    report = ('='*80) + '\n' + 'BMW SALES ANALYTICS - MONTHLY REPORT' + '\n' + f'Generated: {timestamp.strftime("%Y-%m-%d %H:%M:%S")}' + '\n' + ('='*80) + '\n\n'
    report += '1. EXECUTIVE SUMMARY\n' + ('-'*80) + '\n'
    report += f'   ‚Ä¢ Report Period: {timestamp.strftime("%B %Y")}\n'
    report += '   ‚Ä¢ Number of Active Alerts: 0 (alerting disabled)\n\n'
    report += '2. KEY METRICS\n' + ('-'*80) + '\n'
    report += f'   ‚Ä¢ Historical Average Sales: {average_sales:,.0f}\n'
    report += '   ‚Ä¢ Year-over-Year Change: N/A\n\n'
    report += '3. ALERTS & ACTION ITEMS\n' + ('-'*80) + '\n'
    report += '   No alerts configured for this simplified run.\n\n'
    report += '\n5. MODEL PERFORMANCE (Top 5)\n' + ('-'*80) + '\n'
    top_performers = df_clean.groupby('Model')['Sales_Volume'].sum().nlargest(5)
    for i, (model, sales) in enumerate(top_performers.items(), 1):
        report += f'   {i}. {model}: {sales:,.0f}\n'
    report += '\n6. REGIONAL PERFORMANCE\n' + ('-'*80) + '\n'
    by_region = df_clean.groupby('Region')['Sales_Volume'].sum().sort_values(ascending=False)
    for region, sales in by_region.items():
        pct = (sales / by_region.sum() * 100)
        report += f'   ‚Ä¢ {region}: {sales:,.0f} ({pct:.1f}%)\n'
    report += '\n7. RECOMMENDATIONS\n' + ('-'*80) + '\n'
    report += '   ‚Ä¢ Monitor underperforming models closely\n'
    report += '   ‚Ä¢ Invest in high-growth regions\n'
    report += '   ‚Ä¢ Adjust inventory based on demand signals\n'
    report += '   ‚Ä¢ Review market conditions quarterly\n\n'
    report += ('='*80) + '\nEND OF REPORT\n' + ('='*80) + '\n'
    return report

def generate_final_summary(df_clean, average_sales, ts_years, ts_data):
     import numpy as np
     total_records = len(df_clean) if df_clean is not None else 0
     year_min = int(df_clean['Year'].min()) if (df_clean is not None and 'Year' in df_clean.columns) else 'N/A'
     year_max = int(df_clean['Year'].max()) if (df_clean is not None and 'Year' in df_clean.columns) else 'N/A'
     top_model = df_clean.groupby('Model')['Sales_Volume'].sum().idxmax() if (df_clean is not None and 'Model' in df_clean.columns) else 'N/A'
     top_region = df_clean.groupby('Region')['Sales_Volume'].sum().idxmax() if (df_clean is not None and 'Region' in df_clean.columns) else 'N/A'
     avg_sales = average_sales
     peak_year = 'N/A'
     peak_value = 'N/A'
     low_year = 'N/A'
     low_value = 'N/A'
     trend = 'N/A'
     try:
          if ts_years is not None and ts_data is not None and len(ts_years) > 0 and len(ts_data) > 0:
                peak_idx = int(np.argmax(ts_data))
                peak_year = int(ts_years[peak_idx])
                peak_value = int(ts_data.max())
                low_idx = int(np.argmin(ts_data))
                low_year = int(ts_years[low_idx])
                low_value = int(ts_data.min())
                trend = 'GROWING' if ts_data[-1] > ts_data[0] else 'DECLINING'
     except Exception:
          pass
     summary = ('='*80) + '\n' + 'BMW SALES ANALYTICS - ANALYSIS COMPLETE' + '\n' + ('='*80) + '\n\n'
     summary += f'ANALYSIS COMPLETED:\n\n1. Data Overview:\n    ‚Ä¢ Total records analyzed: {total_records:,}\n    ‚Ä¢ Time period: {year_min} - {year_max}\n    ‚Ä¢ Models tracked: {df_clean["Model"].nunique() if df_clean is not None else 0}\n    ‚Ä¢ Regions tracked: {df_clean["Region"].nunique() if df_clean is not None else 0}\n\n'
     summary += f'2. Historical Performance:\n    ‚Ä¢ Average annual sales: {avg_sales:,.0f}\n    ‚Ä¢ Peak sales year: {peak_year} ({peak_value:,})\n    ‚Ä¢ Lowest sales year: {low_year} ({low_value:,})\n    ‚Ä¢ Trend: {trend}\n\n'
     summary += '3. Visualizations Generated:\n    [OK] 01_sales_overview.png - Overview charts (4-panel analysis)\n    [OK] 02_model_region_heatmap.png - Performance matrix\n    [OK] 05_interactive_dashboard.html - Main interactive dashboard\n    [OK] 06_model_heatmap_interactive.html - Interactive heatmap\n    [OK] 07_all_outputs.html - Aggregated outputs page\n\n'
     summary += '4. Data Files Generated:\n    [OK] sales_report_[timestamp].txt - Detailed report\n    [OK] ANALYSIS_SUMMARY.txt - This summary\n\n'
     summary += f'5. Top Insights:\n    ‚Ä¢ Top Model: {top_model}\n    ‚Ä¢ Top Region: {top_region}\n\n'
     summary += ('='*80) + '\nPROJECT STATUS: ANALYSIS COMPLETE (Forecasting & Alerts Removed)\n' + ('='*80) + '\n'
     print(summary)
     with open(out_path('ANALYSIS_SUMMARY.txt'), 'w', encoding='utf-8') as f:
          f.write(summary)
     print(f"\n[OK] Saved: {out_path('ANALYSIS_SUMMARY.txt')}")
# --- end reporting ---

In [None]:
# --- aggregator.py (embedded) ---
import os
import webbrowser
from pathlib import Path

def create_aggregator_html():
    out_html = '07_all_outputs.html'
    pngs = sorted([str(p) for p in OUTPUT_DIR.glob('*.png')])
    exclude_names = {out_html, 'commit_messages-can-change-values.html'}
    htmls = sorted([str(p) for p in OUTPUT_DIR.glob('*.html') if os.path.basename(p) not in exclude_names])
    if not pngs and not htmls:
        print('No output PNG or HTML files found in the current directory.')
    else:
        parts = []
        parts.append('<!doctype html>')
        parts.append('<html lang="en">')
        parts.append('<head>')
        parts.append('<meta charset="utf-8"/>')
        parts.append('<meta name="viewport" content="width=device-width, initial-scale=1"/>')
        parts.append('<title>All Outputs - BMW Sales Forecast</title>')
        parts.append('<style>body{font-family:system-ui,Segoe UI,Roboto,Helvetica,Arial,sans-serif;margin:20px} h2{margin-top:1.2rem} figure{margin:12px 0} img{max-width:100%;height:auto;border:1px solid #ddd;padding:4px;background:#fff} .filelink{margin-bottom:8px;display:inline-block}</style>')
        parts.append('</head>')
        parts.append('<body>')
        parts.append('<h1>BMW Sales Forecast ‚Äî Generated Outputs</h1>')
        parts.append(f'<p>Repository path: {Path().resolve()}</p>')
        if pngs:
            parts.append('<h2>PNG Visualizations</h2>')
            for p in pngs:
                safe = os.path.basename(p)
                parts.append(f'<figure><figcaption>{safe}</figcaption><img src="{safe}" alt="{safe}"/></figure>')
        if htmls:
            parts.append('<h2>Interactive HTML Outputs</h2>')
            for h in htmls:
                safe = os.path.basename(h)
                parts.append(f'<div class="filelink"><a href="{safe}" target="_blank">Open {safe} in new tab</a></div>')
                parts.append(f'<div style="margin:12px 0; border:1px solid #ccc;"><iframe src="{safe}" style="width:100%;height:640px;border:0"></iframe></div>')
        parts.append('</body>')
        parts.append('</html>')
        html_content = '\n'.join(parts)
        out_path_full = OUTPUT_DIR / out_html
        with open(out_path_full, 'w', encoding='utf-8') as f:
            f.write(html_content)
        abs_path = out_path_full.resolve()
        print(f'‚úÖ Created aggregator: {abs_path}')
        try:
            url = abs_path.as_uri()
            print(f'\nüåê Opening {out_html} in your default browser...')
            webbrowser.open(url)
            print(f'‚úÖ Opened aggregator: {abs_path}')
            dash05 = (OUTPUT_DIR / '05_interactive_dashboard.html').resolve()
            dash06 = (OUTPUT_DIR / '06_model_heatmap_interactive.html').resolve()
            try:
                if dash05.exists():
                    print(f'üåê Opening dashboard: {dash05.name} in a new tab...')
                    webbrowser.open_new_tab(dash05.as_uri())
                else:
                    print(f'   ‚Ä¢ {dash05} not found; skipping open for 05')
            except Exception as e2:
                print(f'‚ö†Ô∏è Could not open {dash05}: {e2}')
            try:
                if dash06.exists():
                    print(f'üåê Opening dashboard: {dash06.name} in a new tab...')
                    webbrowser.open_new_tab(dash06.as_uri())
                else:
                    print(f'   ‚Ä¢ {dash06} not found; skipping open for 06')
            except Exception as e3:
                print(f'‚ö†Ô∏è Could not open {dash06}: {e3}')
            print('‚úÖ Browser open actions complete.')
        except Exception as e:
            print(f'‚ö†Ô∏è Could not open browser automatically: {e}')
            print(f'   You can manually open: {abs_path}')
# --- end aggregator ---

In [None]:
# Initialize variables
df = None
df_clean = None
df_yearly = None
ts_data = None
ts_years = None
df_model_yearly = None
df_region_yearly = None

# Clean output directory
clean_outputs()

## 1. Data Loading & Preprocessing

In [None]:
if ENABLE_DATA_PROCESSING:
    download_required_files()
    df = load_and_explore_data(DATA_CSV_FILE)
    df_clean = preprocess_data(df)
    
    if ENABLE_EXPLORATORY_ANALYSIS:
        exploratory_data_analysis(df_clean)

## 2. Time Series Aggregation

In [None]:
if ENABLE_TIME_SERIES:
    df_yearly, ts_data, ts_years, df_model_yearly, df_region_yearly = aggregate_time_series(df_clean)

## 3. Static Visualizations

In [None]:
if ENABLE_STATIC_PLOTS:
    create_overview_visualizations(df_yearly, df_clean)
    create_heatmap(df_clean)

## 4. Reporting

In [None]:
if ENABLE_REPORTING:
    # Ensure we have necessary data or defaults
    average_sales = df_yearly['Total_Sales'].mean() if df_yearly is not None else 0
    
    # Create dummy data for reporting if missing
    monthly_report = generate_monthly_report(df_clean, average_sales)
    print(monthly_report)
    
    report_filename = out_path(f"sales_report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.txt")
    with open(report_filename, 'w', encoding='utf-8') as f:
        f.write(monthly_report)
    print(f"\n‚úÖ Saved: {report_filename}")

## 5. Interactive Dashboards

In [None]:
if ENABLE_DASHBOARDS:
    create_interactive_dashboard(ts_years, ts_data, df_yearly, df_clean)
    create_heatmap_interactive(df_model_yearly)

## 6. Aggregator & Final Summary

In [None]:
if ENABLE_AGGREGATOR:
    create_aggregator_html()
    zip_all_outputs()

if ENABLE_REPORTING:
    average_sales = df_yearly['Total_Sales'].mean() if df_yearly is not None else 0
    
    # Create dummy data for summary if missing
    if ts_data is None:
        ts_data = np.array([0, 0])
    if ts_years is None:
        ts_years = np.array([2020, 2021])

    generate_final_summary(df_clean, average_sales, ts_years, ts_data)