# Retail Store Sales Optimization

**Goal**: Find optimal interventions to increase store sales by 20%

This notebook demonstrates how to use the Intervention Search system to identify the best ways to improve retail store performance through causal interventions.

## 1. Load Data and Setup

In [None]:
import pandas as pd
import numpy as np
import networkx as nx
import warnings
warnings.filterwarnings('ignore')

# Load retail data
df = pd.read_csv('data/retail_data.csv')
print(f"Loaded {len(df)} retail stores")
print(f"\nColumns: {list(df.columns)}")
df.head()

## 2. Define Causal Graph

**Causal Structure:**
- `store_location → foot_traffic → sales`
- `store_size → inventory_level → sales`
- `marketing_spend → foot_traffic`
- `price_discount → conversion_rate → sales`
- `staff_count → customer_satisfaction → sales`
- `competitor_proximity → foot_traffic`

In [None]:
# Define causal graph as adjacency matrix
nodes = ['store_location', 'store_size', 'marketing_spend', 'price_discount', 
         'staff_count', 'competitor_proximity', 'foot_traffic', 'inventory_level', 
         'conversion_rate', 'customer_satisfaction', 'sales']

edges = [
    ('store_location', 'foot_traffic'),
    ('marketing_spend', 'foot_traffic'),
    ('competitor_proximity', 'foot_traffic'),
    ('store_size', 'inventory_level'),
    ('price_discount', 'conversion_rate'),
    ('staff_count', 'customer_satisfaction'),
    ('foot_traffic', 'sales'),
    ('inventory_level', 'sales'),
    ('conversion_rate', 'sales'),
    ('customer_satisfaction', 'sales')
]

# Create adjacency matrix
adj_matrix = pd.DataFrame(0, index=nodes, columns=nodes)
for parent, child in edges:
    adj_matrix.loc[parent, child] = 1

print("Causal Graph:")
print(adj_matrix)

## 3. Train Causal Model

In [None]:
from ht_categ import HT, HTConfig

# Create and train HT model
config = HTConfig(graph=adj_matrix, model_type='XGBoost')
ht_model = HT(config)
ht_model.train(df)

print("✓ Causal model trained")
print(f"\nModel metrics (R² scores):")
for node, metrics in ht_model.model_metrics.items():
    if 'r2' in metrics:
        print(f"  {node}: {metrics['r2']:.3f}")

## 4. Find Optimal Interventions

**Objective**: Increase sales by 20% with high confidence

In [None]:
from intervention_search import InterventionSearch

# Initialize intervention search
searcher = InterventionSearch(
    graph=ht_model.graph,
    ht_model=ht_model,
    n_simulations=1000
)

# Search for interventions to increase sales by 20%
results = searcher.find_interventions(
    target_outcome='sales',
    target_change=20.0,  # +20% increase
    tolerance=3.0,       # ±3% tolerance
    confidence_level=0.90,
    max_intervention_pct=30.0,
    verbose=True
)

## 5. Analyze Best Intervention

In [None]:
best = results['best_intervention']

print("\n" + "="*70)
print("RECOMMENDED INTERVENTION")
print("="*70)
print(f"\nIntervene on: {', '.join(best['nodes'])}")
print(f"\nRequired changes:")
for node, change in best['required_pct_changes'].items():
    baseline = ht_model.baseline_stats[node]['mean']
    new_value = baseline * (1 + change/100)
    print(f"  • {node}: {change:+.1f}% (from {baseline:.0f} to {new_value:.0f})")

print(f"\nExpected Impact:")
print(f"  • Predicted sales change: {best['actual_effect']:+.1f}% (target: +20.0%)")
print(f"  • 90% Confidence Interval: [{best['ci_90'][0]:+.1f}%, {best['ci_90'][1]:+.1f}%]")
print(f"  • 50% Confidence Interval: [{best['ci_50'][0]:+.1f}%, {best['ci_50'][1]:+.1f}%]")
print(f"  • Confidence Score: {best['confidence']:.0%}")
print(f"  • Status: {'✅ APPROVED' if best['within_tolerance'] else '❌ NOT APPROVED'}")
print("="*70)

## 6. Compare Top Interventions

In [None]:
# Show top 5 interventions
print("\nTop 5 Interventions:\n")
for i, candidate in enumerate(results['all_candidates'][:5], 1):
    print(f"{i}. {', '.join(candidate['nodes'])}")
    print(f"   Effect: {candidate['actual_effect']:+.1f}% | "
          f"Confidence: {candidate['confidence']:.0%} | "
          f"Quality: {candidate['quality']['overall_grade']}")
    print(f"   Changes: {candidate['required_pct_changes']}\n")

## 7. Path Analysis

Understanding which causal paths contribute most to the effect

In [None]:
if 'path_analysis' in results and results['path_analysis'] is not None:
    path_info = results['path_analysis']
    print("\n" + "="*70)
    print("CAUSAL PATH SENSITIVITY ANALYSIS (RCA)")
    print("="*70)
    
    if 'path_contributions' in path_info:
        print("\nPath Contributions to Total Effect:")
        for i, path_contrib in enumerate(path_info['path_contributions'][:5], 1):
            path_str = path_contrib.get('path', 'Unknown')
            contribution_pct = path_contrib.get('contribution_pct', 0)
            quality = path_contrib.get('quality_score', 0)
            print(f"{i}. {path_str}")
            print(f"   Contribution: {contribution_pct:.1f}%")
            print(f"   Path Quality: {quality:.2f}")
            print()
    
    if 'total_paths' in path_info:
        print(f"Total Causal Paths: {path_info['total_paths']}")
        print(f"High Quality Paths (>0.7): {path_info.get('high_quality_paths', 'N/A')}")
        print(f"Average Path Quality: {path_info.get('avg_path_quality', 0):.3f}")
        
else:
    print("\nPath analysis not available for this intervention.")
    print("Using direct path decomposition instead...")
    
    # Manual path analysis
    from intervention_search.core.path_analyzer import PathSensitivityAnalyzer
    
    best = results['best_intervention']
    intervention_node = best['nodes'][0]
    target_node = 'sales'
    
    analyzer = PathSensitivityAnalyzer(
        graph=ht_model.graph,
        model_metrics=ht_model.model_metrics,
        edge_elasticities=getattr(ht_model, 'edge_elasticities', {})
    )
    
    path_analysis = analyzer.decompose_total_effect(
        intervention_node,
        target_node,
        best['actual_effect']
    )
    
    print("\n" + "="*70)
    print("ROOT CAUSE ANALYSIS - Path Decomposition")
    print("="*70)
    print(f"\nIntervention: {intervention_node} → {target_node}")
    print(f"Total Effect: {best['actual_effect']:+.1f}%")
    print(f"\nDirect vs. Indirect Effects:")
    print(f"  • Direct Effect: {path_analysis.get('direct_effect', 0):.1f}%")
    print(f"  • Indirect Effect: {path_analysis.get('indirect_effect', 0):.1f}%")
    print(f"  • Number of Paths: {path_analysis.get('num_paths', 0)}")
    
    if 'path_contributions' in path_analysis:
        print(f"\nTop Contributing Paths:")
        for i, path in enumerate(path_analysis['path_contributions'][:3], 1):
            print(f"  {i}. {path.get('path', 'N/A')} ({path.get('contribution_pct', 0):.1f}% of effect)")

## 8. Model Quality Assessment

Understanding the reliability of predictions through quality grading

In [None]:
# Model Quality Summary
quality_report = results.get('quality_report', {})

print("\n" + "="*70)
print("MODEL QUALITY REPORT")
print("="*70)

print(f"\nOverall Grade: {quality_report.get('overall_grade', 'N/A')}")
print(f"Total Models: {quality_report.get('total_models', 0)}")
print(f"Regression Models: {quality_report.get('regression_models', 0)}")
print(f"Mean R²: {quality_report.get('mean_r2', 0):.3f}")

if 'grade_distribution' in quality_report:
    print(f"\nGrade Distribution:")
    for grade in ['A', 'B', 'C', 'D', 'F']:
        count = quality_report['grade_distribution'].get(grade, 0)
        if count > 0:
            print(f"  Grade {grade}: {count} models")

# Best intervention quality
best = results['best_intervention']
if 'quality' in best:
    quality = best['quality']
    print(f"\n{'-'*70}")
    print("Best Intervention Quality:")
    print(f"  Path: {quality.get('path', 'N/A')}")
    print(f"  Overall Grade: {quality.get('quality_grade', 'N/A')}")
    print(f"  Geometric Mean Quality: {quality.get('quality_score_geom_mean', 0):.3f}")
    print(f"  Weakest Link: {quality.get('weakest_link', {}).get('node', 'N/A')} "
          f"(Grade: {quality.get('weakest_link', {}).get('grade', 'N/A')})")
    
    if quality.get('warnings'):
        print(f"\n  ⚠️  Warnings:")
        for warning in quality['warnings'][:3]:
            print(f"    - {warning}")
print("="*70)

## 8. Business Interpretation

### Key Insights:

1. **Primary Levers**: The analysis identifies which operational variables have the strongest causal impact on sales
2. **Confidence Levels**: High confidence scores indicate reliable predictions based on strong model quality
3. **Uncertainty**: Confidence intervals account for model uncertainty through Monte Carlo simulation
4. **Feasibility**: Interventions are validated for out-of-distribution detection and practical constraints

### Recommended Actions:

Based on the best intervention identified:
- Implement the recommended changes gradually
- Monitor actual vs. predicted outcomes
- Consider multi-node interventions for robust improvements
- Focus on high-quality causal paths for maximum reliability

## Summary

This notebook demonstrated:
- ✅ Loading and preparing retail data
- ✅ Defining causal graph structure
- ✅ Training causal models with HT
- ✅ Finding optimal interventions with uncertainty quantification
- ✅ Analyzing causal paths and model quality
- ✅ Interpreting results for business decisions