In [1]:
# Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import networkx as nx
import warnings
warnings.filterwarnings('ignore')

# Set random seed for reproducibility
np.random.seed(42)

# Configure plotting
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (12, 6)

# Import HT and Intervention Search
from ht_categ import HT, HTConfig
from intervention_search import (
    InterventionSearch,
    DOOperator,
    verify_do_operator_properties,
    TimeSeriesInterventionAnalyzer,
    create_intervention_report
)

print("‚úÖ All imports successful!")

‚úÖ All imports successful!


In [2]:
# Load retail store data
df = pd.read_csv('test_profitops.csv')
df.head()

Unnamed: 0,Price,CompetitorPriceIndex,SellThrough,StoreSales,StoreTraffic,DemandPotential,InventoryAvailability,StockLevel,PromoFlag,CompetitorPromoFlag,CompetitorStockPresence,Week,SKU,Region
0,53.97,0.92,8.62,197.56,974.07,6.6,0.88,73.39,0,0,0,2023-03-20T00:00:00.000Z,SKU_1,West
1,41.83,0.77,9.31,147.74,888.37,8.53,0.93,123.55,0,0,1,2023-03-20T00:00:00.000Z,SKU_10,South
2,63.47,0.91,4.35,112.78,1138.87,2.28,1.0,132.05,0,0,1,2023-03-20T00:00:00.000Z,SKU_11,West
3,48.62,1.17,8.74,270.38,914.02,7.44,0.81,110.33,0,0,0,2023-03-20T00:00:00.000Z,SKU_12,South
4,51.87,0.89,-1.28,20.49,1007.51,2.5,1.0,79.55,1,1,1,2023-03-20T00:00:00.000Z,SKU_13,East


In [3]:
df['Week'] = pd.to_datetime(df['Week'])
df['Week_Num'] = df['Week'].dt.isocalendar().week
df['Week_Sin'] = np.sin(2 * np.pi * df['Week_Num']/52)
df['Week_Cos'] = np.cos(2 * np.pi * df['Week_Num']/52)
df['Region_Code'] = df['Region'].astype('category').cat.codes
df['SKU_Code'] = df['SKU'].astype('category').cat.codes

In [4]:
# Updated Nodes List (No changes to names)
nodes = [
    "SKU_Code", "Region_Code", "Week_Sin", "Week_Cos", 
    "Price", "StockLevel", "DemandPotential", "PromoFlag", 
    "StoreTraffic", "InventoryAvailability", 
    "CompetitorPriceIndex", "CompetitorPromoFlag", "CompetitorStockPresence", 
    "StoreSales", "SellThrough"
]

# Updated Edges List (Optimized based on AutoML)
edges = [
    # --- Hierarchical Drivers ---
    ("SKU_Code", "Price"),
    ("SKU_Code", "StockLevel"),
    ("SKU_Code", "DemandPotential"),
    ("SKU_Code", "PromoFlag"),
    
    ("Region_Code", "DemandPotential"),
    ("Region_Code", "StoreTraffic"),
    ("Region_Code", "StockLevel"),
    ("Region_Code", "PromoFlag"),
    # REMOVED: Region -> Competitor Nodes (R¬≤ was 0.0, meaning no causality found)

    # --- Temporal/Seasonality Drivers ---
    # Added Week -> Price to improve the low R¬≤ (0.35)
    ("Week_Sin", "Price"),
    ("Week_Cos", "Price"),
    ("Week_Sin", "DemandPotential"),
    ("Week_Cos", "DemandPotential"),
    ("Week_Sin", "StoreTraffic"),
    ("Week_Cos", "StoreTraffic"),

    # --- Inventory Logic ---
    ("DemandPotential", "StockLevel"),
    ("StockLevel", "InventoryAvailability"),
    ("StockLevel", "PromoFlag"),

    # --- Competitor Dynamics (Exogenous Impact) ---
    # Competitors are now Roots (Inputs), influencing us, but not influenced by us
    ("CompetitorPriceIndex", "Price"),
    ("CompetitorPriceIndex", "StoreTraffic"),
    ("CompetitorPriceIndex", "StoreSales"),
    
    ("CompetitorPromoFlag", "PromoFlag"),
    ("CompetitorPromoFlag", "StoreTraffic"),
    ("CompetitorPromoFlag", "StoreSales"),
    
    ("CompetitorStockPresence", "StoreSales"),

    # --- The Sales Funnel (High Confidence Section) ---
    ("PromoFlag", "StoreTraffic"),
    ("PromoFlag", "StoreSales"),
    ("StoreTraffic", "StoreSales"),
    ("InventoryAvailability", "StoreSales"),
    ("Price", "StoreSales"),

    # --- Outcomes ---
    ("StoreSales", "SellThrough"),
    ("StockLevel", "SellThrough")
]

In [5]:
adj_matrix = pd.DataFrame(0, index=nodes, columns=nodes)
for parent, child in edges:
    adj_matrix.loc[parent, child] = 1

In [6]:
# Verify it's a valid DAG
G = nx.from_pandas_adjacency(adj_matrix, create_using=nx.DiGraph())
assert nx.is_directed_acyclic_graph(G), "‚ùå Graph contains cycles!"
print("   ‚Ä¢ DAG structure: Valid ‚úì")

   ‚Ä¢ DAG structure: Valid ‚úì


In [7]:
df[nodes].head()

Unnamed: 0,SKU_Code,Region_Code,Week_Sin,Week_Cos,Price,StockLevel,DemandPotential,PromoFlag,StoreTraffic,InventoryAvailability,CompetitorPriceIndex,CompetitorPromoFlag,CompetitorStockPresence,StoreSales,SellThrough
0,0,3,0.992709,0.120537,53.97,73.39,6.6,0,974.07,0.88,0.92,0,0,197.56,8.62
1,1,2,0.992709,0.120537,41.83,123.55,8.53,0,888.37,0.93,0.77,0,1,147.74,9.31
2,2,3,0.992709,0.120537,63.47,132.05,2.28,0,1138.87,1.0,0.91,0,1,112.78,4.35
3,3,2,0.992709,0.120537,48.62,110.33,7.44,0,914.02,0.81,1.17,0,0,270.38,8.74
4,4,0,0.992709,0.120537,51.87,79.55,2.5,1,1007.51,1.0,0.89,1,1,20.49,-1.28


In [8]:
config = HTConfig(
    graph=adj_matrix,
    model_type='AutoML',  # Enables automatic model selection
    auto_ml=True,
    auto_ml_models=['LinearRegression', 'RandomForest', 'Xgboost', 'LightGBM'],
    aggregator='max',
    root_cause_top_k=3)

# Create and train model
print("üéØ Training causal model with AutoML...\n")
ht_model = HT(config)
ht_model.train(df, perform_cv=True, verbose_automl=True)

üéØ Training causal model with AutoML...

üéì TRAINING MODELS WITH QUALITY ASSESSMENT

üìä Detecting variable types...
   ‚úì SKU_Code: CONTINUOUS
   ‚úì Region_Code: CATEGORICAL (4 classes: [np.int8(0), np.int8(1), np.int8(2), np.int8(3)]...)
   ‚úì Week_Sin: CONTINUOUS
   ‚úì Week_Cos: CONTINUOUS
   ‚úì Price: CONTINUOUS
   ‚úì StockLevel: CONTINUOUS
   ‚úì DemandPotential: CONTINUOUS
   ‚úì PromoFlag: CATEGORICAL (2 classes: [np.int64(0), np.int64(1)]...)
   ‚úì StoreTraffic: CONTINUOUS
   ‚úì InventoryAvailability: CONTINUOUS
   ‚úì CompetitorPriceIndex: CONTINUOUS
   ‚úì CompetitorPromoFlag: CATEGORICAL (2 classes: [np.int64(0), np.int64(1)]...)
   ‚úì CompetitorStockPresence: CATEGORICAL (2 classes: [np.int64(0), np.int64(1)]...)
   ‚úì StoreSales: CONTINUOUS
   ‚úì SellThrough: CONTINUOUS

ü§ñ Training models (AUTO-ML mode: 4 models per node)...
   ‚úì SKU_Code: Root node (no parents) - baseline scaling only
   ‚úì Region_Code: Root node (no parents) - baseline scaling only


In [9]:
# Model quality report
quality_report = ht_model.get_model_quality_report()

print("\nüìä MODEL QUALITY REPORT")
print("="*70)
print(f"\nüéØ Overall Quality Grade: {quality_report['trust_indicators']['quality_grade']}")
print(f"üìà Graph Coverage: {quality_report['trust_indicators']['graph_coverage']}%")

reg_perf = quality_report['overall_summary']['regression_performance']
print(f"\nüìä Regression Performance:")
print(f"   ‚Ä¢ Mean R¬≤:   {reg_perf['mean_r2']:.4f}")
print(f"   ‚Ä¢ Median R¬≤: {reg_perf['median_r2']:.4f}")
print(f"   ‚Ä¢ Min R¬≤:    {reg_perf['min_r2']:.4f}")
print(f"   ‚Ä¢ Max R¬≤:    {reg_perf['max_r2']:.4f}")

print("\n" + "="*70)


üìä MODEL QUALITY REPORT

üéØ Overall Quality Grade: A (Excellent)
üìà Graph Coverage: 53.3%

üìä Regression Performance:
   ‚Ä¢ Mean R¬≤:   0.8093
   ‚Ä¢ Median R¬≤: 0.8302
   ‚Ä¢ Min R¬≤:    0.6138
   ‚Ä¢ Max R¬≤:    0.9525



In [10]:
# Initialize intervention search
searcher = InterventionSearch(
    graph=ht_model.graph,
    ht_model=ht_model,
    n_simulations=100,  # Monte Carlo samples for uncertainty
    random_seed=42,
    strict_quality_mode=True
)

print("‚úÖ Intervention search initialized")
print(f"   ‚Ä¢ Monte Carlo simulations: 2000")
print(f"   ‚Ä¢ Target: +20% sales increase")

‚úÖ Intervention search initialized
   ‚Ä¢ Monte Carlo simulations: 2000
   ‚Ä¢ Target: +20% sales increase


In [11]:
# Run the intervention search
print("\nüîç Searching for optimal intervention...")

results = searcher.find_interventions(
    target_outcome='StoreSales',
    target_change=20.0,         # +20% increase in sales
    tolerance=3.0,              # Accept ¬±3% error
    max_intervention_pct=30.0,  # Don't change any node by more than ¬±30%
    allow_combinations=False,   # Single-node interventions only
    max_candidates=10,          # Return top 10 candidates
    confidence_level=0.90,      # 90% confidence intervals
    verbose=True,
    min_model_quality = 0.7,
    candidate_nodes =['Price', 'StoreTraffic', 'StockLevel'],
    intervention_bounds = {"StockLevel": (5, 25)}  # StockLevel can be changed between 5% and 25%

)


üîç Searching for optimal intervention...

üéØ INTERVENTION SEARCH v2.0 (Production Grade)
Target: +20.0% change in StoreSales
Tolerance: ¬±3.0% points
Max intervention: ¬±30.0%
Monte Carlo simulations: 100

üìä Pre-flight checks...
   Candidate nodes: 3
   Overall model quality: F
   ‚ö†Ô∏è  1 low-quality models detected

üîç Searching 3 candidates...
   Testing: Price... ‚úì -30.0% ‚Üí +18.2%
   Testing: StoreTraffic... ‚úì +23.3% ‚Üí +17.1%
   Testing: StockLevel... ‚úì +18.3% ‚Üí +21.4%

‚úÖ Validating 3 candidates...

‚úÖ SEARCH COMPLETE

Best Intervention:
   Type: single
   Variables: StockLevel
   ‚îî‚îÄ StockLevel: +18.35%

   Predicted Effect: +21.4% (target: +20.0%)
   90% Confidence Interval: [-5.5%, +80.1%]
   50% Confidence Interval: [+0.0%, +29.0%]
   Confidence Score: 21%

   Status: ‚úÖ APPROVED

   Total Candidates Found: 3

