In [1]:
# Enhanced Attribution Model Analysis
import pickle
import pandas as pd
import numpy as np

# Load model and feature importance
with open("criteo_attribution_xgboost_model.pkl", "rb") as f:
    saved_data = pickle.load(f)

model = saved_data['model']
feature_names = saved_data['feature_names']

# Feature importance (from analysis)
feature_importance = {
    'click': 0.96947354,
    'campaign_perf': 0.011650703,
    'cat1': 0.009480488,
    'cat4': 0.002314992,
    'cost': 0.0020268261,
    'cost_quartile': 0.00091451674,
    'cpo': 0.0008982269,
    'cat5': 0.00065160333,
    'cat3': 0.00059247704,
    'cat6': 0.00049594167
}

print("=== ATTRIBUTION MODEL PARAMETER ANALYSIS ===")
print("\nModel Features (in order of importance):")
for i, (feat, imp) in enumerate(sorted(feature_importance.items(), key=lambda x: x[1], reverse=True), 1):
    print(f"{i}. {feat} ({imp:.2%} importance) - {get_feature_description(feat)}")

def get_feature_description(feature):
    descriptions = {
        'click': 'Whether there was a click',
        'campaign_perf': 'Campaign performance metric',
        'cat1': 'Category 1',
        'cat4': 'Category 4', 
        'cost': 'Cost of the interaction',
        'cost_quartile': 'Cost quartile',
        'cpo': 'Cost per order',
        'cat5': 'Category 5',
        'cat3': 'Category 3',
        'cat6': 'Category 6'
    }
    return descriptions.get(feature, 'Unknown feature')

def predict_attribution(input_params):
    """Make prediction with given parameters"""
    # Create full input with defaults
    full_input = {feat: 0 for feat in feature_names}
    full_input.update(input_params)
    
    input_df = pd.DataFrame([full_input])[feature_names]
    prob = model.predict_proba(input_df)[0][1]
    pred = model.predict(input_df)[0]
    
    return prob, pred, full_input

# Baseline parameters
baseline_params = {
    "click": 1,
    "campaign_perf": 0.05,
    "cost": 0.5,
    "cpo": 0.004
}

print("\n=== BASELINE PREDICTION ===")
baseline_prob, baseline_pred, baseline_full = predict_attribution(baseline_params)
print(f"Baseline Parameters:")
print(baseline_full)
print(f"\nBaseline Result:")
print(f"Probability: {baseline_prob:.2%}")
print(f"Prediction: {'ATTRIBUTED' if baseline_pred == 1 else 'NOT ATTRIBUTED'}")

=== ATTRIBUTION MODEL PARAMETER ANALYSIS ===

Model Features (in order of importance):
1. click (96.95% importance) - Whether there was a click
2. campaign_perf (1.17% importance) - Campaign performance metric
3. cat1 (0.95% importance) - Category 1
4. cat4 (0.23% importance) - Category 4
5. cost (0.20% importance) - Cost of the interaction
6. cost_quartile (0.09% importance) - Cost quartile
7. cpo (0.09% importance) - Cost per order
8. cat5 (0.07% importance) - Category 5
9. cat3 (0.06% importance) - Category 3
10. cat6 (0.05% importance) - Category 6

=== BASELINE PREDICTION ===
Baseline Parameters:
{'click': 1, 'campaign_perf': 0.05, 'cat1': 0, 'cat4': 0, 'cost': 0.5, 'cost_quartile': 0, 'cpo': 0.004, 'cat5': 0, 'cat3': 0, 'cat6': 0, 'campaign': 0, 'cat2': 0, 'cat8': 0, 'cat9': 0, 'click_cost_interaction': 0, 'campaign_click_rate': 0, 'cost_efficiency': 0, 'cat1_cat2_interaction': 0, 'cat3_cat4_interaction': 0}

Baseline Result:
Probability: 2.40%
Prediction: NOT ATTRIBUTED


In [2]:
print("\n=== PARAMETER IMPACT ANALYSIS ===")

# Test impact of most important features
print("\nTesting impact of CLICK parameter (most important: 96.95%):")
for click_val in [0, 1]:
    test_params = baseline_params.copy()
    test_params['click'] = click_val
    prob, pred, _ = predict_attribution(test_params)
    print(f"click={click_val}: Probability={prob:.2%}, Prediction={'ATTRIBUTED' if pred == 1 else 'NOT ATTRIBUTED'}")

click_0_prob, _, _ = predict_attribution({**baseline_params, 'click': 0})
click_1_prob, _, _ = predict_attribution({**baseline_params, 'click': 1})
print(f"Impact: +{click_1_prob - click_0_prob:.2%} probability increase with click")

print("\nTesting impact of CAMPAIGN_PERF parameter (2nd most important: 1.17%):")
for perf_val in [0.01, 0.05, 0.10, 0.20, 0.50, 0.80, 1.00]:
    test_params = baseline_params.copy()
    test_params['campaign_perf'] = perf_val
    prob, pred, _ = predict_attribution(test_params)
    print(f"campaign_perf={perf_val}: Probability={prob:.2%}, Prediction={'ATTRIBUTED' if pred == 1 else 'NOT ATTRIBUTED'}")

print("\nTesting impact of COST parameter (5th most important: 0.20%):")
for cost_val in [0.1, 0.5, 1.0, 2.0, 5.0, 10.0]:
    test_params = baseline_params.copy()
    test_params['cost'] = cost_val
    prob, pred, _ = predict_attribution(test_params)
    print(f"cost={cost_val}: Probability={prob:.2%}, Prediction={'ATTRIBUTED' if pred == 1 else 'NOT ATTRIBUTED'}")

print("\nTesting impact of CPO parameter (7th most important: 0.09%):")
for cpo_val in [0.001, 0.004, 0.01, 0.05, 0.10, 0.20]:
    test_params = baseline_params.copy()
    test_params['cpo'] = cpo_val
    prob, pred, _ = predict_attribution(test_params)
    print(f"cpo={cpo_val}: Probability={prob:.2%}, Prediction={'ATTRIBUTED' if pred == 1 else 'NOT ATTRIBUTED'}")

=== PARAMETER IMPACT ANALYSIS ===

Testing impact of CLICK parameter (most important: 96.95%):
click=0: Probability=0.50%, Prediction=NOT ATTRIBUTED
click=1: Probability=2.40%, Prediction=NOT ATTRIBUTED
Impact: +1.90% probability increase with click

Testing impact of CAMPAIGN_PERF parameter (2nd most important: 1.17%):
campaign_perf=0.01: Probability=2.35%, Prediction=NOT ATTRIBUTED
campaign_perf=0.05: Probability=2.40%, Prediction=NOT ATTRIBUTED
campaign_perf=0.10: Probability=2.46%, Prediction=NOT ATTRIBUTED
campaign_perf=0.20: Probability=2.58%, Prediction=NOT ATTRIBUTED
campaign_perf=0.50: Probability=3.01%, Prediction=NOT ATTRIBUTED
campaign_perf=0.80: Probability=3.42%, Prediction=NOT ATTRIBUTED
campaign_perf=1.00: Probability=3.67%, Prediction=NOT ATTRIBUTED

Testing impact of COST parameter (5th most important: 0.20%):
cost=0.1: Probability=2.38%, Prediction=NOT ATTRIBUTED
cost=0.5: Probability=2.40%, Prediction=NOT ATTRIBUTED
cost=1.0: Probability=2.42%, Prediction=NOT ATTRIB

In [3]:
print("\n=== FINDING EXAMPLES THAT PREDICT ATTRIBUTION ===")

print("\nStrategy 1: Maximize most important features")
high_params = {
    "click": 1,
    "campaign_perf": 1.0,
    "cost": 10.0,
    "cpo": 0.20
}
prob, pred, _ = predict_attribution(high_params)
print(f"Parameters: {high_params}")
print(f"Result: Probability={prob:.2%}, Prediction={'ATTRIBUTED' if pred == 1 else 'NOT ATTRIBUTED'}")

print("\nStrategy 2: Add categorical features (trying different cat1 values)")
for cat1_val in [1, 5, 8]:  # Try different category values
    cat_params = high_params.copy()
    cat_params['cat1'] = cat1_val
    prob, pred, _ = predict_attribution(cat_params)
    print(f"Parameters: {cat_params}")
    print(f"Result: Probability={prob:.2%}, Prediction={'ATTRIBUTED' if pred == 1 else 'NOT ATTRIBUTED'}")
    print()

print("Strategy 3: Extreme high values to force attribution")
extreme_values = [
    {"click": 1, "campaign_perf": 5.0, "cost": 50.0, "cpo": 1.0},
    {"click": 1, "campaign_perf": 10.0, "cost": 100.0, "cpo": 2.0},
    {"click": 1, "campaign_perf": 20.0, "cost": 200.0, "cpo": 5.0},
    {"click": 1, "campaign_perf": 50.0, "cost": 500.0, "cpo": 10.0},
    {"click": 1, "campaign_perf": 60.0, "cost": 600.0, "cpo": 12.0},
    {"click": 1, "campaign_perf": 80.0, "cost": 800.0, "cpo": 15.0},
    {"click": 1, "campaign_perf": 100.0, "cost": 1000.0, "cpo": 20.0}
]

for extreme_params in extreme_values:
    prob, pred, _ = predict_attribution(extreme_params)
    attribution_status = "ATTRIBUTED ‚úÖ" if pred == 1 else "NOT ATTRIBUTED"
    emoji = "üéØ FOUND ATTRIBUTION! " if pred == 1 else ""
    print(f"{emoji}Parameters: {extreme_params}")
    print(f"Result: Probability={prob:.2%}, Prediction={attribution_status}")
    print()

=== FINDING EXAMPLES THAT PREDICT ATTRIBUTION ===

Strategy 1: Maximize most important features
Parameters: {'click': 1, 'campaign_perf': 1.0, 'cost': 10.0, 'cpo': 0.20}
Result: Probability=4.33%, Prediction=NOT ATTRIBUTED

Strategy 2: Add categorical features (trying different cat1 values)
Parameters: {'click': 1, 'campaign_perf': 1.0, 'cost': 10.0, 'cpo': 0.20, 'cat1': 1}
Result: Probability=4.33%, Prediction=NOT ATTRIBUTED

Parameters: {'click': 1, 'campaign_perf': 1.0, 'cost': 10.0, 'cpo': 0.20, 'cat1': 5}
Result: Probability=4.33%, Prediction=NOT ATTRIBUTED

Parameters: {'click': 1, 'campaign_perf': 1.0, 'cost': 10.0, 'cpo': 0.20, 'cat1': 8}
Result: Probability=4.33%, Prediction=NOT ATTRIBUTED

Strategy 3: Extreme high values to force attribution
Parameters: {'click': 1, 'campaign_perf': 5.0, 'cost': 50.0, 'cpo': 1.0}
Result: Probability=8.78%, Prediction=NOT ATTRIBUTED

Parameters: {'click': 1, 'campaign_perf': 10.0, 'cost': 100.0, 'cpo': 2.0}
Result: Probability=14.32%, Predicti

In [4]:
print("=== FINDING ATTRIBUTION THRESHOLD ===")

print("\nTesting campaign_perf values to find attribution threshold:")
base_extreme = {"click": 1, "cost": 500.0, "cpo": 10.0}

attribution_threshold = None
for perf_val in [30.0, 40.0, 45.0, 50.0, 55.0]:
    test_params = base_extreme.copy()
    test_params['campaign_perf'] = perf_val
    prob, pred, _ = predict_attribution(test_params)
    attribution_status = "ATTRIBUTED ‚úÖ" if pred == 1 else "NOT ATTRIBUTED"
    print(f"campaign_perf={perf_val}: Probability={prob:.2%}, Prediction={attribution_status}")
    
    if pred == 1 and attribution_threshold is None:
        attribution_threshold = perf_val
        threshold_params = test_params.copy()
        threshold_prob = prob

if attribution_threshold:
    print(f"\nüéØ ATTRIBUTION THRESHOLD FOUND!")
    print(f"Minimum campaign_perf for attribution: ~{attribution_threshold}")
    print(f"With parameters: {threshold_params}")
    print(f"Result: {threshold_prob:.2%} probability, ATTRIBUTED")

=== FINDING ATTRIBUTION THRESHOLD ===

Testing campaign_perf values to find attribution threshold:
campaign_perf=30.0: Probability=35.89%, Prediction=NOT ATTRIBUTED
campaign_perf=40.0: Probability=43.95%, Prediction=NOT ATTRIBUTED
campaign_perf=45.0: Probability=47.89%, Prediction=NOT ATTRIBUTED
campaign_perf=50.0: Probability=51.73%, Prediction=ATTRIBUTED ‚úÖ
campaign_perf=55.0: Probability=55.48%, Prediction=ATTRIBUTED ‚úÖ

üéØ ATTRIBUTION THRESHOLD FOUND!
Minimum campaign_perf for attribution: ~50.0
With parameters: {'click': 1, 'campaign_perf': 50.0, 'cost': 500.0, 'cpo': 10.0}
Result: 51.73% probability, ATTRIBUTED


In [5]:
print("=== SUMMARY: PARAMETER EFFECTS ON ATTRIBUTION PREDICTION ===")

print("\nüîç KEY FINDINGS:")
print("\n1. CLICK (96.95% importance) - CRITICAL PARAMETER")
print("   ‚Ä¢ click=0: 0.50% probability")
print("   ‚Ä¢ click=1: 2.40% probability")
print("   ‚Ä¢ Impact: Must be 1 for any chance of attribution")

print("\n2. CAMPAIGN_PERF (1.17% importance) - MAIN DRIVER FOR ATTRIBUTION")
print("   ‚Ä¢ Low values (0.01-1.0): 2-4% probability")
print("   ‚Ä¢ Threshold for attribution: ~50.0")
print("   ‚Ä¢ High values (50-100): 50-77% probability ‚Üí ATTRIBUTED")

print("\n3. COST (0.20% importance) - MODERATE IMPACT")
print("   ‚Ä¢ Higher cost increases attribution probability")
print("   ‚Ä¢ Works synergistically with campaign_perf")

print("\n4. CPO (0.09% importance) - MINOR IMPACT")
print("   ‚Ä¢ Higher CPO slightly increases attribution probability")
print("   ‚Ä¢ Secondary effect compared to campaign_perf")

print("\nüìä ATTRIBUTION EXAMPLES:")
print("\n‚ùå NOT ATTRIBUTED (typical):")
print("   {'click': 1, 'campaign_perf': 0.05, 'cost': 0.5, 'cpo': 0.004}")
print("   ‚Üí 2.40% probability")

print("\n‚úÖ ATTRIBUTED (threshold):")
print("   {'click': 1, 'campaign_perf': 50.0, 'cost': 500.0, 'cpo': 10.0}")
print("   ‚Üí 51.73% probability")

print("\n‚úÖ ATTRIBUTED (high confidence):")
print("   {'click': 1, 'campaign_perf': 100.0, 'cost': 1000.0, 'cpo': 20.0}")
print("   ‚Üí 76.84% probability")

print("\nüéØ CONCLUSION:")
print("The model primarily uses CAMPAIGN_PERF as the key differentiator for attribution.")
print("A click must occur (click=1), then campaign_perf ‚â• 50 typically results in attribution.")
print("Cost and CPO provide additional signal but are secondary factors.")

=== SUMMARY: PARAMETER EFFECTS ON ATTRIBUTION PREDICTION ===

üîç KEY FINDINGS:

1. CLICK (96.95% importance) - CRITICAL PARAMETER
   ‚Ä¢ click=0: 0.50% probability
   ‚Ä¢ click=1: 2.40% probability
   ‚Ä¢ Impact: Must be 1 for any chance of attribution

2. CAMPAIGN_PERF (1.17% importance) - MAIN DRIVER FOR ATTRIBUTION
   ‚Ä¢ Low values (0.01-1.0): 2-4% probability
   ‚Ä¢ Threshold for attribution: ~50.0
   ‚Ä¢ High values (50-100): 50-77% probability ‚Üí ATTRIBUTED

3. COST (0.20% importance) - MODERATE IMPACT
   ‚Ä¢ Higher cost increases attribution probability
   ‚Ä¢ Works synergistically with campaign_perf

4. CPO (0.09% importance) - MINOR IMPACT
   ‚Ä¢ Higher CPO slightly increases attribution probability
   ‚Ä¢ Secondary effect compared to campaign_perf

üìä ATTRIBUTION EXAMPLES:

‚ùå NOT ATTRIBUTED (typical):
   {'click': 1, 'campaign_perf': 0.05, 'cost': 0.5, 'cpo': 0.004}
   ‚Üí 2.40% probability

‚úÖ ATTRIBUTED (threshold):
   {'click': 1, 'campaign_perf': 50.0, 'cost': 50