In [1]:
import pandas as pd
import numpy as np
from scipy.stats import mannwhitneyu
import json

In [2]:
df = pd.read_csv('../data/google_merch_users.csv')

desktop = df[df['device'] == 'desktop']
mobile = df[df['device'] == 'mobile']

In [3]:
# Define guardrails (metrics we DON'T want to degrade)
guardrails = {
    'page_views': {
        'name': 'Page Views per User',
        'threshold': -0.10,  # -10% acceptable
        'higher_is_better': True,
        'description': 'Overall engagement'
    },
    'num_sessions': {
        'name': 'Sessions per User',
        'threshold': -0.10,
        'higher_is_better': True,
        'description': 'Return visits'
    },
    'product_views': {
        'name': 'Product Views per User',
        'threshold': -0.15,
        'higher_is_better': True,
        'description': 'Product interest'
    }
}

In [4]:
results = []
all_passed = True

for metric, config in guardrails.items():
    desktop_vals = desktop[metric]
    mobile_vals = mobile[metric]
    
    # Calculate means
    desktop_mean = desktop_vals.mean()
    mobile_mean = mobile_vals.mean()
    rel_change = (desktop_mean - mobile_mean) / mobile_mean
    
    # Statistical test (Mann-Whitney U for non-normal data)
    _, p_val = mannwhitneyu(desktop_vals, mobile_vals, alternative='two-sided')
    
    # Check threshold
    passes = rel_change >= config['threshold']
    
    # Store result
    results.append({
        'metric': config['name'],
        'desktop_mean': desktop_mean,
        'mobile_mean': mobile_mean,
        'change_pct': rel_change * 100,
        'threshold_pct': config['threshold'] * 100,
        'passes': passes,
        'p_value': p_val
    })
    
    if not passes:
        all_passed = False
    
    # Print
    status = "✓" if passes else "⚠️"
    print(f"\n{status} {config['name']}")
    print(f"   Desktop: {desktop_mean:.2f}")
    print(f"   Mobile:  {mobile_mean:.2f}")
    print(f"   Change:  {rel_change*100:+.1f}% (threshold: {config['threshold']*100:.0f}%)")
    print(f"   Status:  {'PASS' if passes else 'FAIL'}")



✓ Page Views per User
   Desktop: 4.94
   Mobile:  4.90
   Change:  +0.9% (threshold: -10%)
   Status:  PASS

✓ Sessions per User
   Desktop: 1.30
   Mobile:  1.29
   Change:  +0.3% (threshold: -10%)
   Status:  PASS

✓ Product Views per User
   Desktop: 1.42
   Mobile:  1.40
   Change:  +1.3% (threshold: -15%)
   Status:  PASS


In [8]:
# Summary

print("GUARDRAIL SUMMARY")

def convert_np(obj):
    if isinstance(obj, dict):
        return {k: convert_np(v) for k, v in obj.items()}
    elif isinstance(obj, list):
        return [convert_np(i) for i in obj]
    elif hasattr(obj, 'item'):
        return obj.item()
    else:
        return obj

passed = sum(r['passes'] for r in results)
total = len(results)

if all_passed:
    print(f"\n {passed}/{total} guardrails passed")
    print(f"  → Safe to optimize for primary metric")
    print(f"  → No unintended side effects detected")
else:
    print(f"\n  {total-passed} guardrail(s) failed")
    print(f"  → Review trade-offs before deployment")
    failed = [r for r in results if not r['passes']]
    for f in failed:
        print(f"  → {f['metric']}: {f['change_pct']:.1f}% change")

# Save
guardrail_results = {
    'all_passed': all_passed,
    'passed_count': passed,
    'total_count': total,
    'details': results
}

# Convert all numpy types to native Python types
safe_guardrail_results = convert_np(guardrail_results)

with open('../results/guardrail_check.json', 'w') as f:
    json.dump(safe_guardrail_results, f, indent=2)

print(f"\n Results saved to results/guardrail_check.json")


GUARDRAIL SUMMARY

 3/3 guardrails passed
  → Safe to optimize for primary metric
  → No unintended side effects detected

 Results saved to results/guardrail_check.json
