## Phase 8: Testing in Production

### Types of Testing

**1. Data Validation**


In [None]:
# Great Expectations: Validate data quality in production
from great_expectations.dataset import PandasDataset

# Define expectations
expectations = PandasDataset(new_data).expect_table_row_count_to_be_between(
    min_value=1000,
    max_value=100000
)

# Check age is in valid range
expectations.expect_column_values_to_be_between(
    column='age',
    min_value=18,
    max_value=120
)

# Check no missing values
expectations.expect_column_values_to_not_be_null(column='customer_id')

# Get validation report
validation_result = expectations.validate()
print(validation_result)


**2. Model Performance Monitoring**


In [None]:
# Monitor prediction distribution
# If distribution changes drastically, model might be broken

def monitor_predictions(predictions):
    """
    Track prediction statistics over time
    """
    monitoring_stats = {
        'mean_churn_prob': predictions.mean(),
        'std_churn_prob': predictions.std(),
        'min': predictions.min(),
        'max': predictions.max(),
        'high_risk_count': (predictions > 0.7).sum(),
        'low_risk_count': (predictions < 0.3).sum(),
        'timestamp': pd.Timestamp.now()
    }
    
    # Save to time series database
    # Alert if mean_churn_prob suddenly doubles
    return monitoring_stats


**3. A/B Testing (Champion vs Challenger)**


In [None]:
# Split traffic between old model (champion) and new model (challenger)
# Compare performance metrics

def run_ab_test(data, champion_model, challenger_model, split_ratio=0.5):
    """
    Run A/B test between two models
    """
    n = len(data)
    split_point = int(n * split_ratio)
    
    # Champion (old model)
    champion_preds = champion_model.predict(data[:split_point])
    
    # Challenger (new model)
    challenger_preds = challenger_model.predict(data[split_point:])
    
    # Compare metrics
    champion_auc = roc_auc_score(y_true[:split_point], champion_preds)
    challenger_auc = roc_auc_score(y_true[split_point:], challenger_preds)
    
    print(f"Champion AUC: {champion_auc:.4f}")
    print(f"Challenger AUC: {challenger_auc:.4f}")
    
    if challenger_auc > champion_auc + 0.01:  # 1% improvement threshold
        print("Challenger wins! Promote to production.")
        return 'challenger'
    else:
        print("Champion still better. Keep current model.")
        return 'champion'


**4. Monitoring Data Drift**


In [None]:
# Data drift: Input distribution changes over time
# Example: Sudden increase in old customers, model was trained on younger customers

from scipy.stats import ks_2samp

def detect_data_drift(X_train, X_new):
    """
    Detect if new data distribution differs from training data
    """
    for column in X_train.columns:
        statistic, p_value = ks_2samp(X_train[column], X_new[column])
        
        if p_value < 0.05:  # Statistically significant difference
            print(f"Data drift detected in column: {column}")
            print(f"p-value: {p_value}")
            # Action: Retrain model on new data


### Tools Used in Testing

| Tool | Purpose |
|------|---------|
| Great Expectations | Data validation |
| Evidently | Model monitoring |
| Prometheus | Metrics collection |
| Grafana | Monitoring dashboards |
| MLflow | Model tracking |

---
