# Day 6: Model Deployment

## Learning Objectives
- Build inference pipeline
- Model monitoring and drift detection
- Production-ready architecture

---

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import GradientBoostingClassifier
from scipy import stats
import json
import datetime
import warnings
warnings.filterwarnings('ignore')

np.random.seed(42)

print("‚úÖ Libraries loaded!")
print("üìö Day 6: Model Deployment")

## Part 1: Model Serialization

In [None]:
# ============================================================
# TRAIN AND SAVE MODEL
# ============================================================

print("MODEL TRAINING AND SERIALIZATION")
print("="*60)

# Generate training data
np.random.seed(42)
n_days = 1000

returns = np.random.normal(0.0003, 0.015, n_days)
prices = 100 * np.cumprod(1 + returns)
df = pd.DataFrame({'price': prices, 'returns': returns})

df['ret_5d'] = df['price'].pct_change(5)
df['vol_5d'] = df['returns'].rolling(5).std()
df['mom_5d'] = df['returns'].rolling(5).sum()
df['target'] = (df['returns'].shift(-1) > 0).astype(int)
df = df.dropna()

feature_cols = ['ret_5d', 'vol_5d', 'mom_5d']
X = df[feature_cols].values
y = df['target'].values

split = int(len(X) * 0.7)
X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]

# Scaler and model
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

model = GradientBoostingClassifier(n_estimators=50, max_depth=3, random_state=42)
model.fit(X_train_scaled, y_train)

print(f"Model trained with accuracy: {model.score(X_test_scaled, y_test):.3f}")

# Model artifact
model_artifact = {
    'model': model,
    'scaler': scaler,
    'feature_names': feature_cols,
    'training_stats': {
        'X_mean': X_train.mean(axis=0).tolist(),
        'X_std': X_train.std(axis=0).tolist(),
        'n_samples': len(X_train),
        'train_date': datetime.datetime.now().isoformat()
    }
}

print("\n‚úÖ Model artifact created")

## Part 2: Inference Pipeline

In [None]:
# ============================================================
# INFERENCE PIPELINE
# ============================================================

print("INFERENCE PIPELINE")
print("="*60)

class TradingInference:
    """Production inference pipeline."""
    
    def __init__(self, model_artifact):
        self.model = model_artifact['model']
        self.scaler = model_artifact['scaler']
        self.feature_names = model_artifact['feature_names']
        self.training_stats = model_artifact['training_stats']
        
        self.prediction_log = []
        
    def predict(self, features, timestamp=None):
        """
        Generate prediction with logging.
        
        Args:
            features: Dict or array of feature values
            timestamp: Optional timestamp
            
        Returns:
            Dict with prediction and metadata
        """
        if timestamp is None:
            timestamp = datetime.datetime.now()
        
        # Parse features
        if isinstance(features, dict):
            X = np.array([[features[f] for f in self.feature_names]])
        else:
            X = np.array(features).reshape(1, -1)
        
        # Scale
        X_scaled = self.scaler.transform(X)
        
        # Predict
        pred_class = self.model.predict(X_scaled)[0]
        pred_proba = self.model.predict_proba(X_scaled)[0]
        
        # Build result
        result = {
            'timestamp': timestamp.isoformat(),
            'features': X[0].tolist(),
            'prediction': int(pred_class),
            'confidence': float(max(pred_proba)),
            'prob_up': float(pred_proba[1]),
            'signal': 'BUY' if pred_class == 1 else 'SELL'
        }
        
        # Log
        self.prediction_log.append(result)
        
        return result
    
    def get_signal(self, features, threshold=0.55):
        """
        Get trading signal with confidence threshold.
        """
        result = self.predict(features)
        
        if result['confidence'] < threshold:
            result['signal'] = 'HOLD'
            result['position_size'] = 0
        else:
            # Scale position by confidence
            edge = abs(result['prob_up'] - 0.5) * 2
            result['position_size'] = edge
        
        return result

# Test
inference = TradingInference(model_artifact)

# Simulate predictions
test_features = {
    'ret_5d': 0.02,
    'vol_5d': 0.012,
    'mom_5d': 0.03
}

result = inference.get_signal(test_features)
print("\nPrediction Result:")
for k, v in result.items():
    print(f"  {k}: {v}")

## Part 3: Drift Detection

In [None]:
# ============================================================
# DRIFT DETECTION
# ============================================================

print("\nDRIFT DETECTION")
print("="*60)

class DriftDetector:
    """Detect data and concept drift."""
    
    def __init__(self, reference_data, feature_names):
        self.reference = reference_data
        self.feature_names = feature_names
        self.reference_stats = self._compute_stats(reference_data)
        
    def _compute_stats(self, data):
        """Compute distribution statistics."""
        return {
            'mean': np.mean(data, axis=0),
            'std': np.std(data, axis=0),
            'min': np.min(data, axis=0),
            'max': np.max(data, axis=0)
        }
    
    def detect_drift(self, new_data, threshold=0.05):
        """
        Detect drift using Kolmogorov-Smirnov test.
        
        Returns:
            Dict with drift detection results per feature
        """
        results = {}
        
        for i, feat in enumerate(self.feature_names):
            ref_vals = self.reference[:, i]
            new_vals = new_data[:, i]
            
            # KS test
            ks_stat, p_value = stats.ks_2samp(ref_vals, new_vals)
            
            # Population Stability Index
            psi = self._compute_psi(ref_vals, new_vals)
            
            drift_detected = p_value < threshold
            
            results[feat] = {
                'ks_statistic': ks_stat,
                'p_value': p_value,
                'psi': psi,
                'drift': drift_detected
            }
        
        return results
    
    def _compute_psi(self, expected, actual, n_bins=10):
        """
        Compute Population Stability Index.
        
        PSI < 0.1: No significant change
        PSI 0.1-0.25: Moderate change
        PSI > 0.25: Significant change
        """
        # Create bins from expected
        bins = np.percentile(expected, np.linspace(0, 100, n_bins + 1))
        bins[0] = -np.inf
        bins[-1] = np.inf
        
        # Count in each bin
        expected_counts = np.histogram(expected, bins=bins)[0] / len(expected)
        actual_counts = np.histogram(actual, bins=bins)[0] / len(actual)
        
        # Avoid division by zero
        expected_counts = np.clip(expected_counts, 0.001, None)
        actual_counts = np.clip(actual_counts, 0.001, None)
        
        psi = np.sum((actual_counts - expected_counts) * np.log(actual_counts / expected_counts))
        return psi

# Create drift detector
drift_detector = DriftDetector(X_train, feature_cols)

# Test with test data (should be similar)
drift_results = drift_detector.detect_drift(X_test)

print("\nDrift Detection Results (Test Data):")
for feat, result in drift_results.items():
    status = '‚ö†Ô∏è DRIFT' if result['drift'] else '‚úì OK'
    print(f"  {feat:<10} KS={result['ks_statistic']:.3f}  PSI={result['psi']:.3f}  {status}")

In [None]:
# Test with synthetic drifted data
print("\nDrift Detection (Synthetic Shifted Data):")
X_drifted = X_test + np.array([0.02, 0.005, 0.01])  # Shift features

drift_results_shifted = drift_detector.detect_drift(X_drifted)

for feat, result in drift_results_shifted.items():
    status = '‚ö†Ô∏è DRIFT' if result['drift'] else '‚úì OK'
    print(f"  {feat:<10} KS={result['ks_statistic']:.3f}  PSI={result['psi']:.3f}  {status}")

## Part 4: Performance Monitoring

In [None]:
# ============================================================
# PERFORMANCE MONITORING
# ============================================================

print("\nPERFORMANCE MONITORING")
print("="*60)

class PerformanceMonitor:
    """Monitor model performance over time."""
    
    def __init__(self, window=50):
        self.window = window
        self.predictions = []
        self.actuals = []
        self.timestamps = []
        
    def log(self, prediction, actual, timestamp=None):
        """Log a prediction-actual pair."""
        self.predictions.append(prediction)
        self.actuals.append(actual)
        self.timestamps.append(timestamp or datetime.datetime.now())
    
    def get_metrics(self):
        """Calculate current metrics."""
        if len(self.predictions) < self.window:
            recent_preds = self.predictions
            recent_actual = self.actuals
        else:
            recent_preds = self.predictions[-self.window:]
            recent_actual = self.actuals[-self.window:]
        
        accuracy = np.mean(np.array(recent_preds) == np.array(recent_actual))
        
        return {
            'rolling_accuracy': accuracy,
            'total_predictions': len(self.predictions),
            'window': self.window
        }
    
    def check_alert(self, accuracy_threshold=0.48):
        """Check if performance is below threshold."""
        metrics = self.get_metrics()
        if metrics['rolling_accuracy'] < accuracy_threshold:
            return {
                'alert': True,
                'message': f"Model accuracy {metrics['rolling_accuracy']:.1%} below threshold {accuracy_threshold:.1%}",
                'metrics': metrics
            }
        return {'alert': False, 'metrics': metrics}

# Simulate monitoring
monitor = PerformanceMonitor(window=50)

# Log predictions from test set
for i in range(len(X_test)):
    pred = model.predict(X_test_scaled[i:i+1])[0]
    actual = y_test[i]
    monitor.log(pred, actual)

metrics = monitor.get_metrics()
alert = monitor.check_alert()

print(f"\nMonitoring Status:")
print(f"  Total predictions: {metrics['total_predictions']}")
print(f"  Rolling accuracy: {metrics['rolling_accuracy']:.1%}")
print(f"  Alert status: {'‚ö†Ô∏è ALERT' if alert['alert'] else '‚úì OK'}")

In [None]:
print("""
‚ïî‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïó
‚ïë              DAY 6 COMPLETE: MODEL DEPLOYMENT                    ‚ïë
‚ï†‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ï£
‚ïë  ‚úì Model artifact and serialization                             ‚ïë
‚ïë  ‚úì Inference pipeline with logging                              ‚ïë
‚ïë  ‚úì Data drift detection (KS test, PSI)                          ‚ïë
‚ïë  ‚úì Performance monitoring and alerts                            ‚ïë
‚ïö‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïù

Tomorrow: Day 7 - Complete Trading System
""")