## 8. SOFTWARE INTEGRATION CHALLENGES

### Definition
**Deploying ML models to production** and integrating with existing systems is complex and error-prone.

### Challenge 1: Version Control & Reproducibility


In [None]:
# Problem: Can't reproduce model results
# ❌ Reproducible = Different results each run!

np.random.seed()  # No seed set
model = RandomForestClassifier()
model.fit(X, y)

# Different results every time!
pred1 = model.predict(X_test)
pred2 = model.predict(X_test)  # Different!

# ✅ Solution: Set seed for reproducibility
np.random.seed(42)
tf.random.set_seed(42)

model = RandomForestClassifier(random_state=42)
model.fit(X, y)

# Same results every time
pred1 = model.predict(X_test)
pred2 = model.predict(X_test)  # Identical!


### Challenge 2: Environment Parity


In [None]:
# Problem: Model works locally but fails in production
# Causes:
# - Different Python version
# - Different package versions
# - Different OS
# - Different architecture (CPU vs GPU)

# Local: Python 3.9, NumPy 1.21
# Production: Python 3.10, NumPy 1.24
# Result: Model crashes or gives wrong predictions!

# ✅ Solution: Use containers (Docker)
import subprocess

dockerfile = """
FROM python:3.9-slim

WORKDIR /app

COPY requirements.txt .
RUN pip install -r requirements.txt

COPY model.pkl .
COPY api.py .

CMD ["python", "api.py"]
"""

requirements_txt = """
numpy==1.21.0
scikit-learn==0.24.2
flask==2.0.1
"""

# Now everyone has identical environment!

# ✅ Solution: Document environment
import pkg_resources

def save_environment():
    """Save package versions"""
    installed_packages = pkg_resources.working_set
    with open('requirements.txt', 'w') as f:
        for package in sorted(installed_packages, key=lambda x: x.key):
            f.write(f"{package.key}=={package.version}\n")

save_environment()

# Others can install same versions:
# pip install -r requirements.txt


### Challenge 3: Model Serving Infrastructure


In [None]:
# Problem: Model needs to handle real-time requests
# Solutions:

# Option 1: Flask API (simple, single-threaded)
from flask import Flask, request, jsonify
import pickle

app = Flask(__name__)
model = pickle.load(open('model.pkl', 'rb'))

@app.route('/predict', methods=['POST'])
def predict():
    data = request.json
    X = data['features']
    prediction = model.predict([X])[0]
    return jsonify({'prediction': float(prediction)})

# Option 2: TensorFlow Serving (production-grade)
# Handles multiple versions, A/B testing, etc.

# Option 3: Ray Serve (distributed serving)
from ray import serve
import ray

serve.start()

@serve.deployment
class Model:
    def __init__(self):
        self.model = pickle.load(open('model.pkl', 'rb'))
    
    def __call__(self, request):
        features = request['features']
        return self.model.predict([features])

serve.run(Model.bind())

# Option 4: KServe (Kubernetes-native)
# Orchestrates model serving on K8s clusters


### Challenge 4: Data Pipeline Issues


In [None]:
# Problem: Features computed differently in training vs production
# Training: Features computed offline, batch
# Production: Features computed in real-time, online
# Result: Feature mismatch = poor predictions!

# Example: Compute user average spending
# Training:
average_spending = df.groupby('user_id')['amount'].mean()
# Result: {user_1: 100, user_2: 150, ...}

# Production:
# API computes average from last 30 days
average_spending_prod = last_30_days.groupby('user_id')['amount'].mean()
# Result: {user_1: 120, user_2: 130, ...}  ← DIFFERENT!

# ✅ Solution: Centralized feature store
from feast import FeatureStore

# Define features once
feature_store = FeatureStore(repo_path='.')

# Use in both training and production
features_training = feature_store.get_historical_features(...)
features_production = feature_store.get_online_features(...)
# Now guaranteed to be identical!


### Challenge 5: Monitoring & Debugging


In [None]:
# Problem: Can't debug what's wrong when model fails in production

# ✅ Solution: Comprehensive logging
import logging

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

def predict_with_logging(X):
    logger.info(f"Input shape: {X.shape}")
    logger.info(f"Input values: mean={X.mean():.2f}, std={X.std():.2f}")
    
    try:
        prediction = model.predict(X)
        logger.info(f"Prediction: {prediction}")
        return prediction
    
    except Exception as e:
        logger.error(f"Prediction failed: {str(e)}")
        logger.error(f"Input data: {X}")
        raise

# ✅ Solution: Performance monitoring
from prometheus_client import Counter, Histogram
import time

prediction_counter = Counter('predictions_total', 'Total predictions')
prediction_latency = Histogram('prediction_latency_seconds', 'Prediction latency')

def predict_monitored(X):
    start = time.time()
    
    prediction = model.predict(X)
    
    prediction_counter.inc()
    prediction_latency.observe(time.time() - start)
    
    return prediction

# ✅ Solution: Alerting
def alert_if_needed(metric_value, threshold):
    if metric_value > threshold:
        send_alert(f"⚠️  Alert: {metric_value} exceeds {threshold}")


### Deployment Checklist:


In [None]:
deployment_checklist = {
    'Code': [
        'Code reviewed and tested',
        'All edge cases handled',
        'Error handling in place',
        'Logging comprehensive'
    ],
    'Model': [
        'Model tested offline',
        'Cross-validation done',
        'Performance meets requirements',
        'Model serialized and versioned'
    ],
    'Infrastructure': [
        'Containerized (Docker)',
        'Environment documented',
        'CI/CD pipeline set up',
        'Rollback plan ready'
    ],
    'Monitoring': [
        'Logging configured',
        'Metrics tracked',
        'Alerts set up',
        'Dashboard created'
    ],
    'Data': [
        'Data validation in place',
        'Feature store available',
        'Data quality monitored',
        'Backup and recovery plan'
    ]
}

for category, items in deployment_checklist.items():
    print(f"\n{category}:")
    for item in items:
        print(f"  ☐ {item}")


---
