# Model Training & MLflow Integration

Train fraud detection models and register them in MLflow for the ML Pipeline Platform.

**Prerequisites**: Run `data_exploration.ipynb` first or have training data ready.

In [None]:
# Core imports
import warnings
from pathlib import Path

import mlflow
import mlflow.sklearn
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score,
    f1_score,
    precision_score,
    recall_score,
    roc_auc_score,
)
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

warnings.filterwarnings('ignore')

print("Environment ready")

## 1. Load Training Data

In [None]:
# Load processed data or generate sample
data_path = Path('../sample_data/demo/datasets/fraud_detection_processed.csv')

if data_path.exists():
    df = pd.read_csv(data_path)
    print(f"Loaded {len(df)} records from {data_path}")
else:
    # Generate sample data if not exists
    print("Generating sample training data...")
    np.random.seed(42)
    n_samples = 5000

    df = pd.DataFrame({
        'amount': np.random.lognormal(4, 2, n_samples),
        'merchant_category': np.random.choice(['electronics', 'grocery', 'gas', 'restaurant', 'online'], n_samples),
        'hour_of_day': np.random.randint(0, 24, n_samples),
        'is_weekend': np.random.choice([0, 1], n_samples),
        'risk_score': np.random.beta(2, 5, n_samples),
        'days_since_last': np.random.exponential(5, n_samples),
        'num_transactions_today': np.random.poisson(3, n_samples),
        'label': np.random.choice([0, 1], n_samples, p=[0.95, 0.05])
    })

    # Make fraud look different
    fraud_idx = df['label'] == 1
    df.loc[fraud_idx, 'risk_score'] *= 2.5
    df.loc[fraud_idx, 'amount'] *= 1.8

print(f"\nDataset shape: {df.shape}")
print(f"Fraud rate: {df['label'].mean()*100:.1f}%")
df.head()

## 2. Feature Preparation

In [None]:
# Prepare features for training
categorical_cols = ['merchant_category']
numerical_cols = ['amount', 'hour_of_day', 'is_weekend', 'risk_score',
                  'days_since_last', 'num_transactions_today']

# Encode categorical variables
le = LabelEncoder()
for col in categorical_cols:
    df[f'{col}_encoded'] = le.fit_transform(df[col])
    numerical_cols.append(f'{col}_encoded')

# Select features and target
X = df[numerical_cols]
y = df['label']

# Split data
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

print(f"Training set: {X_train.shape}")
print(f"Test set: {X_test.shape}")
print(f"\nFeatures: {list(X.columns)}")

## 3. MLflow Setup

In [None]:
# Configure MLflow
import os

# Check if running in Docker or local
mlflow_uri = os.getenv('MLFLOW_TRACKING_URI', 'http://localhost:5000')
mlflow.set_tracking_uri(mlflow_uri)

# Set experiment
experiment_name = "fraud_detection_notebook"
mlflow.set_experiment(experiment_name)

print(f"MLflow tracking URI: {mlflow_uri}")
print(f"Experiment: {experiment_name}")

# Get experiment info
experiment = mlflow.get_experiment_by_name(experiment_name)
if experiment:
    print(f"Experiment ID: {experiment.experiment_id}")

## 4. Train Multiple Models

In [None]:
def train_and_log_model(name, model, X_train, X_test, y_train, y_test):
    """Train model and log to MLflow"""

    with mlflow.start_run(run_name=name):
        # Train model
        model.fit(X_train, y_train)

        # Make predictions
        y_pred = model.predict(X_test)
        y_pred_proba = model.predict_proba(X_test)[:, 1]

        # Calculate metrics
        metrics = {
            'accuracy': accuracy_score(y_test, y_pred),
            'precision': precision_score(y_test, y_pred),
            'recall': recall_score(y_test, y_pred),
            'f1': f1_score(y_test, y_pred),
            'auc_roc': roc_auc_score(y_test, y_pred_proba)
        }

        # Log parameters
        if hasattr(model, 'get_params'):
            mlflow.log_params(model.get_params())

        # Log metrics
        mlflow.log_metrics(metrics)

        # Log model
        mlflow.sklearn.log_model(
            sk_model=model,
            artifact_path="model",
            registered_model_name="fraud_detector"
        )

        # Log feature importance if available
        if hasattr(model, 'feature_importances_'):
            importance = pd.DataFrame({
                'feature': X_train.columns,
                'importance': model.feature_importances_
            }).sort_values('importance', ascending=False)

            # Log as artifact
            importance.to_csv('/tmp/feature_importance.csv', index=False)
            mlflow.log_artifact('/tmp/feature_importance.csv')

        print(f"\n{name} Results:")
        print("-" * 40)
        for metric, value in metrics.items():
            print(f"{metric:10s}: {value:.4f}")

        return metrics

In [None]:
# Train different model configurations
models_to_train = [
    ("RF_baseline", RandomForestClassifier(n_estimators=50, max_depth=5, random_state=42)),
    ("RF_medium", RandomForestClassifier(n_estimators=100, max_depth=10, random_state=42)),
    ("RF_advanced", RandomForestClassifier(n_estimators=200, max_depth=15, min_samples_split=5, random_state=42)),
]

results = {}
for name, model in models_to_train:
    print(f"\nTraining {name}...")
    metrics = train_and_log_model(name, model, X_train, X_test, y_train, y_test)
    results[name] = metrics

## 5. Compare Model Performance

In [None]:
# Create comparison dataframe
comparison_df = pd.DataFrame(results).T
comparison_df = comparison_df.round(4)

print("\nMODEL COMPARISON")
print("="*60)
print(comparison_df)

# Find best model by F1 score
best_model = comparison_df['f1'].idxmax()
print(f"\nüèÜ Best model (by F1 score): {best_model}")
print(f"   F1 Score: {comparison_df.loc[best_model, 'f1']:.4f}")
print(f"   AUC-ROC: {comparison_df.loc[best_model, 'auc_roc']:.4f}")

In [None]:
# Visualize comparison
import matplotlib.pyplot as plt

fig, axes = plt.subplots(1, 2, figsize=(12, 5))

# Metrics comparison
comparison_df[['accuracy', 'precision', 'recall', 'f1']].plot(kind='bar', ax=axes[0])
axes[0].set_title('Model Performance Metrics')
axes[0].set_ylabel('Score')
axes[0].set_xlabel('Model')
axes[0].legend(loc='best')
axes[0].grid(True, alpha=0.3)

# AUC-ROC comparison
comparison_df['auc_roc'].plot(kind='bar', ax=axes[1], color='orange')
axes[1].set_title('AUC-ROC Scores')
axes[1].set_ylabel('AUC-ROC')
axes[1].set_xlabel('Model')
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## 6. Promote Best Model to Production

In [None]:
# Get MLflow client
from mlflow.tracking import MlflowClient

client = MlflowClient()

# Get all versions of the model
model_name = "fraud_detector"

try:
    versions = client.search_model_versions(f"name='{model_name}'")
    print(f"Found {len(versions)} versions of {model_name}\n")

    # Display version info
    for v in versions[:5]:  # Show last 5 versions
        run = client.get_run(v.run_id)
        metrics = run.data.metrics
        print(f"Version {v.version}:")
        print(f"  Run Name: {run.info.run_name}")
        print(f"  F1 Score: {metrics.get('f1', 'N/A'):.4f}" if 'f1' in metrics else "  F1 Score: N/A")
        print(f"  Stage: {v.current_stage}")
        print()

except Exception as e:
    print(f"Note: {e}")
    print("Model will be registered when you run the training cells above.")

In [None]:
# Transition best model to production
try:
    # Get latest version
    latest_version = versions[0].version if versions else None

    if latest_version:
        # Transition to production
        client.transition_model_version_stage(
            name=model_name,
            version=latest_version,
            stage="Production",
            archive_existing_versions=True
        )

        print(f"‚úÖ Model version {latest_version} promoted to Production!")
        print("\nThe API will automatically load this model within 60 seconds.")
        print(f"\nYou can verify at: {mlflow_uri}/#/models/{model_name}")
    else:
        print("No model versions found. Train a model first.")

except Exception as e:
    print(f"Could not promote model: {e}")
    print("Make sure MLflow server is running and accessible.")

## 7. Test Model Loading

In [None]:
# Load production model for testing
try:
    model_uri = f"models:/{model_name}/Production"
    loaded_model = mlflow.sklearn.load_model(model_uri)

    # Test prediction
    sample = X_test.iloc[:5]
    predictions = loaded_model.predict(sample)
    probabilities = loaded_model.predict_proba(sample)[:, 1]

    print("Production Model Test:")
    print("="*50)
    print(f"Model: {model_uri}")
    print("\nSample predictions:")
    for i, (pred, prob) in enumerate(zip(predictions, probabilities)):
        actual = y_test.iloc[i]
        print(f"  Sample {i+1}: Predicted={pred}, Probability={prob:.3f}, Actual={actual}")

except Exception as e:
    print(f"Could not load production model: {e}")
    print("Make sure a model is in Production stage.")

## 8. Generate API Request Examples

In [None]:
# Generate example API requests
import json

# Create example requests
normal_transaction = {
    "features": {
        "amount": 50.0,
        "merchant_category": "grocery",
        "hour_of_day": 14,
        "is_weekend": 0,
        "risk_score": 0.2,
        "days_since_last": 2,
        "num_transactions_today": 3
    },
    "model_name": "fraud_detector",
    "return_probabilities": True
}

suspicious_transaction = {
    "features": {
        "amount": 2500.0,
        "merchant_category": "electronics",
        "hour_of_day": 3,
        "is_weekend": 1,
        "risk_score": 0.8,
        "days_since_last": 30,
        "num_transactions_today": 15
    },
    "model_name": "fraud_detector",
    "return_probabilities": True
}

print("Example API Requests:")
print("="*60)
print("\n1. Normal Transaction:")
print(json.dumps(normal_transaction, indent=2))
print("\n2. Suspicious Transaction:")
print(json.dumps(suspicious_transaction, indent=2))

# Save to files
output_dir = Path('../sample_data/demo/requests')
output_dir.mkdir(parents=True, exist_ok=True)

with open(output_dir / 'normal_transaction.json', 'w') as f:
    json.dump(normal_transaction, f, indent=2)

with open(output_dir / 'suspicious_transaction.json', 'w') as f:
    json.dump(suspicious_transaction, f, indent=2)

print(f"\nüíæ Saved example requests to {output_dir}")

## 9. Test with API

In [None]:
# Test API endpoint (if running)
import requests

api_url = "http://localhost:8000"

try:
    # Check API health
    response = requests.get(f"{api_url}/health")

    if response.status_code == 200:
        print("‚úÖ API is running!\n")

        # Test prediction
        response = requests.post(
            f"{api_url}/predict",
            json=normal_transaction
        )

        if response.status_code == 200:
            result = response.json()
            print("API Prediction Result:")
            print(json.dumps(result, indent=2))
        else:
            print(f"Prediction failed: {response.status_code}")
            print(response.text)
    else:
        print("‚ö†Ô∏è API is not responding. Make sure Docker services are running.")
        print("Run: docker-compose up -d")

except requests.exceptions.ConnectionError:
    print("‚ö†Ô∏è Cannot connect to API at http://localhost:8000")
    print("\nTo start the API:")
    print("1. Run: docker-compose up -d")
    print("2. Wait 30 seconds for services to start")
    print("3. Try again")

## Summary

This notebook demonstrated:

1. **Data Preparation**: Loading and preparing features for model training
2. **MLflow Integration**: Tracking experiments and model versions
3. **Model Training**: Training multiple model configurations
4. **Model Comparison**: Evaluating and comparing model performance
5. **Model Promotion**: Transitioning best model to production
6. **API Integration**: Testing model serving through the FastAPI endpoint

**Next Steps**:
- Monitor model performance with `performance_monitoring.ipynb`
- Set up automated retraining pipeline
- Configure alerts for model drift
- Deploy to production environment