# MLOps Transformer Pipeline - Production Execution

This notebook executes the complete MLOps pipeline for sensor failure prediction.

In [None]:
# Cell 1: Setup
import sys
sys.path.append('../src')

from utils.config import Config
from utils.logging import MLOpsLogger
from utils.reproducibility import set_seed, get_device
from data.dataset import create_dataloaders
from models.transformer import TransformerClassifier
from training.trainer import Trainer
import torch
import pickle
import mlflow

config = Config('../configs/base.yaml')
logger = MLOpsLogger("main", log_dir="../logs/training")
set_seed(config['data']['random_seed'])
device = get_device()

logger.info("="*80)
logger.info("MLOPS TRANSFORMER PIPELINE - PRODUCTION EXECUTION")
logger.info("="*80)
logger.log_config(config._config)

In [None]:
# Cell 2: Data Preparation
logger.info("Creating dataloaders...")
train_loader, val_loader, test_loader, scaler = create_dataloaders(
    config._config,
    batch_size=config['training']['batch_size']
)

logger.info(f"Train batches: {len(train_loader)}")
logger.info(f"Val batches: {len(val_loader)}")
logger.info(f"Test batches: {len(test_loader)}")

# Save scaler for deployment
with open('../models/scaler.pkl', 'wb') as f:
    pickle.dump(scaler, f)

In [None]:
# Cell 3: Build Model
logger.info("Building Transformer model...")
model = TransformerClassifier(
    n_features=config['data']['n_features'],
    d_model=config['model']['transformer']['d_model'],
    num_heads=config['model']['transformer']['num_heads'],
    num_layers=config['model']['transformer']['num_layers'],
    d_ff=config['model']['transformer']['d_ff'],
    n_classes=2,
    dropout=config['model']['transformer']['dropout']
)

total_params = sum(p.numel() for p in model.parameters())
logger.info(f"Model parameters: {total_params:,}")

In [None]:
# Cell 4: Train Model
logger.info("Starting training...")
trainer = Trainer(model, config, device, logger)
best_f1 = trainer.fit(
    train_loader,
    val_loader,
    experiment_name=config['mlflow']['experiment_name']
)

logger.info(f"Training complete. Best validation F1: {best_f1:.4f}")

In [None]:
# Cell 5: Hyperparameter Optimization (Optional)
if config['automl']['enabled']:
    logger.info("Starting AutoML hyperparameter search...")
    from automl.optuna_optimizer import run_hyperparameter_search
    
    best_params = run_hyperparameter_search(
        config._config,
        train_loader,
        val_loader,
        device,
        n_trials=config['automl']['n_trials']
    )

In [None]:
# Cell 6: Final Evaluation
logger.info("Evaluating on test set...")
model.load_state_dict(torch.load('../models/saved/best_model.pth'))
test_metrics = trainer.validate(test_loader)

logger.info("TEST SET RESULTS:")
logger.log_metrics(test_metrics)

In [None]:
# Cell 7: Model Export
logger.info("Exporting model for deployment...")

# Export to ONNX
dummy_input = torch.randn(1, 100, 4).to(device)
torch.onnx.export(
    model,
    dummy_input,
    "../models/onnx/model.onnx",
    input_names=['input'],
    output_names=['output'],
    dynamic_axes={'input': {0: 'batch_size'}, 'output': {0: 'batch_size'}}
)

logger.info("✓ Model exported to ONNX")

In [None]:
# Cell 8: Register Model in MLflow
mlflow.set_experiment(config['mlflow']['experiment_name'])
with mlflow.start_run():
    mlflow.pytorch.log_model(model, "production_model")
    mlflow.log_metrics(test_metrics)
    
    # Register model
    model_uri = f"runs:/{mlflow.active_run().info.run_id}/production_model"
    mlflow.register_model(model_uri, "sensor_failure_model")

logger.info("✓ Model registered in MLflow Model Registry")

In [None]:
# Cell 9: Drift Monitoring
logger.info("Setting up drift monitoring...")

# Get reference data (training set)
X_ref = train_loader.dataset.X.numpy()
X_ref_flat = X_ref.reshape(-1, config['data']['n_features'])

from monitoring.drift_detector import DriftDetector
drift_detector = DriftDetector(X_ref_flat, threshold=0.05)

# Simulate production data (use test set as example)
X_prod = test_loader.dataset.X.numpy()
X_prod_flat = X_prod.reshape(-1, config['data']['n_features'])

drift_detector.monitor(X_prod_flat)

## Deployment Instructions

### Start API Server:
```bash
cd deploy/api
uvicorn app:app --host 0.0.0.0 --port 5000 --reload
```

### Test API:
```bash
curl -X POST http://localhost:5000/predict \
  -H "Content-Type: application/json" \
  -d '{
    "temperature": [...100 values...],
    "vibration": [...100 values...],
    "pressure": [...100 values...],
    "rpm": [...100 values...]
  }'
```

### MLflow UI:
```bash
mlflow ui --port 5001
```

In [None]:
logger.info("="*80)
logger.info("PIPELINE COMPLETE ✓")
logger.info("="*80)
logger.info("Next steps:")
logger.info("1. Review MLflow UI: mlflow ui --port 5001")
logger.info("2. Deploy API: cd deploy/api && uvicorn app:app")
logger.info("3. Monitor production: Use drift_detector on incoming data")
logger.info("4. Retrain when drift detected or performance degrades")