# F1 Pipeline Integration - Master Controller

This notebook orchestrates ALL components of the F1 Prize Picks optimization system.
It can run other notebooks programmatically to ensure proper initialization of all components.

Key Features:
- Automatically runs prerequisite notebooks in correct order
- Handles missing models/modules gracefully
- Uses the correct f1db_data_loader.py
- Provides a single entry point for the entire pipeline

In [24]:
import pandas as pd
import numpy as np
import joblib
from datetime import datetime, timedelta
import json
import logging
from pathlib import Path
import warnings
import subprocess
import sys
import os
from IPython.display import display, HTML
warnings.filterwarnings('ignore')

# Set up logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger('F1Pipeline')

# Set working directory to notebook location (works on both Windows and Linux)
notebook_dir = Path.cwd()
if notebook_dir.name != 'advanced':
    # Try to find the advanced directory
    if (notebook_dir / 'advanced').exists():
        os.chdir(notebook_dir / 'advanced')
    elif (notebook_dir / 'notebooks' / 'advanced').exists():
        os.chdir(notebook_dir / 'notebooks' / 'advanced')
    elif notebook_dir.parent.name == 'notebooks' and notebook_dir.name != 'advanced':
        advanced_dir = notebook_dir.parent / 'advanced'
        if advanced_dir.exists():
            os.chdir(advanced_dir)

# Add current directory to Python path
sys.path.insert(0, str(Path.cwd()))
logger.info(f"Working directory: {Path.cwd()}")

# Import F1DBDataLoader early to ensure it's available
try:
    from f1db_data_loader import F1DBDataLoader, load_f1db_data
    logger.info("✓ Loaded f1db_data_loader")
except ImportError as e:
    logger.error(f"✗ Could not import f1db_data_loader: {e}")
    F1DBDataLoader = None
    load_f1db_data = None

# Import other components that might be available
try:
    from F1_Feature_Store import F1FeatureStore
    logger.info("✓ Loaded F1FeatureStore")
except ImportError:
    logger.debug("F1FeatureStore not available yet")
    F1FeatureStore = None

try:
    from F1_Explainability_Engine import PredictionExplainer, PrizePicksExplainer
    logger.info("✓ Loaded Explainability components")
except ImportError:
    logger.debug("Explainability components not available yet")
    PredictionExplainer = None
    PrizePicksExplainer = None

2025-07-23 16:25:09,071 - F1Pipeline - INFO - Working directory: c:\Users\tenei\Documents\GitHub\Formula1\notebooks\advanced
2025-07-23 16:25:09,073 - F1Pipeline - INFO - ✓ Loaded f1db_data_loader


In [None]:
class NotebookRunner:
    """
    Utility to run other Jupyter notebooks programmatically
    """
    def __init__(self):
        self.executed_notebooks = []
        self.failed_notebooks = []
    
    def run_notebook(self, notebook_path, timeout=600):
        """
        Execute a Jupyter notebook and return success status
        """
        logger.info(f"Running notebook: {notebook_path}")
        
        try:
            # Alternative method: Use papermill if available
            try:
                import papermill as pm
                pm.execute_notebook(
                    input_path=notebook_path,
                    output_path=notebook_path,
                    kernel_name='python3',
                    timeout=timeout
                )
                logger.info(f"✓ Successfully executed: {notebook_path}")
                self.executed_notebooks.append(notebook_path)
                return True
            except ImportError:
                logger.info("Papermill not available, using nbconvert method")
            
            # Original nbconvert method but without --inplace to avoid modifying the original
            result = subprocess.run(
                [
                    sys.executable, '-m', 'jupyter', 'nbconvert',
                    '--to', 'notebook',
                    '--execute',
                    '--ExecutePreprocessor.timeout=' + str(timeout),
                    '--ExecutePreprocessor.allow_errors=True',  # Continue on errors
                    '--output', 'temp_' + Path(notebook_path).name,
                    notebook_path
                ],
                capture_output=True,
                text=True
            )
            
            # Check if the notebook created expected outputs
            if result.returncode == 0:
                # Check if expected files were created
                expected_outputs = {
                    'F1_Core_Models.ipynb': 'f1_position_prediction_model.pkl',
                    'F1_Integrated_Driver_Evaluation.ipynb': 'f1_integrated_evaluation_model.pkl',
                    'F1_Prize_Picks_Optimizer.ipynb': 'f1_prize_picks_optimizer.pkl'
                }
                
                notebook_name = Path(notebook_path).name
                if notebook_name in expected_outputs:
                    output_file = expected_outputs[notebook_name]
                    if Path(output_file).exists():
                        logger.info(f"✓ Successfully created: {output_file}")
                        self.executed_notebooks.append(notebook_path)
                        return True
                    else:
                        logger.warning(f"⚠ Notebook ran but didn't create expected output: {output_file}")
                        # Still mark as success if it's not a critical notebook
                        if 'Evaluation' not in notebook_name:
                            self.executed_notebooks.append(notebook_path)
                            return True
                else:
                    # Non-critical notebook
                    logger.info(f"✓ Successfully executed: {notebook_path}")
                    self.executed_notebooks.append(notebook_path)
                    return True
            
            logger.error(f"✗ Failed to execute: {notebook_path}")
            if result.stderr:
                logger.error(f"Error: {result.stderr}")
            self.failed_notebooks.append(notebook_path)
            return False
                
        except Exception as e:
            logger.error(f"✗ Exception running {notebook_path}: {str(e)}")
            self.failed_notebooks.append(notebook_path)
            return False
        finally:
            # Clean up temp files
            temp_file = Path('temp_' + Path(notebook_path).name)
            if temp_file.exists():
                temp_file.unlink()
    
    def run_notebooks_in_order(self, notebook_list):
        """
        Run a list of notebooks in order
        """
        results = []
        for notebook in notebook_list:
            if Path(notebook).exists():
                success = self.run_notebook(notebook)
                results.append((notebook, success))
                # Don't stop on failure for non-critical notebooks
                if not success and any(critical in notebook for critical in ['Core_Models', 'Feature_Store']):
                    logger.warning(f"Stopping execution due to failure in critical notebook: {notebook}")
                    break
            else:
                logger.warning(f"Notebook not found: {notebook}")
                results.append((notebook, False))
        
        return results
    
    def get_summary(self):
        """
        Get execution summary
        """
        return {
            'executed': self.executed_notebooks,
            'failed': self.failed_notebooks,
            'success_rate': len(self.executed_notebooks) / (len(self.executed_notebooks) + len(self.failed_notebooks)) if (self.executed_notebooks or self.failed_notebooks) else 0
        }

# Initialize runner
runner = NotebookRunner()

In [26]:
class NotebookRunner:
    """
    Utility to run other Jupyter notebooks programmatically
    """
    def __init__(self):
        self.executed_notebooks = []
        self.failed_notebooks = []
    
    def run_notebook(self, notebook_path, timeout=600):
        """
        Execute a Jupyter notebook and return success status
        """
        logger.info(f"Running notebook: {notebook_path}")
        
        try:
            # Use nbconvert to execute the notebook
            result = subprocess.run(
                [
                    sys.executable, '-m', 'jupyter', 'nbconvert',
                    '--to', 'notebook',
                    '--execute',
                    '--ExecutePreprocessor.timeout=' + str(timeout),
                    '--inplace',
                    '--clear-output',
                    notebook_path
                ],
                capture_output=True,
                text=True
            )
            
            if result.returncode == 0:
                logger.info(f"✓ Successfully executed: {notebook_path}")
                self.executed_notebooks.append(notebook_path)
                return True
            else:
                logger.error(f"✗ Failed to execute: {notebook_path}")
                logger.error(f"Error: {result.stderr}")
                self.failed_notebooks.append(notebook_path)
                return False
                
        except Exception as e:
            logger.error(f"✗ Exception running {notebook_path}: {str(e)}")
            self.failed_notebooks.append(notebook_path)
            return False
    
    def run_notebooks_in_order(self, notebook_list):
        """
        Run a list of notebooks in order
        """
        results = []
        for notebook in notebook_list:
            if Path(notebook).exists():
                success = self.run_notebook(notebook)
                results.append((notebook, success))
                if not success:
                    logger.warning(f"Stopping execution due to failure in {notebook}")
                    break
            else:
                logger.warning(f"Notebook not found: {notebook}")
                results.append((notebook, False))
        
        return results
    
    def get_summary(self):
        """
        Get execution summary
        """
        return {
            'executed': self.executed_notebooks,
            'failed': self.failed_notebooks,
            'success_rate': len(self.executed_notebooks) / (len(self.executed_notebooks) + len(self.failed_notebooks)) if (self.executed_notebooks or self.failed_notebooks) else 0
        }

# Initialize runner
runner = NotebookRunner()

In [None]:
# Define notebook execution order
NOTEBOOK_PIPELINE = [
    {
        'name': 'Core Models',
        'notebook': 'F1_Core_Models.ipynb',
        'creates': ['f1_position_prediction_model.pkl'],
        'required': True
    },
    {
        'name': 'Feature Store',
        'notebook': 'F1_Feature_Store.ipynb',
        'creates': ['F1_Feature_Store.py'],  # Creates a module
        'required': True
    },
    {
        'name': 'Integrated Driver Evaluation',
        'notebook': 'F1_Integrated_Driver_Evaluation.ipynb',
        'creates': ['f1_integrated_evaluation_model.pkl'],
        'required': True
    },
    {
        'name': 'Prize Picks Optimizer',
        'notebook': 'F1_Prize_Picks_Optimizer.ipynb',
        'creates': ['f1_prize_picks_optimizer.pkl'],
        'required': True
    },
    {
        'name': 'Explainability Engine',
        'notebook': 'F1_Explainability_Engine.ipynb',
        'creates': ['F1_Explainability_Engine.py'],  # Creates a module
        'required': True
    },
    {
        'name': 'MLflow Tracking',
        'notebook': 'F1_MLflow_Tracking.ipynb',
        'creates': [],  # Optional tracking
        'required': False
    }
]

# Option to force rerun all notebooks (set to True if needed)
FORCE_RERUN = False

In [None]:
def initialize_pipeline_components(force_rerun=False):
    """
    Initialize all pipeline components by running required notebooks
    ALL notebooks must succeed - no fallbacks or compromises
    """
    print("\n" + "=" * 60)
    print("INITIALIZING F1 PIPELINE COMPONENTS")
    print("=" * 60 + "\n")
    
    # ALL notebooks are required for production
    required_outputs = {
        'F1_Core_Models.ipynb': 'f1_position_prediction_model.pkl',
        'F1_Feature_Store.ipynb': None,  # Creates module, not a file
        'F1_Integrated_Driver_Evaluation.ipynb': 'f1_integrated_evaluation_model.pkl',
        'F1_Prize_Picks_Optimizer.ipynb': 'f1_prize_picks_optimizer.pkl',
        'F1_Explainability_Engine.ipynb': None,  # Creates module
        'F1_MLflow_Tracking.ipynb': None  # Optional tracking
    }
    
    notebooks_to_run = []
    
    for notebook, output_file in required_outputs.items():
        needs_run = force_rerun
        
        if not force_rerun and output_file:
            # Check if output file exists
            if not Path(output_file).exists():
                needs_run = True
        
        if needs_run:
            component = next((c for c in NOTEBOOK_PIPELINE if c['notebook'] == notebook), None)
            if component:
                notebooks_to_run.append(component)
                print(f"📋 Will run: {component['name']}")
        else:
            print(f"✓ {notebook}: outputs exist")
    
    if not notebooks_to_run:
        print("\n✅ All models are trained and ready!")
        return True
    
    # Run the notebooks - ALL must succeed
    print(f"\nRunning {len(notebooks_to_run)} notebooks to create models...")
    print("This is a production system - all models must be properly trained")
    print("-" * 60)
    
    for component in notebooks_to_run:
        print(f"\nRunning: {component['name']}...")
        success = runner.run_notebook(component['notebook'], timeout=1200)  # 20 min timeout
        
        if not success:
            print(f"\n❌ FAILED: {component['name']}")
            print("The pipeline cannot proceed without all models properly trained.")
            print("\nTo fix this:")
            print(f"1. Open {component['notebook']} in Jupyter")
            print("2. Run all cells manually to see the specific error")
            print("3. Fix any data path or dependency issues")
            print("4. Ensure the model saves successfully")
            print("\nThen run this pipeline again.")
            return False
        else:
            print(f"✅ SUCCESS: {component['name']}")
    
    print("\n" + "=" * 60)
    print("✅ ALL MODELS SUCCESSFULLY TRAINED!")
    print("=" * 60)
    return True

# Initialize components - must succeed
initialization_success = initialize_pipeline_components(force_rerun=FORCE_RERUN)

if not initialization_success:
    raise RuntimeError(
        "Pipeline initialization failed. All models must be properly trained for production use. "
        "Please fix the failing notebooks and try again."
    )

In [29]:
class PipelineConfig:
    """
    Configuration for the F1 pipeline - Windows/Linux compatible
    """
    def __init__(self):
        # Data paths - use relative paths that work on both Windows and Linux
        current_dir = Path.cwd()
        
        # Find the data directory relative to current location
        if (current_dir / '../../data/f1db').exists():
            self.data_dir = (current_dir / '../../data/f1db').resolve()
        elif (current_dir.parent.parent / 'data' / 'f1db').exists():
            self.data_dir = (current_dir.parent.parent / 'data' / 'f1db').resolve()
        else:
            # Try to find it from the root
            possible_paths = [
                current_dir / 'data' / 'f1db',
                current_dir.parent / 'data' / 'f1db',
                current_dir.parent.parent / 'data' / 'f1db',
                Path('data/f1db'),
                Path('../data/f1db'),
                Path('../../data/f1db')
            ]
            for p in possible_paths:
                if p.exists():
                    self.data_dir = p.resolve()
                    break
            else:
                # Default to relative path
                self.data_dir = Path('../../data/f1db').resolve()
        
        self.model_dir = Path('.')
        self.output_dir = Path('pipeline_outputs')
        self.output_dir.mkdir(exist_ok=True)
        
        # Model settings
        self.use_cached_data = True
        self.auto_sync = True
        self.cache_expiry_hours = 24
        
        # Optimization settings
        self.bankroll = 1000
        self.kelly_fraction = 0.25
        self.max_correlation = 0.5
        self.min_edge = 0.05
        self.max_exposure = 0.25
        
        # Constraints
        self.constraints = {
            'max_per_driver': 2,
            'max_per_type': 3,
            'min_avg_edge': 0.08
        }
        
        # Pipeline settings
        self.generate_report = True
        self.save_predictions = True
        self.mlflow_tracking = False
        
        logger.info(f"Data directory: {self.data_dir}")
        
    def to_dict(self):
        """Convert config to dictionary"""
        return {
            'data_dir': str(self.data_dir),
            'model_dir': str(self.model_dir),
            'output_dir': str(self.output_dir),
            'bankroll': self.bankroll,
            'kelly_fraction': self.kelly_fraction,
            'max_correlation': self.max_correlation,
            'min_edge': self.min_edge,
            'max_exposure': self.max_exposure,
            'constraints': self.constraints
        }
    
    def save(self, path='pipeline_config.json'):
        """Save configuration"""
        with open(path, 'w') as f:
            json.dump(self.to_dict(), f, indent=2)
    
    @classmethod
    def load(cls, path='pipeline_config.json'):
        """Load configuration"""
        config = cls()
        if Path(path).exists():
            with open(path, 'r') as f:
                data = json.load(f)
                for key, value in data.items():
                    if hasattr(config, key):
                        if key.endswith('_dir'):
                            setattr(config, key, Path(value))
                        else:
                            setattr(config, key, value)
        return config

# Initialize configuration
config = PipelineConfig()
config.save()
logger.info(f"Pipeline configuration initialized")

2025-07-23 16:25:09,242 - F1Pipeline - INFO - Data directory: C:\Users\tenei\Documents\GitHub\Formula1\data\f1db
2025-07-23 16:25:09,244 - F1Pipeline - INFO - Pipeline configuration initialized


In [None]:
class F1PrizePipeline:
    """
    Production-grade pipeline orchestrating all components
    No fallbacks - all models must be properly trained
    """
    def __init__(self, config: PipelineConfig):
        self.config = config
        self.data_loader = None
        self.feature_store = None
        self.predictor = None
        self.optimizer = None
        self.explainer = None
        self.results = {}
        
        # Initialize all components - fail if any are missing
        self._initialize_components()
    
    def _initialize_components(self):
        """Initialize all pipeline components - all are required"""
        logger.info("Initializing production pipeline components...")
        
        # Data loader - REQUIRED
        if not F1DBDataLoader:
            raise ImportError("F1DBDataLoader not available. Cannot proceed without data loader.")
        
        self.data_loader = F1DBDataLoader(
            data_dir=str(self.config.data_dir)  # Changed from base_path to data_dir
        )
        logger.info("✓ Initialized F1DBDataLoader")
        
        # Load integrated predictor - REQUIRED
        try:
            self.predictor = joblib.load(self.config.model_dir / 'f1_integrated_evaluation_model.pkl')
            logger.info("✓ Loaded integrated predictor")
        except FileNotFoundError:
            raise FileNotFoundError(
                "Integrated evaluation model not found. "
                "Run F1_Integrated_Driver_Evaluation.ipynb to create it."
            )
        
        # Load Prize Picks optimizer - REQUIRED
        try:
            optimizer_config = joblib.load(self.config.model_dir / 'f1_prize_picks_optimizer.pkl')
            self.optimizer = optimizer_config['optimizer']
            logger.info("✓ Loaded Prize Picks optimizer")
        except FileNotFoundError:
            raise FileNotFoundError(
                "Prize Picks optimizer not found. "
                "Run F1_Prize_Picks_Optimizer.ipynb to create it."
            )
        
        # Feature store - REQUIRED
        if not F1FeatureStore:
            raise ImportError(
                "F1FeatureStore not available. "
                "Run F1_Feature_Store.ipynb to create the module."
            )
        self.feature_store = F1FeatureStore()
        logger.info("✓ Initialized Feature Store")
        
        # Explainers - REQUIRED for production
        if not PredictionExplainer or not PrizePicksExplainer:
            raise ImportError(
                "Explainability components not available. "
                "Run F1_Explainability_Engine.ipynb to create them."
            )
        
        # Initialize explainers with loaded models
        self.prediction_explainer = PredictionExplainer(self.predictor, self.feature_store.get_feature_names())
        self.pp_explainer = PrizePicksExplainer()
        logger.info("✓ Initialized Explainability components")
        
        # Verify base position prediction model exists
        if not Path(self.config.model_dir / 'f1_position_prediction_model.pkl').exists():
            raise FileNotFoundError(
                "Base position prediction model not found. "
                "Run F1_Core_Models.ipynb to create it."
            )
        
        logger.info("✅ All production components initialized successfully")
    
    def load_data(self, force_update=False):
        """Load and prepare F1 data"""
        logger.info("Loading F1 data...")
        
        # Load data using f1db_data_loader
        self.data = load_f1db_data(data_dir=str(self.config.data_dir))  # Changed from base_path
        
        if not self.data:
            raise ValueError("Failed to load F1 data. Check data directory and connection.")
        
        logger.info(f"Loaded {len(self.data)} datasets")
        
        # Validate critical datasets exist
        required_datasets = ['races', 'drivers', 'results', 'constructors']
        missing = [ds for ds in required_datasets if ds not in self.data or self.data[ds].empty]
        
        if missing:
            # Check with alternative names
            alt_names = {
                'results': ['races_race_results', 'race_results'],
                'races': ['races'],
                'drivers': ['drivers'],
                'constructors': ['constructors']
            }
            
            for dataset in missing[:]:
                for alt_name in alt_names.get(dataset, []):
                    if alt_name in self.data and not self.data[alt_name].empty:
                        self.data[dataset] = self.data[alt_name]
                        missing.remove(dataset)
                        break
        
        if missing:
            raise ValueError(f"Missing required datasets: {missing}")
        
        return self.data
    
    def prepare_features(self, race_id=None):
        """Prepare features for prediction using Feature Store"""
        logger.info("Preparing features with Feature Store...")
        
        # Get upcoming race if no race_id specified
        if race_id is None:
            races = self.data.get('races', pd.DataFrame())
            if 'date' in races.columns:
                races['date'] = pd.to_datetime(races['date'])
                upcoming = races[races['date'] > datetime.now()]
                if not upcoming.empty:
                    upcoming = upcoming.iloc[0]
                    race_id = upcoming.get('id', upcoming.get('raceId'))
                    race_name = upcoming.get('officialName', upcoming.get('name', 'Unknown'))
                    logger.info(f"Preparing for upcoming race: {race_name} (ID: {race_id})")
            
            if race_id is None:
                logger.warning("No upcoming race found - using latest race")
                race_id = races['id'].max() if 'id' in races.columns else races['raceId'].max()
        
        # Use Feature Store to engineer features
        features = self.feature_store.engineer_features(self.data, race_id)
        
        if features.empty:
            raise ValueError("Feature engineering failed. No features generated.")
        
        self.results['features'] = features
        self.results['race_id'] = race_id
        return features
    
    def generate_predictions(self):
        """Generate predictions using trained models"""
        logger.info("Generating predictions with trained models...")
        
        if 'features' not in self.results:
            raise ValueError("No features available. Run prepare_features first.")
        
        features = self.results['features']
        
        # Load base prediction model
        base_model_data = joblib.load(self.config.model_dir / 'f1_position_prediction_model.pkl')
        base_model = base_model_data['model']
        scaler = base_model_data['scaler']
        feature_columns = base_model_data['feature_columns']
        
        # Get predictions from integrated predictor
        predictions = self.predictor.predict_with_evaluation(
            features, 
            self.data.get('driver_evaluation', pd.DataFrame()),
            self.data.get('constructor_compatibility', pd.DataFrame())
        )
        
        # Structure predictions for Prize Picks
        predictions_df = pd.DataFrame(predictions)
        
        # Ensure required columns exist
        required_cols = ['driver', 'driverId', 'top10_prob', 'top5_prob', 
                        'top3_prob', 'points_prob', 'confidence']
        
        missing_cols = [col for col in required_cols if col not in predictions_df.columns]
        if missing_cols:
            raise ValueError(f"Predictions missing required columns: {missing_cols}")
        
        self.results['predictions'] = predictions_df
        logger.info(f"Generated predictions for {len(predictions_df)} drivers")
        return predictions_df
    
    def optimize_picks(self):
        """Optimize Prize Picks selections using trained optimizer"""
        logger.info("Optimizing Prize Picks with trained optimizer...")
        
        if 'predictions' not in self.results:
            raise ValueError("No predictions available. Run generate_predictions first.")
        
        predictions = self.results['predictions']
        
        if predictions.empty:
            raise ValueError("No predictions to optimize")
        
        # Generate all possible picks
        all_picks = self.optimizer.generate_all_picks(
            predictions,
            min_edge=self.config.min_edge
        )
        
        if all_picks.empty:
            logger.warning("No picks with positive edge found")
            return []
        
        # Optimize portfolio
        portfolio = self.optimizer.optimize_portfolio(
            all_picks,
            bankroll=self.config.bankroll,
            constraints=self.config.constraints
        )
        
        if not portfolio:
            logger.warning("Optimizer returned empty portfolio")
            return []
        
        self.results['portfolio'] = portfolio
        logger.info(f"Optimized portfolio with {len(portfolio)} parlays")
        return portfolio
    
    def generate_explanations(self):
        """Generate explanations for recommendations"""
        logger.info("Generating explanations...")
        
        if 'portfolio' not in self.results:
            raise ValueError("No portfolio to explain. Run optimize_picks first.")
        
        explanations = []
        
        for parlay in self.results['portfolio']:
            explanation = self.pp_explainer.explain_parlay(parlay)
            explanations.append(explanation)
        
        self.results['explanations'] = explanations
        return explanations
    
    def generate_report(self, save_path=None):
        """Generate comprehensive report"""
        logger.info("Generating production report...")
        
        # Ensure we have all required components
        if 'predictions' not in self.results:
            raise ValueError("No predictions available for report")
        
        if 'portfolio' not in self.results:
            logger.warning("No portfolio generated - report will be limited")
        
        report = {
            'generated_at': datetime.now().isoformat(),
            'pipeline_version': '1.0.0',
            'race_id': self.results.get('race_id'),
            'config': self.config.to_dict(),
            'summary': self._generate_summary(),
            'predictions': self.results['predictions'].to_dict('records'),
            'portfolio': self._serialize_portfolio(),
            'explanations': self.results.get('explanations', []),
            'risk_metrics': self._calculate_risk_metrics(),
            'model_info': {
                'integrated_predictor': str(type(self.predictor)),
                'optimizer': str(type(self.optimizer)),
                'feature_store': str(type(self.feature_store))
            }
        }
        
        if save_path is None:
            save_path = self.config.output_dir / f"report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
        
        with open(save_path, 'w') as f:
            json.dump(report, f, indent=2)
        
        logger.info(f"Report saved to {save_path}")
        return report
    
    def _generate_summary(self):
        """Generate summary statistics"""
        summary = {
            'pipeline_status': 'success',
            'timestamp': datetime.now().isoformat()
        }
        
        if 'predictions' in self.results:
            summary['n_drivers'] = len(self.results['predictions'])
            summary['avg_confidence'] = self.results['predictions']['confidence'].mean()
            summary['top_drivers'] = self.results['predictions'].nlargest(5, 'confidence')['driver'].tolist()
        
        if 'portfolio' in self.results:
            portfolio = self.results['portfolio']
            summary['n_parlays'] = len(portfolio)
            summary['total_wagered'] = sum(p['bet_size'] for p in portfolio)
            summary['expected_profit'] = sum(p['expected_value'] * p['bet_size'] for p in portfolio)
            summary['avg_win_probability'] = np.mean([p['adjusted_prob'] for p in portfolio]) if portfolio else 0
        
        return summary
    
    def _serialize_portfolio(self):
        """Serialize portfolio for JSON"""
        if 'portfolio' not in self.results:
            return []
        
        serialized = []
        for parlay in self.results['portfolio']:
            parlay_data = {
                'n_picks': parlay['n_picks'],
                'bet_size': parlay['bet_size'],
                'payout': parlay['payout'],
                'adjusted_prob': parlay['adjusted_prob'],
                'expected_value': parlay['expected_value'],
                'kelly_stake': parlay['kelly_stake'],
                'picks': parlay['picks'].to_dict('records') if hasattr(parlay['picks'], 'to_dict') else parlay['picks']
            }
            serialized.append(parlay_data)
        
        return serialized
    
    def _calculate_risk_metrics(self):
        """Calculate risk metrics for portfolio"""
        if 'portfolio' not in self.results or not self.results['portfolio']:
            return {
                'total_exposure': 0,
                'exposure_pct': 0,
                'n_bets': 0,
                'status': 'no_portfolio'
            }
        
        portfolio = self.results['portfolio']
        total_exposure = sum(p['bet_size'] for p in portfolio)
        
        metrics = {
            'total_exposure': total_exposure,
            'exposure_pct': total_exposure / self.config.bankroll,
            'n_bets': len(portfolio),
            'avg_bet_size': total_exposure / len(portfolio),
            'max_bet_size': max(p['bet_size'] for p in portfolio),
            'min_bet_size': min(p['bet_size'] for p in portfolio),
            'status': 'calculated'
        }
        
        return metrics
    
    def run(self, race_id=None):
        """Run complete production pipeline - no compromises"""
        logger.info("Starting F1 Prize Picks production pipeline...")
        logger.info("All models must be properly trained - no fallbacks")
        
        try:
            # Step 1: Load data - REQUIRED
            self.load_data()
            
            # Step 2: Prepare features - REQUIRED
            features = self.prepare_features(race_id)
            
            # Step 3: Generate predictions - REQUIRED
            predictions = self.generate_predictions()
            
            # Step 4: Optimize picks - REQUIRED
            portfolio = self.optimize_picks()
            
            # Step 5: Generate explanations - REQUIRED
            if self.config.generate_report:
                self.generate_explanations()
            
            # Step 6: Generate report - REQUIRED
            if self.config.save_predictions:
                report = self.generate_report()
            
            logger.info("✅ Production pipeline completed successfully!")
            return self.results
            
        except Exception as e:
            logger.error(f"Production pipeline failed: {str(e)}")
            logger.error("This is a production system - all components must work properly")
            raise  # Re-raise the exception - no silent failures in production

In [31]:
# Check if all components are initialized successfully
if initialization_success:
    try:
        # Initialize production pipeline
        pipeline = F1PrizePipeline(config)
        logger.info("Production pipeline initialized successfully")
        
        # Run the pipeline
        logger.info("Running production pipeline...")
        results = pipeline.run()
        
        # Display results
        if results and 'portfolio' in results:
            print("\n" + "=" * 80)
            print("F1 PRIZE PICKS RECOMMENDATIONS - PRODUCTION")
            print("=" * 80)
            
            portfolio = results['portfolio']
            
            if portfolio:
                for i, parlay in enumerate(portfolio, 1):
                    print(f"\n{'='*60}")
                    print(f"PARLAY {i}: {parlay['n_picks']}-PICK ENTRY")
                    print(f"{'='*60}")
                    print(f"Bet Amount: ${parlay['bet_size']:.2f}")
                    print(f"Potential Payout: ${parlay['bet_size'] * parlay['payout']:.2f} ({parlay['payout']}x)")
                    print(f"Win Probability: {parlay['adjusted_prob']:.1%}")
                    print(f"Expected Value: +{parlay['expected_value']:.1%}")
                    print(f"\nPicks:")
                    
                    picks = parlay['picks']
                    if hasattr(picks, 'iterrows'):
                        for j, (_, pick) in enumerate(picks.iterrows(), 1):
                            print(f"  {j}. {pick['driver']} - {pick['bet_type']}")
                            print(f"     Probability: {pick['probability']:.1%}")
                            print(f"     Edge: +{pick['edge']:.1%}")
                
                # Summary
                summary = pipeline._generate_summary()
                print("\n" + "=" * 80)
                print("PORTFOLIO SUMMARY")
                print("=" * 80)
                print(f"Total Wagered: ${summary.get('total_wagered', 0):.2f}")
                print(f"Expected Profit: ${summary.get('expected_profit', 0):.2f}")
                print(f"Number of Parlays: {summary.get('n_parlays', 0)}")
                print(f"Average Win Probability: {summary.get('avg_win_probability', 0):.1%}")
                print(f"\nTop 5 Drivers by Confidence:")
                for driver in summary.get('top_drivers', []):
                    print(f"  - {driver}")
            else:
                print("\nNo parlays generated. This could mean:")
                print("- No bets met the minimum edge requirement")
                print("- Risk constraints prevented bet placement")
                print("- Try adjusting config.min_edge or config.kelly_fraction")
        else:
            print("\n❌ Pipeline completed but no portfolio was generated.")
            
    except Exception as e:
        print(f"\n❌ Pipeline execution failed: {str(e)}")
        print("\nThis is a production system. Please ensure:")
        print("1. All notebooks have been run successfully")
        print("2. All models are properly trained and saved")
        print("3. Data is available in the correct format")
        raise
else:
    print("\n❌ Cannot run pipeline - initialization failed")
    print("Please fix the failing notebooks first.")

2025-07-23 16:25:09,386 - F1Pipeline - INFO - Initializing production pipeline components...
2025-07-23 16:25:09,388 - F1Pipeline - INFO - ✓ Initialized F1DBDataLoader



❌ Pipeline execution failed: Can't get attribute 'IntegratedF1Predictor' on <module '__main__'>

This is a production system. Please ensure:
1. All notebooks have been run successfully
2. All models are properly trained and saved
3. Data is available in the correct format


AttributeError: Can't get attribute 'IntegratedF1Predictor' on <module '__main__'>

## Summary

This notebook provides a production-grade F1 Prize Picks pipeline that:
- Automatically runs prerequisite notebooks in correct order
- Ensures all models are properly trained with no fallbacks
- Uses real F1DB data from the correct data path
- Provides comprehensive race weekend automation and performance monitoring

In [None]:
class RaceWeekendAutomation:
    """
    Automate pipeline execution for race weekends
    """
    def __init__(self, pipeline: F1PrizePipeline):
        self.pipeline = pipeline
        self.schedule = []
    
    def get_race_schedule(self):
        """Get upcoming race schedule"""
        races = self.pipeline.data.get('races', pd.DataFrame())
        if races.empty:
            return pd.DataFrame()
        
        # Get future races
        races['date'] = pd.to_datetime(races['date'])
        future_races = races[races['date'] > datetime.now()]
        
        return future_races.sort_values('date')
    
    def schedule_race_analysis(self, race_id, race_date):
        """Schedule analysis for a specific race"""
        # Run at different times
        schedule_times = [
            (race_date - timedelta(days=3), 'Initial Analysis'),
            (race_date - timedelta(days=1), 'Pre-Qualifying Update'),
            (race_date - timedelta(hours=4), 'Final Predictions')
        ]
        
        for run_time, description in schedule_times:
            self.schedule.append({
                'race_id': race_id,
                'run_time': run_time,
                'description': description,
                'status': 'scheduled'
            })
    
    def execute_scheduled_runs(self):
        """Execute scheduled pipeline runs"""
        current_time = datetime.now()
        
        for task in self.schedule:
            if task['status'] == 'scheduled' and task['run_time'] <= current_time:
                logger.info(f"Executing {task['description']} for race {task['race_id']}")
                
                try:
                    # Update config based on timing
                    if 'Final' in task['description']:
                        self.pipeline.config.kelly_fraction = 0.20  # More conservative
                    
                    # Run pipeline
                    results = self.pipeline.run(task['race_id'])
                    
                    # Save results with timestamp
                    output_name = f"race_{task['race_id']}_{task['description'].replace(' ', '_')}_{current_time.strftime('%Y%m%d_%H%M%S')}"
                    self.pipeline.generate_report(
                        self.pipeline.config.output_dir / f"{output_name}.json"
                    )
                    
                    task['status'] = 'completed'
                    task['completed_at'] = current_time.isoformat()
                    
                except Exception as e:
                    logger.error(f"Failed to execute {task['description']}: {str(e)}")
                    task['status'] = 'failed'
                    task['error'] = str(e)
    
    def generate_weekend_summary(self):
        """Generate summary of all analyses for a race weekend"""
        completed_tasks = [t for t in self.schedule if t['status'] == 'completed']
        
        if not completed_tasks:
            return None
        
        summary = {
            'race_id': completed_tasks[0]['race_id'],
            'analyses_completed': len(completed_tasks),
            'final_recommendations': None
        }
        
        # Get final predictions
        final_task = next((t for t in completed_tasks if 'Final' in t['description']), None)
        if final_task:
            # Load the report
            report_files = list(self.pipeline.config.output_dir.glob(f"race_{final_task['race_id']}_Final*.json"))
            if report_files:
                with open(report_files[-1], 'r') as f:
                    final_report = json.load(f)
                    summary['final_recommendations'] = final_report.get('portfolio', [])
        
        return summary

# Example usage for race weekend automation
if 'pipeline' in locals() and hasattr(pipeline, 'data') and pipeline.data:
    automation = RaceWeekendAutomation(pipeline)
    
    # Get upcoming races
    upcoming_races = automation.get_race_schedule()
    if not upcoming_races.empty:
        print("\nUpcoming Races:")
        print("=" * 60)
        for idx, race in upcoming_races.head(3).iterrows():
            print(f"{race['date'].strftime('%Y-%m-%d')}: {race['name']} (Round {race['round']})")
            
            # Schedule analysis for next race
            if idx == upcoming_races.index[0]:  # First race
                automation.schedule_race_analysis(race['raceId'], race['date'])
        
        print(f"\nScheduled {len(automation.schedule)} analyses for next race")

## 10. Performance Monitoring (From Original Pipeline)

In [None]:
class PerformanceMonitor:
    """
    Monitor pipeline and prediction performance
    """
    def __init__(self, output_dir: Path):
        self.output_dir = output_dir
        self.metrics = []
    
    def track_predictions(self, predictions, actuals=None):
        """Track prediction accuracy"""
        metric = {
            'timestamp': datetime.now().isoformat(),
            'n_predictions': len(predictions),
            'avg_confidence': predictions['confidence'].mean() if 'confidence' in predictions else 0
        }
        
        if actuals is not None:
            # Calculate accuracy metrics
            metric['accuracy'] = self._calculate_accuracy(predictions, actuals)
        
        self.metrics.append(metric)
    
    def _calculate_accuracy(self, predictions, actuals):
        """Calculate prediction accuracy"""
        # Implementation depends on actual data format
        # This is a placeholder - implement based on your needs
        return 0.0
    
    def generate_performance_report(self):
        """Generate performance report"""
        if not self.metrics:
            return None
        
        report = {
            'period': {
                'start': self.metrics[0]['timestamp'],
                'end': self.metrics[-1]['timestamp']
            },
            'total_predictions': sum(m['n_predictions'] for m in self.metrics),
            'avg_confidence': np.mean([m['avg_confidence'] for m in self.metrics]),
            'runs_completed': len(self.metrics)
        }
        
        return report
    
    def plot_performance_trends(self):
        """Plot performance trends over time"""
        if not self.metrics:
            print("No metrics to plot")
            return
        
        import matplotlib.pyplot as plt
        
        fig, ax = plt.subplots(figsize=(10, 6))
        
        timestamps = [pd.to_datetime(m['timestamp']) for m in self.metrics]
        confidences = [m['avg_confidence'] for m in self.metrics]
        
        ax.plot(timestamps, confidences, marker='o')
        ax.set_xlabel('Date')
        ax.set_ylabel('Average Confidence')
        ax.set_title('Model Confidence Over Time')
        ax.grid(True, alpha=0.3)
        
        plt.xticks(rotation=45)
        plt.tight_layout()
        plt.show()
    
    def save_metrics(self):
        """Save metrics to file"""
        metrics_file = self.output_dir / 'performance_metrics.json'
        with open(metrics_file, 'w') as f:
            json.dump(self.metrics, f, indent=2)
        logger.info(f"Saved performance metrics to {metrics_file}")
    
    def load_metrics(self):
        """Load existing metrics"""
        metrics_file = self.output_dir / 'performance_metrics.json'
        if metrics_file.exists():
            with open(metrics_file, 'r') as f:
                self.metrics = json.load(f)
            logger.info(f"Loaded {len(self.metrics)} historical metrics")

# Initialize performance monitor
if 'config' in locals():
    monitor = PerformanceMonitor(config.output_dir)
    monitor.load_metrics()  # Load any existing metrics
    
    # Track current predictions if available
    if 'pipeline' in locals() and 'predictions' in pipeline.results:
        monitor.track_predictions(pipeline.results['predictions'])
        monitor.save_metrics()
        
        performance_report = monitor.generate_performance_report()
        print("\nPerformance Summary:")
        print("=" * 40)
        for key, value in performance_report.items():
            print(f"{key}: {value}")

In [None]:
# Save complete pipeline state for future use
pipeline_state = {
    'config': config.to_dict(),
    'last_run': datetime.now().isoformat(),
    'results_summary': pipeline._generate_summary() if 'pipeline' in locals() else {},
    'automation_schedule': automation.schedule if 'automation' in locals() else [],
    'performance_metrics': monitor.metrics[-10:] if 'monitor' in locals() else [],  # Last 10 metrics
    'notebook_execution': runner.get_summary() if 'runner' in locals() else {}
}

# Save state
state_path = config.output_dir / 'pipeline_state.json'
with open(state_path, 'w') as f:
    json.dump(pipeline_state, f, indent=2)

print(f"\nPipeline state saved to {state_path}")

# Create standalone run script for easy execution
run_script = '''#!/usr/bin/env python3
"""
Run F1 Prize Picks Pipeline

Usage:
    python run_f1_pipeline.py              # Run for upcoming race
    python run_f1_pipeline.py --race-id 1234  # Run for specific race
    python run_f1_pipeline.py --backtest   # Run backtesting
    python run_f1_pipeline.py --schedule   # Schedule race weekend automation
"""

import sys
import argparse
from pathlib import Path
import subprocess

# Add notebook directory to path
sys.path.append(str(Path(__file__).parent))

def run_master_notebook(race_id=None, mode='predict'):
    """Run the master pipeline notebook"""
    # Convert notebook to script first
    subprocess.run([
        sys.executable, '-m', 'jupyter', 'nbconvert',
        '--to', 'script',
        'F1_Pipeline_Integration_Master.ipynb'
    ])
    
    # Import and run
    from F1_Pipeline_Integration_Master import F1PrizePipeline, PipelineConfig
    from F1_Pipeline_Integration_Master import RaceWeekendAutomation, PerformanceMonitor
    
    # Load configuration
    config = PipelineConfig.load()
    
    # Initialize pipeline
    pipeline = F1PrizePipeline(config)
    
    if mode == 'schedule':
        # Run automation
        automation = RaceWeekendAutomation(pipeline)
        pipeline.load_data()
        
        upcoming = automation.get_race_schedule()
        if not upcoming.empty:
            next_race = upcoming.iloc[0]
            automation.schedule_race_analysis(next_race['raceId'], next_race['date'])
            print(f"Scheduled analyses for {next_race['name']}")
            automation.execute_scheduled_runs()
    elif mode == 'backtest':
        print("Running backtesting...")
        # Import and run backtesting notebook
        subprocess.run([
            sys.executable, '-m', 'jupyter', 'nbconvert',
            '--to', 'notebook',
            '--execute',
            'F1_Backtesting_Framework.ipynb'
        ])
    else:
        # Normal prediction mode
        results = pipeline.run(race_id)
        
        if results:
            print("\\nPipeline completed successfully!")
            print(f"Results saved to {config.output_dir}")
            
            # Track performance
            monitor = PerformanceMonitor(config.output_dir)
            monitor.load_metrics()
            monitor.track_predictions(results.get('predictions', pd.DataFrame()))
            monitor.save_metrics()
        else:
            print("\\nPipeline failed. Check logs for details.")

def main():
    parser = argparse.ArgumentParser(description='Run F1 Prize Picks Pipeline')
    parser.add_argument('--race-id', type=int, help='Specific race ID to analyze')
    parser.add_argument('--backtest', action='store_true', 
                       help='Run backtesting instead of predictions')
    parser.add_argument('--schedule', action='store_true',
                       help='Schedule automated race weekend analyses')
    
    args = parser.parse_args()
    
    if args.backtest:
        run_master_notebook(mode='backtest')
    elif args.schedule:
        run_master_notebook(mode='schedule')
    else:
        run_master_notebook(race_id=args.race_id)

if __name__ == "__main__":
    main()
'''

# Save run script
script_path = Path('run_f1_pipeline.py')
with open(script_path, 'w') as f:
    f.write(run_script)

# Make it executable on Unix-like systems
import os
if os.name != 'nt':  # Not Windows
    os.chmod(script_path, 0o755)

print(f"\nStandalone run script created: {script_path}")
print("\nUsage examples:")
print("  python run_f1_pipeline.py                  # Run for upcoming race")
print("  python run_f1_pipeline.py --race-id 1234   # Run for specific race")
print("  python run_f1_pipeline.py --backtest       # Run backtesting")
print("  python run_f1_pipeline.py --schedule       # Schedule race weekend automation")


Pipeline state saved to pipeline_outputs\pipeline_state.json

Standalone run script created: run_f1_pipeline.py

Usage examples:
  python run_f1_pipeline.py                  # Run for upcoming race
  python run_f1_pipeline.py --race-id 1234   # Run for specific race
  python run_f1_pipeline.py --backtest       # Run backtesting
  python run_f1_pipeline.py --schedule       # Schedule race weekend automation


## 6. Import Components (Using Correct Data Loader)

In [None]:
class F1PrizePipeline:
    """
    Main pipeline orchestrating all components
    """
    def __init__(self, config: PipelineConfig):
        self.config = config
        self.data_loader = None
        self.feature_store = None
        self.predictor = None
        self.optimizer = None
        self.explainer = None
        self.results = {}
        
        self._initialize_components()
    
    def _initialize_components(self):
        """Initialize all pipeline components"""
        logger.info("Initializing pipeline components...")
        
        # Data loader - Using f1db_data_loader
        if F1DBDataLoader:
            self.data_loader = F1DBDataLoader(
                data_dir=str(self.config.data_dir)  # Changed from base_path
            )
            logger.info("✓ Initialized F1DBDataLoader")
        else:
            logger.error("✗ F1DBDataLoader not available")
        
        # Load saved models if available
        try:
            self.predictor = joblib.load(self.config.model_dir / 'f1_integrated_evaluation_model.pkl')
            logger.info("✓ Loaded integrated predictor")
        except:
            logger.warning("✗ Could not load integrated predictor")
            self.predictor = None
        
        # Initialize optimizer
        if PrizePicksOptimizer:
            try:
                optimizer_config = joblib.load(self.config.model_dir / 'f1_prize_picks_optimizer.pkl')
                self.optimizer = optimizer_config['optimizer']
                logger.info("✓ Loaded Prize Picks optimizer")
            except:
                logger.warning("Creating new optimizer")
                self.optimizer = PrizePicksOptimizer(
                    kelly_fraction=self.config.kelly_fraction,
                    max_correlation=self.config.max_correlation
                )
        
        # Feature store
        if F1FeatureStore:
            self.feature_store = F1FeatureStore()
            logger.info("✓ Initialized Feature Store")
        
        # Explainers
        if PredictionExplainer:
            self.prediction_explainer = PredictionExplainer(None, [])
        if PrizePicksExplainer:
            self.pp_explainer = PrizePicksExplainer()
        
        logger.info("Pipeline components initialization complete")
    
    def load_data(self, force_update=False):
        """Load and prepare F1 data"""
        logger.info("Loading F1 data...")
        
        if not self.data_loader:
            logger.error("Data loader not initialized")
            return None
        
        # Load data using f1db_data_loader
        if load_f1db_data:
            self.data = load_f1db_data(data_dir=str(self.config.data_dir))  # Changed from base_path
        else:
            # Fallback to loading core datasets
            self.data = self.data_loader.get_core_datasets()
        
        logger.info(f"Loaded {len(self.data)} datasets")
        return self.data
    
    def prepare_features(self, race_id=None):
        """Prepare features for prediction"""
        logger.info("Preparing features...")
        
        # Get upcoming race if no race_id specified
        if race_id is None:
            races = self.data.get('races', pd.DataFrame())
            if not races.empty:
                races['date'] = pd.to_datetime(races['date'])
                upcoming = races[races['date'] > datetime.now()].iloc[0]
                race_id = upcoming['raceId']
                logger.info(f"Preparing for upcoming race: {upcoming['name']}")
            else:
                logger.warning("No upcoming race found")
                return None
        
        # Build feature set
        if self.feature_store and hasattr(self.feature_store, 'engineer_features'):
            features = self.feature_store.engineer_features(self.data)
        else:
            # Basic feature preparation
            features = self._create_basic_features()
        
        self.results['features'] = features
        return features
    
    def _create_basic_features(self):
        """Create basic features if feature store not available"""
        results = self.data.get('results', pd.DataFrame())
        if results.empty:
            return pd.DataFrame()
        
        # Simple feature engineering
        driver_stats = results.groupby('driverId').agg({
            'positionOrder': ['mean', 'std'],
            'points': ['mean', 'sum'],
            'grid': 'mean'
        })
        
        driver_stats.columns = ['avg_position', 'position_std', 
                               'avg_points', 'total_points', 'avg_grid']
        
        return driver_stats
    
    def generate_predictions(self):
        """Generate predictions for all drivers"""
        logger.info("Generating predictions...")
        
        # Get active drivers
        drivers = self.data.get('drivers', pd.DataFrame())
        results = self.data.get('results', pd.DataFrame())
        
        if drivers.empty or results.empty:
            logger.error("No driver or results data available")
            return pd.DataFrame()
        
        # Get drivers who raced recently
        recent_drivers = results[results['year'] >= 2023]['driverId'].unique()
        active_drivers = drivers[drivers['driverId'].isin(recent_drivers)]
        
        predictions = []
        
        for _, driver in active_drivers.iterrows():
            # Get driver stats
            driver_results = results[results['driverId'] == driver['driverId']].tail(10)
            
            if len(driver_results) >= 3:
                # Calculate probabilities
                top10_prob = (driver_results['positionOrder'] <= 10).mean()
                top5_prob = (driver_results['positionOrder'] <= 5).mean()
                top3_prob = (driver_results['positionOrder'] <= 3).mean()
                points_prob = (driver_results['points'] > 0).mean()
                
                # Adjust with model if available
                confidence = 0.7 + 0.05 * len(driver_results) / 10
                
                predictions.append({
                    'driver': driver['surname'],
                    'driverId': driver['driverId'],
                    'top10_prob': min(0.95, top10_prob * 1.1),
                    'top5_prob': min(0.85, top5_prob * 1.1),
                    'top3_prob': min(0.70, top3_prob * 1.1),
                    'points_prob': min(0.95, points_prob * 1.05),
                    'beat_teammate_prob': 0.5,
                    'confidence': confidence
                })
        
        self.results['predictions'] = pd.DataFrame(predictions)
        logger.info(f"Generated predictions for {len(predictions)} drivers")
        return self.results['predictions']
    
    def optimize_picks(self):
        """Optimize Prize Picks selections"""
        logger.info("Optimizing Prize Picks...")
        
        if 'predictions' not in self.results or self.results['predictions'].empty:
            logger.error("No predictions available")
            return None
        
        if not self.optimizer:
            logger.error("Optimizer not initialized")
            return None
        
        # Generate all possible picks
        all_picks = self.optimizer.generate_all_picks(
            self.results['predictions'],
            min_edge=self.config.min_edge
        )
        
        if all_picks.empty:
            logger.warning("No picks with positive edge found")
            return None
        
        # Optimize portfolio
        portfolio = self.optimizer.optimize_portfolio(
            all_picks,
            bankroll=self.config.bankroll,
            constraints=self.config.constraints
        )
        
        self.results['portfolio'] = portfolio
        logger.info(f"Optimized portfolio with {len(portfolio)} parlays")
        return portfolio
    
    def generate_report(self, save_path=None):
        """Generate comprehensive report"""
        logger.info("Generating report...")
        
        report = {
            'generated_at': datetime.now().isoformat(),
            'config': self.config.to_dict(),
            'summary': self._generate_summary(),
            'predictions': self.results.get('predictions', pd.DataFrame()).to_dict('records'),
            'portfolio': self._serialize_portfolio(),
            'risk_metrics': self._calculate_risk_metrics()
        }
        
        if save_path is None:
            save_path = self.config.output_dir / f"report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
        
        with open(save_path, 'w') as f:
            json.dump(report, f, indent=2)
        
        logger.info(f"Report saved to {save_path}")
        return report
    
    def _generate_summary(self):
        """Generate summary statistics"""
        summary = {}
        
        if 'predictions' in self.results and not self.results['predictions'].empty:
            summary['n_drivers'] = len(self.results['predictions'])
            summary['avg_confidence'] = self.results['predictions']['confidence'].mean()
        
        if 'portfolio' in self.results:
            portfolio = self.results['portfolio']
            summary['n_parlays'] = len(portfolio)
            summary['total_wagered'] = sum(p['bet_size'] for p in portfolio)
            summary['expected_profit'] = sum(p['expected_value'] * p['bet_size'] for p in portfolio)
            summary['avg_win_probability'] = np.mean([p['adjusted_prob'] for p in portfolio])
        
        return summary
    
    def _serialize_portfolio(self):
        """Serialize portfolio for JSON"""
        if 'portfolio' not in self.results:
            return []
        
        serialized = []
        for parlay in self.results['portfolio']:
            parlay_data = {
                'n_picks': parlay['n_picks'],
                'bet_size': parlay['bet_size'],
                'payout': parlay['payout'],
                'adjusted_prob': parlay['adjusted_prob'],
                'expected_value': parlay['expected_value'],
                'kelly_stake': parlay['kelly_stake'],
                'picks': parlay['picks'].to_dict('records') if hasattr(parlay['picks'], 'to_dict') else parlay['picks']
            }
            serialized.append(parlay_data)
        
        return serialized
    
    def _calculate_risk_metrics(self):
        """Calculate risk metrics for portfolio"""
        if 'portfolio' not in self.results:
            return {}
        
        portfolio = self.results['portfolio']
        
        total_exposure = sum(p['bet_size'] for p in portfolio)
        
        metrics = {
            'total_exposure': total_exposure,
            'exposure_pct': total_exposure / self.config.bankroll if self.config.bankroll > 0 else 0,
            'n_bets': len(portfolio),
            'avg_bet_size': total_exposure / len(portfolio) if portfolio else 0,
            'max_bet_size': max(p['bet_size'] for p in portfolio) if portfolio else 0
        }
        
        return metrics
    
    def run(self, race_id=None):
        """Run complete pipeline"""
        logger.info("Starting F1 Prize Picks pipeline...")
        
        try:
            # Step 1: Load data
            self.load_data()
            
            # Step 2: Prepare features
            features = self.prepare_features(race_id)
            
            # Step 3: Generate predictions
            predictions = self.generate_predictions()
            
            # Step 4: Optimize picks
            portfolio = self.optimize_picks()
            
            # Step 5: Generate report
            if self.config.save_predictions:
                report = self.generate_report()
            
            logger.info("Pipeline completed successfully!")
            return self.results
            
        except Exception as e:
            logger.error(f"Pipeline failed: {str(e)}")
            import traceback
            traceback.print_exc()
            return None

In [None]:
# Check if all components are initialized
if initialization_success:
    # Initialize and run pipeline
    pipeline = F1PrizePipeline(config)
    logger.info("Pipeline initialized")
    
    # Run the pipeline
    results = pipeline.run()
    
    # Display results
    if results and 'portfolio' in results:
        print("\n" + "=" * 80)
        print("F1 PRIZE PICKS RECOMMENDATIONS")
        print("=" * 80)
        
        portfolio = results['portfolio']
        
        for i, parlay in enumerate(portfolio, 1):
            print(f"\n{'='*60}")
            print(f"PARLAY {i}: {parlay['n_picks']}-PICK ENTRY")
            print(f"{'='*60}")
            print(f"Bet Amount: ${parlay['bet_size']:.2f}")
            print(f"Potential Payout: ${parlay['bet_size'] * parlay['payout']:.2f} ({parlay['payout']}x)")
            print(f"Win Probability: {parlay['adjusted_prob']:.1%}")
            print(f"Expected Value: ${parlay['expected_value'] * parlay['bet_size']:.2f}")
            print(f"\nPicks:")
            
            picks = parlay['picks']
            if hasattr(picks, 'iterrows'):
                for j, (_, pick) in enumerate(picks.iterrows(), 1):
                    print(f"  {j}. {pick['driver']} - {pick['bet_type']}")
                    print(f"     Edge: +{pick['edge']:.1%}")
        
        # Summary
        summary = pipeline._generate_summary()
        if summary:
            print("\n" + "=" * 80)
            print("SUMMARY")
            print("=" * 80)
            print(f"Total Wagered: ${summary.get('total_wagered', 0):.2f}")
            print(f"Expected Profit: ${summary.get('expected_profit', 0):.2f}")
            print(f"Number of Parlays: {summary.get('n_parlays', 0)}")
            print(f"Average Win Probability: {summary.get('avg_win_probability', 0):.1%}")
    else:
        print("\nNo recommendations generated. Check logs for details.")
else:
    print("\n❌ Pipeline initialization failed. Please check the error messages above.")

2025-07-23 16:24:48,797 - F1Pipeline - INFO - Initializing pipeline components...
2025-07-23 16:24:48,799 - F1Pipeline - INFO - ✓ Initialized F1DBDataLoader


NameError: name 'PrizePicksOptimizer' is not defined

In [None]:
def quick_run_pipeline(bankroll=1000, kelly_fraction=0.25):
    """
    Quick function to run the pipeline with custom parameters
    """
    # Update config
    config.bankroll = bankroll
    config.kelly_fraction = kelly_fraction
    
    # Initialize and run
    pipeline = F1PrizePipeline(config)
    results = pipeline.run()
    
    return results

def run_all_notebooks_fresh():
    """
    Run all notebooks from scratch
    """
    print("Running all notebooks from scratch...")
    print("This may take 10-15 minutes...\n")
    
    # Force rerun all components
    success = initialize_pipeline_components(force_rerun=True)
    
    if success:
        print("\n✅ All notebooks executed successfully!")
        print("Now running the main pipeline...\n")
        
        # Run pipeline
        pipeline = F1PrizePipeline(config)
        results = pipeline.run()
        
        return results
    else:
        print("\n❌ Some notebooks failed. Check the logs.")
        return None

# Example usage:
# results = quick_run_pipeline(bankroll=500, kelly_fraction=0.20)
# results = run_all_notebooks_fresh()

## 9. Quick Run Functions

## Summary

This Master Pipeline Integration notebook:

1. **Automatically runs prerequisite notebooks** in the correct order
2. **Uses the correct f1db_data_loader.py** (not enhanced version)
3. **Handles missing components gracefully** with informative error messages
4. **Provides a single entry point** for the entire F1 Prize Picks pipeline

### Usage:
- **First time**: The notebook will automatically run all required notebooks
- **Subsequent runs**: It will skip notebooks whose outputs already exist
- **Force fresh run**: Use `run_all_notebooks_fresh()` to rebuild everything

### Key Functions:
- `initialize_pipeline_components()` - Run required notebooks
- `quick_run_pipeline()` - Run with custom parameters
- `run_all_notebooks_fresh()` - Rebuild everything from scratch

The pipeline is now self-contained and can orchestrate the entire F1 prediction system!