In [None]:
import streamlit as st
import pandas as pd
import numpy as np
import nflreadpy as nfl
import pickle
import os
import sys
from datetime import datetime

# --- CONFIGURATION ---
st.set_page_config(page_title="NFL War Room", layout="wide", page_icon="üèà")

# Constants
CURRENT_SEASON = 2025  # Update this as needed
LAG_SEASON = CURRENT_SEASON - 1
MODEL_DIR = './models' # Path relative to where you run the script
DATA_DIR = './data'

# --- 1. DATA LOADING & ENGINEERING (The "Brain" Pipeline) ---
@st.cache_data(ttl=3600)  # Cache data for 1 hour to prevent constant redownloading
def load_and_prep_data(current_season, lag_season):
    """
    Fetches live data from nflreadpy and reconstructs the feature matrix.
    This replaces the manual notebook cells.
    """
    with st.spinner(f'Fetching Data for {lag_season}-{current_season}...'):
        # 1. Fetch PBP & Schedule
        pbp = nfl.load_pbp(seasons=[lag_season, current_season]).to_pandas()
        pbp = pbp[pbp['season_type'] == 'REG']

        schedule = nfl.load_schedules().to_pandas()
        schedule = schedule[
            (schedule['season'].isin([lag_season, current_season])) &
            (schedule['game_type'] == 'REG')
        ]

        # 2. Feature Engineering (Condensed Version)
        pbp['pass_epa'] = np.where(pbp['play_type'] == 'pass', pbp['epa'], np.nan)
        pbp['rush_epa'] = np.where(pbp['play_type'] == 'run', pbp['epa'], np.nan)
        pbp['turnover'] = pbp['fumble_lost'] + pbp['interception']

        game_stats = pbp.groupby(['season', 'week', 'game_id', 'posteam']).agg({
            'epa': 'mean', 'success': 'mean', 'pass_epa': 'mean', 'rush_epa': 'mean', 'turnover': 'sum'
        }).reset_index()
        game_stats.columns = ['season', 'week', 'game_id', 'team', 'off_epa_per_play', 'off_success_rate', 'off_pass_epa', 'off_run_epa', 'off_turnovers']

        def_stats = game_stats.copy().rename(columns={
            'team': 'defteam', 'off_epa_per_play': 'def_epa_per_play', 'off_success_rate': 'def_success_rate',
            'off_pass_epa': 'def_pass_epa', 'off_run_epa': 'def_run_epa', 'off_turnovers': 'def_turnovers_forced'
        })

        # Schedule & Rest
        schedule['gameday'] = pd.to_datetime(schedule['gameday'])
        # Identify Home/Away for the base skeleton
        base_games = pd.concat([
            schedule[['game_id', 'season', 'week', 'home_team', 'spread_line']].rename(columns={'home_team': 'team'}).assign(is_home=1),
            schedule[['game_id', 'season', 'week', 'away_team', 'spread_line']].rename(columns={'away_team': 'team'}).assign(is_home=0)
        ])

        # Calculate Rest
        team_sched = base_games[['season', 'week', 'team']].merge(schedule[['game_id', 'gameday']], on='game_id')
        team_sched = team_sched.sort_values(['team', 'season', 'week'])
        team_sched['rest_days'] = (team_sched['gameday'] - team_sched.groupby('team')['gameday'].shift(1)).dt.days.fillna(7)

        # Merge Stats & Rest back to Base
        base_games = base_games.merge(game_stats, on=['game_id', 'team'], how='left')

        # Map Opponents for Defense Stats
        opp_map = pd.concat([
            schedule[['game_id', 'home_team', 'away_team']].rename(columns={'home_team': 'team', 'away_team': 'opponent'}),
            schedule[['game_id', 'away_team', 'home_team']].rename(columns={'away_team': 'team', 'home_team': 'opponent'})
        ])
        base_games = base_games.merge(opp_map, on=['game_id', 'team'], how='left')
        base_games = base_games.merge(def_stats, left_on=['game_id', 'opponent'], right_on=['game_id', 'defteam'], how='left')
        base_games = base_games.merge(team_sched[['game_id', 'team', 'rest_days']], on=['game_id', 'team'], how='left')

        # Rolling Calculations
        rolling_metrics = [
            'off_epa_per_play', 'off_success_rate', 'off_pass_epa', 'off_run_epa', 'off_turnovers',
            'def_epa_per_play', 'def_success_rate', 'def_pass_epa', 'def_run_epa', 'def_turnovers_forced'
        ]
        windows = [3, 5, 8]
        base_games = base_games.sort_values(['team', 'season', 'week'])
        grouped = base_games.groupby('team')

        for window in windows:
            for col in rolling_metrics:
                base_games[f'{col}_roll{window}'] = grouped[col].shift(1).rolling(window=window, min_periods=1).mean()

        # Final Matchup Matrix (Home vs Away Row)
        val_df = base_games[(base_games['season'] == current_season) & (base_games['is_home'] == 1)].copy()
        away_df = base_games[(base_games['season'] == current_season) & (base_games['is_home'] == 0)].copy()
        final_df = val_df.merge(away_df, on='game_id', suffixes=('', '_away'))

        # Differentials
        for window in windows:
            for col in rolling_metrics:
                final_df[f'home_{col}_roll{window}'] = final_df[f'{col}_roll{window}']
                final_df[f'away_{col}_roll{window}'] = final_df[f'{col}_roll{window}_away']

                if 'off_' in col:
                    def_col = col.replace('off_', 'def_')
                    if col == 'off_turnovers': def_col = 'def_turnovers_forced'

                    if f'{def_col}_roll{window}_away' in final_df.columns:
                        final_df[f'home_{col}_matchup_roll{window}'] = final_df[f'{col}_roll{window}'] - final_df[f'{def_col}_roll{window}_away']
                        final_df[f'away_{col}_matchup_roll{window}'] = final_df[f'{col}_roll{window}_away'] - final_df[f'{def_col}_roll{window}']

        final_df['home_rest'] = final_df['rest_days']
        final_df['away_rest'] = final_df['rest_days_away']
        final_df['rest_advantage'] = final_df['home_rest'] - final_df['away_rest']

        return final_df

# --- 2. LOAD MODELS ---
@st.cache_resource
def load_models(models_dir):
    # Adjust paths based on your actual folder structure
    # Try looking in common locations
    possible_paths = [
        models_dir,
        "/content/drive/MyDrive/NFL_Prediction_System/models", # Colab path
        "models" # Local path
    ]

    nv_path = None
    scaler_path = None

    for path in possible_paths:
        if os.path.exists(os.path.join(path, "ensemble", "nv_nuclear_ensemble.pkl")):
            nv_path = os.path.join(path, "ensemble", "nv_nuclear_ensemble.pkl")
            scaler_path = os.path.join(path, "baseline", "NV", "nv_scaler.pkl")
            break

    if not nv_path:
        st.error("‚ùå Could not find model files! Please check paths.")
        return None, None

    with open(nv_path, 'rb') as f:
        model = pickle.load(f)
    with open(scaler_path, 'rb') as f:
        scaler = pickle.load(f)

    return model, scaler

# --- 3. UI & LOGIC ---

# Sidebar: Controls
st.sidebar.header("üí∞ Bankroll Manager")
bankroll = st.sidebar.number_input("Current Bankroll ($)", value=1000, step=100)
kelly_fraction = st.sidebar.slider("Kelly Fraction (Risk Tolerance)", 0.1, 1.0, 0.25, 0.05)
min_edge = st.sidebar.slider("Minimum Edge %", 0.0, 0.20, 0.05, 0.01)

# Main Title
st.title("üèà NFL War Room: Prediction Dashboard")
st.markdown(f"**Season:** {CURRENT_SEASON} | **Model:** NV Nuclear Ensemble (60% Acc)")

# Load Everything
data = load_and_prep_data(CURRENT_SEASON, LAG_SEASON)
model, scaler = load_models(MODEL_DIR)

if data is not None and model is not None:
    # --- Prediction Logic ---

    # 1. Feature Alignment
    # We need to ensure we only use the columns the model was trained on
    try:
        # Try to get features from XGBoost estimator
        model_features = model.estimators_[0].feature_names_in_
    except:
        # Fallback: manually define or load a reference
        st.warning("‚ö†Ô∏è Could not read feature names from model. Using dynamic alignment.")
        # Create a dummy list based on dataframe intersection
        model_features = [c for c in data.columns if np.issubdtype(data[c].dtype, np.number)]
        # This is risky in production, usually you load a feature_list.pkl

    # Create input matrix
    X = pd.DataFrame(0, index=data.index, columns=model_features)
    for col in model_features:
        if col in data.columns:
            X[col] = data[col]

    # Scale
    X_scaled = scaler.transform(X)

    # Predict
    probs = model.predict_proba(X_scaled)[:, 1]

    # Implied Vegas Prob
    vegas_probs = 1 / (1 + 10 ** (data['spread_line'] / 14.5))

    # Create Display Dataframe
    display_df = data[['week', 'team', 'opponent', 'spread_line']].copy()
    display_df.columns = ['Week', 'Home', 'Away', 'Spread']
    display_df['Model_Win_Prob'] = probs
    display_df['Vegas_Win_Prob'] = vegas_probs
    display_df['Edge'] = display_df['Model_Win_Prob'] - display_df['Vegas_Win_Prob']

    # --- UI: Week Selector ---
    available_weeks = sorted(display_df['Week'].unique())
    selected_week = st.selectbox("Select Week", available_weeks, index=len(available_weeks)-1)

    # Filter by Week
    week_data = display_df[display_df['Week'] == selected_week].copy()

    # --- CALCULATE BETS ---
    bets = []
    for idx, row in week_data.iterrows():
        edge = row['Edge']

        # Filter by Min Edge
        if abs(edge) < min_edge:
            rec = "No Bet"
            size = 0.0
            pick = "-"
        else:
            # Direction
            if edge > 0:
                pick = f"{row['Home']} (Home)"
                my_p = row['Model_Win_Prob']
            else:
                pick = f"{row['Away']} (Away)"
                my_p = 1 - row['Model_Win_Prob']

            # Kelly Calc
            b = 0.909
            q = 1 - my_p
            kelly = (b * my_p - q) / b

            # Constraints
            wager_pct = max