<a href="https://colab.research.google.com/github/Dave356w/Dave356w/blob/main/NBA_LINEUPS_2_0.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
pip install numpy==1.23.5 thefuzz nba_api pykalman pulp fuzzywuzzy

Collecting numpy==1.23.5
  Downloading numpy-1.23.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (2.3 kB)
Collecting thefuzz
  Downloading thefuzz-0.22.1-py3-none-any.whl.metadata (3.9 kB)
Collecting nba_api
  Downloading nba_api-1.8.0-py3-none-any.whl.metadata (5.7 kB)
Collecting pykalman
  Downloading pykalman-0.10.1-py2.py3-none-any.whl.metadata (9.5 kB)
Collecting pulp
  Downloading PuLP-3.0.2-py3-none-any.whl.metadata (6.7 kB)
Collecting fuzzywuzzy
  Downloading fuzzywuzzy-0.18.0-py2.py3-none-any.whl.metadata (4.9 kB)
Collecting rapidfuzz<4.0.0,>=3.0.0 (from thefuzz)
  Downloading rapidfuzz-3.12.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting scikit-base<0.13.0 (from pykalman)
  Downloading scikit_base-0.12.0-py3-none-any.whl.metadata (8.5 kB)
Downloading numpy-1.23.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m17.1/17.1 MB[0m [31

In [None]:
import os
import sys
import random
import logging
from collections import defaultdict
from contextlib import redirect_stdout
from dataclasses import dataclass
from typing import List, Set, Optional

import pandas as pd
import pulp

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger('NBA_DFS_Optimizer')

# Configuration dictionary - Update with your paths
CONFIG = {
    "salary_cap": 50000,
    "min_salary": 49000,  # Minimum total salary
    "min_player_salary": 3300,  # Minimum salary for filtering players
    "max_players_per_team": 3,
    "lineup_variance": 0.10,  # Simple variance factor
    "n_lineup_attempts": 5000,
    "target_lineups": 20,
    "max_exposure": 0.50,
    # Update this to your player projections CSV path
    "projections_path": '/content/drive/My Drive/NBA_PerGame_Data/NBA_Player_Data_M_player_projections.csv',
    "output_path": './NBA_Projections',
    "max_overlap": 4,
    "exclude_inactive": True,
    "min_stack_size": 2,
    "min_stacks_required": 2,
    "zero_salary_handling": "filter"  # Options: "filter", "keep", or "set_to_min"
}

@dataclass
class Player:
    id: int
    name: str
    team: str
    positions: List[str]  # List of eligible positions
    salary: float
    projected_fp: float
    opponent: str
    is_starter: bool
    is_home: bool
    minutes: float
    value: float  # FP per $1000
    adj_projected_fp: float = 0  # For optimization
    times_used: int = 0
    max_exposure: float = CONFIG["max_exposure"]

def get_positions_from_raw(position_value):
    """Convert any position value to a list of eligible positions"""
    # Initialize with empty list
    positions = []

    # Handle various data types
    if isinstance(position_value, str):
        # If it's a string, split it if it has slashes
        if '/' in position_value:
            positions = position_value.split('/')
        else:
            positions = [position_value]
    elif isinstance(position_value, float) or isinstance(position_value, int):
        # If it's a numeric value, map to position
        # This is a guess - you may need to adjust based on your data
        pos_map = {1: 'PG', 2: 'SG', 3: 'SF', 4: 'PF', 5: 'C'}
        if float(position_value) in pos_map:
            positions = [pos_map[float(position_value)]]
        else:
            # Default to C if unknown
            positions = ['C']
    else:
        try:
            # Try to convert to list
            positions = list(position_value)
        except:
            # Default to UTIL if all else fails
            positions = ['UTIL']

    # Expand positions
    expanded = positions.copy()
    for pos in positions:
        if pos in ['PG', 'SG'] and 'G' not in expanded:
            expanded.append('G')
        elif pos in ['SF', 'PF'] and 'F' not in expanded:
            expanded.append('F')

    # Always add UTIL
    if 'UTIL' not in expanded:
        expanded.append('UTIL')

    return expanded

def load_projection_data(file_path: str) -> pd.DataFrame:
    try:
        df = pd.read_csv(file_path)
        logger.info(f"Loaded {len(df)} players from {file_path}")

        # Handle null salaries
        null_salary_count = df['SALARY'].isna().sum()
        if null_salary_count > 0:
            logger.warning(f"Found {null_salary_count} players with null salary.")
            df['SALARY'] = df['SALARY'].fillna(0)

        # Handle zero salaries based on config setting
        zero_salary_count = (df['SALARY'] == 0).sum()
        if zero_salary_count > 0:
            logger.warning(f"Found {zero_salary_count} players with zero salary.")

        # Count players below minimum salary
        below_min_count = ((df['SALARY'] > 0) & (df['SALARY'] < CONFIG["min_player_salary"])).sum()
        if below_min_count > 0:
            logger.warning(f"Found {below_min_count} players with salary below minimum ${CONFIG['min_player_salary']}.")

        # Log salary distribution
        logger.info(f"Salary range: ${df['SALARY'].min():,.0f} to ${df['SALARY'].max():,.0f}")

        return df
    except Exception as e:
        logger.error(f"Failed to load data: {e}")
        return pd.DataFrame()

def create_player_objects(df: pd.DataFrame, exclude_inactive: bool = True) -> List[Player]:
    players = []

    # Check for required columns and add defaults if missing
    required_cols = ['PLAYER_ID', 'PLAYER_NAME', 'TEAM', 'POSITION', 'SALARY', 'PROJ_FP',
                     'OPPONENT', 'IS_STARTER', 'IS_HOME', 'PROJ_MINUTES']

    for col in required_cols:
        if col not in df.columns:
            logger.warning(f"Column {col} not found in DataFrame. Adding default values.")
            if col == 'PLAYER_ID':
                df[col] = range(1, len(df) + 1)  # Generate IDs starting from 1
            elif col == 'POSITION':
                df[col] = 'UTIL'  # Default position
            elif col == 'OPPONENT':
                df[col] = 'UNK'  # Unknown opponent
            elif col in ['IS_STARTER', 'IS_HOME']:
                df[col] = 0  # Default to not starter, not home
            elif col == 'PROJ_MINUTES':
                df[col] = 0  # Default minutes
            else:
                df[col] = 0  # Default for other numeric columns

    # Counters for player filtering
    filtered_salary_count = 0
    filtered_inactive_count = 0

    for _, row in df.iterrows():
        # Filter out inactive players
        if exclude_inactive and (row['PROJ_FP'] == 0.0 or row['PROJ_MINUTES'] < 20):
            filtered_inactive_count += 1
            continue

        # Filter out players with salary below minimum (unless it's zero and we're keeping zeros)
        if (row['SALARY'] > 0 and row['SALARY'] < CONFIG["min_player_salary"]):
            filtered_salary_count += 1
            continue

        # Filter out zero salary players if configured to do so
        if row['SALARY'] == 0 and CONFIG["zero_salary_handling"] == "filter":
            filtered_salary_count += 1
            continue

        # Get positions list
        positions = get_positions_from_raw(row['POSITION'])

        # Use the original salary from CSV
        salary = float(row['SALARY'])

        # Set zero salaries to minimum if configured to do so
        if salary == 0 and CONFIG["zero_salary_handling"] == "set_to_min":
            salary = CONFIG["min_player_salary"]

        # Calculate value (FP per $1000)
        value = row['PROJ_FP'] / (salary / 1000) if salary > 0 else 0

        # Convert various data types to expected types
        player_id = int(row['PLAYER_ID'])
        is_starter = bool(row['IS_STARTER']) if not pd.isna(row['IS_STARTER']) else False
        is_home = bool(row['IS_HOME']) if not pd.isna(row['IS_HOME']) else False
        minutes = float(row['PROJ_MINUTES']) if not pd.isna(row['PROJ_MINUTES']) else 0.0

        # Create player object
        player = Player(
            id=player_id,
            name=str(row['PLAYER_NAME']),
            team=str(row['TEAM']),
            positions=positions,
            salary=salary,
            projected_fp=float(row['PROJ_FP']),
            opponent=str(row['OPPONENT']),
            is_starter=is_starter,
            is_home=is_home,
            minutes=minutes,
            value=value,
            adj_projected_fp=float(row['PROJ_FP'])
        )
        players.append(player)

    logger.info(f"Filtered out {filtered_inactive_count} inactive players and {filtered_salary_count} players due to salary criteria")
    logger.info(f"Created {len(players)} player objects after applying filters")
    return players

def apply_variance(players: List[Player], lineup_num: int):
    """Apply simple variance to projections to create lineup diversity"""
    variance = CONFIG["lineup_variance"]
    # Increase variance slightly for each subsequent lineup
    adjusted_variance = variance * (1 + (lineup_num / 50))

    for p in players:
        # Apply less variance to players with higher exposure
        exposure_factor = 1 - (p.times_used / (CONFIG["target_lineups"] * p.max_exposure + 0.001))
        # Random adjustment between -variance and +variance
        random_factor = random.uniform(-adjusted_variance, +adjusted_variance) * exposure_factor
        p.adj_projected_fp = p.projected_fp * (1 + random_factor)

def check_overlap(new_lineup: List[tuple], all_lineups: List[List[tuple]], max_overlap: int) -> bool:
    """Check if lineup has too much overlap with existing lineups"""
    new_ids = {p.id for _, p in new_lineup}
    for prev_lineup in all_lineups:
        overlap = sum(p.id in new_ids for _, p in prev_lineup)
        if overlap > max_overlap:
            return False
    return True

def run_lineup_optimizer(players: List[Player], target_lineups: int = CONFIG["target_lineups"],
                         excluded_teams: Optional[Set[str]] = None) -> List[List[tuple]]:
    if excluded_teams is None:
        excluded_teams = set()

    print("\n" + "="*70)
    print(f"Running NBA DFS Optimizer to generate {target_lineups} lineups")
    print(f"Salary cap: ${CONFIG['salary_cap']:,}")
    print(f"Minimum lineup salary: ${CONFIG['min_salary']:,}")
    print(f"Filtering players with salary below ${CONFIG['min_player_salary']:,}")
    print(f"Zero salary handling: {CONFIG['zero_salary_handling']}")
    print(f"Max exposure: {CONFIG['max_exposure']*100}%")
    print(f"Max overlap: {CONFIG['max_overlap']} players between lineups")
    print(f"Team stacking: Min {CONFIG['min_stack_size']} players per team, {CONFIG['min_stacks_required']} stacks required")
    print("="*70)

    # Filter out excluded teams
    players = [p for p in players if p.team not in excluded_teams]
    print(f"Excluding teams: {', '.join(excluded_teams) if excluded_teams else 'None'}. {len(players)} players remaining.")

    # Check if we have enough players
    if len(players) < 8:
        print(f"Error: Not enough players ({len(players)}) to form a valid lineup. Need at least 8.")
        return []

    # Standard DFS lineup positions
    lineup_slots = ['PG', 'SG', 'SF', 'PF', 'C', 'G', 'F', 'UTIL']

    all_lineups = []
    lineup_signatures = set()  # To track unique lineups
    attempts = 0
    lineup_attempts = 0
    max_consecutive_fails = 500

    while len(all_lineups) < target_lineups and attempts < CONFIG["n_lineup_attempts"]:
        attempts += 1
        lineup_attempts += 1
        lineup_num = len(all_lineups) + 1

        if lineup_attempts == 1 or lineup_attempts % max_consecutive_fails == 0:
            print(f"\nGenerating lineup {lineup_num} of {target_lineups} (Attempt {attempts})")

        # Get players still eligible based on exposure
        eligible_players = [p for p in players if p.times_used < p.max_exposure * target_lineups]
        if len(eligible_players) < 8:
            print(f"Not enough eligible players ({len(eligible_players)}) to form lineup {lineup_num}")
            break

        # Apply variance to create different lineups
        apply_variance(eligible_players, lineup_num)

        # Create optimization problem
        prob = pulp.LpProblem(f"DFS_Lineup_Optimization_{lineup_num}", pulp.LpMaximize)

        # Create variables
        x = {}
        for p in eligible_players:
            for pos in p.positions:
                if pos in lineup_slots:  # Only create variables for valid positions
                    x[(p.id, pos)] = pulp.LpVariable(f"x_{p.id}_{pos}", cat="Binary")

        # Objective: maximize adjusted projected fantasy points
        prob += pulp.lpSum(x[(p.id, pos)] * p.adj_projected_fp
                          for p in eligible_players
                          for pos in p.positions if pos in lineup_slots and (p.id, pos) in x)

        # Position constraints: exactly one player per slot
        for pos in lineup_slots:
            prob += pulp.lpSum(x[(p.id, pos)]
                             for p in eligible_players
                             if pos in p.positions and (p.id, pos) in x) == 1

        # Player uniqueness: each player can only be used once
        for p in eligible_players:
            valid_positions = [pos for pos in p.positions if pos in lineup_slots and (p.id, pos) in x]
            if valid_positions:
                prob += pulp.lpSum(x[(p.id, pos)] for pos in valid_positions) <= 1

        # Salary constraints
        prob += pulp.lpSum(x[(p.id, pos)] * p.salary
                         for p in eligible_players
                         for pos in p.positions if pos in lineup_slots and (p.id, pos) in x) <= CONFIG["salary_cap"]
        prob += pulp.lpSum(x[(p.id, pos)] * p.salary
                         for p in eligible_players
                         for pos in p.positions if pos in lineup_slots and (p.id, pos) in x) >= CONFIG["min_salary"]

        # Team constraints
        teams = defaultdict(list)
        for p in eligible_players:
            teams[p.team].append(p)

        for team, team_players in teams.items():
            prob += pulp.lpSum(x[(p.id, pos)]
                             for p in team_players
                             for pos in p.positions if pos in lineup_slots and (p.id, pos) in x) <= CONFIG["max_players_per_team"]

        # Team stacking variables
        team_stack_vars = {team: pulp.LpVariable(f"stack_{team}", cat="Binary")
                          for team in teams.keys() if team not in excluded_teams}

        # Stacking constraints
        for team, team_var in team_stack_vars.items():
            team_players = teams[team]
            prob += pulp.lpSum(x[(p.id, pos)]
                             for p in team_players
                             for pos in p.positions if pos in lineup_slots and (p.id, pos) in x) >= CONFIG["min_stack_size"] * team_var

        # Ensure minimum stacks
        prob += pulp.lpSum(team_stack_vars.values()) >= CONFIG["min_stacks_required"]

        # Solve silently
        solver = pulp.PULP_CBC_CMD(msg=False, timeLimit=60)
        with open(os.devnull, 'w') as f, redirect_stdout(f):
            result_status = prob.solve(solver)

        if pulp.LpStatus[result_status] == "Optimal":
            # Extract the lineup
            selected_lineup = []
            lineup_player_ids = set()
            total_salary = 0
            total_fp = 0
            total_value = 0

            for pos in lineup_slots:
                for p in eligible_players:
                    if pos in p.positions and p.id not in lineup_player_ids:
                        if (p.id, pos) in x and pulp.value(x[(p.id, pos)]) == 1:
                            selected_lineup.append((pos, p))
                            lineup_player_ids.add(p.id)
                            total_salary += p.salary
                            total_fp += p.projected_fp
                            total_value += p.value
                            break

            # Check if lineup is valid and unique
            if len(selected_lineup) != 8:
                continue

            signature = tuple(sorted(p.id for _, p in selected_lineup))
            if signature in lineup_signatures or not check_overlap(selected_lineup, all_lineups, CONFIG["max_overlap"]):
                continue

            # Add to lineups and update player usage
            lineup_signatures.add(signature)
            avg_value = total_value / len(selected_lineup)

            for _, p in selected_lineup:
                p.times_used += 1

            # Check for stacks
            team_counts = defaultdict(int)
            for _, p in selected_lineup:
                team_counts[p.team] += 1

            stacks_found = sum(1 for count in team_counts.values() if count >= CONFIG["min_stack_size"])
            if stacks_found < CONFIG["min_stacks_required"]:
                continue

            # Reset lineup attempts counter since we found a valid lineup
            lineup_attempts = 0

            # Print lineup details
            print(f"\n--- Lineup #{lineup_num} ---")
            print(f"Total Projected Points: {total_fp:.1f}")
            print(f"Salary Used: ${total_salary:,.0f}")
            print(f"Avg Value: {avg_value:.2f}")

            print("\nTeam Stacks:")
            for team, count in sorted(team_counts.items(), key=lambda x: x[1], reverse=True):
                if count > 1:
                    print(f"{team}: {count} players")

            print("\nPlayers:")
            for pos, p in selected_lineup:
                starter_status = "S" if p.is_starter else "B"
                home_away = "H" if p.is_home else "A"
                print(f"  [{pos:<4}] {p.name:<22} {p.team} {starter_status}/{home_away} ({p.minutes:.1f} min) | {p.projected_fp:>5.1f} pts (${p.salary:,.0f}) | Value: {p.value:.2f}")

            all_lineups.append(selected_lineup)
        else:
            if lineup_attempts % max_consecutive_fails == 0:
                print(f"   - Still trying to find a valid lineup (made {lineup_attempts} attempts)")

    if len(all_lineups) < target_lineups:
        print(f"\nNote: Only generated {len(all_lineups)} of the requested {target_lineups} lineups after {attempts} attempts.")
    else:
        print(f"\nSuccessfully generated all {target_lineups} lineups in {attempts} attempts.")

    # Print exposure report
    print("\n" + "="*100)
    print("Player Exposure Report")
    print("="*100)
    print(f"{'Player Name':<22} {'Team':<5} {'Pos':<12} {'Salary':>8} {'Min':>5} {'Proj FP':>8} {'Value':>6} {'Used':>5} {'Exp %':>6}")
    print("-" * 100)

    for p in sorted([p for p in players if p.times_used > 0], key=lambda x: x.times_used, reverse=True):
        exposure_pct = (p.times_used / target_lineups) * 100
        pos_str = '/'.join([pos for pos in p.positions if pos in ['PG', 'SG', 'SF', 'PF', 'C']])
        print(f"{p.name:<22} {p.team:<5} {pos_str:<12} ${p.salary:>7,.0f} {p.minutes:>5.1f} {p.projected_fp:>8.1f} {p.value:>6.2f} {p.times_used:>5} {exposure_pct:>5.1f}%")

    # Print stacking stats
    stack_sizes = []
    for lineup in all_lineups:
        team_counts = defaultdict(int)
        for _, p in lineup:
            team_counts[p.team] += 1
        stacks = [count for count in team_counts.values() if count >= CONFIG["min_stack_size"]]
        stack_sizes.append(len(stacks))

    avg_stacks = sum(stack_sizes) / len(stack_sizes) if stack_sizes else 0
    print(f"\nStacking Statistics:")
    print(f"Average stacks per lineup: {avg_stacks:.2f}")
    print(f"Stack distribution: {dict(sorted([(size, stack_sizes.count(size)) for size in set(stack_sizes)]))}")

    return all_lineups

def main():
    print("=" * 70)
    print("NBA DFS Lineup Optimizer")
    print("=" * 70)

    # Load projection data
    df = load_projection_data(CONFIG["projections_path"])
    if df.empty:
        logger.error("No data loaded. Exiting.")
        return

    # Show available teams
    # Create a combined set of teams (both team and opponent)
    all_teams = set()
    if 'TEAM' in df.columns:
        all_teams.update(df['TEAM'].dropna().unique())
    if 'OPPONENT' in df.columns:
        all_teams.update(df['OPPONENT'].dropna().unique())

    active_teams = sorted([t for t in all_teams if isinstance(t, str) and t.strip()])

    print("\nAvailable teams with games today:")
    for i, team in enumerate(active_teams, 1):
        print(f"{i}. {team}")

    # Get teams to exclude
    exclude_input = input("\nEnter team numbers to exclude (comma-separated) or press Enter to skip: ").strip()
    excluded_teams = set()
    if exclude_input:
        try:
            excluded_indices = [int(x.strip()) - 1 for x in exclude_input.split(',') if x.strip().isdigit()]
            excluded_teams = {active_teams[i] for i in excluded_indices if 0 <= i < len(active_teams)}
        except Exception as e:
            logger.error(f"Error processing excluded teams input: {e}")

    print(f"\nExcluded Teams: {', '.join(excluded_teams) if excluded_teams else 'None'}")

    # Create player objects and run optimizer
    players = create_player_objects(df, exclude_inactive=CONFIG["exclude_inactive"])

    if not players:
        logger.error("No valid players created. Check your CSV format and filters.")
        return

    lineups = run_lineup_optimizer(players, target_lineups=CONFIG["target_lineups"], excluded_teams=excluded_teams)

    # Save lineups to CSV
    if lineups:
        output_file = os.path.join(CONFIG["output_path"], "optimized_lineups.csv")
        os.makedirs(CONFIG["output_path"], exist_ok=True)

        with open(output_file, 'w') as f:
            f.write("Lineup,Position,Player_ID,Player_Name,Team,Opponent,Starter,Home,Minutes,Salary,Projected_FP,Value\n")
            for lineup_num, lineup in enumerate(lineups, 1):
                for pos, player in lineup:
                    f.write(f"{lineup_num},{pos},{player.id},{player.name},{player.team},{player.opponent},"
                            f"{1 if player.is_starter else 0},{1 if player.is_home else 0},{player.minutes:.1f},"
                            f"{player.salary},{player.projected_fp:.1f},{player.value:.2f}\n")

        print(f"\nLineups saved to {output_file}")
    else:
        print("\nNo valid lineups were generated.")

if __name__ == '__main__':
    main()



NBA DFS Lineup Optimizer

Available teams with games today:
1. ATL
2. BKN
3. CHI
4. GSW
5. IND
6. LAL
7. MIL
8. NYK
9. SAC
10. WAS

Excluded Teams: NYK, ATL, WAS, IND, BKN, GSW

Running NBA DFS Optimizer to generate 20 lineups
Salary cap: $50,000
Minimum lineup salary: $49,000
Filtering players with salary below $3,300
Zero salary handling: filter
Max exposure: 50.0%
Max overlap: 4 players between lineups
Team stacking: Min 2 players per team, 2 stacks required
Excluding teams: NYK, ATL, WAS, IND, BKN, GSW. 20 players remaining.

Generating lineup 1 of 20 (Attempt 1)

--- Lineup #1 ---
Total Projected Points: 268.5
Salary Used: $49,700
Avg Value: 5.40

Team Stacks:
MIL: 3 players
SAC: 2 players
CHI: 2 players

Players:
  [PG  ] Malik Monk             SAC S/A (35.9 min) |  41.6 pts ($8,100) | Value: 5.13
  [SG  ] Taurean Prince         MIL S/H (28.0 min) |  19.0 pts ($3,700) | Value: 5.15
  [SF  ] LeBron James           LAL S/A (34.2 min) |  54.6 pts ($10,500) | Value: 5.20
  [PF  ] Kyl

In [None]:
import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup
import re
import unicodedata
from fuzzywuzzy import fuzz
from typing import Dict, List, Tuple
import logging
import os

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger('nba_projections')

# Path configuration (adjust paths as needed)
HISTORICAL_DATA_PATH = '/content/drive/My Drive/NBA_PerGame_Data/NBA_Per_Game_Player_Data.csv'
OUTPUT_PATH = '/content/drive/My Drive/NBA_PerGame_Data/NBA_Player_Data_M.csv'
RG_URL = 'https://rotogrinders.com/lineups/nba'

def weighted_average(series: pd.Series, alpha: float = 0.75) -> float:
    """Compute an exponential weighted average of a series."""
    n = len(series)
    weights = np.array([alpha ** (n - i - 1) for i in range(n)])
    return np.average(series, weights=weights)

def get_dynamic_min_starter_minutes(historical_df, lookback_games=30):
    """Calculate dynamic minimum starter minutes (10th percentile of recent starter minutes)."""
    recent_games = historical_df.sort_values('GAME_DATE', ascending=False)
    unique_games = recent_games['GAME_ID'].unique()[:lookback_games]
    starters = recent_games[(recent_games['GAME_ID'].isin(unique_games)) & (recent_games['STARTER'] == 1)]
    min_starter_minutes = np.percentile(starters['MINUTES'], 10) if not starters.empty else 15.0
    logger.info(f"Dynamic MIN_STARTER_MINUTES: {min_starter_minutes:.1f}")
    return min_starter_minutes

def get_dynamic_default_bench_minutes(historical_df, min_games=15):
    """Calculate dynamic default bench minutes (median for players with limited games)."""
    bench_players = historical_df[historical_df['STARTER'] == 0]
    player_game_counts = bench_players.groupby('PLAYER_ID')['GAME_ID'].nunique()
    limited_data_players = player_game_counts[player_game_counts < min_games].index
    limited_bench = bench_players[bench_players['PLAYER_ID'].isin(limited_data_players)]
    default_bench_minutes = limited_bench['MINUTES'].median() if not limited_bench.empty else 10.0
    logger.info(f"Dynamic DEFAULT_BENCH_MINUTES: {default_bench_minutes:.1f}")
    return default_bench_minutes

def get_dynamic_max_player_minutes(historical_df, team=None, lookback_games=30):
    """Calculate dynamic max player minutes (95th percentile for starters)."""
    recent_games = historical_df.sort_values('GAME_DATE', ascending=False)
    unique_games = recent_games['GAME_ID'].unique()[:lookback_games]
    if team:
        starters = recent_games[(recent_games['GAME_ID'].isin(unique_games)) &
                                (recent_games['STARTER'] == 1) &
                                (recent_games['TEAM'] == team)]
    else:
        starters = recent_games[(recent_games['GAME_ID'].isin(unique_games)) &
                                (recent_games['STARTER'] == 1)]
    max_minutes = np.percentile(starters['MINUTES'], 95) if not starters.empty else 38.0
    logger.info(f"Dynamic max_player_minutes for {team or 'league'}: {max_minutes:.1f}")
    return max_minutes

def get_dynamic_max_bench_minutes(historical_df, team=None, lookback_games=30):
    """Calculate dynamic max bench minutes (90th percentile for bench players)."""
    recent_games = historical_df.sort_values('GAME_DATE', ascending=False)
    unique_games = recent_games['GAME_ID'].unique()[:lookback_games]
    if team:
        bench = recent_games[(recent_games['GAME_ID'].isin(unique_games)) &
                             (recent_games['STARTER'] == 0) &
                             (recent_games['TEAM'] == team)]
    else:
        bench = recent_games[(recent_games['GAME_ID'].isin(unique_games)) &
                             (recent_games['STARTER'] == 0)]
    max_bench_minutes = np.percentile(bench['MINUTES'], 90) if not bench.empty else 25.0
    logger.info(f"Dynamic MAX_BENCH_MINUTES for {team or 'league'}: {max_bench_minutes:.1f}")
    return max_bench_minutes

def get_dynamic_home_advantage(historical_df, stat='FP', lookback_days=60):
    """Calculate dynamic home advantage based on recent performance differential."""
    cutoff_date = historical_df['GAME_DATE'].max() - pd.Timedelta(days=lookback_days)
    recent_df = historical_df[historical_df['GAME_DATE'] >= cutoff_date]
    home_performance = recent_df.groupby(['TEAM', 'GAME_ID'])[stat].sum().groupby('TEAM').mean()
    away_performance = recent_df.groupby(['OPPONENT', 'GAME_ID'])[stat].sum().groupby('OPPONENT').mean()
    avg_home = home_performance.mean()
    avg_away = away_performance.mean()
    home_advantage = avg_home / avg_away if avg_away > 0 else 1.03
    logger.info(f"Dynamic home_advantage ({stat}): {home_advantage:.3f}")
    return home_advantage

def extract_rotogrinders_data(url: str = RG_URL) -> Tuple[pd.DataFrame, pd.DataFrame]:
    """Extract lineup data from RotoGrinders."""
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/91.0.4472.124'}
    try:
        response = requests.get(url, headers=headers)
        response.raise_for_status()
        soup = BeautifulSoup(response.content, 'html.parser')
        players, matchups = [], {}
        team_mapping = {
            'Atlanta Hawks': 'ATL', 'Boston Celtics': 'BOS', 'Brooklyn Nets': 'BKN', 'Charlotte Hornets': 'CHA',
            'Chicago Bulls': 'CHI', 'Cleveland Cavaliers': 'CLE', 'Dallas Mavericks': 'DAL', 'Denver Nuggets': 'DEN',
            'Detroit Pistons': 'DET', 'Golden State Warriors': 'GSW', 'Houston Rockets': 'HOU', 'Indiana Pacers': 'IND',
            'Los Angeles Clippers': 'LAC', 'Los Angeles Lakers': 'LAL', 'Memphis Grizzlies': 'MEM', 'Miami Heat': 'MIA',
            'Milwaukee Bucks': 'MIL', 'Minnesota Timberwolves': 'MIN', 'New Orleans Pelicans': 'NOP', 'New York Knicks': 'NYK',
            'Oklahoma City Thunder': 'OKC', 'Orlando Magic': 'ORL', 'Philadelphia 76ers': 'PHI', 'Phoenix Suns': 'PHX',
            'Portland Trail Blazers': 'POR', 'Sacramento Kings': 'SAC', 'San Antonio Spurs': 'SAS', 'Toronto Raptors': 'TOR',
            'Utah Jazz': 'UTA', 'Washington Wizards': 'WAS'
        }

        for game_card in soup.find_all('div', class_='module game-card'):
            teams = game_card.find_all('div', class_='team-nameplate')
            if len(teams) != 2:
                continue
            lineup_cards = game_card.find_all('div', class_='lineup-card')
            if len(lineup_cards) != 2:
                continue

            for idx, lineup_card in enumerate(lineup_cards):
                city = teams[idx].find('span', class_='team-nameplate-city')
                mascot = teams[idx].find('span', class_='team-nameplate-mascot')
                if not city or not mascot:
                    continue
                team_name = f"{city.text.strip()} {mascot.text.strip()}"
                team_abbr = team_mapping.get(team_name, 'UNK')
                opp_idx = 1 - idx
                opp_city = teams[opp_idx].find('span', class_='team-nameplate-city')
                opp_mascot = teams[opp_idx].find('span', class_='team-nameplate-mascot')
                team_abbr_opp = team_mapping.get(f"{opp_city.text.strip()} {opp_mascot.text.strip()}", 'UNK')

                is_home = (idx == 0)
                matchups[team_abbr] = {'opponent': team_abbr_opp, 'is_home': is_home}

                player_idx = 0
                for elem in lineup_card.find_all('li', class_='lineup-card-player'):
                    name_elem = elem.find('a', class_='player-nameplate-name')
                    salary_elem = elem.find('span', class_='player-nameplate-salary')
                    pos_elem = elem.find('span', class_='player-nameplate-stats')
                    name = name_elem.text.strip() if name_elem else 'Unknown'
                    # Convert salary: assume salaries with "K" represent thousands
                    if salary_elem and 'K' in salary_elem.text:
                        salary = float(salary_elem.text.strip().replace('$', '').replace('K', '')) * 1000
                    else:
                        salary = 0.0
                    position = pos_elem.find('span', class_='small muted').text.strip() if pos_elem and pos_elem.find('span', class_='small muted') else ''
                    is_starter = int(player_idx < 5)
                    players.append({
                        'PLAYER_NAME': name,
                        'TEAM': team_abbr,
                        'POSITION': position,
                        'SALARY': salary,
                        'IS_STARTER': is_starter,
                        'STARTER_INDEX': player_idx,
                        'OPPONENT': team_abbr_opp
                    })
                    player_idx += 1

        players_df = pd.DataFrame(players)
        players_df['VALUE_STARTER'] = (players_df['IS_STARTER'] == 1) & (players_df['SALARY'] < 4500)
        team_sizes = players_df.groupby('TEAM').size().rename("TEAM_SIZE")
        avg_team_size = team_sizes.mean()
        players_df = players_df.merge(team_sizes.reset_index(), on='TEAM')
        players_df['THIN_TEAM'] = players_df['TEAM_SIZE'] < avg_team_size

        matchup_data = [{'HOME_TEAM': team if info['is_home'] else info['opponent'],
                         'AWAY_TEAM': info['opponent'] if info['is_home'] else team}
                        for team, info in matchups.items() if info['is_home']]
        matchups_df = pd.DataFrame(matchup_data).drop_duplicates()
        logger.info(f"Extracted {len(players_df)} players and {len(matchups_df)} matchups from RotoGrinders")
        return players_df, matchups_df
    except Exception as e:
        logger.error(f"Failed to extract RotoGrinders data: {e}")
        return pd.DataFrame(), pd.DataFrame()

def standardize_name(name: str) -> str:
    """Normalize and simplify player names for matching."""
    name = unicodedata.normalize('NFKD', name).encode('ASCII', 'ignore').decode('ASCII').lower().strip()
    name = re.sub(r'[^a-z\s]', '', name)
    name = re.sub(r'\s+(jr|sr|ii|iii|iv|v|junior|senior)\b\.?', '', name, flags=re.IGNORECASE)
    name = re.sub(r'\s+', ' ', name).strip()
    return name

def fuzzy_match(name: str, historical_names: list, threshold: int = 80) -> str:
    """Return the best fuzzy match for a given name if above threshold."""
    std_name = standardize_name(name)
    best_match = max(historical_names, key=lambda x: fuzz.ratio(std_name, standardize_name(x)), default=None)
    if best_match and fuzz.ratio(std_name, standardize_name(best_match)) >= threshold:
        logger.debug(f"Fuzzy matched '{name}' to '{best_match}'")
        return best_match
    return None

def load_and_match_historical_data(rotogrinders_df: pd.DataFrame, historical_path: str) -> pd.DataFrame:
    """Load historical data and match it with RotoGrinders players."""
    try:
        historical_df = pd.read_csv(historical_path)
        historical_df['GAME_DATE'] = pd.to_datetime(historical_df['GAME_DATE'])
        historical_df['STD_NAME'] = historical_df['PLAYER_NAME'].apply(standardize_name)
        rotogrinders_df['STD_NAME'] = rotogrinders_df['PLAYER_NAME'].apply(standardize_name)
        rotogrinders_df = rotogrinders_df.drop_duplicates(subset=['STD_NAME', 'TEAM'], keep='first')
        merged_df = rotogrinders_df.merge(
            historical_df[['PLAYER_ID', 'PLAYER_NAME', 'TEAM', 'STD_NAME']].drop_duplicates(),
            on=['STD_NAME', 'TEAM'],
            how='left',
            suffixes=('', '_hist')
        )
        unmatched = merged_df[merged_df['PLAYER_ID'].isna()].copy()
        if not unmatched.empty:
            logger.warning(f"Initial {len(unmatched)} players unmatched: {unmatched['PLAYER_NAME'].tolist()}")
            for i, row in unmatched.iterrows():
                name_matches = historical_df[historical_df['STD_NAME'] == row['STD_NAME']]
                if not name_matches.empty:
                    best_match = name_matches.iloc[0]
                    merged_df.loc[i, 'PLAYER_ID'] = best_match['PLAYER_ID']
                    merged_df.loc[i, 'PLAYER_NAME_hist'] = best_match['PLAYER_NAME']
                    logger.info(f"Matched {row['PLAYER_NAME']} ({row['TEAM']}) to {best_match['PLAYER_NAME']} ({best_match['TEAM']}) by name only")
        still_unmatched = merged_df[merged_df['PLAYER_ID'].isna()]
        fuzzy_matches = 0
        if not still_unmatched.empty:
            historical_names = historical_df['PLAYER_NAME'].unique().tolist()
            for i, row in still_unmatched.iterrows():
                match = fuzzy_match(row['PLAYER_NAME'], historical_names)
                if match:
                    match_row = historical_df[historical_df['PLAYER_NAME'] == match].iloc[0]
                    merged_df.loc[i, 'PLAYER_ID'] = match_row['PLAYER_ID']
                    merged_df.loc[i, 'PLAYER_NAME_hist'] = match_row['PLAYER_NAME']
                    fuzzy_matches += 1
        final_unmatched = merged_df[merged_df['PLAYER_ID'].isna()]
        if not final_unmatched.empty:
            logger.warning(f"After fuzzy matching, {len(final_unmatched)} players unmatched: {final_unmatched['PLAYER_NAME'].tolist()}")
            next_id = historical_df['PLAYER_ID'].max() + 1
            for i, row in final_unmatched.iterrows():
                merged_df.loc[i, 'PLAYER_ID'] = next_id
                merged_df.loc[i, 'PLAYER_NAME_hist'] = row['PLAYER_NAME']
                logger.info(f"Created placeholder ID {next_id} for unmatched player {row['PLAYER_NAME']} ({row['TEAM']})")
                next_id += 1
        merged_df['PLAYER_ID'] = merged_df['PLAYER_ID'].astype(int)
        merged_df = merged_df.drop_duplicates(subset=['PLAYER_ID', 'TEAM'], keep='first')
        logger.info(f"Matched {len(merged_df)} players to historical data ({fuzzy_matches} via fuzzy matching)")
        return merged_df
    except Exception as e:
        logger.error(f"Failed to load/match historical data: {e}")
        return pd.DataFrame()

def load_historical_data(historical_path: str) -> pd.DataFrame:
    """Load historical NBA player data from CSV."""
    df = pd.read_csv(historical_path)
    df['GAME_DATE'] = pd.to_datetime(df['GAME_DATE'])
    return df

def get_team_average_points(team: str, historical_df: pd.DataFrame, lookback_days: int = 60) -> float:
    """Get a team's average points scored over the recent period."""
    cutoff_date = historical_df['GAME_DATE'].max() - pd.Timedelta(days=lookback_days)
    recent_df = historical_df[historical_df['GAME_DATE'] >= cutoff_date]
    team_games = recent_df[recent_df['TEAM'] == team]
    if len(team_games) == 0:
        return recent_df.groupby('GAME_ID')['TEAM_PTS_SCORED'].mean().mean()
    game_ids = team_games['GAME_ID'].unique()
    pts_per_game = [team_games[team_games['GAME_ID'] == game_id]['TEAM_PTS_SCORED'].iloc[0]
                    for game_id in game_ids if not team_games[team_games['GAME_ID'] == game_id].empty]
    return sum(pts_per_game) / len(pts_per_game) if pts_per_game else recent_df.groupby('GAME_ID')['TEAM_PTS_SCORED'].mean().mean()

def tune_team_projection_range(team: str, historical_df: pd.DataFrame, lookback_days: int = 60) -> Tuple[float, float]:
    """Dynamically compute team point projection range (5th and 95th percentiles)."""
    cutoff_date = historical_df['GAME_DATE'].max() - pd.Timedelta(days=lookback_days)
    team_df = historical_df[(historical_df['TEAM'] == team) & (historical_df['GAME_DATE'] >= cutoff_date)]
    game_totals = team_df.groupby('GAME_ID')['TEAM_PTS_SCORED'].first() if not team_df.empty else historical_df.groupby('GAME_ID')['TEAM_PTS_SCORED'].mean()
    min_proj = np.percentile(game_totals, 5)
    max_proj = np.percentile(game_totals, 95)
    logger.info(f"Dynamic projection range for {team}: min {min_proj:.1f}, max {max_proj:.1f}")
    return min_proj, max_proj

def calculate_team_defense_factors(historical_df: pd.DataFrame, lookback_days: int = 60) -> Dict[str, float]:
    """Calculate defensive adjustment factors for each team based on recent data."""
    cutoff_date = historical_df['GAME_DATE'].max() - pd.Timedelta(days=lookback_days)
    recent_df = historical_df[historical_df['GAME_DATE'] >= cutoff_date]
    league_avg_pts = recent_df.groupby('GAME_ID')['TEAM_PTS_SCORED'].mean().mean()
    team_defense = {team: recent_df[recent_df['OPPONENT'] == team]['TEAM_PTS_SCORED'].mean() / league_avg_pts if league_avg_pts > 0 else 1.0
                    for team in recent_df['OPPONENT'].unique()}
    logger.info(f"Calculated defense factors for {len(team_defense)} teams")
    return team_defense

def calculate_positional_defense(historical_df: pd.DataFrame, lookback_days: int = 60) -> Dict[str, Dict[str, float]]:
    """Calculate positional defense factors for each team."""
    cutoff_date = historical_df['GAME_DATE'].max() - pd.Timedelta(days=lookback_days)
    recent_df = historical_df[historical_df['GAME_DATE'] >= cutoff_date]
    positions = recent_df['POSITION'].unique()
    position_league_avgs = {pos: recent_df[recent_df['POSITION'] == pos]['PTS'].mean() for pos in positions}
    team_positional_defense = {}
    for team in recent_df['OPPONENT'].unique():
        team_positional_defense[team] = {}
        for pos in positions:
            team_pos_df = recent_df[(recent_df['OPPONENT'] == team) & (recent_df['POSITION'] == pos)]
            team_positional_defense[team][pos] = team_pos_df['PTS'].mean() / position_league_avgs[pos] if len(team_pos_df) > 0 and position_league_avgs[pos] > 0 else 1.0
    return team_positional_defense

def analyze_team_distribution_patterns(team: str, historical_df: pd.DataFrame, lookback_games: int = 30) -> Dict:
    """Analyze a team's distribution patterns across recent games."""
    team_games = historical_df[historical_df['TEAM'] == team].sort_values('GAME_DATE', ascending=False)
    unique_game_ids = team_games['GAME_ID'].unique()[:min(lookback_games, len(team_games['GAME_ID'].unique()))]
    recent_games_df = historical_df[historical_df['GAME_ID'].isin(unique_game_ids)]
    games_analyzed = []
    position_minutes = {'PG': [], 'SG': [], 'SF': [], 'PF': [], 'C': [], 'G': [], 'F': [], '': []}
    position_fp_per_min = {pos: [] for pos in position_minutes.keys()}
    starter_minutes = []
    bench_minutes = []
    starter_fp_per_min = []
    bench_fp_per_min = []
    for game_id in unique_game_ids:
        game_df = recent_games_df[(recent_games_df['GAME_ID'] == game_id) & (recent_games_df['TEAM'] == team)]
        if len(game_df) == 0:
            continue
        games_analyzed.append(game_id)
        for pos in position_minutes.keys():
            pos_players = game_df[game_df['POSITION'] == pos]
            if len(pos_players) > 0:
                total_pos_minutes = pos_players['MINUTES'].sum()
                total_pos_fp = pos_players['FP'].sum()
                position_minutes[pos].append(total_pos_minutes)
                if total_pos_minutes > 0:
                    position_fp_per_min[pos].append(total_pos_fp / total_pos_minutes)
        starters = game_df[game_df['STARTER'] == 1]
        bench = game_df[game_df['STARTER'] == 0]
        total_starter_minutes = starters['MINUTES'].sum()
        total_bench_minutes = bench['MINUTES'].sum()
        starter_minutes.append(total_starter_minutes)
        bench_minutes.append(total_bench_minutes)
        if total_starter_minutes > 0:
            starter_fp_per_min.append(starters['FP'].sum() / total_starter_minutes)
        if total_bench_minutes > 0:
            bench_fp_per_min.append(bench['FP'].sum() / total_bench_minutes)
    result = {
        'team': team,
        'games_analyzed': len(games_analyzed),
        'position_minutes': {pos: (sum(mins) / len(mins) if mins else 0) for pos, mins in position_minutes.items()},
        'position_fp_per_min': {pos: (sum(rates) / len(rates) if rates else 0) for pos, rates in position_fp_per_min.items()},
        'avg_starter_minutes': sum(starter_minutes) / len(starter_minutes) if starter_minutes else 0,
        'avg_bench_minutes': sum(bench_minutes) / len(bench_minutes) if bench_minutes else 0,
        'avg_starter_fp_per_min': sum(starter_fp_per_min) / len(starter_fp_per_min) if starter_fp_per_min else 0,
        'avg_bench_fp_per_min': sum(bench_fp_per_min) / len(bench_fp_per_min) if bench_fp_per_min else 0,
        'total_rotation_minutes': (sum(starter_minutes) / len(starter_minutes) if starter_minutes else 0) +
                                  (sum(bench_minutes) / len(bench_minutes) if bench_minutes else 0)
    }
    return result

def get_player_production_rate(player_id: int, historical_df: pd.DataFrame, lookback_games: int = 30) -> Dict:
    """Get a player's per-minute production rates using weighted averages over recent games."""
    player_games = historical_df[historical_df['PLAYER_ID'] == player_id].sort_values('GAME_DATE', ascending=False)
    if player_games.empty:
        return {'fp_per_min': 0.0, 'pts_per_min': 0.0, 'reb_per_min': 0.0, 'ast_per_min': 0.0, 'games_played': 0}
    recent_games = player_games.head(lookback_games)
    weighted_minutes = weighted_average(recent_games['MINUTES'])
    if weighted_minutes <= 0:
        return {'fp_per_min': 0.0, 'pts_per_min': 0.0, 'reb_per_min': 0.0, 'ast_per_min': 0.0, 'games_played': len(recent_games)}
    # Use weighted averages for production stats
    weighted_fp = weighted_average(recent_games['FP'])
    weighted_pts = weighted_average(recent_games['PTS'])
    weighted_reb = weighted_average(recent_games['REB'])
    weighted_ast = weighted_average(recent_games['AST'])
    return {
        'fp_per_min': weighted_fp / weighted_minutes,
        'pts_per_min': weighted_pts / weighted_minutes,
        'reb_per_min': weighted_reb / weighted_minutes,
        'ast_per_min': weighted_ast / weighted_minutes,
        'games_played': len(recent_games)
    }

def get_salary_based_fp_rate(salary: float, is_starter: bool) -> float:
    """
    Provide a fallback FP/min rate based on salary tier.
    (These thresholds are illustrative; adjust as needed.)
    """
    if salary < 3500:
        return 0.75 if not is_starter else 0.85
    elif salary < 6000:
        return 0.85 if not is_starter else 0.95
    else:
        return 1.0

def project_minutes_distribution(team: str, lineup_df: pd.DataFrame, historical_df: pd.DataFrame, min_starter_minutes: float, default_bench_minutes: float) -> pd.DataFrame:
    """Project minutes for all players in a lineup with dynamic caps and role-based adjustments."""
    team_patterns = analyze_team_distribution_patterns(team, historical_df)
    tuned_max_player_minutes = get_dynamic_max_player_minutes(historical_df, team)
    tuned_max_bench_minutes = get_dynamic_max_bench_minutes(historical_df, team)
    team_players = lineup_df[lineup_df['TEAM'] == team].copy()
    if team_players.empty:
        logger.warning(f"No players found for team {team} in lineup data")
        return team_players
    total_available_minutes = 240
    for i, player in team_players.iterrows():
        player_id = player['PLAYER_ID']
        player_games = historical_df[historical_df['PLAYER_ID'] == player_id].sort_values('GAME_DATE', ascending=False)
        if not player_games.empty:
            recent_games = player_games.head(15)
            avg_minutes = weighted_average(recent_games['MINUTES'])
            team_players.loc[i, 'RAW_MINUTES'] = min(avg_minutes, tuned_max_player_minutes)
        else:
            is_starter = bool(player['IS_STARTER'])
            position = player['POSITION']
            # Use team patterns for position if available, otherwise use default bench minutes or scaled starter minutes.
            if is_starter:
                base = team_patterns['position_minutes'].get(position, team_patterns['avg_starter_minutes'] / 5)
                team_players.loc[i, 'RAW_MINUTES'] = min(base * 0.8, tuned_max_player_minutes)
            else:
                base = team_patterns['position_minutes'].get(position, default_bench_minutes)
                team_players.loc[i, 'RAW_MINUTES'] = min(base * 0.2, default_bench_minutes)
    starters = team_players[team_players['IS_STARTER'] == 1].copy()
    bench = team_players[team_players['IS_STARTER'] == 0].copy()
    starters['CAPPED_MINUTES'] = starters['RAW_MINUTES'].clip(upper=tuned_max_player_minutes)
    starter_total = starters['CAPPED_MINUTES'].sum()
    remaining_minutes = total_available_minutes - starter_total
    bench_total_raw = bench['RAW_MINUTES'].sum()
    if bench_total_raw > 0 and not bench.empty:
        bench_scaling = remaining_minutes / bench_total_raw
        bench['FINAL_MINUTES'] = bench['RAW_MINUTES'] * bench_scaling
        bench['FINAL_MINUTES'] = bench['FINAL_MINUTES'].clip(upper=tuned_max_bench_minutes)
    else:
        bench['FINAL_MINUTES'] = bench['RAW_MINUTES']
    team_players = pd.concat([starters.assign(PROJ_MINUTES=lambda df: df['CAPPED_MINUTES']),
                              bench.assign(PROJ_MINUTES=lambda df: df['FINAL_MINUTES'])])
    current_total = team_players['PROJ_MINUTES'].sum()
    if current_total != total_available_minutes and current_total > 0:
        scaling_factor = total_available_minutes / current_total
        team_players['PROJ_MINUTES'] *= scaling_factor
    bench_indices = team_players[team_players['IS_STARTER'] == 0].index
    team_players.loc[bench_indices, 'PROJ_MINUTES'] = team_players.loc[bench_indices, 'PROJ_MINUTES'].clip(upper=tuned_max_bench_minutes)
    starters_low = team_players[(team_players['IS_STARTER'] == 1) & (team_players['PROJ_MINUTES'] < min_starter_minutes)]
    if not starters_low.empty:
        total_increase_needed = (min_starter_minutes - starters_low['PROJ_MINUTES']).sum()
        team_players.loc[starters_low.index, 'PROJ_MINUTES'] = min_starter_minutes
        bench_indices = team_players[team_players['IS_STARTER'] == 0].index
        total_bench_minutes = team_players.loc[bench_indices, 'PROJ_MINUTES'].sum()
        if total_bench_minutes > 0:
            adjustment = team_players.loc[bench_indices, 'PROJ_MINUTES'] / total_bench_minutes * total_increase_needed
            team_players.loc[bench_indices, 'PROJ_MINUTES'] -= adjustment
            team_players.loc[bench_indices, 'PROJ_MINUTES'] = team_players.loc[bench_indices, 'PROJ_MINUTES'].clip(upper=tuned_max_bench_minutes)
        current_total = team_players['PROJ_MINUTES'].sum()
        if current_total != total_available_minutes and current_total > 0:
            scaling_factor = total_available_minutes / current_total
            team_players['PROJ_MINUTES'] *= scaling_factor
            team_players.loc[team_players['IS_STARTER'] == 1, 'PROJ_MINUTES'] = team_players.loc[team_players['IS_STARTER'] == 1, 'PROJ_MINUTES'].apply(lambda x: max(x, min_starter_minutes))
    # Log redistribution details for debugging
    logger.debug(f"After redistribution for team {team}: total minutes = {team_players['PROJ_MINUTES'].sum():.1f}")
    team_players.loc[team_players['IS_STARTER'] == 1, 'PROJ_MINUTES'] = team_players.loc[team_players['IS_STARTER'] == 1, 'PROJ_MINUTES'].clip(upper=tuned_max_player_minutes)
    team_players.loc[team_players['IS_STARTER'] == 0, 'PROJ_MINUTES'] = team_players.loc[team_players['IS_STARTER'] == 0, 'PROJ_MINUTES'].clip(upper=tuned_max_bench_minutes)
    team_players.drop(columns=['RAW_MINUTES', 'CAPPED_MINUTES', 'FINAL_MINUTES'], errors='ignore', inplace=True)
    logger.debug(f"Team {team} minutes distributed: Total = {team_players['PROJ_MINUTES'].sum():.1f}")
    return team_players

def project_player_minute_based(
    player_id: int,
    player_name: str,
    team: str,
    position: str,
    is_starter: bool,
    opponent: str,
    projected_minutes: float,
    historical_df: pd.DataFrame,
    defense_factors: Dict[str, float],
    positional_defense: Dict[str, Dict[str, float]],
    team_patterns: Dict,
    salary: float,
    is_home: bool,
    home_advantage: float
) -> Dict:
    """Project a player's performance based on projected minutes with dynamic adjustments."""
    production_rates = get_player_production_rate(player_id, historical_df)
    has_historical_data = production_rates['games_played'] > 0
    team_defense_factor = defense_factors.get(opponent, 1.0)
    pos_defense_factor = positional_defense.get(opponent, {}).get(position, 1.0)
    combined_factor = team_defense_factor * pos_defense_factor * (home_advantage if is_home else 1.0)
    if not has_historical_data:
        # Use positional averages from team patterns as fallback.
        fp_per_min = team_patterns['position_fp_per_min'].get(position, None)
        if fp_per_min is None or fp_per_min <= 0:
            # Fallback based on salary tier (this is an illustrative approach)
            fp_per_min = get_salary_based_fp_rate(salary, is_starter)
        pts_per_min = fp_per_min * 0.6
        reb_per_min = fp_per_min * 0.2
        ast_per_min = fp_per_min * 0.1
        return {
            'PLAYER_ID': player_id, 'PLAYER_NAME': player_name, 'TEAM': team, 'POSITION': position, 'OPPONENT': opponent,
            'IS_STARTER': is_starter, 'IS_HOME': is_home, 'PROJ_MINUTES': projected_minutes,
            'PROJ_PTS': pts_per_min * projected_minutes * combined_factor,
            'PROJ_REB': reb_per_min * projected_minutes * team_defense_factor,
            'PROJ_AST': ast_per_min * projected_minutes * team_defense_factor,
            'PROJ_FP': fp_per_min * projected_minutes * combined_factor,
            'GAMES_USED': 0, 'DEFENSE_FACTOR': team_defense_factor, 'POS_DEFENSE_FACTOR': pos_defense_factor,
            'HOME_FACTOR': home_advantage if is_home else 1.0, 'ESTIMATED': True, 'FP_PER_MIN': fp_per_min, 'SALARY': salary
        }
    fp_per_min = production_rates['fp_per_min']
    # Adjust FP/min for bench players if historical data suggests an efficiency drop
    if not is_starter and fp_per_min > 1.0:
        scaling_factor = 1.0 / fp_per_min
        fp_per_min = 0.95
        pts_per_min = production_rates['pts_per_min'] * scaling_factor
        reb_per_min = production_rates['reb_per_min'] * scaling_factor
        ast_per_min = production_rates['ast_per_min'] * scaling_factor
    else:
        pts_per_min = production_rates['pts_per_min']
        reb_per_min = production_rates['reb_per_min']
        ast_per_min = production_rates['ast_per_min']
    return {
        'PLAYER_ID': player_id, 'PLAYER_NAME': player_name, 'TEAM': team, 'POSITION': position, 'OPPONENT': opponent,
        'IS_STARTER': is_starter, 'IS_HOME': is_home, 'PROJ_MINUTES': projected_minutes,
        'PROJ_PTS': pts_per_min * projected_minutes * combined_factor,
        'PROJ_REB': reb_per_min * projected_minutes * team_defense_factor,
        'PROJ_AST': ast_per_min * projected_minutes * team_defense_factor,
        'PROJ_FP': fp_per_min * projected_minutes * combined_factor,
        'GAMES_USED': production_rates['games_played'], 'DEFENSE_FACTOR': team_defense_factor,
        'POS_DEFENSE_FACTOR': pos_defense_factor, 'HOME_FACTOR': home_advantage if is_home else 1.0,
        'ESTIMATED': False, 'FP_PER_MIN': fp_per_min, 'SALARY': salary
    }

def get_dynamic_default_starter_pct(historical_df, lookback_games=30):
    """Calculate dynamic default starter scoring percentage (median)."""
    recent_games = historical_df.sort_values('GAME_DATE', ascending=False)
    unique_games = recent_games['GAME_ID'].unique()[:lookback_games]
    game_df = recent_games[recent_games['GAME_ID'].isin(unique_games)]
    starter_pcts = []
    for game_id in unique_games:
        game = game_df[game_df['GAME_ID'] == game_id]
        for team in game['TEAM'].unique():
            team_game = game[game['TEAM'] == team]
            total_pts = team_game['PTS'].sum()
            starter_pts = team_game[team_game['STARTER'] == 1]['PTS'].sum()
            if total_pts > 0:
                starter_pcts.append((starter_pts / total_pts) * 100)
    default_pct = np.median(starter_pcts) if starter_pcts else 65.0
    logger.info(f"Dynamic default starter scoring percentage: {default_pct:.1f}%")
    return default_pct

def analyze_team_starter_scoring_patterns(team: str, historical_df: pd.DataFrame, default_pct: float, lookback_games: int = 30) -> float:
    """Analyze team's historical starter vs bench scoring distribution."""
    team_games = historical_df[historical_df['TEAM'] == team].sort_values('GAME_DATE', ascending=False)
    game_ids = team_games['GAME_ID'].unique()
    unique_game_ids = game_ids[:min(lookback_games, len(game_ids))]
    starter_percentages = []
    for game_id in unique_game_ids:
        game_df = historical_df[(historical_df['GAME_ID'] == game_id) & (historical_df['TEAM'] == team)]
        if len(game_df) == 0:
            continue
        total_team_pts = game_df['PTS'].sum()
        starter_pts = game_df[game_df['STARTER'] == 1]['PTS'].sum()
        if total_team_pts > 0:
            starter_percentages.append((starter_pts / total_team_pts) * 100)
    if starter_percentages:
        weights = np.array([0.75 ** i for i in range(len(starter_percentages))])
        weighted_avg = np.average(starter_percentages, weights=weights)
        team_specific_min = max(50.0, weighted_avg - 5.0)
        logger.info(f"Team {team} historical starter scoring: {weighted_avg:.1f}%, using {team_specific_min:.1f}% as minimum")
        return team_specific_min
    logger.warning(f"No historical starter percentage data for {team}, using default {default_pct:.1f}%")
    return default_pct

def get_league_starter_scoring_patterns(historical_df: pd.DataFrame, default_pct: float) -> Dict[str, float]:
    """Analyze starter vs bench scoring distributions for all teams."""
    team_patterns = {team: analyze_team_starter_scoring_patterns(team, historical_df, default_pct) for team in historical_df['TEAM'].unique()}
    all_teams = ['ATL', 'BOS', 'BKN', 'CHA', 'CHI', 'CLE', 'DAL', 'DEN', 'DET', 'GSW', 'HOU', 'IND', 'LAC', 'LAL', 'MEM',
                 'MIA', 'MIL', 'MIN', 'NOP', 'NYK', 'OKC', 'ORL', 'PHI', 'PHX', 'POR', 'SAC', 'SAS', 'TOR', 'UTA', 'WAS']
    league_avg = sum(team_patterns.values()) / len(team_patterns) if team_patterns else default_pct
    for team in all_teams:
        if team not in team_patterns:
            team_patterns[team] = league_avg
            logger.warning(f"No data for {team}, using league average: {league_avg:.1f}%")
    return team_patterns

def project_team_minute_based_dynamic(
    team: str,
    opponent: str,
    lineup_df: pd.DataFrame,
    historical_df: pd.DataFrame,
    defense_factors: Dict[str, float],
    positional_defense: Dict[str, Dict[str, float]],
    team_starter_patterns: Dict[str, float],
    is_home: bool,
    min_starter_minutes: float,
    default_bench_minutes: float,
    home_advantage: float
) -> Tuple[float, float, List[Dict]]:
    """Project team performance with dynamic adjustments."""
    team_patterns = analyze_team_distribution_patterns(team, historical_df)
    team_players_with_minutes = project_minutes_distribution(team, lineup_df, historical_df, min_starter_minutes, default_bench_minutes)
    if team_players_with_minutes.empty:
        logger.warning(f"No players found for team {team}!")
        return 0.0, 0.0, []
    team_total_pts = 0
    team_total_fp = 0
    player_projections = []
    for _, player in team_players_with_minutes.iterrows():
        proj = project_player_minute_based(
            player_id=player['PLAYER_ID'], player_name=player['PLAYER_NAME'], team=team, position=player['POSITION'],
            is_starter=bool(player['IS_STARTER']), opponent=opponent, projected_minutes=player['PROJ_MINUTES'],
            historical_df=historical_df, defense_factors=defense_factors, positional_defense=positional_defense,
            team_patterns=team_patterns, salary=player['SALARY'], is_home=is_home, home_advantage=home_advantage
        )
        player_projections.append(proj)
        team_total_pts += proj['PROJ_PTS']
        team_total_fp += proj['PROJ_FP']
    min_proj, max_proj = tune_team_projection_range(team, historical_df)
    historical_team_avg = get_team_average_points(team, historical_df)
    # Scale team totals to align with historical and league expectations.
    if team_total_pts < min_proj or team_total_pts > max_proj:
        target_pts = (min_proj + historical_team_avg) / 2 if team_total_pts < min_proj else (max_proj + historical_team_avg) / 2
        logger.info(f"Adjusting {team} projection from {team_total_pts:.1f} to {target_pts:.1f}")
        adjustment_factor = target_pts / team_total_pts if team_total_pts > 0 else 1.0
        team_total_pts = 0
        team_total_fp = 0
        for i in range(len(player_projections)):
            player_projections[i]['PROJ_PTS'] *= adjustment_factor
            player_projections[i]['PROJ_FP'] *= adjustment_factor
            team_total_pts += player_projections[i]['PROJ_PTS']
            team_total_fp += player_projections[i]['PROJ_FP']
    team_min_starter_pct = team_starter_patterns.get(team, 65.0)
    starters = [p for p in player_projections if p['IS_STARTER']]
    bench = [p for p in player_projections if not p['IS_STARTER']]
    starter_pts = sum(p['PROJ_PTS'] for p in starters)
    starter_pct = (starter_pts / team_total_pts * 100) if team_total_pts > 0 else 0
    if starter_pct < team_min_starter_pct and team_total_pts > 0:
        logger.warning(f"Starters for {team} only projected for {starter_pct:.1f}% of points (below {team_min_starter_pct:.1f}%). Redistributing.")
        target_starter_pts = team_total_pts * (team_min_starter_pct / 100)
        extra_pts_needed = target_starter_pts - starter_pts
        bench_pts = sum(p['PROJ_PTS'] for p in bench)
        if bench_pts > 0 and len(bench) > 0 and len(starters) > 0:
            bench_reduction_factor = max(0.5, (bench_pts - extra_pts_needed) / bench_pts)
            starter_boost_factor = target_starter_pts / starter_pts if starter_pts > 0 else 1.0
            team_total_pts = 0
            team_total_fp = 0
            for i in range(len(player_projections)):
                if player_projections[i]['IS_STARTER']:
                    player_projections[i]['PROJ_PTS'] *= starter_boost_factor
                    player_projections[i]['PROJ_FP'] *= starter_boost_factor
                else:
                    player_projections[i]['PROJ_PTS'] *= bench_reduction_factor
                    player_projections[i]['PROJ_FP'] *= bench_reduction_factor
                team_total_pts += player_projections[i]['PROJ_PTS']
                team_total_fp += player_projections[i]['PROJ_FP']
    return team_total_pts, team_total_fp, player_projections

def project_all_games_dynamic(
    lineup_df: pd.DataFrame,
    matchups_df: pd.DataFrame,
    historical_df: pd.DataFrame,
    team_starter_patterns: Dict[str, float],
    min_starter_minutes: float,
    default_bench_minutes: float,
    home_advantage: float
) -> Dict:
    """Project all games for a slate using dynamic thresholds."""
    defense_factors = calculate_team_defense_factors(historical_df)
    positional_defense = calculate_positional_defense(historical_df)
    game_projections = {}
    for _, matchup in matchups_df.iterrows():
        home_team = matchup['HOME_TEAM']
        away_team = matchup['AWAY_TEAM']
        home_total_pts, home_total_fp, home_players = project_team_minute_based_dynamic(
            team=home_team, opponent=away_team, lineup_df=lineup_df, historical_df=historical_df,
            defense_factors=defense_factors, positional_defense=positional_defense,
            team_starter_patterns=team_starter_patterns, is_home=True,
            min_starter_minutes=min_starter_minutes, default_bench_minutes=default_bench_minutes,
            home_advantage=home_advantage
        )
        away_total_pts, away_total_fp, away_players = project_team_minute_based_dynamic(
            team=away_team, opponent=home_team, lineup_df=lineup_df, historical_df=historical_df,
            defense_factors=defense_factors, positional_defense=positional_defense,
            team_starter_patterns=team_starter_patterns, is_home=False,
            min_starter_minutes=min_starter_minutes, default_bench_minutes=default_bench_minutes,
            home_advantage=home_advantage
        )
        game_id = f"{away_team}@{home_team}"
        game_projections[game_id] = {
            'HOME_TEAM': home_team, 'AWAY_TEAM': away_team, 'PROJ_HOME_PTS': home_total_pts,
            'PROJ_AWAY_PTS': away_total_pts, 'PROJ_HOME_FP': home_total_fp, 'PROJ_AWAY_FP': away_total_fp,
            'PROJ_TOTAL': home_total_pts + away_total_pts, 'PROJ_SPREAD': home_total_pts - away_total_pts,
            'HOME_PLAYERS': home_players, 'AWAY_PLAYERS': away_players
        }
    return game_projections

def main():
    logger.info("Starting NBA projection system with dynamic variables")
    if not os.path.exists(HISTORICAL_DATA_PATH):
        logger.error(f"Historical data file not found: {HISTORICAL_DATA_PATH}")
        return
    try:
        historical_df = load_historical_data(HISTORICAL_DATA_PATH)
        logger.info(f"Loaded {len(historical_df)} historical game records")

        # Calculate dynamic variables
        MIN_STARTER_MINUTES = get_dynamic_min_starter_minutes(historical_df)
        DEFAULT_BENCH_MINUTES = get_dynamic_default_bench_minutes(historical_df)
        HOME_ADVANTAGE = get_dynamic_home_advantage(historical_df)
        DEFAULT_STARTER_PCT = get_dynamic_default_starter_pct(historical_df)

        logger.info(f"Extracting lineup data from {RG_URL}")
        rg_players, rg_matchups = extract_rotogrinders_data(RG_URL)
        logger.info(f"Extracted {len(rg_players)} players and {len(rg_matchups)} matchups")

        logger.info(f"Loading and matching historical data from {HISTORICAL_DATA_PATH}")
        matched_players = load_and_match_historical_data(rg_players, HISTORICAL_DATA_PATH)
        logger.info(f"Matched {len(matched_players)} players to historical data")

        team_starter_patterns = get_league_starter_scoring_patterns(historical_df, DEFAULT_STARTER_PCT)
        logger.info("Generating projections for all games with dynamic thresholds")

        projections = project_all_games_dynamic(
            matched_players, rg_matchups, historical_df, team_starter_patterns,
            MIN_STARTER_MINUTES, DEFAULT_BENCH_MINUTES, HOME_ADVANTAGE
        )

        game_summary = []
        player_summary = []
        for game_id, game in projections.items():
            game_summary.append({
                'GAME_ID': game_id, 'HOME_TEAM': game['HOME_TEAM'], 'AWAY_TEAM': game['AWAY_TEAM'],
                'PROJ_HOME_PTS': game['PROJ_HOME_PTS'], 'PROJ_AWAY_PTS': game['PROJ_AWAY_PTS'],
                'PROJ_TOTAL': game['PROJ_TOTAL'], 'PROJ_SPREAD': game['PROJ_SPREAD']
            })
            player_summary.extend(game['HOME_PLAYERS'] + game['AWAY_PLAYERS'])

        game_df = pd.DataFrame(game_summary)
        player_df = pd.DataFrame(player_summary)

        game_df['VEGAS_TOTAL'] = None
        game_df['VEGAS_SPREAD'] = None
        game_df['TOTAL_DIFF'] = None
        game_df['SPREAD_DIFF'] = None
        game_df = game_df.sort_values('PROJ_TOTAL', ascending=False)

        game_output = OUTPUT_PATH.replace('.csv', '_game_projections.csv')
        player_output = OUTPUT_PATH.replace('.csv', '_player_projections.csv')
        game_df.to_csv(game_output, index=False)
        player_df.to_csv(player_output, index=False)
        logger.info(f"Game projections saved to: {game_output}")
        logger.info(f"Player projections saved to: {player_output}")

        # Console summaries
        print("\nGame Projections Summary (Sorted by Total):")
        print("=" * 100)
        print(f"{'Away Team':<7} @ {'Home Team':<7}: {'Away':<6}-{'Home':<6} {'Total':<7} {'Spread':<7}")
        print("-" * 100)
        for _, game in game_df.iterrows():
            print(f"{game['AWAY_TEAM']:<7} @ {game['HOME_TEAM']:<7}: {game['PROJ_AWAY_PTS']:>6.1f}-{game['PROJ_HOME_PTS']:>6.1f} {game['PROJ_TOTAL']:>7.1f} {game['PROJ_SPREAD']:>7.1f}")

        print("\nMinutes Distribution by Team:")
        print("=" * 100)
        for team in sorted(player_df['TEAM'].unique()):
            team_players = player_df[player_df['TEAM'] == team].sort_values(['IS_STARTER', 'PROJ_MINUTES'], ascending=[False, False])
            team_minutes = team_players['PROJ_MINUTES'].sum()
            print(f"\n{team} Minutes Distribution (Total: {team_minutes:.1f})")
            print("-" * 60)
            for _, player in team_players.iterrows():
                starter_status = "S" if player['IS_STARTER'] else "B"
                print(f"{player['PLAYER_NAME']:<25} {player['POSITION']:<3} {starter_status}: {player['PROJ_MINUTES']:>5.1f} min | Salary: ${player['SALARY']:,.0f}")

        print("\nComplete Team Projections (Starters and Bench):")
        print("=" * 100)
        for team in sorted(player_df['TEAM'].unique()):
            team_players = player_df[player_df['TEAM'] == team].sort_values(['IS_STARTER', 'PROJ_PTS'], ascending=[False, False])
            starters = team_players[team_players['IS_STARTER'] == 1]
            bench = team_players[team_players['IS_STARTER'] == 0]
            starters_total = starters['PROJ_PTS'].sum()
            bench_total = bench['PROJ_PTS'].sum()
            team_total = starters_total + bench_total
            bench_pct = (bench_total / team_total * 100) if team_total > 0 else 0
            print(f"\n{team} Projection: {team_total:.1f} pts (Starters: {starters_total:.1f}, Bench: {bench_total:.1f}, {bench_pct:.1f}% bench)")
            print("-" * 70)
            print(f"{'Player':<25} {'Role':<8} {'Pos':<3} {'Min':>5} {'Pts':>5} {'FP':>6} {'FP/Min':>7} {'Salary':>8}")
            print("-" * 70)
            for _, player in starters.iterrows():
                print(f"{player['PLAYER_NAME']:<25} {'Starter':<8} {player['POSITION']:<3} {player['PROJ_MINUTES']:>5.1f} {player['PROJ_PTS']:>5.1f} {player['PROJ_FP']:>6.1f} {player['FP_PER_MIN']:>7.2f} ${player['SALARY']:>7,.0f}")
            if not bench.empty:
                print(f"--- Bench Players ({bench_total:.1f} pts) ---")
                for _, player in bench.iterrows():
                    print(f"{player['PLAYER_NAME']:<25} {'Bench':<8} {player['POSITION']:<3} {player['PROJ_MINUTES']:>5.1f} {player['PROJ_PTS']:>5.1f} {player['PROJ_FP']:>6.1f} {player['FP_PER_MIN']:>7.2f} ${player['SALARY']:>7,.0f}")

        print("\nTop 50 Overall Player Projections:")
        print("=" * 100)
        top_players = player_df.sort_values('PROJ_PTS', ascending=False).head(50)
        print(f"{'Player Name':<25} {'Team':<5} {'Pos':<3} {'vs':<3} {'Opp':<5}: {'Min':>5} {'Pts':>5} {'FP':>6} {'FP/Min':>7} {'Salary':>8}")
        print("-" * 100)
        for _, player in top_players.iterrows():
            print(f"{player['PLAYER_NAME']:<25} {player['TEAM']:<5} {player['POSITION']:<3} vs {player['OPPONENT']:<5}: {player['PROJ_MINUTES']:>5.1f} {player['PROJ_PTS']:>5.1f} {player['PROJ_FP']:>6.1f} {player['FP_PER_MIN']:>7.2f} ${player['SALARY']:>7,.0f}")
    except Exception as e:
        logger.error(f"Error running projection system: {e}", exc_info=True)

if __name__ == "__main__":
    main()




Game Projections Summary (Sorted by Total):
Away Team @ Home Team: Away  -Home   Total   Spread 
----------------------------------------------------------------------------------------------------
SAC     @ MIL    :  124.1- 112.7   236.8   -11.4
ATL     @ GSW    :  107.1- 112.1   219.2     5.1
LAL     @ CHI    :  117.2-  99.1   216.3   -18.1
NYK     @ WAS    :  105.4-  96.7   202.1    -8.6
IND     @ BKN    :  107.3-  94.5   201.8   -12.8

Minutes Distribution by Team:

ATL Minutes Distribution (Total: 240.0)
------------------------------------------------------------
Trae Young                PG  S:  29.6 min | Salary: $9,900
Dyson Daniels             PG/SG S:  29.6 min | Salary: $7,000
Onyeka Okongwu            C   S:  29.6 min | Salary: $7,600
Zaccharie Risacher        SF/PF S:  27.7 min | Salary: $4,600
Mouhamed Gueye            PF/C S:  17.9 min | Salary: $3,400
Vit Krejci                SG/SF B:  24.4 min | Salary: $4,200
Georges Niang             PF  B:  22.9 min | Salary: $4,

In [None]:
import pandas as pd
import numpy as np
from nba_api.live.nba.endpoints import boxscore
from nba_api.stats.endpoints import TeamGameLog, CommonPlayerInfo
from nba_api.stats.static import teams
import time
import random
import json
from datetime import datetime
import os
from google.colab import drive
import warnings
warnings.filterwarnings("ignore", message="Could not infer format")

# Mount Google Drive (for Colab)
#drive.mount('/content/drive')

class SimpleLineupAnalyzer:
    def __init__(self, games_back=30):
        self.current_season = "2024-25"
        self.games_back = games_back
        # Fantasy scoring weights
        self.scoring = {
            'PTS': 1.0,
            'FG3M': 0.5,
            'REB': 1.25,
            'AST': 1.5,
            'STL': 2.0,
            'BLK': 2.0,
            'TOV': -0.5
        }

        # Initialize data folders
        self.data_folder = "/content/drive/My Drive/NBA_PerGame_Data"
        if not os.path.exists(self.data_folder):
            os.makedirs(self.data_folder)

        # Initialize position cache
        self.position_cache_file = os.path.join(self.data_folder, 'player_positions.json')
        try:
            if os.path.exists(self.position_cache_file):
                with open(self.position_cache_file, 'r') as f:
                    self.position_cache = json.load(f)
                print(f"Loaded position cache with {len(self.position_cache)} players")
            else:
                self.position_cache = {}
                print("Created new position cache")
        except Exception as e:
            print(f"Error loading position cache: {e}")
            self.position_cache = {}

        print(f"Initialized for season {self.current_season} with last {self.games_back} games.")

    def add_delay(self, min_seconds=1.0, max_seconds=2.0):
        """Add a random delay to avoid API rate limiting"""
        time.sleep(random.uniform(min_seconds, max_seconds))

    def save_position_cache(self):
        """Save the position cache to disk."""
        try:
            with open(self.position_cache_file, 'w') as f:
                json.dump(self.position_cache, f)
            print(f"Saved position cache with {len(self.position_cache)} players")
        except Exception as e:
            print(f"Error saving position cache: {e}")

    def get_player_info(self, player_id):
        """Get player information including position from NBA API."""
        # Convert to string for JSON compatibility
        player_id_str = str(player_id)

        # Return from cache if available
        if player_id_str in self.position_cache:
            return self.position_cache[player_id_str]

        try:
            player_info = CommonPlayerInfo(player_id=player_id)
            info_df = player_info.get_data_frames()[0]
            position = info_df['POSITION'].iloc[0] if not info_df.empty else None

            # Save to cache
            self.position_cache[player_id_str] = position

            # Periodically save cache to disk to prevent data loss
            if len(self.position_cache) % 10 == 0:
                self.save_position_cache()

            print(f"Fetched position for {info_df['DISPLAY_FIRST_LAST'].iloc[0]}: {position}")
            return position
        except Exception as e:
            print(f"Error fetching position for player {player_id}: {e}")
            return None

    def calculate_fantasy_points(self, stats):
        return (
            stats['PTS'] * self.scoring['PTS'] +
            stats['FG3M'] * self.scoring['FG3M'] +
            stats['REB'] * self.scoring['REB'] +
            stats['AST'] * self.scoring['AST'] +
            stats['STL'] * self.scoring['STL'] +
            stats['BLK'] * self.scoring['BLK'] +
            stats['TOV'] * self.scoring['TOV']
        )

    def get_team_games(self, team_id):
        try:
            team_games = TeamGameLog(
                team_id=team_id,
                season=self.current_season,
                season_type_all_star='Regular Season'
            ).get_data_frames()[0]
            team_games['GAME_DATE'] = pd.to_datetime(team_games['GAME_DATE'])
            team_games = team_games.sort_values('GAME_DATE', ascending=False)
            recent_games = team_games.head(self.games_back).copy()
            return recent_games
        except Exception as e:
            print(f"Error fetching games for team {team_id}: {e}")
            return pd.DataFrame()

    def get_game_boxscore(self, game_id):
        try:
            box = boxscore.BoxScore(game_id=game_id)
            game_data = box.game.get_dict()

            home_team = game_data['homeTeam']
            away_team = game_data['awayTeam']

            # Capture team scores
            home_team_id = home_team['teamId']
            away_team_id = away_team['teamId']
            home_team_score = home_team['score']
            away_team_score = away_team['score']

            # Create a mapping of team_id to points scored and allowed
            team_points = {
                home_team_id: {'PTS_SCORED': home_team_score, 'PTS_ALLOWED': away_team_score, 'OPPONENT': away_team_id},
                away_team_id: {'PTS_SCORED': away_team_score, 'PTS_ALLOWED': home_team_score, 'OPPONENT': home_team_id}
            }

            # Create team ID to tricode mapping
            team_to_tricode = {
                home_team_id: home_team['teamTricode'],
                away_team_id: away_team['teamTricode']
            }

            all_players = []

            # Process players from both teams
            for team_data in [home_team, away_team]:
                team_id = team_data['teamId']
                opponent_id = team_points[team_id]['OPPONENT']

                for player in team_data['players']:
                    stats = player['statistics']
                    # Convert "PT27M" format to minutes
                    minutes = 0
                    min_str = stats['minutesCalculated']
                    if isinstance(min_str, str) and min_str.startswith("PT") and min_str.endswith("M"):
                        minutes = float(min_str[2:-1])

                    # Skip players who didn't play meaningful minutes
                    if minutes < 1:
                        continue

                    player_id = player['personId']

                    # Get player position if not in cache
                    if str(player_id) not in self.position_cache:
                        position = self.get_player_info(player_id)
                        # Add a small delay to avoid rate limiting
                        self.add_delay(0.5, 1.0)
                    else:
                        position = self.position_cache[str(player_id)]

                    player_data = {
                        'GAME_ID': game_id,
                        'GAME_DATE': game_data['gameTimeLocal'][:10],
                        'TEAM_ID': team_id,
                        'TEAM': team_data['teamTricode'],
                        'PLAYER_ID': player_id,
                        'PLAYER_NAME': player['name'],
                        'POSITION': position,
                        'STARTER': player.get('starter', False),
                        'MINUTES': minutes,
                        'PTS': stats['points'],
                        'FG3M': stats['threePointersMade'],
                        'REB': stats['reboundsTotal'],
                        'AST': stats['assists'],
                        'STL': stats['steals'],
                        'BLK': stats['blocks'],
                        'TOV': stats['turnovers'],
                        'FGA': stats['fieldGoalsAttempted'],
                        'FTA': stats['freeThrowsAttempted'],
                        'TEAM_PTS_SCORED': team_points[team_id]['PTS_SCORED'],
                        'TEAM_PTS_ALLOWED': team_points[team_id]['PTS_ALLOWED'],
                        'OPPONENT_ID': opponent_id,
                        'OPPONENT': team_to_tricode[opponent_id]
                    }
                    player_data['FP'] = self.calculate_fantasy_points(player_data)
                    all_players.append(player_data)

            return pd.DataFrame(all_players)

        except Exception as e:
            print(f"Error fetching boxscore for game {game_id}: {e}")
            return None

    def generate_per_game_player_data(self, save_to_drive=True, folder_path=None, filename=None):
        """
        Generate a CSV file with one row per player per game across the league.
        Now includes player position data fetched from the NBA API.
        """
        folder_path = folder_path or self.data_folder
        all_nba_teams = teams.get_teams()
        per_game_data = []
        processed_games = set()

        print(f"Generating per-game player data across all teams...")
        print(f"This will include position data for each player.")

        try:
            for team_idx, team in enumerate(all_nba_teams):
                team_id = team['id']
                team_abbr = team['abbreviation']
                team_full = team['full_name']

                print(f"Processing team {team_idx+1}/{len(all_nba_teams)}: {team_full} ({team_abbr})")

                # Retrieve recent games for this team
                games = self.get_team_games(team_id)
                if games.empty:
                    print(f"No games found for {team_full}")
                    continue

                print(f"Found {len(games)} recent games for {team_full}")

                for game_idx, game in enumerate(games.iterrows()):
                    _, game_data = game
                    game_id = game_data['Game_ID']

                    # Skip games we've already processed
                    if game_id in processed_games:
                        continue

                    print(f"Processing game {game_idx+1}/{len(games)}: {game_id} on {game_data['GAME_DATE']}")

                    box_df = self.get_game_boxscore(game_id)
                    if box_df is None or box_df.empty:
                        print(f"No boxscore data for game {game_id}")
                        continue

                    # Add the game to processed set
                    processed_games.add(game_id)

                    # Process all players from the game (both teams)
                    for team_in_game_id in box_df['TEAM_ID'].unique():
                        team_in_game_abbr = box_df[box_df['TEAM_ID'] == team_in_game_id]['TEAM'].iloc[0]
                        team_in_game_full = next((t['full_name'] for t in all_nba_teams if t['id'] == team_in_game_id), None)

                        # Filter for current team's players
                        team_box = box_df[box_df['TEAM_ID'] == team_in_game_id].copy()
                        team_box['TEAM_FULL'] = team_in_game_full

                        # Keep all the required columns
                        per_game_data.append(team_box)

                    # Save position cache after each game to prevent data loss
                    self.save_position_cache()

                    # Add a delay between games to avoid rate limiting
                    self.add_delay()

                # Save progress after each team
                if per_game_data and team_idx % 5 == 30:
                    print("Saving intermediate data...")
                    temp_df = pd.concat(per_game_data, ignore_index=True)
                    temp_filename = os.path.join(folder_path, f"NBA_Per_Game_Data_Intermediate_{team_idx+1}_teams.csv")
                    temp_df.to_csv(temp_filename, index=False)
                    print(f"Saved intermediate data to: {temp_filename}")

        except KeyboardInterrupt:
            print("Process interrupted. Saving collected data so far...")

        except Exception as e:
            print(f"Error during data collection: {e}")
            import traceback
            traceback.print_exc()

        finally:
            # Save whatever data we've collected
            if per_game_data:
                print("Finalizing data collection...")
                per_game_df = pd.concat(per_game_data, ignore_index=True)
                per_game_df.sort_values(['TEAM', 'GAME_DATE'], inplace=True)

                # Save the position cache one final time
                self.save_position_cache()

                if save_to_drive:
                    if filename is None:
                        timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
                        #filename = f"NBA_Per_Game_Player_Data_{timestamp}.csv"
                        filename = f"NBA_Per_Game_Player_Data.csv"

                    if not os.path.exists(folder_path):
                        os.makedirs(folder_path)

                    full_path = os.path.join(folder_path, filename)
                    per_game_df.to_csv(full_path, index=False)
                    print(f"Saved per-game data to: {full_path}")

                # Print summary stats
                print("\nData Collection Summary:")
                print(f"Total players: {per_game_df['PLAYER_ID'].nunique()}")
                print(f"Total games: {per_game_df['GAME_ID'].nunique()}")
                print(f"Total teams: {per_game_df['TEAM'].nunique()}")
                print(f"Position data collected: {sum(~per_game_df['POSITION'].isna())} of {len(per_game_df)} entries")
                print(f"Total position cache size: {len(self.position_cache)} players")

                # Print position distribution
                if 'POSITION' in per_game_df.columns:
                    pos_counts = per_game_df['POSITION'].value_counts()
                    print("\nPosition distribution:")
                    for pos, count in pos_counts.items():
                        print(f"  {pos}: {count} entries")

                return per_game_df
            else:
                print("No data was collected")
                return pd.DataFrame()

# -------------------------
# Main block to run per-game analysis
# -------------------------
if __name__ == "__main__":
    analyzer = SimpleLineupAnalyzer(games_back=30)
    print("Generating per-game player CSV output with position data...")
    per_game_df = analyzer.generate_per_game_player_data()
    print("Per-game player data generation complete.")

Loaded position cache with 567 players
Initialized for season 2024-25 with last 30 games.
Generating per-game player CSV output with position data...
Generating per-game player data across all teams...
This will include position data for each player.
Processing team 1/30: Atlanta Hawks (ATL)
Found 30 recent games for Atlanta Hawks
Processing game 1/30: 0022400993 on 2025-03-18 00:00:00
Saved position cache with 567 players
Processing game 2/30: 0022400978 on 2025-03-16 00:00:00
Saved position cache with 567 players
Processing game 3/30: 0022400960 on 2025-03-14 00:00:00
Saved position cache with 567 players
Processing game 4/30: 0022400945 on 2025-03-12 00:00:00
Saved position cache with 567 players
Processing game 5/30: 0022400928 on 2025-03-10 00:00:00
Saved position cache with 567 players
Processing game 6/30: 0022400914 on 2025-03-08 00:00:00
Saved position cache with 567 players
Processing game 7/30: 0022400899 on 2025-03-06 00:00:00
Saved position cache with 567 players
Processin

In [None]:
import os
import sys
import random
import logging
from collections import defaultdict
from typing import List, Set, Optional

import pandas as pd

# Configure logging
logging.basicConfig(level=logging.ERROR, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger('NBA_DFS_Optimizer')

# Configuration dictionary
CONFIG = {
    "salary_cap": 50000,
    "min_salary": 49000,
    "min_player_salary": 3100,
    "min_player_minutes": 20,  # Added minimum player minutes threshold
    "max_players_per_team": 3,
    "lineup_variance": 0.1,
    "n_lineup_attempts": 200000,  # Increased to generate 100,000 lineups
    "target_lineups": 20,  # Number of top lineups to keep after filtering
    "max_exposure": 0.50,
    "projections_path": '/content/drive/My Drive/NBA_PerGame_Data/NBA_Player_Data_M_player_projections.csv',
    "output_path": './NBA_Projections',
    "max_overlap": 3,
    "exclude_inactive": True,
    "min_stack_size": 2,
    "min_stacks_required": 0,
    "zero_salary_handling": "filter"
}

class Player:
    """Class representing a player for DFS lineup generation"""
    def __init__(self, player_id, name, team, opponent, positions, salary, projected_fp,
                minutes=0, is_starter=False, is_home=False):
        self.id = player_id
        self.name = name
        self.team = team
        self.opponent = opponent
        self.positions = positions  # List of eligible positions
        self.salary = salary
        self.projected_fp = projected_fp
        self.adj_projected_fp = projected_fp  # Used for variance in Monte Carlo
        self.minutes = minutes
        self.is_starter = is_starter
        self.is_home = is_home
        self.times_used = 0  # For tracking exposure
        self.value = projected_fp / (salary / 1000) if salary > 0 else 0

    def __repr__(self):
        return f"{self.name} ({self.team}) - {self.positions} - ${self.salary:,} - {self.projected_fp:.1f}pts - {self.minutes:.1f}min"

def get_positions_from_raw(raw_position):
    """Convert raw position string to list of eligible DFS positions"""
    if not isinstance(raw_position, str):
        return []

    raw_position = raw_position.upper().strip()
    position_map = {
        'PG': ['PG', 'G', 'UTIL'],
        'SG': ['SG', 'G', 'UTIL'],
        'SF': ['SF', 'F', 'UTIL'],
        'PF': ['PF', 'F', 'UTIL'],
        'C': ['C', 'UTIL'],
        'G': ['G', 'UTIL'],
        'F': ['F', 'UTIL'],
    }

    positions = []
    if 'PG' in raw_position:
        positions.extend(position_map['PG'])
    if 'SG' in raw_position:
        positions.extend(position_map['SG'])
    if 'SF' in raw_position:
        positions.extend(position_map['SF'])
    if 'PF' in raw_position:
        positions.extend(position_map['PF'])
    if 'C' in raw_position:
        positions.extend(position_map['C'])

    # Handle weird position cases
    if not positions and 'G' in raw_position:
        positions.extend(position_map['G'])
    if not positions and 'F' in raw_position:
        positions.extend(position_map['F'])

    # Remove duplicates
    return list(set(positions))

def load_projection_data(file_path):
    """Load projection data from CSV file"""
    try:
        df = pd.read_csv(file_path)
        print(f"Loaded {len(df)} player projections from {file_path}")

        # Show sample of loaded data
        print("\nColumn names in data:")
        print(', '.join(df.columns))

        return df
    except Exception as e:
        logger.error(f"Error loading projection data: {e}")
        return pd.DataFrame()

def create_player_objects(df, exclude_inactive=True, min_minutes=None, verbose=False):
    """Create Player objects from DataFrame

    Args:
        df (DataFrame): DataFrame with player projection data
        exclude_inactive (bool): Whether to filter out injured/inactive players
        min_minutes (float): Minimum projected minutes threshold
        verbose (bool): Whether to print detailed info about filtered players
    """
    # Create Player objects from DataFrame
    players = []
    count_zero_salary = 0
    count_low_minutes = 0

    for _, row in df.iterrows():
        try:
            # Handle different CSV formats by checking for column existence
            player_id = str(row.get('PLAYER_ID', row.get('ID', f"P{len(players)}")))
            name = row.get('NAME', row.get('PLAYER', row.get('PLAYER_NAME', '')))
            team = row.get('TEAM', '')
            opponent = row.get('OPPONENT', '')

            # Position handling
            raw_position = row.get('POSITION', row.get('POS', ''))
            positions = get_positions_from_raw(raw_position)

            # Numeric values
            salary = row.get('SALARY', row.get('DK_SALARY', 0))
            if isinstance(salary, str):
                # Handle salary with $ or commas
                salary = int(''.join(c for c in salary if c.isdigit()) or 0)

            projected_fp = row.get('PROJ_FP', 0)
            if isinstance(projected_fp, str):
                projected_fp = float(projected_fp.replace(',', '.') or 0)

            # Check for minutes column with different possible names
            minutes = row.get('MINUTES', row.get('PROJ_MINUTES', row.get('MIN', 0)))
            if isinstance(minutes, str):
                minutes = float(minutes.replace(',', '.') or 0)

            # Status flags - fix the nested get() calls
            starter_value = ''
            if 'STARTER' in row:
                starter_value = row['STARTER']
            elif 'IS_STARTER' in row:
                starter_value = row['IS_STARTER']
            is_starter = True if str(starter_value).lower() in ['true', 'yes', '1', 't', 'y'] else False

            home_value = ''
            if 'HOME' in row:
                home_value = row['HOME']
            elif 'IS_HOME' in row:
                home_value = row['IS_HOME']
            is_home = True if str(home_value).lower() in ['true', 'yes', '1', 't', 'y'] else False

            # Skip this player if they have insufficient data
            if not name or not team:
                logger.debug(f"Skipping player with ID {player_id} due to missing basic data")
                continue

            # Filter out inactive players or those with no position
            if exclude_inactive:
                excluded_statuses = ['out', 'doubtful', 'suspended', 'injured']
                status = str(row.get('STATUS', row.get('INJURY', ''))).lower()
                if any(s in status for s in excluded_statuses):
                    logger.info(f"Excluding {name} - {status}")
                    continue

            # Handle zero or low salary based on configuration
            if salary < CONFIG["min_player_salary"]:
                count_zero_salary += 1
                if CONFIG["zero_salary_handling"] == "filter":
                    #logger.warning(f"Filtering out {name} due to low salary: ${salary}")
                    continue
                elif CONFIG["zero_salary_handling"] == "min":
                    logger.info(f"Adjusting {name}'s salary from ${salary} to minimum ${CONFIG['min_player_salary']}")
                    salary = CONFIG["min_player_salary"]

            # Filter players with low projected minutes
            if min_minutes is not None and minutes < min_minutes:
                count_low_minutes += 1
                if verbose:
                    fp_per_min = projected_fp / minutes if minutes > 0 else 0
                    logger.info(f"Filtering out {name} ({team}) - Mins: {minutes:.1f} FP: {projected_fp:.1f} ({fp_per_min:.2f}/min) ${salary}")
                else:
                    logger.info(f"Filtering out {name} ({team}) due to low minutes: {minutes:.1f} (min: {min_minutes:.1f})")
                continue

            # Only include players with valid positions and projections
            if positions and projected_fp > 0:
                players.append(Player(player_id, name, team, opponent, positions,
                                      salary, projected_fp, minutes, is_starter, is_home))

        except Exception as e:
            logger.error(f"Error creating player from row {row}: {e}")

    if count_zero_salary > 0:
        logger.warning(f"Found {count_zero_salary} players with insufficient salary")

    if count_low_minutes > 0:
        logger.warning(f"Filtered out {count_low_minutes} players with minutes below {min_minutes}")

    logger.info(f"Created {len(players)} player objects")

    # Debug info about remaining players
    if players:
        logger.info(f"Sample player: {players[0]}")
        position_counts = defaultdict(int)
        team_counts = defaultdict(int)
        for p in players:
            for pos in p.positions:
                position_counts[pos] += 1
            team_counts[p.team] += 1

        logger.info(f"Position distribution: {dict(position_counts)}")
        logger.info(f"Team distribution: {dict(team_counts)}")
    else:
        logger.warning("No players created - check salary configuration and CSV format")

    return players

def check_overlap(new_lineup, existing_lineups, max_overlap):
    """Check if new lineup has too much overlap with existing lineups"""
    new_players = {p.id for _, p in new_lineup}

    for lineup in existing_lineups:
        existing_players = {p.id for _, p in lineup if not isinstance(_, str)}  # Skip Total_FP tuple
        overlap = len(new_players.intersection(existing_players))
        if overlap > max_overlap:
            return False

    return True

def apply_variance(players: List[Player]):
    """Apply variance to projections for Monte Carlo simulation"""
    variance = CONFIG["lineup_variance"]
    for p in players:
        random_factor = random.uniform(-variance, variance)
        p.adj_projected_fp = p.projected_fp * (1 + random_factor)

def generate_random_lineup(players: List[Player], lineup_slots: List[str]) -> List[tuple]:
    """Generate a single random lineup"""
    lineup = []
    player_ids = set()
    available_players = players.copy()

    for pos in lineup_slots:
        # Filter players eligible for this position
        eligible = [p for p in available_players if pos in p.positions and p.id not in player_ids]
        if not eligible:
            return []  # Invalid lineup if no eligible players
        player = random.choice(eligible)
        lineup.append((pos, player))
        player_ids.add(player.id)
        available_players.remove(player)

    return lineup

def is_valid_lineup(lineup: List[tuple], players_by_team: defaultdict) -> bool:
    """Check if the lineup meets all constraints"""
    if len(lineup) != 8:
        return False

    total_salary = sum(p.salary for _, p in lineup)
    if not (CONFIG["min_salary"] <= total_salary <= CONFIG["salary_cap"]):
        return False

    # Team constraints
    team_counts = defaultdict(int)
    for _, p in lineup:
        team_counts[p.team] += 1
    if any(count > CONFIG["max_players_per_team"] for count in team_counts.values()):
        return False

    # Stacking constraints
    stacks = sum(1 for count in team_counts.values() if count >= CONFIG["min_stack_size"])
    if stacks < CONFIG["min_stacks_required"]:
        return False

    return True

def run_lineup_optimizer(players: List[Player], target_lineups: int = CONFIG["target_lineups"],
                         excluded_teams: Optional[Set[str]] = None) -> List[List[tuple]]:
    if excluded_teams is None:
        excluded_teams = set()

    print("\n" + "="*70)
    print(f"Running Monte Carlo NBA DFS Optimizer to generate {CONFIG['n_lineup_attempts']} lineups")
    print(f"Filtering to top {target_lineups} lineups by projected FP")
    print(f"Salary cap: ${CONFIG['salary_cap']:,}")
    print(f"Minimum lineup salary: ${CONFIG['min_salary']:,}")
    print(f"Minimum player salary: ${CONFIG['min_player_salary']:,}")
    print(f"Minimum player minutes: {CONFIG['min_player_minutes']:.1f}")
    print(f"Max exposure: {CONFIG['max_exposure']*100:.1f}%")
    print(f"Max overlap: {CONFIG['max_overlap']} players between lineups")
    print(f"Available players: {len(players)} (after minutes filter)")
    print("="*70)

    # Filter out excluded teams
    players = [p for p in players if p.team not in excluded_teams]
    print(f"Excluding teams: {', '.join(excluded_teams) if excluded_teams else 'None'}. {len(players)} players remaining.")

    if len(players) < 8:
        print(f"Error: Not enough players ({len(players)}) to form a valid lineup.")
        return []

    # Standard DFS lineup positions
    lineup_slots = ['PG', 'SG', 'SF', 'PF', 'C', 'G', 'F', 'UTIL']

    # Precompute players by team for efficiency
    players_by_team = defaultdict(list)
    for p in players:
        players_by_team[p.team].append(p)

    all_lineups = []
    lineup_signatures = set()
    attempts = 0

    print(f"Generating {CONFIG['n_lineup_attempts']} random lineups...")
    while attempts < CONFIG["n_lineup_attempts"]:
        attempts += 1
        if attempts % 50000 == 0:
            print(f"Generated {attempts} lineups so far...")

        # Apply variance to player projections
        apply_variance(players)

        # Generate a random lineup
        lineup = generate_random_lineup(players, lineup_slots)
        if not lineup or not is_valid_lineup(lineup, players_by_team):
            continue

        # Check uniqueness and overlap
        signature = tuple(sorted(p.id for _, p in lineup))
        if signature in lineup_signatures or not check_overlap(lineup, all_lineups, CONFIG["max_overlap"]):
            continue

        lineup_signatures.add(signature)
        all_lineups.append(lineup)

    print(f"Generated {len(all_lineups)} valid lineups out of {attempts} attempts.")

    # Calculate total FP for each lineup and sort
    for lineup in all_lineups:
        total_fp = sum(p.adj_projected_fp for _, p in lineup)  # Use adjusted FP for sorting
        lineup.append(("Total_FP", total_fp))  # Append total FP for sorting

    # Sort by total FP and take top N lineups
    all_lineups.sort(key=lambda x: x[-1][1], reverse=True)  # Sort by Total_FP
    final_lineups = all_lineups[:target_lineups]

    # Update player usage for exposure tracking
    for lineup in final_lineups:
        for _, p in lineup[:-1]:  # Exclude the Total_FP tuple
            p.times_used += 1

    # Print top lineups
    for i, lineup in enumerate(final_lineups, 1):
        total_salary = sum(p.salary for _, p in lineup[:-1])
        total_fp = lineup[-1][1]
        print(f"\n--- Lineup #{i} ---")
        print(f"Total Projected Points: {total_fp:.1f}")
        print(f"Salary Used: ${total_salary:,.0f}")
        for pos, p in lineup[:-1]:  # Exclude Total_FP
            print(f"  [{pos:<4}] {p.name:<22} {p.team} | {p.projected_fp:>5.1f} pts (${p.salary:,.0f}) | {p.minutes:>4.1f} min")

    # Exposure report
    print("\n" + "="*110)
    print("Player Exposure Report")
    print("="*110)
    print(f"{'Player Name':<22} {'Team':<5} {'Pos':<12} {'Salary':>8} {'Proj FP':>8} {'Min':>6} {'Used':>5} {'Exp %':>6}")
    print("-" * 110)
    for p in sorted([p for p in players if p.times_used > 0], key=lambda x: x.times_used, reverse=True):
        exposure_pct = (p.times_used / target_lineups) * 100
        pos_str = '/'.join([pos for pos in p.positions if pos in ['PG', 'SG', 'SF', 'PF', 'C']])
        print(f"{p.name:<22} {p.team:<5} {pos_str:<12} ${p.salary:>7,.0f} {p.projected_fp:>8.1f} {p.minutes:>6.1f} {p.times_used:>5} {exposure_pct:>5.1f}%")

    # Check for exposure limits and print warnings
    over_exposed = []
    for p in players:
        if p.times_used > 0:
            exposure_pct = (p.times_used / target_lineups)
            if exposure_pct > CONFIG["max_exposure"]:
                over_exposed.append((p.name, p.team, exposure_pct * 100))

    if over_exposed:
        print("\nWARNING: Some players exceeded max exposure limit:")
        for name, team, pct in sorted(over_exposed, key=lambda x: x[2], reverse=True):
            print(f"  {name} ({team}): {pct:.1f}% (limit: {CONFIG['max_exposure']*100}%)")

    # Remove Total_FP tuple from final output
    return [lineup[:-1] for lineup in final_lineups]

def main():
    # Main function remains mostly unchanged, just calls the updated optimizer
    print("=" * 70)
    print("NBA DFS Monte Carlo Lineup Optimizer")
    print("=" * 70)

    df = load_projection_data(CONFIG["projections_path"])
    if df.empty:
        logger.error("No data loaded. Exiting.")
        return

    # Print column names for debugging
    print("\nColumn names found in data:")
    print(', '.join(df.columns))

    # Get user input for minimum minutes
    min_minutes_input = input(f"\nEnter minimum player minutes threshold (default: {CONFIG['min_player_minutes']}): ").strip()
    if min_minutes_input:
        try:
            CONFIG["min_player_minutes"] = float(min_minutes_input)
            print(f"Set minimum player minutes to {CONFIG['min_player_minutes']}")
        except ValueError:
            print(f"Invalid input. Using default: {CONFIG['min_player_minutes']}")

    # Calculate minutes distribution for info
    minutes_column = None
    for col in ['MINUTES', 'PROJ_MINUTES', 'MIN']:
        if col in df.columns:
            minutes_column = col
            break

    if minutes_column:
        minutes = df[minutes_column].dropna()
        print(f"\nMinutes distribution in dataset (column: {minutes_column}):")
        print(f"  Min: {minutes.min():.1f}")
        print(f"  Max: {minutes.max():.1f}")
        print(f"  Avg: {minutes.mean():.1f}")
        print(f"  Players with <{CONFIG['min_player_minutes']} min: {(minutes < CONFIG['min_player_minutes']).sum()} of {len(minutes)}")

    # Team exclusion logic
    all_teams = set()
    if 'TEAM' in df.columns:
        all_teams.update(df['TEAM'].dropna().unique())
    if 'OPPONENT' in df.columns:
        all_teams.update(df['OPPONENT'].dropna().unique())
    active_teams = sorted([t for t in all_teams if isinstance(t, str) and t.strip()])

    print("\nAvailable teams with games today:")
    for i, team in enumerate(active_teams, 1):
        print(f"{i}. {team}")

    exclude_input = input("\nEnter team numbers to exclude (comma-separated) or press Enter to skip: ").strip()
    excluded_teams = set()
    if exclude_input:
        excluded_indices = [int(x.strip()) - 1 for x in exclude_input.split(',') if x.strip().isdigit()]
        excluded_teams = {active_teams[i] for i in excluded_indices if 0 <= i < len(active_teams)}

    # Prompt for verbose output
    verbose_input = False #input("\nShow detailed filter logs? (y/n): ").strip().lower()
    verbose = verbose_input #.startswith('y')

    # Set log level based on verbose setting
    if verbose:
        logger.setLevel(logging.INFO)
    else:
        logger.setLevel(logging.WARNING)

    players = create_player_objects(df, exclude_inactive=CONFIG["exclude_inactive"],
                                    min_minutes=CONFIG["min_player_minutes"],
                                    verbose=verbose)
    if not players:
        logger.error("No valid players created.")
        return

    lineups = run_lineup_optimizer(players, target_lineups=CONFIG["target_lineups"], excluded_teams=excluded_teams)

    # Save to CSV
    if lineups:
        output_file = os.path.join(CONFIG["output_path"], "optimized_lineups.csv")
        os.makedirs(CONFIG["output_path"], exist_ok=True)
        with open(output_file, 'w') as f:
            f.write("Lineup,Position,Player_ID,Player_Name,Team,Opponent,Starter,Home,Minutes,Salary,Projected_FP,Value\n")
            for lineup_num, lineup in enumerate(lineups, 1):
                for pos, player in lineup:
                    f.write(f"{lineup_num},{pos},{player.id},{player.name},{player.team},{player.opponent},"
                            f"{1 if player.is_starter else 0},{1 if player.is_home else 0},{player.minutes:.1f},"
                            f"{player.salary},{player.projected_fp:.1f},{player.value:.2f}\n")
        print(f"\nLineups saved to {output_file}")

if __name__ == '__main__':
    main()

NBA DFS Monte Carlo Lineup Optimizer
Loaded 127 player projections from /content/drive/My Drive/NBA_PerGame_Data/NBA_Player_Data_M_player_projections.csv

Column names in data:
PLAYER_ID, PLAYER_NAME, TEAM, POSITION, OPPONENT, IS_STARTER, IS_HOME, PROJ_MINUTES, PROJ_PTS, PROJ_REB, PROJ_AST, PROJ_FP, GAMES_USED, DEFENSE_FACTOR, POS_DEFENSE_FACTOR, HOME_FACTOR, ESTIMATED, FP_PER_MIN, SALARY

Column names found in data:
PLAYER_ID, PLAYER_NAME, TEAM, POSITION, OPPONENT, IS_STARTER, IS_HOME, PROJ_MINUTES, PROJ_PTS, PROJ_REB, PROJ_AST, PROJ_FP, GAMES_USED, DEFENSE_FACTOR, POS_DEFENSE_FACTOR, HOME_FACTOR, ESTIMATED, FP_PER_MIN, SALARY

Enter minimum player minutes threshold (default: 20): 15
Set minimum player minutes to 15.0

Minutes distribution in dataset (column: PROJ_MINUTES):
  Min: 0.0
  Max: 38.5
  Avg: 18.8
  Players with <15.0 min: 55 of 127

Available teams with games today:
1. ATL
2. BKN
3. CHI
4. GSW
5. IND
6. LAL
7. MIL
8. NYK
9. SAC
10. WAS

Enter team numbers to exclude (comma




Running Monte Carlo NBA DFS Optimizer to generate 200000 lineups
Filtering to top 20 lineups by projected FP
Salary cap: $50,000
Minimum lineup salary: $49,000
Minimum player salary: $3,100
Minimum player minutes: 15.0
Max exposure: 50.0%
Max overlap: 3 players between lineups
Available players: 72 (after minutes filter)
Excluding teams: None. 72 players remaining.
Generating 200000 random lineups...
Generated 50000 lineups so far...
Generated 100000 lineups so far...
Generated 150000 lineups so far...
Generated 200000 lineups so far...
Generated 10543 valid lineups out of 200000 attempts.

--- Lineup #1 ---
Total Projected Points: 281.7
Salary Used: $49,800
  [PG  ] Malik Monk             SAC |  41.6 pts ($8,100) | 35.9 min
  [SG  ] Cameron Payne          NYK |  28.5 pts ($3,100) | 18.0 min
  [SF  ] Zach LaVine            SAC |  39.9 pts ($8,700) | 36.9 min
  [PF  ] Kyle Kuzma             MIL |  35.9 pts ($5,400) | 33.5 min
  [C   ] Alexandre Sarr         WAS |  26.0 pts ($6,700) | 2