In [2]:
import pandas as pd
import numpy as np
from typing import Dict, List, Tuple, Optional
from dataclasses import dataclass
from datetime import datetime
import logging
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
import warnings
warnings.filterwarnings('ignore')

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

@dataclass
class PlayerPrediction:
    """Data class for storing player predictions and metadata"""
    name: str
    team: str
    role: str
    predicted_points: float
    confidence: float
    recent_form: float
    value_for_money: float
    batting_prediction: float
    bowling_prediction: float
    fielding_prediction: float
    cost: float = 8.5

class AIModelPredictor:
    """AI-powered cricket performance predictor using ensemble methods"""
    
    def __init__(self):
        self.batting_model = None
        self.bowling_model = None
        self.fielding_model = None
        self.scaler = StandardScaler()
        self.is_trained = False
        self.feature_columns = []
        
    def prepare_features(self, player_stats: Dict, match_context: Dict = None) -> np.ndarray:
        """Prepare feature vector for ML models"""
        features = []
        
        # Batting features
        features.extend([
            player_stats.get('batting_mean_runs', 0),
            player_stats.get('batting_std_runs', 0),
            player_stats.get('batting_max_runs', 0),
            player_stats.get('batting_count_runs', 0),
            player_stats.get('batting_strike_rate', 0),
            player_stats.get('batting_boundary_rate', 0),
        ])
        
        # Bowling features
        features.extend([
            player_stats.get('bowling_mean_wickets', 0),
            player_stats.get('bowling_sum_wickets', 0),
            player_stats.get('bowling_economy', 0),
            player_stats.get('bowling_strike_rate', 0),
            player_stats.get('bowling_count_overs', 0),
        ])
        
        # Fielding features
        features.extend([
            player_stats.get('fielding_catches', 0),
            player_stats.get('fielding_run_outs', 0),
            player_stats.get('fielding_stumpings', 0),
        ])
        
        # Form and contextual features
        features.extend([
            player_stats.get('recent_form', 1.0),
            player_stats.get('consistency_score', 0.5),
            player_stats.get('pressure_performance', 0.5),
        ])
        
        # Match context features (if available)
        if match_context:
            features.extend([
                match_context.get('venue_batting_avg', 150),
                match_context.get('venue_bowling_avg', 25),
                match_context.get('weather_factor', 1.0),
                match_context.get('pitch_factor', 1.0),
            ])
        else:
            features.extend([150, 25, 1.0, 1.0])
        
        return np.array(features).reshape(1, -1)
    
    def train_models(self, training_data: List[Dict]):
        """Train AI models using historical performance data"""
        logger.info("Training AI models with historical data...")
        
        if len(training_data) < 50:
            logger.warning("Insufficient training data. Using fallback statistical models.")
            self.is_trained = False
            return
        
        # Prepare training dataset
        X_features = []
        y_batting = []
        y_bowling = []
        y_fielding = []
        
        for data_point in training_data:
            features = self.prepare_features(data_point['stats'], data_point.get('match_context'))
            X_features.append(features.flatten())
            y_batting.append(data_point.get('actual_batting_points', 0))
            y_bowling.append(data_point.get('actual_bowling_points', 0))
            y_fielding.append(data_point.get('actual_fielding_points', 0))
        
        X = np.array(X_features)
        y_batting = np.array(y_batting)
        y_bowling = np.array(y_bowling)
        y_fielding = np.array(y_fielding)
        
        # Split data
        X_train, X_test, y_bat_train, y_bat_test = train_test_split(
            X, y_batting, test_size=0.2, random_state=42
        )
        _, _, y_bowl_train, y_bowl_test = train_test_split(
            X, y_bowling, test_size=0.2, random_state=42
        )
        _, _, y_field_train, y_field_test = train_test_split(
            X, y_fielding, test_size=0.2, random_state=42
        )
        
        # Scale features
        X_train_scaled = self.scaler.fit_transform(X_train)
        X_test_scaled = self.scaler.transform(X_test)
        
        # Train ensemble models
        self.batting_model = GradientBoostingRegressor(
            n_estimators=100, learning_rate=0.1, max_depth=6, random_state=42
        )
        self.bowling_model = RandomForestRegressor(
            n_estimators=100, max_depth=8, random_state=42
        )
        self.fielding_model = RandomForestRegressor(
            n_estimators=50, max_depth=6, random_state=42
        )
        
        # Fit models
        self.batting_model.fit(X_train_scaled, y_bat_train)
        self.bowling_model.fit(X_train_scaled, y_bowl_train)
        self.fielding_model.fit(X_train_scaled, y_field_train)
        
        # Evaluate models
        bat_pred = self.batting_model.predict(X_test_scaled)
        bowl_pred = self.bowling_model.predict(X_test_scaled)
        field_pred = self.fielding_model.predict(X_test_scaled)
        
        logger.info(f"Batting Model R2: {r2_score(y_bat_test, bat_pred):.3f}")
        logger.info(f"Bowling Model R2: {r2_score(y_bowl_test, bowl_pred):.3f}")
        logger.info(f"Fielding Model R2: {r2_score(y_field_test, field_pred):.3f}")
        
        self.is_trained = True
        logger.info("AI models trained successfully.")
    
    def predict_performance(self, player_stats: Dict, match_context: Dict = None) -> Dict:
        """Predict player performance using trained AI models"""
        if not self.is_trained:
            return self._fallback_prediction(player_stats)
        
        try:
            features = self.prepare_features(player_stats, match_context)
            features_scaled = self.scaler.transform(features)
            
            # Get predictions from each model
            batting_points = max(0, self.batting_model.predict(features_scaled)[0])
            bowling_points = max(0, self.bowling_model.predict(features_scaled)[0])
            fielding_points = max(0, self.fielding_model.predict(features_scaled)[0])
            
            # Calculate total points and confidence
            total_points = batting_points + bowling_points + fielding_points
            
            # Calculate confidence based on model consistency and data quality
            confidence = self._calculate_confidence(player_stats, total_points)
            
            return {
                'total_points': total_points,
                'batting_points': batting_points,
                'bowling_points': bowling_points,
                'fielding_points': fielding_points,
                'confidence': confidence
            }
            
        except Exception as e:
            logger.error(f"AI prediction failed: {e}. Using fallback method.")
            return self._fallback_prediction(player_stats)
    
    def _fallback_prediction(self, player_stats: Dict) -> Dict:
        """Fallback statistical prediction when AI models are unavailable"""
        batting_points = player_stats.get('batting_mean_runs', 0) * 0.5
        bowling_points = player_stats.get('bowling_mean_wickets', 0) * 12
        fielding_points = player_stats.get('fielding_catches', 0) * 4
        
        total_points = batting_points + bowling_points + fielding_points
        confidence = 0.6  # Lower confidence for statistical fallback
        
        return {
            'total_points': total_points,
            'batting_points': batting_points,
            'bowling_points': bowling_points,
            'fielding_points': fielding_points,
            'confidence': confidence
        }
    
    def _calculate_confidence(self, player_stats: Dict, predicted_points: float) -> float:
        """Calculate prediction confidence based on data quality and model factors"""
        base_confidence = 0.7
        
        # Adjust based on player's match count (more matches = higher confidence)
        matches_played = player_stats.get('batting_count_runs', 0)
        match_factor = min(matches_played / 20, 1.0) * 0.2
        
        # Adjust based on recent form consistency
        recent_form = player_stats.get('recent_form', 1.0)
        consistency = player_stats.get('consistency_score', 0.5)
        form_factor = (recent_form * consistency) * 0.15
        
        # Adjust based on prediction magnitude (avoid extreme predictions)
        if predicted_points > 80:  # Very high prediction
            magnitude_factor = -0.1
        elif predicted_points < 10:  # Very low prediction
            magnitude_factor = -0.05
        else:
            magnitude_factor = 0.05
        
        final_confidence = base_confidence + match_factor + form_factor + magnitude_factor
        return max(0.3, min(0.95, final_confidence))

class AdvancedFantasyCalculator:
    """Advanced fantasy points calculator with detailed scoring rules"""
    
    def __init__(self):
        self.scoring_rules = {
            'batting': {
                'run': 1,
                'boundary': 1,
                'six': 2,
                'fifty': 8,
                'century': 16,
                'duck': -2,
                'strike_rate_bonus': {'threshold': 150, 'points': 6},
                'strike_rate_penalty': {'threshold': 60, 'points': -6}
            },
            'bowling': {
                'wicket': 12,
                'maiden': 4,
                'three_wickets': 4,
                'four_wickets': 8,
                'five_wickets': 16,
                'economy_bonus': {'threshold': 5, 'points': 6},
                'economy_penalty': {'threshold': 12, 'points': -6}
            },
            'fielding': {
                'catch': 4,
                'run_out': 6,
                'stumping': 6,
                'three_catches': 4
            }
        }
    
    def calculate_fantasy_points(self, player_stats: Dict, ai_prediction: Dict) -> float:
        """Calculate expected fantasy points using AI predictions and scoring rules"""
        
        # Use AI predictions as base
        batting_points = ai_prediction['batting_points']
        bowling_points = ai_prediction['bowling_points']
        fielding_points = ai_prediction['fielding_points']
        
        # Apply bonuses and penalties based on historical performance patterns
        batting_points += self._calculate_batting_bonuses(player_stats)
        bowling_points += self._calculate_bowling_bonuses(player_stats)
        fielding_points += self._calculate_fielding_bonuses(player_stats)
        
        total_points = batting_points + bowling_points + fielding_points
        
        # Apply form factor
        recent_form = player_stats.get('recent_form', 1.0)
        total_points *= recent_form
        
        return max(0, total_points)
    
    def _calculate_batting_bonuses(self, player_stats: Dict) -> float:
        """Calculate batting bonuses based on player's historical patterns"""
        bonus = 0
        
        avg_runs = player_stats.get('batting_mean_runs', 0)
        strike_rate = player_stats.get('batting_strike_rate', 100)
        
        # Fifty/century bonus probability
        if avg_runs > 30:
            bonus += 4  # Higher chance of fifty
        if avg_runs > 50:
            bonus += 2  # Higher chance of century
        
        # Strike rate bonus
        if strike_rate > 130:
            bonus += 3
        elif strike_rate < 80:
            bonus -= 2
        
        return bonus
    
    def _calculate_bowling_bonuses(self, player_stats: Dict) -> float:
        """Calculate bowling bonuses based on player's historical patterns"""
        bonus = 0
        
        avg_wickets = player_stats.get('bowling_mean_wickets', 0)
        economy = player_stats.get('bowling_economy', 8)
        
        # Wicket bonus probability
        if avg_wickets > 1.5:
            bonus += 6  # Higher chance of multiple wickets
        
        # Economy bonus
        if economy < 6:
            bonus += 3
        elif economy > 10:
            bonus -= 3
        
        return bonus
    
    def _calculate_fielding_bonuses(self, player_stats: Dict) -> float:
        """Calculate fielding bonuses based on player's historical patterns"""
        catches_per_match = player_stats.get('fielding_catches', 0) / max(1, player_stats.get('batting_count_runs', 1))
        
        if catches_per_match > 0.8:
            return 2  # Good fielder bonus
        
        return 0

class CricketDataAnalyzer:
    """Enhanced cricket data analyzer for IPL dataset"""
    
    def __init__(self, matches_df: pd.DataFrame, deliveries_df: pd.DataFrame):
        self.matches = matches_df
        self.deliveries = deliveries_df
        self._player_stats_cache = {}
        self._validate_data()
        self._process_ipl_data()
        
    def _validate_data(self):
        """Validate IPL dataset structure"""
        logger.info("Validating IPL dataset structure...")
        
        # Expected columns for IPL dataset
        expected_delivery_cols = ['match_id', 'inning', 'batting_team', 'bowling_team', 
                                'over', 'ball', 'batsman', 'non_striker', 'bowler',
                                'batsman_runs', 'extra_runs', 'total_runs']
        
        expected_match_cols = ['id', 'season', 'team1', 'team2', 'winner']
        
        # Check and handle column variations in IPL dataset
        if 'batsman' in self.deliveries.columns:
            self.deliveries.rename(columns={'batsman': 'batter'}, inplace=True)
        if 'non_striker' in self.deliveries.columns:
            self.deliveries.rename(columns={'non_striker': 'non_striker'}, inplace=True)
        
        # Create missing columns
        if 'is_wicket' not in self.deliveries.columns:
            if 'dismissal_kind' in self.deliveries.columns:
                self.deliveries['is_wicket'] = self.deliveries['dismissal_kind'].notna().astype(int)
            elif 'player_dismissed' in self.deliveries.columns:
                self.deliveries['is_wicket'] = self.deliveries['player_dismissed'].notna().astype(int)
            else:
                self.deliveries['is_wicket'] = 0
        
        # Create boundary indicators
        if 'fours' not in self.deliveries.columns:
            self.deliveries['fours'] = (self.deliveries['batsman_runs'] == 4).astype(int)
        if 'sixes' not in self.deliveries.columns:
            self.deliveries['sixes'] = (self.deliveries['batsman_runs'] == 6).astype(int)
        
        logger.info(f"Dataset validated. Matches: {len(self.matches)}, Deliveries: {len(self.deliveries)}")
    
    def _process_ipl_data(self):
        """Process IPL-specific data transformations"""
        logger.info("Processing IPL-specific data transformations...")
        
        # Create team mapping for consistency
        self.team_mapping = self._create_team_mapping()
        
        # Standardize team names
        if 'batting_team' in self.deliveries.columns:
            self.deliveries['batting_team'] = self.deliveries['batting_team'].map(
                self.team_mapping
            ).fillna(self.deliveries['batting_team'])
        
        if 'bowling_team' in self.deliveries.columns:
            self.deliveries['bowling_team'] = self.deliveries['bowling_team'].map(
                self.team_mapping
            ).fillna(self.deliveries['bowling_team'])
        
        # Filter recent seasons for better relevance (last 5 seasons)
        if 'season' in self.matches.columns:
            recent_seasons = sorted(self.matches['season'].unique())[-5:]
            recent_match_ids = self.matches[self.matches['season'].isin(recent_seasons)]['id'].tolist()
            self.deliveries = self.deliveries[self.deliveries['match_id'].isin(recent_match_ids)]
            logger.info(f"Filtered to recent seasons: {recent_seasons}")
        
        # Create player-team mapping
        self._create_player_team_mapping()
        
        logger.info("IPL data processing completed.")
    
    def _create_team_mapping(self) -> Dict[str, str]:
        """Create mapping for team name standardization"""
        return {
            'Royal Challengers Bangalore': 'RCB',
            'Mumbai Indians': 'MI',
            'Chennai Super Kings': 'CSK',
            'Kolkata Knight Riders': 'KKR',
            'Rajasthan Royals': 'RR',
            'Punjab Kings': 'PBKS',
            'Delhi Capitals': 'DC',
            'Sunrisers Hyderabad': 'SRH',
            'Gujarat Titans': 'GT',
            'Lucknow Super Giants': 'LSG',
            'Kings XI Punjab': 'PBKS',
            'Delhi Daredevils': 'DC',
            'Rising Pune Supergiant': 'RPS',
            'Gujarat Lions': 'GL',
            'Deccan Chargers': 'DC',
            'Pune Warriors': 'PW',
            'Kochi Tuskers Kerala': 'KTK'
        }
    
    def _create_player_team_mapping(self):
        """Create player to current team mapping"""
        self.player_team_mapping = {}
        
        # Get most recent team for each player
        if 'batting_team' in self.deliveries.columns:
            recent_batting = self.deliveries.groupby('batter')['batting_team'].last()
            for player, team in recent_batting.items():
                self.player_team_mapping[player] = team
        
        if 'bowling_team' in self.deliveries.columns:
            recent_bowling = self.deliveries.groupby('bowler')['bowling_team'].last()
            for player, team in recent_bowling.items():
                if player not in self.player_team_mapping:
                    self.player_team_mapping[player] = team
    
    def get_all_players(self) -> List[str]:
        """Get all unique active players from IPL data"""
        try:
            batters = set(self.deliveries['batter'].dropna().unique())
            bowlers = set(self.deliveries['bowler'].dropna().unique())
            all_players = list(batters.union(bowlers))
            
            # Filter out players with minimal activity
            active_players = []
            for player in all_players:
                batting_activity = len(self.deliveries[self.deliveries['batter'] == player])
                bowling_activity = len(self.deliveries[self.deliveries['bowler'] == player])
                
                # Keep players with reasonable activity
                if batting_activity >= 30 or bowling_activity >= 60:
                    active_players.append(player)
            
            logger.info(f"Found {len(active_players)} active players from {len(all_players)} total")
            return active_players
        except Exception as e:
            logger.error(f"Error getting players: {e}")
            return []
    
    def get_player_stats(self, player_name: str) -> Dict:
        """Get comprehensive player statistics from IPL data"""
        if player_name in self._player_stats_cache:
            return self._player_stats_cache[player_name]
        
        try:
            stats = self._calculate_player_stats(player_name)
            self._player_stats_cache[player_name] = stats
            return stats
        except Exception as e:
            logger.error(f"Error calculating stats for {player_name}: {e}")
            return {}
    
    def _calculate_player_stats(self, player_name: str) -> Dict:
        """Calculate detailed player statistics from IPL data"""
        try:
            # Batting statistics
            batting_data = self.deliveries[self.deliveries['batter'] == player_name]
            batting_stats = self._calculate_batting_stats(batting_data)
            
            # Bowling statistics
            bowling_data = self.deliveries[self.deliveries['bowler'] == player_name]
            bowling_stats = self._calculate_bowling_stats(bowling_data)
            
            # Fielding statistics (estimated from dismissal data)
            fielding_stats = self._calculate_fielding_stats(player_name)
            
            # Form and consistency metrics
            form_stats = self._calculate_form_stats(batting_data, bowling_data)
            
            # Combine all statistics
            all_stats = {
                **batting_stats,
                **bowling_stats,
                **fielding_stats,
                **form_stats
            }
            
            return all_stats
            
        except Exception as e:
            logger.error(f"Error calculating stats for {player_name}: {e}")
            return {}
    
    def _calculate_batting_stats(self, batting_data: pd.DataFrame) -> Dict:
        """Calculate batting statistics"""
        if batting_data.empty:
            return {
                'batting_mean_runs': 0.0,
                'batting_std_runs': 0.0,
                'batting_max_runs': 0.0,
                'batting_count_runs': 0,
                'batting_strike_rate': 0.0,
                'batting_boundary_rate': 0.0,
                'batting_average': 0.0,
                'batting_fifties': 0,
                'batting_hundreds': 0
            }
        
        # Runs per match
        runs_per_match = batting_data.groupby('match_id')['batsman_runs'].sum()
        balls_per_match = batting_data.groupby('match_id').size()
        
        # Calculate strike rate per match
        strike_rates = (runs_per_match / balls_per_match * 100).fillna(0)
        
        # Boundary percentage
        boundaries = batting_data['fours'].sum() + batting_data['sixes'].sum()
        boundary_rate = boundaries / len(batting_data) if len(batting_data) > 0 else 0
        
        # Dismissals
        dismissals = len(batting_data[batting_data['is_wicket'] == 1])
        average = runs_per_match.sum() / max(dismissals, 1)
        
        # Milestones
        fifties = len(runs_per_match[runs_per_match >= 50])
        hundreds = len(runs_per_match[runs_per_match >= 100])
        
        return {
            'batting_mean_runs': float(runs_per_match.mean()) if len(runs_per_match) > 0 else 0.0,
            'batting_std_runs': float(runs_per_match.std()) if len(runs_per_match) > 1 else 0.0,
            'batting_max_runs': float(runs_per_match.max()) if len(runs_per_match) > 0 else 0.0,
            'batting_count_runs': int(len(runs_per_match)),
            'batting_strike_rate': float(strike_rates.mean()) if len(strike_rates) > 0 else 0.0,
            'batting_boundary_rate': float(boundary_rate),
            'batting_average': float(average),
            'batting_fifties': int(fifties),
            'batting_hundreds': int(hundreds)
        }
    
    def _calculate_bowling_stats(self, bowling_data: pd.DataFrame) -> Dict:
        """Calculate bowling statistics"""
        if bowling_data.empty:
            return {
                'bowling_mean_wickets': 0.0,
                'bowling_sum_wickets': 0,
                'bowling_economy': 0.0,
                'bowling_strike_rate': 0.0,
                'bowling_count_overs': 0.0,
                'bowling_average': 0.0,
                'bowling_best_figures': 0
            }
        
        # Group by match
        match_groups = bowling_data.groupby('match_id')
        
        # Wickets per match
        wickets_per_match = match_groups['is_wicket'].sum()
        
        # Runs and overs per match
        runs_per_match = match_groups['total_runs'].sum()
        balls_per_match = match_groups.size()
        overs_per_match = balls_per_match / 6
        
        # Economy rate
        economy_rates = runs_per_match / overs_per_match
        economy_rates = economy_rates.fillna(0).replace([np.inf, -np.inf], 0)
        
        # Strike rate (balls per wicket)
        total_wickets = wickets_per_match.sum()
        total_balls = balls_per_match.sum()
        strike_rate = total_balls / max(total_wickets, 1)
        
        # Bowling average
        total_runs = runs_per_match.sum()
        bowling_average = total_runs / max(total_wickets, 1)
        
        return {
            'bowling_mean_wickets': float(wickets_per_match.mean()) if len(wickets_per_match) > 0 else 0.0,
            'bowling_sum_wickets': int(total_wickets),
            'bowling_economy': float(economy_rates.mean()) if len(economy_rates) > 0 else 0.0,
            'bowling_strike_rate': float(strike_rate),
            'bowling_count_overs': float(total_balls / 6),
            'bowling_average': float(bowling_average),
            'bowling_best_figures': int(wickets_per_match.max()) if len(wickets_per_match) > 0 else 0
        }
    
    def _calculate_fielding_stats(self, player_name: str) -> Dict:
        """Calculate fielding statistics (estimated)"""
        # In IPL dataset, fielding stats are limited
        # Estimate based on player activity and role
        
        total_matches = len(self.deliveries[
            (self.deliveries['batter'] == player_name) | 
            (self.deliveries['bowler'] == player_name)
        ]['match_id'].unique())
        
        # Estimate catches based on player type and matches
        estimated_catches = max(0, int(total_matches * np.random.uniform(0.1, 0.5)))
        estimated_runouts = max(0, int(total_matches * np.random.uniform(0.0, 0.2)))
        
        return {
            'fielding_catches': estimated_catches,
            'fielding_run_outs': estimated_runouts,
            'fielding_stumpings': 0  # Wicket keepers would have this
        }
    
    def _calculate_form_stats(self, batting_data: pd.DataFrame, bowling_data: pd.DataFrame) -> Dict:
        """Calculate recent form and consistency metrics"""
        
        # Recent form based on last 10 matches
        recent_batting = batting_data.groupby('match_id')['batsman_runs'].sum().tail(10)
        recent_bowling = bowling_data.groupby('match_id')['is_wicket'].sum().tail(10)
        
        # Form factor (recent performance vs career average)
        if len(recent_batting) > 0:
            recent_avg = recent_batting.mean()
            career_avg = batting_data.groupby('match_id')['batsman_runs'].sum().mean()
            batting_form = recent_avg / max(career_avg, 1) if career_avg > 0 else 1.0
        else:
            batting_form = 1.0
        
        if len(recent_bowling) > 0:
            recent_wickets = recent_bowling.mean()
            career_wickets = bowling_data.groupby('match_id')['is_wicket'].sum().mean()
            bowling_form = recent_wickets / max(career_wickets, 0.1) if career_wickets > 0 else 1.0
        else:
            bowling_form = 1.0
        
        # Overall form (weighted average)
        batting_weight = len(batting_data) / (len(batting_data) + len(bowling_data) + 1)
        bowling_weight = len(bowling_data) / (len(batting_data) + len(bowling_data) + 1)
        
        recent_form = (batting_form * batting_weight + bowling_form * bowling_weight)
        recent_form = max(0.3, min(2.0, recent_form))  # Cap between 0.3 and 2.0
        
        # Consistency score
        if len(recent_batting) > 1:
            consistency = 1.0 - (recent_batting.std() / max(recent_batting.mean(), 1))
            consistency = max(0.1, min(1.0, consistency))
        else:
            consistency = 0.5
        
        return {
            'recent_form': float(recent_form),
            'consistency_score': float(consistency),
            'pressure_performance': float(np.random.uniform(0.4, 0.9))  # Placeholder
        }

class BestXIPredictor:
    """Main class for predicting best playing XI using AI models - ALWAYS RETURNS EXACTLY 11 PLAYERS"""
    
    def __init__(self, analyzer: CricketDataAnalyzer, ai_predictor: AIModelPredictor = None):
        self.analyzer = analyzer
        self.ai_predictor = ai_predictor or AIModelPredictor()
        self.fantasy_calculator = AdvancedFantasyCalculator()
        self.team_constraints = {
            'total_players': 11,  # EXACTLY 11 players
            'min_batsmen': 3,
            'max_batsmen': 6,
            'min_bowlers': 3,
            'max_bowlers': 6,
            'min_all_rounders': 1,
            'max_all_rounders': 4,
            'wicket_keepers': 1,
            'max_per_team': 7,
            'budget_cap': 100.0
        }
    
    def classify_player_role(self, player_stats: Dict, player_name: str) -> str:
        """Classify player role based on statistics"""
        batting_activity = player_stats.get('batting_count_runs', 0)
        bowling_activity = player_stats.get('bowling_count_overs', 0)
        batting_avg = player_stats.get('batting_mean_runs', 0)
        bowling_avg_wickets = player_stats.get('bowling_mean_wickets', 0)
        
        # Wicket keeper identification (simplified)
        wk_players = ['MS Dhoni', 'Rishabh Pant', 'KL Rahul', 'Quinton de Kock', 
                     'Dinesh Karthik', 'Ishan Kishan', 'Sanju Samson', 'Jos Buttler']
        
        if any(wk in player_name for wk in wk_players):
            return 'WK'
        
        # All-rounder: significant contribution in both batting and bowling
        if (batting_activity >= 20 and bowling_activity >= 30 and 
            batting_avg >= 15 and bowling_avg_wickets >= 0.8):
            return 'AR'
        
        # Bowler: primary bowling role
        if bowling_activity >= 40 and bowling_avg_wickets >= 1.0:
            return 'BOWL'
        
        # Batsman: primary batting role
        if batting_activity >= 30:
            return 'BAT'
        
        # Default classification based on activity
        if bowling_activity > batting_activity:
            return 'BOWL'
        else:
            return 'BAT'
    
    def predict_best_xi(self, match_context: Dict = None, team_filter: List[str] = None) -> List[PlayerPrediction]:
        """Predict the best playing XI - GUARANTEED to return exactly 11 players"""
        logger.info("Predicting Best XI using AI models...")
        
        all_players = self.analyzer.get_all_players()
        if not all_players:
            logger.error("No players found in dataset")
            return []
        
        # Filter by teams if specified
        if team_filter:
            filtered_players = []
            for player in all_players:
                player_team = self.analyzer.player_team_mapping.get(player, 'Unknown')
                if player_team in team_filter:
                    filtered_players.append(player)
            all_players = filtered_players
        
        logger.info(f"Analyzing {len(all_players)} players...")
        
        # Generate predictions for all players
        player_predictions = []
        for player in all_players:
            try:
                stats = self.analyzer.get_player_stats(player)
                if not stats:
                    continue
                
                # Get AI prediction
                ai_prediction = self.ai_predictor.predict_performance(stats, match_context)
                
                # Calculate fantasy points
                fantasy_points = self.fantasy_calculator.calculate_fantasy_points(stats, ai_prediction)
                
                # Classify player role
                role = self.classify_player_role(stats, player)
                
                # Get team
                team = self.analyzer.player_team_mapping.get(player, 'Unknown')
                
                # Calculate value metrics
                cost = np.random.uniform(7.0, 12.0)  # Simulated cost
                value_for_money = fantasy_points / cost if cost > 0 else 0
                
                prediction = PlayerPrediction(
                    name=player,
                    team=team,
                    role=role,
                    predicted_points=fantasy_points,
                    confidence=ai_prediction['confidence'],
                    recent_form=stats.get('recent_form', 1.0),
                    value_for_money=value_for_money,
                    batting_prediction=ai_prediction['batting_points'],
                    bowling_prediction=ai_prediction['bowling_points'],
                    fielding_prediction=ai_prediction['fielding_points'],
                    cost=cost
                )
                
                player_predictions.append(prediction)
                
            except Exception as e:
                logger.warning(f"Error processing {player}: {e}")
                continue
        
        if not player_predictions:
            logger.error("No valid player predictions generated")
            return []
        
        # Select best XI using optimization
        best_xi = self._select_optimal_xi(player_predictions)
        
        # CRITICAL: Ensure exactly 11 players
        if len(best_xi) != 11:
            logger.warning(f"Initial selection returned {len(best_xi)} players. Adjusting to 11...")
            best_xi = self._force_eleven_players(player_predictions, best_xi)
        
        logger.info(f"Best XI selected with {len(best_xi)} players")
        return best_xi
    
    def _select_optimal_xi(self, predictions: List[PlayerPrediction]) -> List[PlayerPrediction]:
        """Select optimal XI using greedy optimization with constraints"""
        
        # Sort by predicted points
        sorted_predictions = sorted(predictions, key=lambda x: x.predicted_points, reverse=True)
        
        # Group by roles
        role_groups = {
            'WK': [p for p in sorted_predictions if p.role == 'WK'],
            'BAT': [p for p in sorted_predictions if p.role == 'BAT'],
            'AR': [p for p in sorted_predictions if p.role == 'AR'],
            'BOWL': [p for p in sorted_predictions if p.role == 'BOWL']
        }
        
        selected_xi = []
        team_count = {}
        
        # Step 1: Select wicket keeper (mandatory 1)
        if role_groups['WK']:
            wk = role_groups['WK'][0]
            selected_xi.append(wk)
            team_count[wk.team] = team_count.get(wk.team, 0) + 1
        
        # Step 2: Select top batsmen (3-6)
        bat_selected = 0
        for bat in role_groups['BAT']:
            if (len(selected_xi) < 11 and bat_selected < 6 and 
                team_count.get(bat.team, 0) < self.team_constraints['max_per_team']):
                selected_xi.append(bat)
                team_count[bat.team] = team_count.get(bat.team, 0) + 1
                bat_selected += 1
                if bat_selected >= 3 and len(selected_xi) >= 8:  # Have minimum batsmen
                    break
        
        # Step 3: Select all-rounders (1-4)
        ar_selected = 0
        for ar in role_groups['AR']:
            if (len(selected_xi) < 11 and ar_selected < 4 and 
                team_count.get(ar.team, 0) < self.team_constraints['max_per_team']):
                selected_xi.append(ar)
                team_count[ar.team] = team_count.get(ar.team, 0) + 1
                ar_selected += 1
                if ar_selected >= 1 and len(selected_xi) >= 9:  # Have minimum AR
                    break
        
        # Step 4: Fill remaining slots with bowlers
        bowl_selected = 0
        for bowl in role_groups['BOWL']:
            if (len(selected_xi) < 11 and bowl_selected < 6 and 
                team_count.get(bowl.team, 0) < self.team_constraints['max_per_team']):
                selected_xi.append(bowl)
                team_count[bowl.team] = team_count.get(bowl.team, 0) + 1
                bowl_selected += 1
        
        # Step 5: Fill any remaining slots with best available players
        remaining_players = [p for p in sorted_predictions if p not in selected_xi]
        for player in remaining_players:
            if (len(selected_xi) < 11 and 
                team_count.get(player.team, 0) < self.team_constraints['max_per_team']):
                selected_xi.append(player)
                team_count[player.team] = team_count.get(player.team, 0) + 1
        
        return selected_xi[:11]  # Ensure max 11 players
    
    def _force_eleven_players(self, all_predictions: List[PlayerPrediction], 
                            current_xi: List[PlayerPrediction]) -> List[PlayerPrediction]:
        """Force selection to exactly 11 players"""
        
        if len(current_xi) == 11:
            return current_xi
        
        if len(current_xi) > 11:
            # Too many players - remove lowest scoring
            return sorted(current_xi, key=lambda x: x.predicted_points, reverse=True)[:11]
        
        # Too few players - add more
        selected_names = {p.name for p in current_xi}
        remaining_players = [p for p in all_predictions if p.name not in selected_names]
        
        # Sort remaining by points
        remaining_players.sort(key=lambda x: x.predicted_points, reverse=True)
        
        # Add players until we have 11
        team_count = {}
        for player in current_xi:
            team_count[player.team] = team_count.get(player.team, 0) + 1
        
        final_xi = current_xi.copy()
        
        for player in remaining_players:
            if len(final_xi) >= 11:
                break
            
            # Check team constraint
            if team_count.get(player.team, 0) < self.team_constraints['max_per_team']:
                final_xi.append(player)
                team_count[player.team] = team_count.get(player.team, 0) + 1
        
        # If still not 11, relax team constraints
        if len(final_xi) < 11:
            for player in remaining_players:
                if len(final_xi) >= 11:
                    break
                if player not in final_xi:
                    final_xi.append(player)
        
        return final_xi[:11]  # Ensure exactly 11
    
    def generate_team_report(self, best_xi: List[PlayerPrediction]) -> str:
        """Generate detailed team analysis report"""
        if not best_xi:
            return "No team selected."
        
        report = []
        
        # Team composition
        role_counts = {}
        team_counts = {}
        total_points = 0
        total_cost = 0
        
        for player in best_xi:
            role_counts[player.role] = role_counts.get(player.role, 0) + 1
            team_counts[player.team] = team_counts.get(player.team, 0) + 1
            total_points += player.predicted_points
            total_cost += player.cost
        
        report.append(f"\nTEAM COMPOSITION:")
        report.append(f"Total Players: {len(best_xi)}")
        report.append(f"Wicket Keepers: {role_counts.get('WK', 0)}")
        report.append(f"Batsmen: {role_counts.get('BAT', 0)}")
        report.append(f"All-Rounders: {role_counts.get('AR', 0)}")
        report.append(f"Bowlers: {role_counts.get('BOWL', 0)}")
        
        report.append(f"\nTEAM METRICS:")
        report.append(f"Total Predicted Points: {total_points:.1f}")
        report.append(f"Average Points per Player: {total_points/len(best_xi):.1f}")
        report.append(f"Total Cost: {total_cost:.1f} Cr")
        report.append(f"Average Cost per Player: {total_cost/len(best_xi):.1f} Cr")
        
        report.append(f"\nTEAM DISTRIBUTION:")
        for team, count in sorted(team_counts.items()):
            report.append(f"{team}: {count} players")
        
        # Player details
        report.append(f"\n SELECTED PLAYERS:")
        report.append("-" * 80)
        
        # Sort by role for better presentation
        role_order = ['WK', 'BAT', 'AR', 'BOWL']
        sorted_xi = sorted(best_xi, key=lambda x: (role_order.index(x.role), -x.predicted_points))
        
        for i, player in enumerate(sorted_xi, 1):
            report.append(f"{i:2d}. {player.name:<25} ({player.team})")
            report.append(f"    Role: {player.role:<4} | Points: {player.predicted_points:5.1f} | "
                         f"Cost: {player.cost:4.1f} | Form: {player.recent_form:4.2f}")
            report.append(f"    Batting: {player.batting_prediction:4.1f} | "
                         f"Bowling: {player.bowling_prediction:4.1f} | "
                         f"Fielding: {player.fielding_prediction:4.1f}")
            report.append(f"    Confidence: {player.confidence:4.2f} | "
                         f"Value: {player.value_for_money:4.2f}")
            report.append("")
        
        # Recommendations
        report.append(" KEY INSIGHTS:")
        high_confidence = [p for p in best_xi if p.confidence > 0.8]
        if high_confidence:
            report.append(f"• {len(high_confidence)} players with high confidence (>0.8)")
        
        in_form = [p for p in best_xi if p.recent_form > 1.2]
        if in_form:
            report.append(f"• {len(in_form)} players in excellent recent form (>1.2)")
        
        value_picks = [p for p in best_xi if p.value_for_money > 5.0]
        if value_picks:
            report.append(f"• {len(value_picks)} excellent value-for-money picks")
        
        # Captain/Vice-captain suggestions
        top_scorers = sorted(best_xi, key=lambda x: x.predicted_points, reverse=True)[:3]
        report.append(f"\n CAPTAINCY SUGGESTIONS:")
        report.append(f"Captain: {top_scorers[0].name} ({top_scorers[0].predicted_points:.1f} points)")
        if len(top_scorers) > 1:
            report.append(f"Vice-Captain: {top_scorers[1].name} ({top_scorers[1].predicted_points:.1f} points)")
        
        report.append("\n" + "=" * 80)      
        return "\n".join(report)

def main():
    """Main function to demonstrate the IPL Best XI predictor"""
    try:
        logger.info("Starting IPL Best XI Prediction System...")    
        matches_df = pd.read_csv('/kaggle/input/iplsmth/matches.csv')
        deliveries_df = pd.read_csv('/kaggle/input/iplsmth/deliveries.csv')
        
        # Initialize system
        analyzer = CricketDataAnalyzer(matches_df, deliveries_df)
        ai_predictor = AIModelPredictor()
        predictor = BestXIPredictor(analyzer, ai_predictor)
        
        # Generate predictions
        best_xi = predictor.predict_best_xi()
        
        # Generate and print report
        report = predictor.generate_team_report(best_xi)
        print(report)
        
        # Save report
        with open('best_xi_report.txt', 'w') as f:
            f.write(report)
        
        logger.info("Best XI prediction completed successfully!")
        
        
    except Exception as e:
        logger.error(f"Error in main function: {e}")
        print(f" Error: {e}")

if __name__ == "__main__":
    main()


TEAM COMPOSITION:
Total Players: 11
Wicket Keepers: 1
Batsmen: 6
All-Rounders: 2
Bowlers: 2

TEAM METRICS:
Total Predicted Points: 1611.2
Average Points per Player: 146.5
Total Cost: 108.5 Cr
Average Cost per Player: 9.9 Cr

TEAM DISTRIBUTION:
KKR: 4 players
LSG: 2 players
PBKS: 1 players
RR: 1 players
Royal Challengers Bengaluru: 2 players
SRH: 1 players

 SELECTED PLAYERS:
--------------------------------------------------------------------------------
 1. KL Rahul                  (LSG)
    Role: WK   | Points: 134.0 | Cost:  9.2 | Form: 0.93
    Batting: 21.2 | Bowling:  0.0 | Fielding: 116.0
    Confidence: 0.60 | Value: 14.58

 2. V Kohli                   (Royal Challengers Bengaluru)
    Role: BAT  | Points: 191.8 | Cost: 10.6 | Form: 1.23
    Batting: 17.3 | Bowling:  0.0 | Fielding: 132.0
    Confidence: 0.60 | Value: 18.01

 3. SP Narine                 (KKR)
    Role: BAT  | Points: 176.5 | Cost: 10.8 | Form: 1.65
    Batting:  7.3 | Bowling: 11.5 | Fielding: 88.0
    Conf