In [None]:
import pandas as pd
import numpy as np
from collections import defaultdict
import random
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import warnings
warnings.filterwarnings('ignore')

class CSVBasedPLPredictor:
    def __init__(self, csv_file_path):

        self.csv_file_path = "PremierLeague.csv"
        self.team_stats = defaultdict(dict)
        self.head_to_head_stats = defaultdict(dict)
        self.model = None
        self.label_encoders = {}
        self.team_form = defaultdict(list)
        self.fixtures = []
        
    def load_data(self):
        
        print("Loading data from CSV...")
        self.data = pd.read_csv(self.csv_file_path)
        
        
        self.data.columns = [col.strip() for col in self.data.columns]
        
        if 'Date' in self.data.columns:
            self.data['Date'] = pd.to_datetime(self.data['Date'], errors='coerce')
        
        essential_cols = ['HomeTeam', 'AwayTeam', 'FullTimeHomeTeamGoals', 'FullTimeAwayTeamGoals']
        self.data = self.data.dropna(subset=essential_cols)
        
        print(f"Loaded {len(self.data)} matches")
        print(f"Date range: {self.data['Date'].min()} to {self.data['Date'].max()}")
        print(f"Unique teams: {len(set(self.data['HomeTeam'].unique()) | set(self.data['AwayTeam'].unique()))}")
        
        return self.data
    
    def calculate_team_statistics(self, seasons_to_include=5):
        
        print("Calculating team statistics...")
        
        recent_data = self.data.sort_values('Date').tail(380 * seasons_to_include)  # Last N seasons
        
        for _, match in recent_data.iterrows():
            home_team = match['HomeTeam']
            away_team = match['AwayTeam']
            home_goals = match['FullTimeHomeTeamGoals']
            away_goals = match['FullTimeAwayTeamGoals']
            
            
            for team in [home_team, away_team]:
                if team not in self.team_stats:
                    self.team_stats[team] = {
                        'matches': 0, 'wins': 0, 'draws': 0, 'losses': 0,
                        'goals_for': 0, 'goals_against': 0, 'points': 0,
                        'home_matches': 0, 'home_wins': 0, 'home_goals_for': 0, 'home_goals_against': 0,
                        'away_matches': 0, 'away_wins': 0, 'away_goals_for': 0, 'away_goals_against': 0,
                        'recent_form': []  # Last 6 matches
                    }
            
            
            self.team_stats[home_team]['matches'] += 1
            self.team_stats[home_team]['home_matches'] += 1
            self.team_stats[home_team]['goals_for'] += home_goals
            self.team_stats[home_team]['goals_against'] += away_goals
            self.team_stats[home_team]['home_goals_for'] += home_goals
            self.team_stats[home_team]['home_goals_against'] += away_goals
            
            
            self.team_stats[away_team]['matches'] += 1
            self.team_stats[away_team]['away_matches'] += 1
            self.team_stats[away_team]['goals_for'] += away_goals
            self.team_stats[away_team]['goals_against'] += home_goals
            self.team_stats[away_team]['away_goals_for'] += away_goals
            self.team_stats[away_team]['away_goals_against'] += home_goals
            
            
            if home_goals > away_goals:  
                self.team_stats[home_team]['wins'] += 1
                self.team_stats[home_team]['home_wins'] += 1
                self.team_stats[home_team]['points'] += 3
                self.team_stats[away_team]['losses'] += 1
                
                
                self.team_stats[home_team]['recent_form'].append('W')
                self.team_stats[away_team]['recent_form'].append('L')
                
            elif away_goals > home_goals:  
                self.team_stats[away_team]['wins'] += 1
                self.team_stats[away_team]['away_wins'] += 1
                self.team_stats[away_team]['points'] += 3
                self.team_stats[home_team]['losses'] += 1
                
                
                self.team_stats[away_team]['recent_form'].append('W')
                self.team_stats[home_team]['recent_form'].append('L')
                
            else:  
                self.team_stats[home_team]['draws'] += 1
                self.team_stats[home_team]['points'] += 1
                self.team_stats[away_team]['draws'] += 1
                self.team_stats[away_team]['points'] += 1
                
                
                self.team_stats[home_team]['recent_form'].append('D')
                self.team_stats[away_team]['recent_form'].append('D')
            
        
            for team in [home_team, away_team]:
                if len(self.team_stats[team]['recent_form']) > 6:
                    self.team_stats[team]['recent_form'] = self.team_stats[team]['recent_form'][-6:]
        
    
        for team, stats in self.team_stats.items():
            if stats['matches'] > 0:
                stats['points_per_game'] = stats['points'] / stats['matches']
                stats['goals_per_game'] = stats['goals_for'] / stats['matches']
                stats['goals_against_per_game'] = stats['goals_against'] / stats['matches']
                stats['goal_difference'] = stats['goals_for'] - stats['goals_against']
                
                if stats['home_matches'] > 0:
                    stats['home_goals_per_game'] = stats['home_goals_for'] / stats['home_matches']
                    stats['home_goals_against_per_game'] = stats['home_goals_against'] / stats['home_matches']
                
                if stats['away_matches'] > 0:
                    stats['away_goals_per_game'] = stats['away_goals_for'] / stats['away_matches']
                    stats['away_goals_against_per_game'] = stats['away_goals_against'] / stats['away_matches']
                

                form_points = sum([3 if x == 'W' else 1 if x == 'D' else 0 for x in stats['recent_form']])
                stats['form_score'] = form_points / max(len(stats['recent_form']), 1) if stats['recent_form'] else 0
    
    def calculate_head_to_head(self):

        print("Calculating head-to-head statistics...")
        
        for _, match in self.data.iterrows():
            home_team = match['HomeTeam']
            away_team = match['AwayTeam']
            home_goals = match['FullTimeHomeTeamGoals']
            away_goals = match['FullTimeAwayTeamGoals']
            

            h2h_key = f"{home_team}_vs_{away_team}"
            
            if h2h_key not in self.head_to_head_stats:
                self.head_to_head_stats[h2h_key] = {
                    'matches': 0, 'home_wins': 0, 'draws': 0, 'away_wins': 0,
                    'home_goals': 0, 'away_goals': 0
                }
            
            self.head_to_head_stats[h2h_key]['matches'] += 1
            self.head_to_head_stats[h2h_key]['home_goals'] += home_goals
            self.head_to_head_stats[h2h_key]['away_goals'] += away_goals
            
            if home_goals > away_goals:
                self.head_to_head_stats[h2h_key]['home_wins'] += 1
            elif away_goals > home_goals:
                self.head_to_head_stats[h2h_key]['away_wins'] += 1
            else:
                self.head_to_head_stats[h2h_key]['draws'] += 1
    
    def update_teams_for_2025_26_season(self):
     
        relegated_teams = ['Leicester City', 'Ipswich Town', 'Southampton']
        
        # need to work on bias against promoted teams
        promoted_teams = ['Leeds United', 'Burnley', 'Sunderland']
        
        print(f"Updating for 2025-26 season:")
        print(f"Relegated (REMOVING): {', '.join(relegated_teams)}")
        print(f"Promoted (ADDING): {', '.join(promoted_teams)}")
        
        
        recent_teams = set(self.data['HomeTeam'].tail(380).unique()) | set(self.data['AwayTeam'].tail(380).unique())
        
    
        relegated_alternatives = ['Leicester City', 'Leicester', 'Ipswich Town', 'Ipswich', 'Southampton', 'Southampton FC']
        remaining_pl_teams = [team for team in recent_teams if team not in relegated_alternatives]
        
        if len(remaining_pl_teams) > 17:
            remaining_pl_teams = remaining_pl_teams[:17]

        updated_teams = remaining_pl_teams + promoted_teams
        
        print(f"\nFinal 2025-26 Premier League teams ({len(updated_teams)}):")
        print(f"Continuing teams: {len(remaining_pl_teams)}")
        print(f"Promoted teams: {len(promoted_teams)}")
        
        if len(updated_teams) != 20:
            print(f"⚠️  Warning: Found {len(updated_teams)} teams instead of 20")
    
            if len(updated_teams) > 20:
                excess = len(updated_teams) - 20
                remaining_pl_teams = remaining_pl_teams[:-excess]
                updated_teams = remaining_pl_teams + promoted_teams
        
        return updated_teams

    def analyze_promoted_teams_history(self):
        
        
        promoted_teams = ['Leeds United', 'Burnley', 'Sunderland']
        
        print("\n" + "="*60)
        print("PROMOTED TEAMS - HISTORICAL PL PERFORMANCE ANALYSIS")
        print("="*60)
        
        for team in promoted_teams:
            print(f"\n📊 {team.upper()}:")
            print("-" * 40)
            
            team_matches = self.data[
                (self.data['HomeTeam'] == team) | (self.data['AwayTeam'] == team)
            ].copy()
            
            if len(team_matches) == 0:
                print(f"❌ No historical PL data found for {team}")
                print("📋 Using default promoted team statistics")
                
                self.team_stats[team] = {
                    'matches': 38, 'wins': 12, 'draws': 9, 'losses': 17,
                    'goals_for': 48, 'goals_against': 62, 'points': 45,
                    'home_matches': 19, 'home_wins': 8, 'home_goals_for': 28, 'home_goals_against': 28,
                    'away_matches': 19, 'away_wins': 4, 'away_goals_for': 20, 'away_goals_against': 34,
                    'recent_form': ['L', 'D', 'W', 'L', 'D', 'L'],
                    'points_per_game': 1.18,
                    'goals_per_game': 1.26,
                    'goals_against_per_game': 1.63,
                    'goal_difference': -14,
                    'home_goals_per_game': 1.47,
                    'home_goals_against_per_game': 1.47,
                    'away_goals_per_game': 1.05,
                    'away_goals_against_per_game': 1.79,
                    'form_score': 1.33  
                }
                continue
            
            
            team_matches['Date'] = pd.to_datetime(team_matches['Date'])
            recent_seasons = team_matches.sort_values('Date').tail(76)  
            
            total_matches = len(recent_seasons)
            wins = draws = losses = 0
            goals_for = goals_against = 0
            home_wins = away_wins = 0
            home_goals_for = home_goals_against = 0
            away_goals_for = away_goals_against = 0
            
            for _, match in recent_seasons.iterrows():
                if match['HomeTeam'] == team:
            
                    home_goals = match['FullTimeHomeTeamGoals']
                    away_goals = match['FullTimeAwayTeamGoals']
                    goals_for += home_goals
                    goals_against += away_goals
                    home_goals_for += home_goals
                    home_goals_against += away_goals
                    
                    if home_goals > away_goals:
                        wins += 1
                        home_wins += 1
                    elif home_goals < away_goals:
                        losses += 1
                    else:
                        draws += 1
                else:
    
                    home_goals = match['FullTimeHomeTeamGoals']
                    away_goals = match['FullTimeAwayTeamGoals']
                    goals_for += away_goals
                    goals_against += home_goals
                    away_goals_for += away_goals
                    away_goals_against += home_goals
                    
                    if away_goals > home_goals:
                        wins += 1
                        away_wins += 1
                    elif away_goals < home_goals:
                        losses += 1
                    else:
                        draws += 1
            
    
            points = wins * 3 + draws
            ppg = points / total_matches if total_matches > 0 else 0
            gpg = goals_for / total_matches if total_matches > 0 else 0
            gapg = goals_against / total_matches if total_matches > 0 else 0
            
            print(f"✅ Found historical data: {total_matches} matches")
            print(f"Record: {wins}W-{draws}D-{losses}L")
            print(f"Goals: {goals_for} for, {goals_against} against ({goals_for-goals_against:+})")
            print(f"Points per game: {ppg:.2f}")
            print(f"Goals per game: {gpg:.2f}")
            print(f"Goals against per game: {gapg:.2f}")
            
    
            if total_matches > 0:
                home_matches = len(recent_seasons[recent_seasons['HomeTeam'] == team])
                away_matches = len(recent_seasons[recent_seasons['AwayTeam'] == team])
                
                self.team_stats[team] = {
                    'matches': total_matches,
                    'wins': wins, 'draws': draws, 'losses': losses,
                    'goals_for': goals_for, 'goals_against': goals_against,
                    'points': points,
                    'home_matches': home_matches, 'home_wins': home_wins,
                    'home_goals_for': home_goals_for, 'home_goals_against': home_goals_against,
                    'away_matches': away_matches, 'away_wins': away_wins,
                    'away_goals_for': away_goals_for, 'away_goals_against': away_goals_against,
                    'recent_form': ['L', 'D', 'W', 'L', 'D', 'L'],  # Default promoted team form
                    'points_per_game': ppg,
                    'goals_per_game': gpg,
                    'goals_against_per_game': gapg,
                    'goal_difference': goals_for - goals_against,
                    'home_goals_per_game': home_goals_for / home_matches if home_matches > 0 else 1.4,
                    'home_goals_against_per_game': home_goals_against / home_matches if home_matches > 0 else 1.5,
                    'away_goals_per_game': away_goals_for / away_matches if away_matches > 0 else 1.0,
                    'away_goals_against_per_game': away_goals_against / away_matches if away_matches > 0 else 1.8,
                    'form_score': 1.33  # Slightly above relegation form
                }
    
    def create_features(self, home_team, away_team):
        
        features = []
        
    
        home_stats = self.team_stats.get(home_team, {})
        away_stats = self.team_stats.get(away_team, {})
        
        # Basic stats
        features.extend([
            home_stats.get('points_per_game', 0),
            away_stats.get('points_per_game', 0),
            home_stats.get('goals_per_game', 0),
            away_stats.get('goals_per_game', 0),
            home_stats.get('goals_against_per_game', 0),
            away_stats.get('goals_against_per_game', 0),
        ])
        
        # Home/Away specific stats
        features.extend([
            home_stats.get('home_goals_per_game', 0),
            home_stats.get('home_goals_against_per_game', 0),
            away_stats.get('away_goals_per_game', 0),
            away_stats.get('away_goals_against_per_game', 0),
        ])
        
        
        features.extend([
            home_stats.get('form_score', 0),
            away_stats.get('form_score', 0),
        ])
        
     
        h2h_key = f"{home_team}_vs_{away_team}"
        h2h_stats = self.head_to_head_stats.get(h2h_key, {})
        
        if h2h_stats.get('matches', 0) > 0:
            features.extend([
                h2h_stats['home_wins'] / h2h_stats['matches'],
                h2h_stats['draws'] / h2h_stats['matches'],
                h2h_stats['away_wins'] / h2h_stats['matches'],
                h2h_stats['home_goals'] / h2h_stats['matches'],
                h2h_stats['away_goals'] / h2h_stats['matches'],
            ])
        else:
            features.extend([0.33, 0.33, 0.33, 1.5, 1.5])  # Default values
        
        return features
    
    def prepare_training_data(self):
    
        print("Preparing training data...")
        
        X, y = [], []

        training_data = self.data.sort_values('Date').tail(380 * 3)
        
        for _, match in training_data.iterrows():
            home_team = match['HomeTeam']
            away_team = match['AwayTeam']
            home_goals = match['FullTimeHomeTeamGoals']
            away_goals = match['FullTimeAwayTeamGoals']
            
            
            features = self.create_features(home_team, away_team)
            
            
            if home_goals > away_goals:
                target = 2  # Home win
            elif away_goals > home_goals:
                target = 0  # Away win
            else:
                target = 1  # Draw
            
            X.append(features)
            y.append(target)
        
        return np.array(X), np.array(y)
    
    def train_model(self):
        
        print("Training prediction model...")
        
        X, y = self.prepare_training_data()
        
        # Split data
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
        
        # Train Random Forest model
        self.model = RandomForestClassifier(n_estimators=100, random_state=42)
        self.model.fit(X_train, y_train)
        
        # Evaluate model
        y_pred = self.model.predict(X_test)
        accuracy = accuracy_score(y_test, y_pred)
        
        print(f"Model accuracy: {accuracy:.3f}")
        
        # Show feature importance
        feature_names = [
            'Home_PPG', 'Away_PPG', 'Home_GPG', 'Away_GPG', 'Home_GAPG', 'Away_GAPG',
            'Home_Home_GPG', 'Home_Home_GAPG', 'Away_Away_GPG', 'Away_Away_GAPG',
            'Home_Form', 'Away_Form', 'H2H_Home_Win%', 'H2H_Draw%', 'H2H_Away_Win%',
            'H2H_Home_Goals', 'H2H_Away_Goals'
        ]
        
        importance = self.model.feature_importances_
        feature_importance = list(zip(feature_names, importance))
        feature_importance.sort(key=lambda x: x[1], reverse=True)
        
        print("\nTop 5 most important features:")
        for feature, imp in feature_importance[:5]:
            print(f"{feature}: {imp:.3f}")
    
    def predict_match(self, home_team, away_team):
    
        features = np.array([self.create_features(home_team, away_team)])
        
        # Get probabilities
        probabilities = self.model.predict_proba(features)[0]
        away_win_prob, draw_prob, home_win_prob = probabilities
        
        # Predict result
        prediction = self.model.predict(features)[0]
        result_map = {0: 'Away Win', 1: 'Draw', 2: 'Home Win'}
        
        return {
            'prediction': result_map[prediction],
            'home_win_prob': home_win_prob,
            'draw_prob': draw_prob,
            'away_win_prob': away_win_prob,
            'confidence': max(probabilities)
        }
    
    def generate_season_fixtures(self, teams=None):

        if teams is None:
            
            recent_teams = set(self.data['HomeTeam'].tail(380).unique()) | set(self.data['AwayTeam'].tail(380).unique())
            teams = list(recent_teams)[:20]  # Take top 20 most recent
        
        fixtures = []
        for home_team in teams:
            for away_team in teams:
                if home_team != away_team:
                    fixtures.append((home_team, away_team))
        
        random.shuffle(fixtures)
        self.fixtures = fixtures
        return fixtures
    
    def simulate_season(self, teams=None):
    
        print("Simulating season...")
        
        if not self.fixtures:
            self.generate_season_fixtures(teams)
        
        results = []
        table = defaultdict(lambda: {'matches': 0, 'wins': 0, 'draws': 0, 'losses': 0, 
                                   'goals_for': 0, 'goals_against': 0, 'points': 0})
        
        for home_team, away_team in self.fixtures:
            prediction = self.predict_match(home_team, away_team)
            
            
            home_avg = self.team_stats.get(home_team, {}).get('home_goals_per_game', 1.3)
            away_avg = self.team_stats.get(away_team, {}).get('away_goals_per_game', 1.1)
            
    
            home_goals = max(0, int(np.random.poisson(home_avg * (1 + random.uniform(-0.3, 0.3)))))
            away_goals = max(0, int(np.random.poisson(away_avg * (1 + random.uniform(-0.3, 0.3)))))
            
            
            if prediction['home_win_prob'] > 0.6:
                home_goals += random.choice([0, 1])
            elif prediction['away_win_prob'] > 0.6:
                away_goals += random.choice([0, 1])
            

            table[home_team]['matches'] += 1
            table[away_team]['matches'] += 1
            table[home_team]['goals_for'] += home_goals
            table[home_team]['goals_against'] += away_goals
            table[away_team]['goals_for'] += away_goals
            table[away_team]['goals_against'] += home_goals
            
            if home_goals > away_goals:
                table[home_team]['wins'] += 1
                table[home_team]['points'] += 3
                table[away_team]['losses'] += 1
                result = 'H'
            elif away_goals > home_goals:
                table[away_team]['wins'] += 1
                table[away_team]['points'] += 3
                table[home_team]['losses'] += 1
                result = 'A'
            else:
                table[home_team]['draws'] += 1
                table[home_team]['points'] += 1
                table[away_team]['draws'] += 1
                table[away_team]['points'] += 1
                result = 'D'
            
            results.append({
                'home_team': home_team,
                'away_team': away_team,
                'home_goals': home_goals,
                'away_goals': away_goals,
                'result': result,
                'prediction': prediction
            })
        
     
        final_table = []
        for team, stats in table.items():
            stats['team'] = team
            stats['goal_difference'] = stats['goals_for'] - stats['goals_against']
            final_table.append(stats)
        
        final_table.sort(key=lambda x: (x['points'], x['goal_difference'], x['goals_for']), reverse=True)
        
        return results, final_table
    
    def run_multiple_simulations(self, num_sims=10000, teams=None):
        
        print(f"Running {num_sims} season simulations...")
        
        position_counts = defaultdict(lambda: defaultdict(int))
        points_totals = defaultdict(list)
        champion_counts = defaultdict(int)
        top4_counts = defaultdict(int)
        top6_counts = defaultdict(int)
        relegation_counts = defaultdict(int)
        
        
        for sim in range(num_sims):
            if sim % 1000 == 0 and sim > 0:
                print(f"Completed {sim}/{num_sims} simulations...")
            
            
            _, table = self.simulate_season(teams)
            
        
            for pos, team_data in enumerate(table, 1):
                team = team_data['team']
                points = team_data['points']
                

                position_counts[team][pos] += 1
                
                
                points_totals[team].append(points)
                
                
                if pos == 1:
                    champion_counts[team] += 1
                if pos <= 4:
                    top4_counts[team] += 1
                if pos <= 6:
                    top6_counts[team] += 1
                if pos >= 18:
                    relegation_counts[team] += 1
        
        print(f"Completed all {num_sims} simulations!")
        
    
        self.display_simulation_results(position_counts, points_totals, champion_counts, 
                                      top4_counts, top6_counts, relegation_counts, num_sims)
        
        return position_counts, points_totals
    
    def display_simulation_results(self, position_counts, points_totals, champion_counts, 
                                 top4_counts, top6_counts, relegation_counts, num_sims):
        
        
        print(f"\n{'='*80}")
        print(f"PREMIER LEAGUE 2025-26 SIMULATION RESULTS ({num_sims:,} simulations)")
        print(f"{'='*80}")
        
    
        print("\n🏆 CHAMPIONSHIP PROBABILITIES:")
        print("-" * 50)
        sorted_champions = sorted(champion_counts.items(), key=lambda x: x[1], reverse=True)
        for team, count in sorted_champions:
            if count > 0:
                percentage = (count / num_sims) * 100
                print(f"{team:<20}: {percentage:6.2f}% ({count:,} times)")
        
    
        print("\n🔵 CHAMPIONS LEAGUE (TOP 4) PROBABILITIES:")
        print("-" * 50)
        sorted_top4 = sorted(top4_counts.items(), key=lambda x: x[1], reverse=True)
        for team, count in sorted_top4:
            if count > 0:
                percentage = (count / num_sims) * 100
                print(f"{team:<20}: {percentage:6.2f}% ({count:,} times)")
        
        
        print("\n🇪🇺 EUROPEAN COMPETITION (TOP 6) PROBABILITIES:")
        print("-" * 50)
        sorted_top6 = sorted(top6_counts.items(), key=lambda x: x[1], reverse=True)
        for team, count in sorted_top6:
            if count > 0:
                percentage = (count / num_sims) * 100
                print(f"{team:<20}: {percentage:6.2f}% ({count:,} times)")
        
    
        print("\n⬇️  RELEGATION PROBABILITIES:")
        print("-" * 50)
        sorted_relegation = sorted(relegation_counts.items(), key=lambda x: x[1], reverse=True)
        for team, count in sorted_relegation:
            if count > 0:
                percentage = (count / num_sims) * 100
                print(f"{team:<20}: {percentage:6.2f}% ({count:,} times)")
        
        
        print("\n📊 EXPECTED FINAL POSITIONS & POINTS:")
        print("-" * 80)
        print(f"{'Team':<20} {'Avg Pos':<8} {'Most Likely':<12} {'Avg Points':<12} {'Points Range':<15}")
        print("-" * 80)
        
        team_expectations = []
        for team in points_totals.keys():
            
            total_positions = 0
            total_sims = 0
            most_common_pos = 0
            max_count = 0
            
            for pos, count in position_counts[team].items():
                total_positions += pos * count
                total_sims += count
                if count > max_count:
                    max_count = count
                    most_common_pos = pos
            
            avg_position = total_positions / total_sims if total_sims > 0 else 20
            avg_points = np.mean(points_totals[team])
            min_points = min(points_totals[team])
            max_points = max(points_totals[team])
            
            team_expectations.append({
                'team': team,
                'avg_pos': avg_position,
                'most_common_pos': most_common_pos,
                'avg_points': avg_points,
                'min_points': min_points,
                'max_points': max_points
            })
        
    
        team_expectations.sort(key=lambda x: x['avg_pos'])
        
        for team_data in team_expectations:
            print(f"{team_data['team']:<20} "
                  f"{team_data['avg_pos']:<8.1f} "
                  f"{team_data['most_common_pos']:<12} "
                  f"{team_data['avg_points']:<12.1f} "
                  f"{team_data['min_points']}-{team_data['max_points']}")

    def display_table(self, table):
        
        print("\n" + "="*80)
        print("PREMIER LEAGUE 2025-26 PREDICTED TABLE (SINGLE SIMULATION)")
        print("="*80)
        print(f"{'Pos':<4} {'Team':<20} {'P':<3} {'W':<3} {'D':<3} {'L':<3} {'GF':<4} {'GA':<4} {'GD':<4} {'Pts':<4}")
        print("-"*80)
        
        for i, team_stats in enumerate(table, 1):
            marker = ""
            if i <= 4: marker = " (CL)"
            elif i == 5: marker = " (EL)"
            elif i == 6: marker = " (ECL)"
            elif i >= 18: marker = " (REL)"
            
            print(f"{i:<4} {team_stats['team']:<20} {team_stats['matches']:<3} {team_stats['wins']:<3} "
                  f"{team_stats['draws']:<3} {team_stats['losses']:<3} {team_stats['goals_for']:<4} "
                  f"{team_stats['goals_against']:<4} {team_stats['goal_difference']:+4} "
                  f"{team_stats['points']:<4}{marker}")


if __name__ == "__main__":
    
    predictor = CSVBasedPLPredictor('PremierLeague.csv')
    
    
    predictor.load_data()
    predictor.calculate_team_statistics()
    predictor.calculate_head_to_head()
    
    
    teams_2025_26 = predictor.update_teams_for_2025_26_season()
    predictor.analyze_promoted_teams_history()
    
    print(f"\n 2025-26 PREMIER LEAGUE TEAMS ({len(teams_2025_26)} teams):")
    print("="*60)
    

    promoted_teams = ['Leeds United', 'Burnley', 'Sunderland']
    continuing_teams = [team for team in teams_2025_26 if team not in promoted_teams]
    
    print(f"\n📋 CONTINUING TEAMS ({len(continuing_teams)}):")
    for i, team in enumerate(sorted(continuing_teams), 1):
        print(f"{i:2d}. {team}")
    
    print(f"\n⬆️  PROMOTED TEAMS ({len(promoted_teams)}):")
    for i, team in enumerate(promoted_teams, len(continuing_teams) + 1):
        print(f"{i:2d}. {team} (PROMOTED)")
    
    print(f"\n❌ RELEGATED TEAMS (NOT IN 2025-26):")
    print("18. Leicester City (relegated)")
    print("19. Ipswich Town (relegated)")  
    print("20. Southampton (relegated)")
    
    
    predictor.train_model()
    
    
    result = predictor.predict_match('Arsenal', 'Leeds United')
    print(f"\n🔮 SAMPLE PREDICTION - Arsenal vs Leeds United:")
    print(f"Prediction: {result['prediction']}")
    print(f"Confidence: {result['confidence']:.2f}")
    print(f"Probabilities - Home: {result['home_win_prob']:.2f}, Draw: {result['draw_prob']:.2f}, Away: {result['away_win_prob']:.2f}")
    
    
    season_results, final_table = predictor.simulate_season(teams_2025_26)
    predictor.display_table(final_table)
    
    print("\n" + "="*60)
    print("⚠️  2025-26 SEASON SIMULATION COMPLETE ⚠️")
    print("✅ Includes: Leeds United, Burnley, Sunderland (promoted)")
    print("❌ Excludes: Leicester City, Ipswich Town, Southampton (relegated)")
    print("="*60)
    
    #multiple sims
    choice = input("\nRun multiple simulations for 2025-26 season? (y/n): ").lower().strip()
    if choice in ['y', 'yes']:
        num_sims = input("How many simulations? (default 10,000): ").strip()
        num_sims = int(num_sims) if num_sims.isdigit() else 10000
        
        if num_sims >= 50000:
            print(f"⚠️  {num_sims:,} simulations will take a while...")
            confirm = input("Continue? (y/n): ").lower().strip()
            if confirm not in ['y', 'yes']:
                num_sims = 10000
                print("Defaulting to 10,000 simulations")
        
        predictor.run_multiple_simulations(num_sims, teams_2025_26)

Loading data from CSV...
Loaded 12160 matches
Date range: 1993-08-14 00:00:00 to 2025-05-25 00:00:00
Unique teams: 51
Calculating team statistics...
Calculating head-to-head statistics...
Updating for 2025-26 season:
Relegated (REMOVING): Leicester City, Ipswich Town, Southampton
Promoted (ADDING): Leeds United, Burnley, Sunderland

Final 2025-26 Premier League teams (20):
Continuing teams: 17
Promoted teams: 3

PROMOTED TEAMS - HISTORICAL PL PERFORMANCE ANALYSIS

📊 LEEDS UNITED:
----------------------------------------
❌ No historical PL data found for Leeds United
📋 Using default promoted team statistics

📊 BURNLEY:
----------------------------------------
✅ Found historical data: 76 matches
Record: 12W-23D-41L
Goals: 75 for, 131 against (-56)
Points per game: 0.78
Goals per game: 0.99
Goals against per game: 1.72

📊 SUNDERLAND:
----------------------------------------
✅ Found historical data: 76 matches
Record: 15W-18D-43L
Goals: 77 for, 131 against (-54)
Points per game: 0.83
Goals