In [None]:
# PROJECT: PREMIER LEAGUE WINNER PREDICTION
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

print("‚öΩ PREMIER LEAGUE 2025-26 WINNER PREDICTION")
print("=" * 50)

#historical Premier League data (2010-2024)
# using realistic stats for top teams
league_data = {
    'season': [2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024],
    'winner': ['Chelsea', 'Man United', 'Man City', 'Man United', 'Man City', 'Leicester', 'Chelsea', 'Man City', 'Man City', 'Liverpool', 'Liverpool', 'Man City', 'Man City', 'Man City', 'Arsenal'],
    'man_city_points': [71, 71, 89, 78, 86, 66, 78, 100, 98, 81, 86, 93, 93, 89, 91],
    'man_city_goals': [60, 60, 93, 66, 102, 71, 80, 106, 95, 95, 83, 83, 99, 94, 96],
    'liverpool_points': [58, 52, 61, 84, 62, 60, 76, 75, 97, 99, 69, 69, 92, 67, 82],
    'liverpool_goals': [59, 47, 47, 101, 52, 63, 78, 84, 89, 85, 68, 68, 94, 75, 86],
    'arsenal_points': [68, 68, 73, 79, 75, 71, 75, 63, 70, 56, 61, 61, 69, 84, 89],
    'arsenal_goals': [72, 72, 74, 68, 71, 65, 77, 74, 73, 56, 56, 55, 61, 88, 91],
    'chelsea_points': [71, 71, 75, 82, 87, 50, 93, 70, 72, 66, 67, 67, 74, 44, 63],
    'chelsea_goals': [69, 69, 65, 75, 73, 59, 85, 62, 63, 69, 69, 58, 76, 38, 77],
    'man_united_points': [80, 89, 89, 64, 70, 66, 69, 81, 66, 66, 74, 74, 58, 75, 60],
    'man_united_goals': [78, 78, 89, 64, 62, 49, 54, 68, 65, 66, 66, 73, 57, 58, 57],
    'transfer_budget': [120, 130, 150, 140, 160, 110, 180, 200, 190, 170, 160, 180, 200, 190, 210],  # in millions
    'manager_experience': [3, 4, 5, 6, 7, 2, 8, 9, 10, 8, 9, 10, 11, 12, 13]  # years
}

club_df = pd.DataFrame(league_data)
print("üìä HISTORICAL PREMIER LEAGUE DATA (2010-2024):")
print(club_df[['season', 'winner', 'man_city_points', 'liverpool_points', 'arsenal_points']].head(8))

‚öΩ PREMIER LEAGUE 2025-26 WINNER PREDICTION
üìä HISTORICAL PREMIER LEAGUE DATA (2010-2024):
   season      winner  man_city_points  liverpool_points  arsenal_points
0    2010     Chelsea               71                58              68
1    2011  Man United               71                52              68
2    2012    Man City               89                61              73
3    2013  Man United               78                84              79
4    2014    Man City               86                62              75
5    2015   Leicester               66                60              71
6    2016     Chelsea               78                76              75
7    2017    Man City              100                75              63


In [None]:
# STEP 1: FEATURES FOR PREDICTION
print("\nüéØ FEATURE ENGINEERING")
print("=" * 30)

# Creating features for prediction
# We'll predict which team wins based on their previous season performance + other factors

# Prepare data for each team separately (we'll create multiple rows per season)
teams_data = []

for idx, row in club_df.iterrows():
    # Manchester City
    teams_data.append({
        'team': 'Man City',
        'season': row['season'],
        'points': row['man_city_points'],
        'goals': row['man_city_goals'],
        'won_league': 1 if row['winner'] == 'Man City' else 0,
        'transfer_budget': row['transfer_budget'],
        'manager_exp': row['manager_experience'],
        'previous_points': row['man_city_points']  # Simplified - in real scenario would be previous season
    })

    # Liverpool
    teams_data.append({
        'team': 'Liverpool',
        'season': row['season'],
        'points': row['liverpool_points'],
        'goals': row['liverpool_goals'],
        'won_league': 1 if row['winner'] == 'Liverpool' else 0,
        'transfer_budget': row['transfer_budget'],
        'manager_exp': row['manager_experience'],
        'previous_points': row['liverpool_points']
    })

    # Arsenal
    teams_data.append({
        'team': 'Arsenal',
        'season': row['season'],
        'points': row['arsenal_points'],
        'goals': row['arsenal_goals'],
        'won_league': 1 if row['winner'] == 'Arsenal' else 0,
        'transfer_budget': row['transfer_budget'],
        'manager_exp': row['manager_experience'],
        'previous_points': row['arsenal_points']
    })

    # Chelsea
    teams_data.append({
        'team': 'Chelsea',
        'season': row['season'],
        'points': row['chelsea_points'],
        'goals': row['chelsea_goals'],
        'won_league': 1 if row['winner'] == 'Chelsea' else 0,
        'transfer_budget': row['transfer_budget'],
        'manager_exp': row['manager_experience'],
        'previous_points': row['chelsea_points']
    })

    # Manchester United
    teams_data.append({
        'team': 'Man United',
        'season': row['season'],
        'points': row['man_united_points'],
        'goals': row['man_united_goals'],
        'won_league': 1 if row['winner'] == 'Man United' else 0,
        'transfer_budget': row['transfer_budget'],
        'manager_exp': row['manager_experience'],
        'previous_points': row['man_united_points']
    })

teams_df = pd.DataFrame(teams_data)
print(f"üìà TEAMS DATA SHAPE: {teams_df.shape}")
print(f"üèÜ LEAGUE WINS BY TEAM:")
print(teams_df.groupby('team')['won_league'].sum())


üéØ FEATURE ENGINEERING
üìà TEAMS DATA SHAPE: (75, 8)
üèÜ LEAGUE WINS BY TEAM:
team
Arsenal       1
Chelsea       2
Liverpool     2
Man City      7
Man United    2
Name: won_league, dtype: int64


In [None]:
# STEP 2: TRAIN THE PREDICTION MODEL
print("\nü§ñ TRAINING PREMIER LEAGUE PREDICTION MODEL")
print("=" * 45)

# Prepare features and target
teams_df['team_encoded'] = teams_df['team'].map({
    'Man City': 0, 'Liverpool': 1, 'Arsenal': 2, 'Chelsea': 3, 'Man United': 4
})

features = ['points', 'goals', 'transfer_budget', 'manager_exp', 'previous_points', 'team_encoded']
X = teams_df[features]
y = teams_df['won_league']

# Train model
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

accuracy = model.score(X_test, y_test)
print(f"üéØ MODEL ACCURACY: {accuracy:.1%}")

# Feature importance
feature_importance = pd.DataFrame({
    'feature': features,
    'importance': model.feature_importances_
}).sort_values('importance', ascending=False)

print(f"\nüìä MOST IMPORTANT FACTORS FOR WINNING:")
print(feature_importance)


ü§ñ TRAINING PREMIER LEAGUE PREDICTION MODEL
üéØ MODEL ACCURACY: 86.7%

üìä MOST IMPORTANT FACTORS FOR WINNING:
           feature  importance
0           points    0.274136
4  previous_points    0.253385
1            goals    0.186485
3      manager_exp    0.099319
5     team_encoded    0.099193
2  transfer_budget    0.087482


In [None]:
# STEP 3: PREDICT 2025-26 PREMIER LEAGUE WINNER!
print("\nüîÆ PREDICTING 2025-26 PREMIER LEAGUE WINNER")
print("=" * 50)

# Create 2025-26 season predictions for each team
# Using realistic projections based on recent trends

teams_2025 = [
    # [points, goals, transfer_budget, manager_exp, previous_points, team_encoded]
    [93, 98, 220, 15, 91, 0],   # Man City - strong continuation
    [88, 92, 180, 8, 82, 1],    # Liverpool - improving
    [86, 90, 160, 5, 89, 2],    # Arsenal - maintaining form
    [78, 80, 200, 4, 63, 3],    # Chelsea - rebuilding
    [75, 78, 190, 3, 60, 4]     # Man United - recovery
]

team_names = ['Manchester City', 'Liverpool', 'Arsenal', 'Chelsea', 'Manchester United']

print("üèÜ 2025-26 SEASON PREDICTIONS:")
predictions_2025 = []

for i, team_data in enumerate(teams_2025):
    team_df = pd.DataFrame([team_data], columns=features)

    win_probability = model.predict_proba(team_df)[0][1]  # Probability of winning

    predictions_2025.append({
        'team': team_names[i],
        'win_probability': win_probability
    })

    print(f"\n{team_names[i]}:")
    print(f"   üìà Win Probability: {win_probability:.1%}")
    print(f"   ‚öΩ Projected Points: {team_data[0]}")
    print(f"   ü•Ö Projected Goals: {team_data[1]}")
    print(f"   üí∞ Transfer Budget: ¬£{team_data[2]}M")

# Find predicted winner
predicted_winner = max(predictions_2025, key=lambda x: x['win_probability'])
print(f"\nüéâ PREDICTED 2025-26 PREMIER LEAGUE WINNER: {predicted_winner['team']}")
print(f"   üèÜ Confidence: {predicted_winner['win_probability']:.1%}")


üîÆ PREDICTING 2025-26 PREMIER LEAGUE WINNER
üèÜ 2025-26 SEASON PREDICTIONS:

Manchester City:
   üìà Win Probability: 60.0%
   ‚öΩ Projected Points: 93
   ü•Ö Projected Goals: 98
   üí∞ Transfer Budget: ¬£220M

Liverpool:
   üìà Win Probability: 27.0%
   ‚öΩ Projected Points: 88
   ü•Ö Projected Goals: 92
   üí∞ Transfer Budget: ¬£180M

Arsenal:
   üìà Win Probability: 20.0%
   ‚öΩ Projected Points: 86
   ü•Ö Projected Goals: 90
   üí∞ Transfer Budget: ¬£160M

Chelsea:
   üìà Win Probability: 7.0%
   ‚öΩ Projected Points: 78
   ü•Ö Projected Goals: 80
   üí∞ Transfer Budget: ¬£200M

Manchester United:
   üìà Win Probability: 23.0%
   ‚öΩ Projected Points: 75
   ü•Ö Projected Goals: 78
   üí∞ Transfer Budget: ¬£190M

üéâ PREDICTED 2025-26 PREMIER LEAGUE WINNER: Manchester City
   üèÜ Confidence: 60.0%


In [None]:
print("\nüìä FOOTBALL ANALYTICS INSIGHTS")
print("=" * 35)

# Historical analysis
print("üìà HISTORICAL PERFORMANCE:")

historical_wins=teams_df.groupby('team')['won_league'].sum()
for team,wins in historical_wins.items():
    print(f"   ‚Ä¢ {team}: {wins} titles")


print(f"\n‚öΩ AVERAGE POINTS OF CHAMPIONS:")
champion_points = teams_df[teams_df['won_league'] == 1]['points'].mean()
print(f"   ‚Ä¢ Champions average: {champion_points:.1f} points")

print(f"\nüéØ WHAT IT TAKES TO WIN THE PREMIER LEAGUE:")
print(f"   ‚Ä¢ Most important factor: {feature_importance.iloc[0]['feature']}")
print(f"   ‚Ä¢ Typical champion points: {champion_points:.0f}+")
print(f"   ‚Ä¢ Key: Consistent high points + strong transfer budget")

print(f"\nüí° PREDICTION METHODOLOGY:")
print("1. Analyzed 15 years of Premier League data")
print("2. Considered points, goals, transfer budgets, manager experience")
print("3. Trained ML model on historical patterns")
print("4. Projected 2025-26 team performances")
print(f"5. Model accuracy: {accuracy:.1%} on historical data")


üìä FOOTBALL ANALYTICS INSIGHTS
üìà HISTORICAL PERFORMANCE:
   ‚Ä¢ Arsenal: 1 titles
   ‚Ä¢ Chelsea: 2 titles
   ‚Ä¢ Liverpool: 2 titles
   ‚Ä¢ Man City: 7 titles
   ‚Ä¢ Man United: 2 titles

‚öΩ AVERAGE POINTS OF CHAMPIONS:
   ‚Ä¢ Champions average: 87.3 points

üéØ WHAT IT TAKES TO WIN THE PREMIER LEAGUE:
   ‚Ä¢ Most important factor: points
   ‚Ä¢ Typical champion points: 87+
   ‚Ä¢ Key: Consistent high points + strong transfer budget

üí° PREDICTION METHODOLOGY:
1. Analyzed 15 years of Premier League data
2. Considered points, goals, transfer budgets, manager experience
3. Trained ML model on historical patterns
4. Projected 2025-26 team performances
5. Model accuracy: 86.7% on historical data


In [None]:
# STEP 5: WHAT-IF SCENARIOS
print("\nüîç WHAT-IF SCENARIOS")
print("=" * 25)

# What if teams perform differently?
scenarios = [
    ("Man City has bad season", [85, 85, 220, 15, 91, 0]),
    ("Liverpool gets huge investment", [88, 92, 250, 8, 82, 1]),
    ("Arsenal continues improvement", [90, 95, 160, 5, 89, 2])
]

print("ALTERNATIVE SCENARIOS:")
for scenario_name, scenario_data in scenarios:
    scenario_df = pd.DataFrame([scenario_data], columns=features)
    win_prob = model.predict_proba(scenario_df)[0][1]
    print(f"   ‚Ä¢ {scenario_name}: {win_prob:.1%} chance")--


üîç WHAT-IF SCENARIOS
ALTERNATIVE SCENARIOS:
   ‚Ä¢ Man City has bad season: 30.0% chance
   ‚Ä¢ Liverpool gets huge investment: 25.0% chance
   ‚Ä¢ Arsenal continues improvement: 49.0% chance
