# Import Libraries and Load Models

In [18]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pickle
import warnings
import os
warnings.filterwarnings('ignore')

print("Loading models and preprocessing objects...")

# Load models
with open('/Users/nazmul/Desktop/Project/ucl/models/random_forest_model.pkl', 'rb') as f:
    rf_model = pickle.load(f)
with open('/Users/nazmul/Desktop/Project/ucl/models/knn_model.pkl', 'rb') as f:
    knn_model = pickle.load(f)
with open('/Users/nazmul/Desktop/Project/ucl/models/xgboost_model.pkl', 'rb') as f:
    xgb_model = pickle.load(f)

# Load preprocessing objects
with open('/Users/nazmul/Desktop/Project/ucl/models/scaler.pkl', 'rb') as f:
    scaler = pickle.load(f)
with open('/Users/nazmul/Desktop/Project/ucl/models/team_encoder.pkl', 'rb') as f:
    team_encoder = pickle.load(f)
with open('/Users/nazmul/Desktop/Project/ucl/models/league_encoder.pkl', 'rb') as f:
    league_encoder = pickle.load(f)
with open('/Users/nazmul/Desktop/Project/ucl/models/ensemble_config.pkl', 'rb') as f:
    ensemble_config = pickle.load(f)

# Store models in dictionary for easy access
best_models = {
    'RandomForest': rf_model,
    'KNN': knn_model,
    'XGBoost': xgb_model
}

print(" All models and objects loaded successfully!")
print(f"\nEnsemble Weights:")
print(f"   Random Forest: {ensemble_config['rf_weight']:.4f}")
print(f"   KNN: {ensemble_config['knn_weight']:.4f}")
print(f"   XGBoost: {ensemble_config['xgb_weight']:.4f}")


Loading models and preprocessing objects...
 All models and objects loaded successfully!

Ensemble Weights:
   Random Forest: 0.3350
   KNN: 0.3313
   XGBoost: 0.3337


# Helper Functions

In [19]:

def engineer_features(df):
    """Apply feature engineering"""
    df = df.copy()
    
    # Create league categories
    leagues = {
        "LaLiga": ["Barcelona", "Real Madrid", "Atletico Madrid", "Valencia", 
                   "Real Sociedad", "Villarreal", "Sevilla"],
        "Premier": ["Manchester United", "Chelsea", "Liverpool", "Tottenham Hotspur", 
                    "Manchester City", "Arsenal"],
        "Bundesliga": ["Bayern Munich", "Borussia Dortmund", "Bayer Leverkusen", 
                       "Borussia Monchengladbach", "RB Leipzig"],
        "Ligue1": ["Paris Saint-Germain", "Monaco", "Marseille", "Lyon", "Lille"],
        "SerieA": ["Juventus", "Lazio", "Napoli", "Inter Milan", "Atalanta", "AC Milan"],
        "Misc": ["Ajax", "Porto", "Benfica", "Sporting CP", "PSV Eindhoven", "Copenhagen"]
    }
    
    df['league'] = 'Misc'
    for league, teams in leagues.items():
        df.loc[df['team'].isin(teams), 'league'] = league
    
    # Create ratio features
    df['win_ratio'] = df['wins'] / df['match_played']
    df['loss_ratio'] = df['losts'] / df['match_played']
    df['draw_ratio'] = df['draws'] / df['match_played']
    df['goals_scored_per_match'] = df['goals_scored'] / df['match_played']
    df['goals_conceded_per_match'] = df['goals_conceded'] / df['match_played']
    df['win_to_loss_ratio'] = df['wins'] / (df['losts'] + 1)
    df['goal_efficiency'] = df['wins'] / (df['goals_scored'] + 1)
    df['defensive_strength'] = df['match_played'] / (df['goals_conceded'] + 1)
    df['points_per_match'] = df['group_point'] / df['match_played']
    df['goal_difference_per_match'] = df['gd'] / df['match_played']
    df['clean_sheet_potential'] = (df['match_played'] - df['goals_conceded']) / df['match_played']
    df['dominance_score'] = (
        df['win_ratio'] * 0.4 + 
        df['goal_difference_per_match'] * 0.3 + 
        df['points_per_match'] * 0.3
    )
    
    return df

def prepare_features(df):
    """Prepare features for prediction"""
    df = engineer_features(df)
    
    # Encode categorical variables
    known_teams = list(team_encoder.classes_)
    df['team_encoded'] = df['team'].apply(
        lambda x: known_teams.index(x) if x in known_teams else len(known_teams)
    )
    
    known_leagues = list(league_encoder.classes_)
    df['league_encoded'] = df['league'].apply(
        lambda x: known_leagues.index(x) if x in known_leagues else len(known_leagues)
    )
    
    # Select features for prediction
    feature_cols = [
        'match_played', 'wins', 'draws', 'losts', 'goals_scored',
        'goals_conceded', 'gd', 'group_point', 'team_encoded',
        'league_encoded', 'win_ratio', 'loss_ratio', 'draw_ratio',
        'goals_scored_per_match', 'goals_conceded_per_match',
        'win_to_loss_ratio', 'goal_efficiency', 'defensive_strength',
        'points_per_match', 'goal_difference_per_match',
        'clean_sheet_potential', 'dominance_score'
    ]
    
    X = df[feature_cols].values
    X_scaled = scaler.transform(X)
    
    return X_scaled

def predict_probabilities(input_df, model_name):
    """Predict probabilities using specified model"""
    model = best_models[model_name]
    return model.predict_proba(input_df)[:, 1]

def ensemble_predict(X_scaled):
    """Ensemble prediction using all three models"""
    rf_proba = rf_model.predict_proba(X_scaled)[:, 1]
    knn_proba = knn_model.predict_proba(X_scaled)[:, 1]
    xgb_proba = xgb_model.predict_proba(X_scaled)[:, 1]
    
    ensemble_proba = (
        ensemble_config['rf_weight'] * rf_proba +
        ensemble_config['knn_weight'] * knn_proba +
        ensemble_config['xgb_weight'] * xgb_proba
    )
    
    exp_proba = np.exp(ensemble_proba - np.max(ensemble_proba))
    softmax_proba = exp_proba / exp_proba.sum()
    
    return {
        'rf_proba': rf_proba * 100,
        'knn_proba': knn_proba * 100,
        'xgb_proba': xgb_proba * 100,
        'ensemble_proba': softmax_proba * 100
    }


# 2020-21 Season Prediction

In [20]:
print("\n" + "=" * 70)
print("2020-21 UCL SEASON ‚Äî ROUND OF 16 PREDICTIONS")
print("=" * 70)

cl_2020_21_r16_teams = [
    {'Club': 'Bayern Munich', 'matches_played': 6, 'wins': 5, 'Draw': 1, 'losses': 0, 'goals_scored': 18, 'goals_conceded': 5},
    {'Club': 'Manchester City', 'matches_played': 6, 'wins': 5, 'Draw': 1, 'losses': 0, 'goals_scored': 13, 'goals_conceded': 1},
    {'Club': 'Chelsea', 'matches_played': 6, 'wins': 4, 'Draw': 2, 'losses': 0, 'goals_scored': 14, 'goals_conceded': 2},
    {'Club': 'Liverpool', 'matches_played': 6, 'wins': 4, 'Draw': 1, 'losses': 1, 'goals_scored': 10, 'goals_conceded': 3},
    {'Club': 'Borussia Dortmund', 'matches_played': 6, 'wins': 4, 'Draw': 1, 'losses': 1, 'goals_scored': 12, 'goals_conceded': 5},
    {'Club': 'Porto', 'matches_played': 6, 'wins': 4, 'Draw': 1, 'losses': 1, 'goals_scored': 10, 'goals_conceded': 3},
    {'Club': 'Sevilla', 'matches_played': 6, 'wins': 4, 'Draw': 1, 'losses': 1, 'goals_scored': 9, 'goals_conceded': 8},
    {'Club': 'Juventus', 'matches_played': 6, 'wins': 5, 'Draw': 0, 'losses': 1, 'goals_scored': 14, 'goals_conceded': 4},
    {'Club': 'Barcelona', 'matches_played': 6, 'wins': 5, 'Draw': 0, 'losses': 1, 'goals_scored': 16, 'goals_conceded': 5},
    {'Club': 'Paris Saint-Germain', 'matches_played': 6, 'wins': 4, 'Draw': 0, 'losses': 2, 'goals_scored': 13, 'goals_conceded': 6},
    {'Club': 'RB Leipzig', 'matches_played': 6, 'wins': 4, 'Draw': 0, 'losses': 2, 'goals_scored': 11, 'goals_conceded': 12},
    {'Club': 'Atalanta', 'matches_played': 6, 'wins': 3, 'Draw': 2, 'losses': 1, 'goals_scored': 10, 'goals_conceded': 8},
    {'Club': 'Real Madrid', 'matches_played': 6, 'wins': 3, 'Draw': 1, 'losses': 2, 'goals_scored': 11, 'goals_conceded': 9},
    {'Club': 'Lazio', 'matches_played': 6, 'wins': 2, 'Draw': 4, 'losses': 0, 'goals_scored': 11, 'goals_conceded': 7},
    {'Club': 'Atletico Madrid', 'matches_played': 6, 'wins': 2, 'Draw': 3, 'losses': 1, 'goals_scored': 7, 'goals_conceded': 8},
    {'Club': 'Borussia Monchengladbach', 'matches_played': 6, 'wins': 2, 'Draw': 2, 'losses': 2, 'goals_scored': 16, 'goals_conceded': 9},
]

# Create and prepare DataFrame
df_cl_2020_21 = pd.DataFrame(cl_2020_21_r16_teams)
df_cl_2020_21 = df_cl_2020_21.rename(columns={
    'Club': 'team',
    'Draw': 'draws',
    'matches_played': 'match_played',
    'losses': 'losts'
})
df_cl_2020_21["gd"] = df_cl_2020_21["goals_scored"] - df_cl_2020_21["goals_conceded"]
df_cl_2020_21["group_point"] = df_cl_2020_21["wins"] * 3 + df_cl_2020_21["draws"]

print("\n2020-21 Round of 16 Feature Table Ready:")
print(df_cl_2020_21[['team', 'match_played', 'wins', 'draws', 'losts', 'goals_scored', 'goals_conceded', 'gd', 'group_point']].head())

# Prepare features
df_features_2021 = df_cl_2020_21.copy()
X_2021 = prepare_features(df_features_2021)

# Predict with RandomForest
print("\nüìä Predicting with RandomForest...")
model_name = "RandomForest"
probs_rf = predict_probabilities(X_2021, model_name)
results_2021_rf = pd.DataFrame({
    "team": df_cl_2020_21["team"],
    "win_probability": probs_rf * 100
}).sort_values(by="win_probability", ascending=False)

print("\n===== UCL 2020-21 ROUND OF 16 ‚Äî RANDOM FOREST PROBABILITIES =====")
print(results_2021_rf.to_string(index=False))

# Predict with KNN
print("\nüìä Predicting with KNN...")
model_name = "KNN"
probs_knn = predict_probabilities(X_2021, model_name)
results_2021_knn = pd.DataFrame({
    "team": df_cl_2020_21["team"],
    "win_probability": probs_knn * 100
}).sort_values(by="win_probability", ascending=False)

print("\n===== UCL 2020-21 ROUND OF 16 ‚Äî KNN PROBABILITIES =====")
print(results_2021_knn.to_string(index=False))

# Predict with XGBoost
print("\nüìä Predicting with XGBoost...")
model_name = "XGBoost"
probs_xgb = predict_probabilities(X_2021, model_name)
results_2021_xgb = pd.DataFrame({
    "team": df_cl_2020_21["team"],
    "win_probability": probs_xgb * 100
}).sort_values(by="win_probability", ascending=False)

print("\n===== UCL 2020-21 ROUND OF 16 ‚Äî XGBOOST PROBABILITIES =====")
print(results_2021_xgb.to_string(index=False))

# Ensemble Prediction
print("\nüìä Predicting with Ensemble...")
predictions_2021 = ensemble_predict(X_2021)
results_2021_ensemble = pd.DataFrame({
    "team": df_cl_2020_21["team"],
    "RandomForest (%)": predictions_2021['rf_proba'],
    "KNN (%)": predictions_2021['knn_proba'],
    "XGBoost (%)": predictions_2021['xgb_proba'],
    "Ensemble (%)": predictions_2021['ensemble_proba']
}).sort_values(by="Ensemble (%)", ascending=False).reset_index(drop=True)

print("\n===== UCL 2020-21 ROUND OF 16 ‚Äî ENSEMBLE PROBABILITIES =====")
print(results_2021_ensemble.to_string(index=False))

print(f"\nüèÜ Predicted Winner: {results_2021_ensemble.iloc[0]['team']}")
print(f"   Ensemble Probability: {results_2021_ensemble.iloc[0]['Ensemble (%)']:.2f}%")
print(f"‚öΩ Actual Winner: üèÜChelseaüèÜ")


2020-21 UCL SEASON ‚Äî ROUND OF 16 PREDICTIONS

2020-21 Round of 16 Feature Table Ready:
                team  match_played  wins  draws  losts  goals_scored  \
0      Bayern Munich             6     5      1      0            18   
1    Manchester City             6     5      1      0            13   
2            Chelsea             6     4      2      0            14   
3          Liverpool             6     4      1      1            10   
4  Borussia Dortmund             6     4      1      1            12   

   goals_conceded  gd  group_point  
0               5  13           16  
1               1  12           16  
2               2  12           14  
3               3   7           13  
4               5   7           13  

üìä Predicting with RandomForest...

===== UCL 2020-21 ROUND OF 16 ‚Äî RANDOM FOREST PROBABILITIES =====
                    team  win_probability
Borussia Monchengladbach        36.666667
           Bayern Munich        35.666667
                   Laz

<div style="text-align:center;">
    <img src="2021_winner.jpg" alt="Past Winners" width="1000"/>
</div>

# 2021-22 Season Prediction

In [22]:
print("\n" + "=" * 70)
print("2021-22 UCL SEASON ‚Äî ROUND OF 16 PREDICTIONS")
print("=" * 70)

cl_2021_22_r16_teams = [
    {'Club': 'Bayern Munich', 'matches_played': 6, 'wins': 6, 'Draw': 0, 'losses': 0, 'goals_scored': 22, 'goals_conceded': 3},
    {'Club': 'Liverpool', 'matches_played': 6, 'wins': 6, 'Draw': 0, 'losses': 0, 'goals_scored': 17, 'goals_conceded': 6},
    {'Club': 'Ajax', 'matches_played': 6, 'wins': 6, 'Draw': 0, 'losses': 0, 'goals_scored': 20, 'goals_conceded': 5},
    {'Club': 'Manchester City', 'matches_played': 6, 'wins': 4, 'Draw': 2, 'losses': 0, 'goals_scored': 18, 'goals_conceded': 4},
    {'Club': 'Real Madrid', 'matches_played': 6, 'wins': 5, 'Draw': 0, 'losses': 1, 'goals_scored': 14, 'goals_conceded': 7},
    {'Club': 'Chelsea', 'matches_played': 6, 'wins': 4, 'Draw': 1, 'losses': 1, 'goals_scored': 12, 'goals_conceded': 4},
    {'Club': 'Juventus', 'matches_played': 6, 'wins': 5, 'Draw': 0, 'losses': 1, 'goals_scored': 12, 'goals_conceded': 6},
    {'Club': 'Manchester United', 'matches_played': 6, 'wins': 3, 'Draw': 2, 'losses': 1, 'goals_scored': 11, 'goals_conceded': 8},
    {'Club': 'Paris Saint-Germain', 'matches_played': 6, 'wins': 3, 'Draw': 2, 'losses': 1, 'goals_scored': 17, 'goals_conceded': 9},
    {'Club': 'Inter Milan', 'matches_played': 6, 'wins': 3, 'Draw': 1, 'losses': 2, 'goals_scored': 7, 'goals_conceded': 7},
    {'Club': 'Villarreal', 'matches_played': 6, 'wins': 3, 'Draw': 1, 'losses': 2, 'goals_scored': 12, 'goals_conceded': 10},
    {'Club': 'Borussia Dortmund', 'matches_played': 6, 'wins': 3, 'Draw': 0, 'losses': 3, 'goals_scored': 11, 'goals_conceded': 11},
    {'Club': 'Atletico Madrid', 'matches_played': 6, 'wins': 2, 'Draw': 2, 'losses': 2, 'goals_scored': 7, 'goals_conceded': 8},
    {'Club': 'RB Leipzig', 'matches_played': 6, 'wins': 2, 'Draw': 1, 'losses': 3, 'goals_scored': 10, 'goals_conceded': 10},
    {'Club': 'Sporting CP', 'matches_played': 6, 'wins': 2, 'Draw': 2, 'losses': 2, 'goals_scored': 9, 'goals_conceded': 11},
    {'Club': 'Benfica', 'matches_played': 6, 'wins': 2, 'Draw': 0, 'losses': 4, 'goals_scored': 7, 'goals_conceded': 11},
]

# Create and prepare DataFrame
df_cl_2021_22 = pd.DataFrame(cl_2021_22_r16_teams)
df_cl_2021_22 = df_cl_2021_22.rename(columns={
    'Club': 'team',
    'Draw': 'draws',
    'matches_played': 'match_played',
    'losses': 'losts'
})
df_cl_2021_22["gd"] = df_cl_2021_22["goals_scored"] - df_cl_2021_22["goals_conceded"]
df_cl_2021_22["group_point"] = df_cl_2021_22["wins"] * 3 + df_cl_2021_22["draws"]

print("\n2021-22 Round of 16 Feature Table Ready:")
print(df_cl_2021_22[['team', 'match_played', 'wins', 'draws', 'losts', 'goals_scored', 'goals_conceded', 'gd', 'group_point']].head())

# Prepare features
df_features_2022 = df_cl_2021_22.copy()
X_2022 = prepare_features(df_features_2022)

# Predict with all models
print("\nüìä Predicting with RandomForest...")
probs_rf = predict_probabilities(X_2022, "RandomForest")
results_2022_rf = pd.DataFrame({
    "team": df_cl_2021_22["team"],
    "win_probability": probs_rf * 100
}).sort_values(by="win_probability", ascending=False)

print("\n===== UCL 2021-22 ROUND OF 16 ‚Äî RANDOM FOREST PROBABILITIES =====")
print(results_2022_rf.to_string(index=False))

print("\nüìä Predicting with KNN...")
probs_knn = predict_probabilities(X_2022, "KNN")
results_2022_knn = pd.DataFrame({
    "team": df_cl_2021_22["team"],
    "win_probability": probs_knn * 100
}).sort_values(by="win_probability", ascending=False)

print("\n===== UCL 2021-22 ROUND OF 16 ‚Äî KNN PROBABILITIES =====")
print(results_2022_knn.to_string(index=False))

print("\nüìä Predicting with XGBoost...")
probs_xgb = predict_probabilities(X_2022, "XGBoost")
results_2022_xgb = pd.DataFrame({
    "team": df_cl_2021_22["team"],
    "win_probability": probs_xgb * 100
}).sort_values(by="win_probability", ascending=False)

print("\n===== UCL 2021-22 ROUND OF 16 ‚Äî XGBOOST PROBABILITIES =====")
print(results_2022_xgb.to_string(index=False))

# Ensemble Prediction
print("\nüìä Predicting with Ensemble...")
predictions_2022 = ensemble_predict(X_2022)
results_2022_ensemble = pd.DataFrame({
    "team": df_cl_2021_22["team"],
    "RandomForest (%)": predictions_2022['rf_proba'],
    "KNN (%)": predictions_2022['knn_proba'],
    "XGBoost (%)": predictions_2022['xgb_proba'],
    "Ensemble (%)": predictions_2022['ensemble_proba']
}).sort_values(by="Ensemble (%)", ascending=False).reset_index(drop=True)

print("\n===== UCL 2021-22 ROUND OF 16 ‚Äî ENSEMBLE PROBABILITIES =====")
print(results_2022_ensemble.to_string(index=False))

print(f"\nüèÜ Predicted Winner: {results_2022_ensemble.iloc[0]['team']}")
print(f"   Ensemble Probability: {results_2022_ensemble.iloc[0]['Ensemble (%)']:.2f}%")
print(f"‚öΩ Actual Winner: üèÜReal MadridüèÜ")




2021-22 UCL SEASON ‚Äî ROUND OF 16 PREDICTIONS

2021-22 Round of 16 Feature Table Ready:
              team  match_played  wins  draws  losts  goals_scored  \
0    Bayern Munich             6     6      0      0            22   
1        Liverpool             6     6      0      0            17   
2             Ajax             6     6      0      0            20   
3  Manchester City             6     4      2      0            18   
4      Real Madrid             6     5      0      1            14   

   goals_conceded  gd  group_point  
0               3  19           18  
1               6  11           18  
2               5  15           18  
3               4  14           14  
4               7   7           15  

üìä Predicting with RandomForest...

===== UCL 2021-22 ROUND OF 16 ‚Äî RANDOM FOREST PROBABILITIES =====
               team  win_probability
      Bayern Munich        38.333333
  Borussia Dortmund        33.000000
Paris Saint-Germain        32.666667
  Manchester

<div style="text-align:center;">
    <img src="2022_winner.webp" alt="Past Winners" width="1000"/>
</div>

# 2022-23 Season Prediction

In [23]:

print("\n" + "=" * 70)
print("2022-23 UCL SEASON ‚Äî ROUND OF 16 PREDICTIONS")
print("=" * 70)

cl_2022_23_r16_teams = [
    {'Club': 'Bayern Munich', 'matches_played': 6, 'wins': 6, 'Draw': 0, 'losses': 0, 'goals_scored': 18, 'goals_conceded': 2},
    {'Club': 'Napoli', 'matches_played': 6, 'wins': 5, 'Draw': 0, 'losses': 1, 'goals_scored': 20, 'goals_conceded': 6},
    {'Club': 'Liverpool', 'matches_played': 6, 'wins': 5, 'Draw': 0, 'losses': 1, 'goals_scored': 17, 'goals_conceded': 6},
    {'Club': 'Benfica', 'matches_played': 6, 'wins': 4, 'Draw': 2, 'losses': 0, 'goals_scored': 16, 'goals_conceded': 7},
    {'Club': 'Paris Saint-Germain', 'matches_played': 6, 'wins': 4, 'Draw': 2, 'losses': 0, 'goals_scored': 16, 'goals_conceded': 5},
    {'Club': 'Real Madrid', 'matches_played': 6, 'wins': 4, 'Draw': 1, 'losses': 1, 'goals_scored': 15, 'goals_conceded': 6},
    {'Club': 'Chelsea', 'matches_played': 6, 'wins': 4, 'Draw': 1, 'losses': 1, 'goals_scored': 10, 'goals_conceded': 4},
    {'Club': 'RB Leipzig', 'matches_played': 6, 'wins': 4, 'Draw': 0, 'losses': 2, 'goals_scored': 13, 'goals_conceded': 9},
    {'Club': 'Porto', 'matches_played': 6, 'wins': 4, 'Draw': 0, 'losses': 2, 'goals_scored': 12, 'goals_conceded': 8},
    {'Club': 'Manchester City', 'matches_played': 6, 'wins': 3, 'Draw': 2, 'losses': 1, 'goals_scored': 14, 'goals_conceded': 4},
    {'Club': 'Inter Milan', 'matches_played': 6, 'wins': 3, 'Draw': 2, 'losses': 1, 'goals_scored': 10, 'goals_conceded': 7},
    {'Club': 'Tottenham Hotspur', 'matches_played': 6, 'wins': 3, 'Draw': 2, 'losses': 1, 'goals_scored': 8, 'goals_conceded': 6},
    {'Club': 'Borussia Dortmund', 'matches_played': 6, 'wins': 3, 'Draw': 1, 'losses': 2, 'goals_scored': 11, 'goals_conceded': 5},
    {'Club': 'AC Milan', 'matches_played': 6, 'wins': 3, 'Draw': 1, 'losses': 2, 'goals_scored': 12, 'goals_conceded': 10},
    {'Club': 'Juventus', 'matches_played': 6, 'wins': 5, 'Draw': 0, 'losses': 1, 'goals_scored': 9, 'goals_conceded': 2},
    {'Club': 'Atletico Madrid', 'matches_played': 6, 'wins': 2, 'Draw': 0, 'losses': 4, 'goals_scored': 5, 'goals_conceded': 11},
]

# Create and prepare DataFrame
df_cl_2022_23 = pd.DataFrame(cl_2022_23_r16_teams)
df_cl_2022_23 = df_cl_2022_23.rename(columns={
    'Club': 'team',
    'Draw': 'draws',
    'matches_played': 'match_played',
    'losses': 'losts'
})
df_cl_2022_23["gd"] = df_cl_2022_23["goals_scored"] - df_cl_2022_23["goals_conceded"]
df_cl_2022_23["group_point"] = df_cl_2022_23["wins"] * 3 + df_cl_2022_23["draws"]

print("\n2022-23 Round of 16 Feature Table Ready:")
print(df_cl_2022_23[['team', 'match_played', 'wins', 'draws', 'losts', 'goals_scored', 'goals_conceded', 'gd', 'group_point']].head())

# Prepare features
df_features_2023 = df_cl_2022_23.copy()
X_2023 = prepare_features(df_features_2023)

# Predict with all models
print("\nüìä Predicting with RandomForest...")
probs_rf = predict_probabilities(X_2023, "RandomForest")
results_2023_rf = pd.DataFrame({
    "team": df_cl_2022_23["team"],
    "win_probability": probs_rf * 100
}).sort_values(by="win_probability", ascending=False)

print("\n===== UCL 2022-23 ROUND OF 16 ‚Äî RANDOM FOREST PROBABILITIES =====")
print(results_2023_rf.to_string(index=False))

print("\nüìä Predicting with KNN...")
probs_knn = predict_probabilities(X_2023, "KNN")
results_2023_knn = pd.DataFrame({
    "team": df_cl_2022_23["team"],
    "win_probability": probs_knn * 100
}).sort_values(by="win_probability", ascending=False)

print("\n===== UCL 2022-23 ROUND OF 16 ‚Äî KNN PROBABILITIES =====")
print(results_2023_knn.to_string(index=False))

print("\nüìä Predicting with XGBoost...")
probs_xgb = predict_probabilities(X_2023, "XGBoost")
results_2023_xgb = pd.DataFrame({
    "team": df_cl_2022_23["team"],
    "win_probability": probs_xgb * 100
}).sort_values(by="win_probability", ascending=False)

print("\n===== UCL 2022-23 ROUND OF 16 ‚Äî XGBOOST PROBABILITIES =====")
print(results_2023_xgb.to_string(index=False))

# Ensemble Prediction
print("\nüìä Predicting with Ensemble...")
predictions_2023 = ensemble_predict(X_2023)
results_2023_ensemble = pd.DataFrame({
    "team": df_cl_2022_23["team"],
    "RandomForest (%)": predictions_2023['rf_proba'],
    "KNN (%)": predictions_2023['knn_proba'],
    "XGBoost (%)": predictions_2023['xgb_proba'],
    "Ensemble (%)": predictions_2023['ensemble_proba']
}).sort_values(by="Ensemble (%)", ascending=False).reset_index(drop=True)

print("\n===== UCL 2022-23 ROUND OF 16 ‚Äî ENSEMBLE PROBABILITIES =====")
print(results_2023_ensemble.to_string(index=False))

print(f"\nüèÜ Predicted Winner: {results_2023_ensemble.iloc[0]['team']}")
print(f"   Ensemble Probability: {results_2023_ensemble.iloc[0]['Ensemble (%)']:.2f}%")
print(f"‚öΩ Actual Winner: üèÜManchester CityüèÜ")


2022-23 UCL SEASON ‚Äî ROUND OF 16 PREDICTIONS

2022-23 Round of 16 Feature Table Ready:
                  team  match_played  wins  draws  losts  goals_scored  \
0        Bayern Munich             6     6      0      0            18   
1               Napoli             6     5      0      1            20   
2            Liverpool             6     5      0      1            17   
3              Benfica             6     4      2      0            16   
4  Paris Saint-Germain             6     4      2      0            16   

   goals_conceded  gd  group_point  
0               2  16           18  
1               6  14           15  
2               6  11           15  
3               7   9           14  
4               5  11           14  

üìä Predicting with RandomForest...

===== UCL 2022-23 ROUND OF 16 ‚Äî RANDOM FOREST PROBABILITIES =====
               team  win_probability
      Bayern Munich        41.666667
  Borussia Dortmund        39.000000
         RB Leipzig      

<div style="text-align:center;">
    <img src="2023_winner.jpeg" alt="Past Winners" width="1000"/>
</div>

# 2023-24 Season Prediction

In [24]:
print("\n" + "=" * 70)
print("2023-24 UCL SEASON ‚Äî ROUND OF 16 PREDICTIONS")
print("=" * 70)

cl_2023_24_r16_teams_group_stats = [
    {'Club': 'Real Madrid', 'matches_played': 6, 'wins': 6, 'Draw': 0, 'losses': 0, 'goals_scored': 16, 'goals_conceded': 3},
    {'Club': 'Manchester City', 'matches_played': 6, 'wins': 4, 'Draw': 1, 'losses': 1, 'goals_scored': 18, 'goals_conceded': 7},
    {'Club': 'Bayern Munich', 'matches_played': 6, 'wins': 5, 'Draw': 0, 'losses': 1, 'goals_scored': 13, 'goals_conceded': 6},
    {'Club': 'Atletico Madrid', 'matches_played': 6, 'wins': 4, 'Draw': 0, 'losses': 2, 'goals_scored': 15, 'goals_conceded': 7},
    {'Club': 'Arsenal', 'matches_played': 6, 'wins': 4, 'Draw': 1, 'losses': 1, 'goals_scored': 16, 'goals_conceded': 4},
    {'Club': 'Real Sociedad', 'matches_played': 6, 'wins': 1, 'Draw': 3, 'losses': 2, 'goals_scored': 6, 'goals_conceded': 8},
    {'Club': 'Inter Milan', 'matches_played': 6, 'wins': 5, 'Draw': 1, 'losses': 0, 'goals_scored': 14, 'goals_conceded': 2},
    {'Club': 'Barcelona', 'matches_played': 6, 'wins': 4, 'Draw': 0, 'losses': 2, 'goals_scored': 12, 'goals_conceded': 6},
    {'Club': 'Porto', 'matches_played': 6, 'wins': 3, 'Draw': 1, 'losses': 2, 'goals_scored': 11, 'goals_conceded': 10},
    {'Club': 'RB Leipzig', 'matches_played': 6, 'wins': 3, 'Draw': 1, 'losses': 2, 'goals_scored': 8, 'goals_conceded': 6},
    {'Club': 'Borussia Dortmund', 'matches_played': 6, 'wins': 3, 'Draw': 2, 'losses': 1, 'goals_scored': 10, 'goals_conceded': 5},
    {'Club': 'Napoli', 'matches_played': 6, 'wins': 3, 'Draw': 0, 'losses': 3, 'goals_scored': 11, 'goals_conceded': 9},
    {'Club': 'Lazio', 'matches_played': 6, 'wins': 3, 'Draw': 0, 'losses': 3, 'goals_scored': 7, 'goals_conceded': 8},
    {'Club': 'PSV Eindhoven', 'matches_played': 6, 'wins': 2, 'Draw': 3, 'losses': 1, 'goals_scored': 8, 'goals_conceded': 10},
    {'Club': 'Paris Saint-Germain', 'matches_played': 6, 'wins': 4, 'Draw': 1, 'losses': 1, 'goals_scored': 10, 'goals_conceded': 5},
    {'Club': 'Copenhagen', 'matches_played': 6, 'wins': 2, 'Draw': 2, 'losses': 2, 'goals_scored': 8, 'goals_conceded': 8}
]

# Create and prepare DataFrame
df_cl_2023_24 = pd.DataFrame(cl_2023_24_r16_teams_group_stats)
df_cl_2023_24 = df_cl_2023_24.rename(columns={
    'Club': 'team',
    'Draw': 'draws',
    'matches_played': 'match_played',
    'losses': 'losts'
})
df_cl_2023_24["gd"] = df_cl_2023_24["goals_scored"] - df_cl_2023_24["goals_conceded"]
df_cl_2023_24["group_point"] = df_cl_2023_24["wins"] * 3 + df_cl_2023_24["draws"]

print("\n2023-24 Round of 16 Feature Table Ready:")
print(df_cl_2023_24[['team', 'match_played', 'wins', 'draws', 'losts', 'goals_scored', 'goals_conceded', 'gd', 'group_point']])

# Prepare features
df_features_2024 = df_cl_2023_24.copy()
X_2024 = prepare_features(df_features_2024)

# Predict with RandomForest
print("\nüìä Predicting with RandomForest...")
model_name = "RandomForest"
probs_rf = predict_probabilities(X_2024, model_name)
results_2024_rf = pd.DataFrame({
    "team": df_cl_2023_24["team"],
    "win_probability": probs_rf * 100
}).sort_values(by="win_probability", ascending=False)

print("\n===== UCL 2023-24 ROUND OF 16 ‚Äî RANDOM FOREST PROBABILITIES =====")
print(results_2024_rf.to_string(index=False))

# Predict with KNN
print("\nüìä Predicting with KNN...")
model_name = "KNN"
probs_knn = predict_probabilities(X_2024, model_name)
results_2024_knn = pd.DataFrame({
    "team": df_cl_2023_24["team"],
    "win_probability": probs_knn * 100
}).sort_values(by="win_probability", ascending=False)

print("\n===== UCL 2023-24 ROUND OF 16 ‚Äî KNN PROBABILITIES =====")
print(results_2024_knn.to_string(index=False))

# Predict with XGBoost
print("\nüìä Predicting with XGBoost...")
model_name = "XGBoost"
probs_xgb = predict_probabilities(X_2024, model_name)
results_2024_xgb = pd.DataFrame({
    "team": df_cl_2023_24["team"],
    "win_probability": probs_xgb * 100
}).sort_values(by="win_probability", ascending=False)

print("\n===== UCL 2023-24 ROUND OF 16 ‚Äî XGBOOST PROBABILITIES =====")
print(results_2024_xgb.to_string(index=False))

# Ensemble Prediction
print("\nüìä Predicting with Ensemble...")
predictions_2024 = ensemble_predict(X_2024)
results_2024_ensemble = pd.DataFrame({
    "team": df_cl_2023_24["team"],
    "RandomForest (%)": predictions_2024['rf_proba'],
    "KNN (%)": predictions_2024['knn_proba'],
    "XGBoost (%)": predictions_2024['xgb_proba'],
    "Ensemble (%)": predictions_2024['ensemble_proba']
}).sort_values(by="Ensemble (%)", ascending=False).reset_index(drop=True)

print("\n===== UCL 2023-24 ROUND OF 16 ‚Äî ENSEMBLE PROBABILITIES =====")
print(results_2024_ensemble.to_string(index=False))

print(f"\nüèÜ Predicted Winner: {results_2024_ensemble.iloc[0]['team']}")
print(f"   Ensemble Probability: {results_2024_ensemble.iloc[0]['Ensemble (%)']:.2f}%")
print(f"‚öΩ Actual Winner: üèÜReal MadridüèÜ")



2023-24 UCL SEASON ‚Äî ROUND OF 16 PREDICTIONS

2023-24 Round of 16 Feature Table Ready:
                   team  match_played  wins  draws  losts  goals_scored  \
0           Real Madrid             6     6      0      0            16   
1       Manchester City             6     4      1      1            18   
2         Bayern Munich             6     5      0      1            13   
3       Atletico Madrid             6     4      0      2            15   
4               Arsenal             6     4      1      1            16   
5         Real Sociedad             6     1      3      2             6   
6           Inter Milan             6     5      1      0            14   
7             Barcelona             6     4      0      2            12   
8                 Porto             6     3      1      2            11   
9            RB Leipzig             6     3      1      2             8   
10    Borussia Dortmund             6     3      2      1            10   
11        

<div style="text-align:center;">
    <img src="2024_winner.avif" alt="Past Winners" width="1000"/>
</div>

# Summary Comparison

In [25]:

print("\n" + "=" * 70)
print("OVERALL SUMMARY ‚Äî ALL SEASONS (2020-21 TO 2023-24)")
print("=" * 70)

summary_data = {
    '2020-21': {
        'Predicted': results_2021_ensemble.iloc[0]['team'],
        'Actual': 'Chelsea',
        'Probability': f"{results_2021_ensemble.iloc[0]['Ensemble (%)']:.2f}%"
    },
    '2021-22': {
        'Predicted': results_2022_ensemble.iloc[0]['team'],
        'Actual': 'Real Madrid',
        'Probability': f"{results_2022_ensemble.iloc[0]['Ensemble (%)']:.2f}%"
    },
    '2022-23': {
        'Predicted': results_2023_ensemble.iloc[0]['team'],
        'Actual': 'Manchester City',
        'Probability': f"{results_2023_ensemble.iloc[0]['Ensemble (%)']:.2f}%"
    },
    '2023-24': {
        'Predicted': results_2024_ensemble.iloc[0]['team'],
        'Actual': 'Real Madrid',
        'Probability': f"{results_2024_ensemble.iloc[0]['Ensemble (%)']:.2f}%"
    }
}

summary_df = pd.DataFrame(summary_data).T
print("\n" + summary_df.to_string())

# Calculate accuracy
correct = sum([1 for season in summary_data.values() if season['Predicted'] == season['Actual']])
total = len(summary_data)
accuracy = (correct / total) * 100

print(f"\nüéØ Overall Accuracy: {correct}/{total} ({accuracy:.1f}%)")

# Save results
os.makedirs('predictions', exist_ok=True)
results_2021_ensemble.to_csv('/Users/nazmul/Desktop/Project/ucl/predictions/2021_ensemble_predictions.csv', index=False)
results_2022_ensemble.to_csv('/Users/nazmul/Desktop/Project/ucl/predictions/2022_ensemble_predictions.csv', index=False)
results_2023_ensemble.to_csv('/Users/nazmul/Desktop/Project/ucl/predictions/2023_ensemble_predictions.csv', index=False)
results_2024_ensemble.to_csv('/Users/nazmul/Desktop/Project/ucl/predictions/2024_ensemble_predictions.csv', index=False)
summary_df.to_csv('/Users/nazmul/Desktop/Project/ucl/predictions/summary_all_seasons.csv')

print("\n===All results saved to 'predictions/' directory===")
print("=" * 70)



OVERALL SUMMARY ‚Äî ALL SEASONS (2020-21 TO 2023-24)

                 Predicted           Actual Probability
2020-21      Bayern Munich          Chelsea       8.36%
2021-22      Bayern Munich      Real Madrid       8.89%
2022-23      Bayern Munich  Manchester City       8.25%
2023-24  Borussia Dortmund      Real Madrid       8.15%

üéØ Overall Accuracy: 0/4 (0.0%)

===All results saved to 'predictions/' directory===
