In [1]:
import numpy as np
import pandas as pd

In [2]:
ucl_alltime = pd.read_csv(r'C:\Users\Shakil\Downloads\Compressed\UEFA Champions League Historical Dataset 1955-2023\ucl_alltime.csv')

ucl_finals = pd.read_csv(r'C:\Users\Shakil\Downloads\Compressed\UEFA Champions League Historical Dataset 1955-2023\ucl_finals.csv')

In [8]:
real_madrid_alltime = ucl_alltime[ucl_alltime['team'] == 'Real Madrid']
borussia_alltime = ucl_alltime[ucl_alltime['team'] == 'Borussia Dortmund']
real_madrid_alltime

Unnamed: 0,team,matches,win,draw,loss,goals,goal difference,points,winners
0,Real Madrid,486,291,85,110,1076:55:00,533,533,14


In [9]:
# create a function for creating new feature

def calculate_features(team_data):
    matches = team_data['matches'].values[0]
    win_rate = team_data['win'].values[0] / matches
    draw_rate = team_data['draw'].values[0] / matches
    loss_rate = team_data['loss'].values[0] / matches
    goal_diff_per_match = team_data['goal difference'].values[0] / matches
    points_per_match = team_data['points'].values[0] / matches
    return win_rate, draw_rate, loss_rate, goal_diff_per_match, points_per_match

In [10]:
real_madrid_features = calculate_features(real_madrid_alltime)
borussia_features = calculate_features(borussia_alltime)

In [11]:
# Extract features from ucl_finals
# Count the number of wins in finals for each team
final_wins = ucl_finals['Winners'].value_counts()
final_losses = ucl_finals['Runners-up'].value_counts()

In [12]:
real_madrid_final_wins = final_wins.get('Real Madrid', 0)
real_madrid_final_losses = final_losses.get('Real Madrid', 0)

borussia_final_wins = final_wins.get('Borussia Dortmund', 0)
borussia_final_losses = final_losses.get('Borussia Dortmund', 0)

In [14]:
data = {
    'team': ['Real Madrid', 'Borussia Dortmund'],
    'win_rate': [real_madrid_features[0], borussia_features[0]],
    'draw_rate': [real_madrid_features[1], borussia_features[1]],
    'loss_rate': [real_madrid_features[2], borussia_features[2]],
    'goal_diff_per_match': [real_madrid_features[3], borussia_features[3]],
    'points_per_match': [real_madrid_features[4], borussia_features[4]],
    'final_wins': [real_madrid_final_wins, borussia_final_wins],
    'final_losses': [real_madrid_final_losses, borussia_final_losses]}

In [15]:
# create a new dataframe for real madrid and borussia dortmund

features_df = pd.DataFrame(data)
features_df

Unnamed: 0,team,win_rate,draw_rate,loss_rate,goal_diff_per_match,points_per_match,final_wins,final_losses
0,Real Madrid,0.598765,0.174897,0.226337,1.096708,1.096708,14,3
1,Borussia Dortmund,0.471591,0.204545,0.323864,0.426136,0.426136,1,1


In [16]:
from sklearn.linear_model import LogisticRegression
import numpy as np

# Prepare the data for modeling
x = features_df[['win_rate', 'draw_rate', 'loss_rate', 'goal_diff_per_match', 
                 'points_per_match', 'final_wins', 'final_losses']]
y = np.array([1, 0])  # Assigning 1 to Real Madrid (higher historical performance), 0 to Borussia Dortmund


In [17]:
model = LogisticRegression()
model.fit(x, y)

# Predict the winner for the final (Real Madrid vs. Borussia Dortmund)
prediction = model.predict(x)

In [18]:
predicted_winner_index = prediction.argmax()
predicted_winner = features_df.iloc[predicted_winner_index]['team']
predicted_winner

'Real Madrid'

In [20]:
probabilities = model.predict_proba(x)

# Get the probabilities for Real Madrid (index 0) and Borussia Dortmund (index 1)
real_madrid_probability = probabilities[0, 1]  # Probability of Real Madrid
borussia_probability = probabilities[1, 1]     # Probability of Borussia Dortmund

real_madrid_probability, borussia_probability

(0.9626268184063416, 0.03735363769765617)

In [21]:
Real_Madrid_percentage = real_madrid_probability * 100
Borussia_Dortmund_percentage = borussia_probability * 100
Real_Madrid_percentage, Borussia_Dortmund_percentage

(96.26268184063416, 3.735363769765617)

In [22]:
print(f'Real Madrid win probability: {Real_Madrid_percentage:.2f}%')
print(f'Borussia Dortmund win probability: {Borussia_Dortmund_percentage:.2f}%')

Real Madrid win probability: 96.26%
Borussia Dortmund win probability: 3.74%
