# Will PSG win the Champions League final?

### Analysis and comparison of PSG's first and second half of the 24/25 season (21/05/25).

### (pytorch)

In [1]:
from selenium import webdriver
from selenium.webdriver.edge.service import Service
from webdriver_manager.microsoft import EdgeChromiumDriverManager
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.options import Options
from bs4 import BeautifulSoup
import time
import pandas as pd
from io import StringIO

### Let's scrabble for data on the Internet. We want to find data from PSG's matches this season.

In [26]:
# Options to make the browser "invisible" (headless mode)
options = Options()
options.add_argument("--headless")  # mode sans interface
options.add_argument("--disable-gpu")
options.add_argument("--no-sandbox")

# Launch the browser with the appropriate driver
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)

# For Edge browser (currently active):
# driver = webdriver.Edge(service=Service(EdgeChromiumDriverManager().install()),options=options)

# Go to the main page
# url = "https://fbref.com/en/squads/e2d8892c/Paris-Saint-Germain-Stats"  # all tab

# only Ligue 1
# url = "https://fbref.com/en/squads/e2d8892c/2024-2025/matchlogs/c13/schedule/Paris-Saint-Germain-Scores-and-Fixtures-Ligue-1"  

# all competition tab
url = "https://fbref.com/en/squads/e2d8892c/2024-2025/matchlogs/all_comps/schedule/Paris-Saint-Germain-Scores-and-Fixtures-All-Competitions " 
driver.get(url)

# Wait for the page to fully load (you can adjust the sleep time if needed)
time.sleep(5)

# Get the page's HTML content
html = driver.page_source
soup = BeautifulSoup(html, "html.parser")

# Close the browser session
driver.quit()

# You can now parse the content with BeautifulSoup as usual
# print(soup.prettify())  # Uncomment this to view the full HTML structure


#### On this page there are a lot of tables, we are looking for one in particular that covers the 24/25 PSG season.

In [27]:
tables = soup.find_all("table")
print(f"Number of tab found: {len(tables)}")

for i, table in enumerate(tables):
    caption = table.find("caption")
    title = caption.text if caption else f"Tableau {i+1} without caption"
    print(title)
    print("-----")

Number of tab found: 1
Scores & Fixtures 2024-2025 Paris Saint-Germain: All Competitions Table
-----


#### Once we have found the table we want, we filter it to keep the following parameters:  Date | Composition | Round | Venue | Result | GF | GA | Opponent | xG | xGA | Poss

In [34]:
tb_saison = tables[0]  # numéro commence à 0
# tb_saison = tables[9]  # numéro commence à 0

caption = table.find("caption")
# print(caption.text) 

# Convert the HTML table to a DataFrame, avoiding warnings
saison_df = pd.read_html(StringIO(str(tables[0])))[0]
# saison_df = pd.read_html(StringIO(str(tables[9])))[0]
# saison_df.dropna(inplace=True)



saison_filtrer = saison_df.drop(columns = ["Time","Day","Captain","Formation","Referee","Opp Formation","Attendance","Match Report","Notes"])

saison_filtrer.loc[24, "GF"] = 1 # penalty score removed
saison_filtrer.loc[24, "GA"] = 1 # penalty score removed
saison_filtrer.loc[45, "GF"] = 1 # penalty score removed
saison_filtrer.loc[45, "GA"] = 0 # penalty score removed
saison_filtrer.drop([10,21,32,43,54,61,62], inplace=True)

saison_filtrer

colonnes = ["GF", "GA", "xG","xGA","Poss"]  # remplace par tes colonnes
saison_filtrer[colonnes] = saison_filtrer[colonnes].astype(float)

print(saison_filtrer.head())



         Date          Comp         Round Venue Result   GF   GA     Opponent  \
0  2024-08-16       Ligue 1   Matchweek 1  Away      W  4.0  1.0     Le Havre   
1  2024-08-23       Ligue 1   Matchweek 2  Home      W  6.0  0.0  Montpellier   
2  2024-09-01       Ligue 1   Matchweek 3  Away      W  3.0  1.0        Lille   
3  2024-09-14       Ligue 1   Matchweek 4  Home      W  3.0  1.0        Brest   
4  2024-09-18  Champions Lg  League phase  Home      W  1.0  0.0    es Girona   

    xG  xGA  Poss  
0  2.9  0.2  76.0  
1  3.8  0.6  68.0  
2  2.6  1.4  58.0  
3  2.8  1.0  67.0  
4  2.1  0.2  64.0  


#### We create on df for the first and second half of the season (separated by the winter break)

In [37]:
#First part of the saison
saison_filtrer["Date"] = pd.to_datetime(saison_filtrer["Date"], format="%Y-%m-%d", errors='coerce')
date_limite = pd.to_datetime("2025-01-01")
first_prt_saison = saison_filtrer[saison_filtrer["Date"] < date_limite].copy()
first_prt_saison.dropna(inplace=True)
first_prt_saison.head()

#Second part of the saison
saison_filtrer["Date"] = pd.to_datetime(saison_filtrer["Date"], format="%Y-%m-%d", errors='coerce')
date_limite = pd.to_datetime("2025-01-01")
second_prt_saison = saison_filtrer[saison_filtrer["Date"] > date_limite].copy()
second_prt_saison.dropna(inplace=True)
second_prt_saison.head()

Unnamed: 0,Date,Comp,Round,Venue,Result,GF,GA,Opponent,xG,xGA,Poss
26,2025-01-12,Ligue 1,Matchweek 17,Home,W,2.0,1.0,Saint-Étienne,4.1,0.4,59.0
28,2025-01-18,Ligue 1,Matchweek 18,Away,W,2.0,1.0,Lens,1.2,0.7,66.0
29,2025-01-22,Champions Lg,League phase,Home,W,4.0,2.0,eng Manchester City,2.8,1.7,63.0
30,2025-01-25,Ligue 1,Matchweek 19,Home,D,1.0,1.0,Reims,1.2,0.9,78.0
31,2025-01-29,Champions Lg,League phase,Away,W,4.0,1.0,de Stuttgart,2.4,0.8,58.0


### We've put together a first analysis to sum up each part of PSG's season in a nutshell.

In [38]:
total_matches = first_prt_saison.shape[0]
wins1, draw1, loss1 = first_prt_saison["Result"].value_counts()
ration1 = round((wins1/total_matches)*100,2)
print(f"PSG achieved {wins1} wins out of {total_matches} matches with {draw1} draws and {loss1} losses in the first half of the season.")

total_matches = second_prt_saison.shape[0]
wins2, draw2, loss2 = second_prt_saison["Result"].value_counts()
ration2 = round((wins2/total_matches)*100,2)
print(f"PSG achieved {wins2} wins out of {total_matches} matches with {draw2} draws and {loss2} losses in the second half of the season.")
print("--------------------------------------------------------------------------------------------------------------------------\n")

 
print(f"PSG had a better second half of the season, winning {ration2}% of their matches \ncompared to {ration1}% in the first half, despite playing more matches.")


PSG achieved 14 wins out of 22 matches with 5 draws and 3 losses in the first half of the season.
PSG achieved 22 wins out of 28 matches with 4 draws and 2 losses in the second half of the season.
--------------------------------------------------------------------------------------------------------------------------

PSG had a better second half of the season, winning 78.57% of their matches 
compared to 63.64% in the first half, despite playing more matches.


In [40]:
def moyenne(df,prt_saison):
    moy_b = round(df["GF"].mean(),2)
    moy_c = round(df["GA"].mean(),2)
    moy_xg = round(df["xG"].mean(),2)
    moy_xga = round(df["xGA"].mean(),2)
    moy_poss = round(df["Poss"].mean(),2)
    nb_g = df["GF"].sum()
    nb_a = df["GA"].sum()
    if prt_saison == 1:
        print("For the first half of the season:")
    elif prt_saison == 2:
        print("For the second half of the season:")

    print(f"PSG has an average of {moy_b} goals scored ({nb_g}) and {moy_c} goals conceded ({nb_a}) with an overall possession of {moy_poss}%  | ({moy_xg}: xG, {moy_xga}: xGA).")


moyenne(first_prt_saison,1)
print("--------------------------------------------------------------------------------------------------------------------------\n")
moyenne(second_prt_saison,2)

For the first half of the season:
PSG has an average of 2.27 goals scored (50.0) and 0.91 goals conceded (20.0) with an overall possession of 67.14%  | (2.4: xG, 0.91: xGA).
--------------------------------------------------------------------------------------------------------------------------

For the second half of the season:
PSG has an average of 2.68 goals scored (75.0) and 1.07 goals conceded (30.0) with an overall possession of 65.07%  | (2.45: xG, 1.04: xGA).




# Prediction on the outcome of a match.
### Will PSG win their Champions League final with the qualities Luis Enrique has given his team?

In [41]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import torch
from torch import nn, optim
from torch.utils.data import TensorDataset, DataLoader

In [42]:
# Assume your DataFrame is called `saison_filtrer`

# Map match results to numeric values: Win=2, Draw=1, Loss=0
# Map match venue: Away=0, Home=1
result_map = {"W": 2, "D": 1, "L": 0}
venue_map = {"Away": 0, "Home": 1}

pred_resultat = saison_filtrer.copy()
pred_resultat = pred_resultat.drop(columns = ["GF", "GA"])
pred_resultat["Cible"] = pred_resultat["Result"].map(result_map)
pred_resultat["Venue"] = pred_resultat["Venue"].map(venue_map)


features = ["Venue", "xG", "xGA", "Poss"]
target = "Cible"

pred_resultat.dropna(subset=features + [target], inplace=True)

X = pred_resultat[features].values.astype("float32")
y = pred_resultat[target].values.astype("int64")

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

X_train_tensor = torch.tensor(X_train)
y_train_tensor = torch.tensor(y_train)
X_test_tensor = torch.tensor(X_test)
y_test_tensor = torch.tensor(y_test)

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)


class SimpleClassifier(nn.Module):
    def __init__(self, input_size, hidden_size=16, output_size=3):
        super().__init__()
        self.model = nn.Sequential(
            nn.Linear(input_size, hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size, output_size)
        )

    def forward(self, x):
        return self.model(x)

# model = SimpleClassifier(input_size=6)  # 6 features 
model = SimpleClassifier(input_size=4)  # car tu as 4 colonnes en entrée

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)



for epoch in range(20):
    model.train()
    total_loss = 0
    for batch_X, batch_y in train_loader:
        optimizer.zero_grad()
        outputs = model(batch_X)
        loss = criterion(outputs, batch_y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1}/20, Loss: {total_loss:.4f}")


model.eval()
with torch.no_grad():
    outputs = model(X_test_tensor)
    predicted = torch.argmax(outputs, dim=1)
    accuracy = (predicted == y_test_tensor).float().mean().item()
    print(f"Accuracy: {accuracy * 100:.2f}%")




Epoch 1/20, Loss: 9.7253
Epoch 2/20, Loss: 7.0340
Epoch 3/20, Loss: 4.9740
Epoch 4/20, Loss: 3.7660
Epoch 5/20, Loss: 4.3378
Epoch 6/20, Loss: 3.9328
Epoch 7/20, Loss: 3.6080
Epoch 8/20, Loss: 3.5004
Epoch 9/20, Loss: 3.7240
Epoch 10/20, Loss: 3.7887
Epoch 11/20, Loss: 3.3675
Epoch 12/20, Loss: 3.6669
Epoch 13/20, Loss: 3.2896
Epoch 14/20, Loss: 3.4775
Epoch 15/20, Loss: 3.3129
Epoch 16/20, Loss: 3.2362
Epoch 17/20, Loss: 3.3571
Epoch 18/20, Loss: 3.2348
Epoch 19/20, Loss: 3.2389
Epoch 20/20, Loss: 3.2239
Accuracy: 70.00%


#### Teston our prediction model, by entering values in the following parameters we could determine whether PSG would win their match.

In [43]:
# print("Exemple : match à domicile, prévision de 1 buts marqués, 4 encaissé, 2.4 xG, 2.9 xGA, 50% possession")
print("Example: home match, forecast 2.4 xG, 2.9 xGA, 50% possession")

venue = 0.0       # Home
gf = 1.0          # prévision de goal scored
ga = 4.0          # prévision de goal took
xg = 0.4          # Xg
xga = 6.9         # Xga 
poss = 30.0       # Possession

# new_match = np.array([[venue, gf, ga, xg, xga, poss]], dtype=np.float32)
new_match = np.array([[venue, xg, xga, poss]], dtype=np.float32)

new_match_tensor = torch.tensor(new_match)

with torch.no_grad():
    output = model(new_match_tensor)
    predicted_class = torch.argmax(output, dim=1).item()

label_map = {0: "Loss", 1: "Draw", 2: "Win"}
print(f"Predicted result:{label_map[predicted_class]}")


Example: home match, forecast 2.4 xG, 2.9 xGA, 50% possession
Predicted result:Win


## Today we know that PSG are European champions !!!!!

#### Now that the match is over and we have the parameters (home match, xG, xGA, possession), let's try out our model and see what results it gives for the final.

In [44]:
venue = 1.0       # away
xg = 3.1          # Xg
xga = 0.5         # Xga 
poss = 59.30       # Possession

new_match = np.array([[venue, xg, xga, poss]], dtype=np.float32)

new_match_tensor = torch.tensor(new_match)

with torch.no_grad():
    output = model(new_match_tensor)
    predicted_class = torch.argmax(output, dim=1).item()

label_map = {0: "", 1: "Draw", 2: "Win"}
print(f"Résultat prédit : {label_map[predicted_class]}")

Résultat prédit : Win
