# KPI Stats

In [27]:
import pandas as pd

# Load your dataset
allsvenskan = pd.read_csv('csv/allsvenskan_all.csv')
damallsvenskan = pd.read_csv('csv/damallsvenskan_all.csv')

## Pass Accuracy

In [28]:
allsvenskan_passes_acc = allsvenskan[allsvenskan['action'] == 'Pass accurate']
allsvenskan_passes_inacc = allsvenskan[allsvenskan['action'] == 'Pass inaccurate']

damallsvenskan_passes_acc = damallsvenskan[damallsvenskan['action'] == 'Pass accurate']
damallsvenskan_passes_inacc = damallsvenskan[damallsvenskan['action'] == 'Pass inaccurate']

allsvenskan_pass_acc = len(allsvenskan_passes_acc) / (len(allsvenskan_passes_acc) + len(allsvenskan_passes_inacc))
damallsvenskan_pass_acc = len(damallsvenskan_passes_acc) / (len(damallsvenskan_passes_acc) + len(damallsvenskan_passes_inacc))

print('Pass Accuracy - Damallsvenskan:', damallsvenskan_pass_acc)
print('Pass Accuracy - Allsvenskan:', allsvenskan_pass_acc)

Pass Accuracy - Damallsvenskan: 0.7757819044742096
Pass Accuracy - Allsvenskan: 0.8385978228992358


## Ball Possession

In [29]:
def calculate_average_possession(df):
    possessions = []
    current_team = None
    start_time = 0

    for index, row in df.iterrows():
        if current_team is None:
            current_team = row["team"]
            start_time = row["start_time"]
        else:
            if current_team != row["team"] and (row["action"] == "Pass accurate"):
                time_of_possession = row["start_time"] - start_time
                if time_of_possession > 0:
                    possessions.append(time_of_possession)
                current_team = row["team"]
                start_time = row["start_time"]

    average_possession = sum(possessions) / len(possessions) if possessions else 0
    return average_possession

average_possession_damallsvenskan = calculate_average_possession(damallsvenskan)
average_possession_allsvenskan = calculate_average_possession(allsvenskan)

print(f"Average Possession Time - Damallsvenskan: {average_possession_damallsvenskan}")
print(f"Average Possession Time - Allsvenskan: {average_possession_allsvenskan}")

Average Possession Time - Damallsvenskan: 35.62468803816295
Average Possession Time - Allsvenskan: 42.35783846427036


## Pass Length

In [30]:
def calculate_pass_lenth(df):
    totalpasses = 0
    pass_length_tot = 0
    for index, row in df.iterrows():
        pass_length_tot += ((row["xpos"] - row["xdest"])**2 + (row["ypos"] - row["ydest"])**2)**0.5
        totalpasses += 1

    return pass_length_tot / totalpasses

pass_length_damallsvenskan = calculate_pass_lenth(damallsvenskan_passes_acc)
pass_length_allsvenskan = calculate_pass_lenth(allsvenskan_passes_acc)

print(f"Average Pass Length - Damallsvenskan: {pass_length_damallsvenskan}")
print(f"Average Pass Length - Allsvenskan: {pass_length_allsvenskan}")

Average Pass Length - Damallsvenskan: 22.58338441114367
Average Pass Length - Allsvenskan: 23.57975138047499


## Passing Chains

In [31]:
def calculate_passing_chains(df):
    chains = []
    current_team = None
    current_chain = 0

    for index, row in df.iterrows():
        if current_team is None:
            current_team = row["team"]
            current_chain = 1
        else:
            if current_team != row["team"] and (row["action"] == "Pass accurate"):
                chains.append(current_chain)
                current_team = row["team"]
                current_chain = 1
            else:
                current_chain += 1

    # To avoid ZeroDivisionError in case chains is empty
    average_chain = sum(chains) / len(chains) if chains else 0
    return average_chain

average_chain_damallsvenskan = calculate_passing_chains(damallsvenskan)
average_chain_allsvenskan = calculate_passing_chains(allsvenskan)

print(f"Average Passing Chains - Damallsvenskan: {average_chain_damallsvenskan}")
print(f"Average Passing Chains - Allsvenskan: {average_chain_allsvenskan}")

Average Passing Chains - Damallsvenskan: 16.843849760119667
Average Passing Chains - Allsvenskan: 18.775687694790395


## Fouls

In [32]:
damallsvenskan_factor = 264 + 264 + 264 + 264 + 364
allsvenskan_factor = 480 + 480 + 480 + 480 + 480

fouls_damallsvenskan = damallsvenskan[damallsvenskan['action'] == 'Foul'].shape[0]
fouls_allsvenskan = allsvenskan[allsvenskan['action'] == 'Foul'].shape[0]

fouls_damallsvenskan = fouls_damallsvenskan / damallsvenskan_factor
fouls_allsvenskan = fouls_allsvenskan / allsvenskan_factor

print(f"Average Fouls - Damallsvenskan: {fouls_damallsvenskan}")
print(f"Average Fouls - Allsvenskan: {fouls_allsvenskan}")

Average Fouls - Damallsvenskan: 7.856338028169014
Average Fouls - Allsvenskan: 11.397083333333333


## Cards

In [34]:
yellow_cards_damallsvenskan = damallsvenskan[damallsvenskan['action'] == 'Yellow card'].shape[0]
red_cards_damallsvenskan = damallsvenskan[damallsvenskan['action'] == 'Red card'].shape[0]

yellow_cards_allsvenskan = allsvenskan[allsvenskan['action'] == 'Yellow card'].shape[0]
red_cards_allsvenskan = allsvenskan[allsvenskan['action'] == 'Red card'].shape[0]

yellow_cards_damallsvenskan = yellow_cards_damallsvenskan / damallsvenskan_factor
red_cards_damallsvenskan = red_cards_damallsvenskan / damallsvenskan_factor
yellow_cards_allsvenskan = yellow_cards_allsvenskan / allsvenskan_factor
red_cards_allsvenskan = red_cards_allsvenskan / allsvenskan_factor

print(f"Average Yellow Cards - Damallsvenskan: {yellow_cards_damallsvenskan}")
print(f"Average Red Cards - Damallsvenskan: {red_cards_damallsvenskan}")
print(f"Average Yellow Cards - Allsvenskan: {yellow_cards_allsvenskan}")
print(f"Average Red Cards - Allsvenskan: {red_cards_allsvenskan}")

Average Yellow Cards - Damallsvenskan: 0.7795774647887324
Average Red Cards - Damallsvenskan: 0.02676056338028169
Average Yellow Cards - Allsvenskan: 1.73375
Average Red Cards - Allsvenskan: 0.057916666666666665


In [36]:
percentage_cards_damallsvenskan = (yellow_cards_damallsvenskan + red_cards_damallsvenskan) / fouls_damallsvenskan
percentage_cards_allsvenskan = (yellow_cards_allsvenskan + red_cards_allsvenskan) / fouls_allsvenskan 

print(f"Percentage of Fouls that Lead to Cards - Damallsvenskan: {percentage_cards_damallsvenskan}")
print(f"Percentage of Fouls that Lead to Cards - Allsvenskan: {percentage_cards_allsvenskan}")

Percentage of Fouls that Lead to Cards - Damallsvenskan: 0.10263535317318034
Percentage of Fouls that Lead to Cards - Allsvenskan: 0.1572039630022301


## Dataframe

In [37]:
# Put all the data into a dataframe
data = {
    "League": ["Damallsvenskan", "Allsvenskan"],
    "Pass Accuracy": [damallsvenskan_pass_acc, allsvenskan_pass_acc],
    "Average Possession Time": [average_possession_damallsvenskan, average_possession_allsvenskan],
    "Average Pass Length": [pass_length_damallsvenskan, pass_length_allsvenskan],
    "Average Passing Chains": [average_chain_damallsvenskan, average_chain_allsvenskan],
    "Average Fouls": [fouls_damallsvenskan, fouls_allsvenskan],
    "Average Yellow Cards": [yellow_cards_damallsvenskan, yellow_cards_allsvenskan],
    "Average Red Cards": [red_cards_damallsvenskan, red_cards_allsvenskan],
    "Percentage of Fouls that Lead to Cards": [percentage_cards_damallsvenskan, percentage_cards_allsvenskan]
}

df = pd.DataFrame(data)

df.head()

Unnamed: 0,League,Pass Accuracy,Average Possession Time,Average Pass Length,Average Passing Chains,Average Fouls,Average Yellow Cards,Average Red Cards,Percentage of Fouls that Lead to Cards
0,Damallsvenskan,0.775782,35.624688,22.583384,16.84385,7.856338,0.779577,0.026761,0.102635
1,Allsvenskan,0.838598,42.357838,23.579751,18.775688,11.397083,1.73375,0.057917,0.157204


## Statistics

In [39]:
desc_stats = df.groupby('League').agg(['mean', 'median', 'std'])
desc_stats  

Unnamed: 0_level_0,Pass Accuracy,Pass Accuracy,Pass Accuracy,Average Possession Time,Average Possession Time,Average Possession Time,Average Pass Length,Average Pass Length,Average Pass Length,Average Passing Chains,...,Average Fouls,Average Yellow Cards,Average Yellow Cards,Average Yellow Cards,Average Red Cards,Average Red Cards,Average Red Cards,Percentage of Fouls that Lead to Cards,Percentage of Fouls that Lead to Cards,Percentage of Fouls that Lead to Cards
Unnamed: 0_level_1,mean,median,std,mean,median,std,mean,median,std,mean,...,std,mean,median,std,mean,median,std,mean,median,std
League,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
Allsvenskan,0.838598,0.838598,,42.357838,42.357838,,23.579751,23.579751,,18.775688,...,,1.73375,1.73375,,0.057917,0.057917,,0.157204,0.157204,
Damallsvenskan,0.775782,0.775782,,35.624688,35.624688,,22.583384,22.583384,,16.84385,...,,0.779577,0.779577,,0.026761,0.026761,,0.102635,0.102635,


In [41]:
from scipy.stats import ttest_ind
from statsmodels.stats.anova import anova_lm
from statsmodels.formula.api import ols

# T-test example for PassAccuracy
men_pass_accuracy = df[df['League'] == 'Allsvenskan']['Pass Accuracy']
women_pass_accuracy = df[df['League'] == 'Damallsvenskan']['Pass Accuracy']
t_stat, p_value = ttest_ind(men_pass_accuracy, women_pass_accuracy)

print(f"T-test for PassAccuracy: p-value = {p_value}")

T-test for PassAccuracy: p-value = nan


  svar = ((n1 - 1) * v1 + (n2 - 1) * v2) / df


In [42]:
import numpy as np
def cohens_d(group1, group2):
    # Calculate the size and the mean difference
    diff = group1.mean() - group2.mean()
    n1, n2 = len(group1), len(group2)
    var1, var2 = group1.var(), group2.var()

    # Calculate the pooled standard deviation
    pooled_std = np.sqrt(((n1 - 1) * var1 + (n2 - 1) * var2) / (n1 + n2 - 2))

    # Calculate Cohen's d
    d = diff / pooled_std
    return d

d = cohens_d(men_pass_accuracy, women_pass_accuracy)
print(f"Cohen's d for PassAccuracy: {d}")

Cohen's d for PassAccuracy: nan


In [45]:
correlation_matrix = df[['Pass Accuracy', 'Average Possession Time', 'Average Pass Length', 'Average Passing Chains', 'Average Fouls', 'Average Yellow Cards', 'Average Red Cards', 'Percentage of Fouls that Lead to Cards']].corr()
correlation_matrix

Unnamed: 0,Pass Accuracy,Average Possession Time,Average Pass Length,Average Passing Chains,Average Fouls,Average Yellow Cards,Average Red Cards,Percentage of Fouls that Lead to Cards
Pass Accuracy,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
Average Possession Time,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
Average Pass Length,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
Average Passing Chains,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
Average Fouls,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
Average Yellow Cards,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
Average Red Cards,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
Percentage of Fouls that Lead to Cards,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
