In [53]:
import pandas as pd
import matplotlib.pyplot as plt
import os


def diff_points(leagues):
    df_leagues = pd.DataFrame()
    df_results = pd.DataFrame()
    for league in leagues:
        data = pd.read_csv(os.path.join('data_leagues', league + '.csv'))
        data['forecast_home'] = data['forecast'].apply(lambda x: eval(x))
        data['forecast_away'] = data['forecast'].apply(lambda x: eval(x))
        data['goals'] = data['goals'].apply(lambda x: eval(x))

        data['xP_home'] = data['forecast_home'].apply(lambda f: 3 * float(f['w']) + 1 * float(f['d']))
        data['xP_away'] = data['forecast_away'].apply(lambda f: 3 * float(f['l']) + 1 * float(f['d']))
        data['results'] = data['goals'].apply(lambda f: 3 if float(f['h']) > float(f['a']) else 1 if float(f['h']) == float(f['a']) else 0)
        data['results_away'] = data['goals'].apply(lambda f: 3 if float(f['a']) > float(f['h']) else 1 if float(f['h']) == float(f['a']) else 0)

        data['xG_home'] = data['xG'].apply(lambda x: float(eval(x)['h']))
        data['xG_away'] = data['xG'].apply(lambda x: float(eval(x)['a']))

        data['league'] = league
        data['year'] = data['year']

        df_results = pd.concat([df_results, data[['league', 'year', 'results', 'results_away', 'xP_home', 'xP_away', 'xG_home', 'xG_away']]], ignore_index=True)

        results_home = data.groupby('year')['results'].sum()
        results_away = data.groupby('year')['results_away'].sum()
        xp_home = data.groupby('year')['xP_home'].sum()
        xp_away = data.groupby('year')['xP_away'].sum()
        
        df = pd.DataFrame()
        df["diff_points"] = results_home - results_away
        df["diff_points_xp"] = xp_home - xp_away
        df["league"] = league
        df["year"] = df.index
        df.reset_index(drop=True, inplace=True)
        df_leagues = pd.concat([df_leagues, df], ignore_index=True)

    df_leagues.set_index('league', inplace=True)
    df_leagues.set_index('year', append=True, inplace=True)
    return df_leagues, df_results
    #.relabel_index(["row 1", "row 2"], axis=0)

def style_formatter(leagues):
    df_leagues,_ = diff_points(leagues)
    return df_leagues.style \
    .format(precision=0, thousands=".", decimal=",") \
    .format_index(str.upper, axis=1) \
    .bar(subset= ["diff_points", "diff_points_xp"], color = ['#b0351a', '#069215'], align=0, vmin=df_leagues["diff_points"].min(), vmax=df_leagues["diff_points"].max(), height=60,
         width=80, props="width: 200px; border-right: 1px solid black;")

leagues = ['La_liga', 'EPL', 'Bundesliga', 'Serie_A', 'RFPL', 'Ligue_1']

#style_formatter(leagues)

In [24]:
import pandas as pd
import os
import json
import scipy.stats as stats

import numpy as np
from scipy.stats import ttest_ind

def cohen_d(group1, group2):
    n1, n2 = len(group1), len(group2)
    
    mean1, mean2 = np.mean(group1), np.mean(group2)
    std1, std2 = np.std(group1, ddof=1), np.std(group2, ddof=1)
    s_pooled = np.sqrt(((n1 - 1) * std1**2 + (n2 - 1) * std2**2) / (n1 + n2 - 2))
    
    d = (mean1 - mean2) / s_pooled
    return d

def perform_statistical_tests_on_results(df):
    results = []
    grouped = df.groupby('league')
    for league, group in grouped:
        for year in group['year'].unique():
            subset = group[group['year'] == year]
            if len(subset) < 2:  
                continue
            
            home_results = subset['results']
            away_results = subset['results_away']
            home_xPoints = subset['xP_home']
            away_xPoints = subset['xP_away']
            home_xG = subset['xG_home']
            away_xG = subset['xG_away']
            
            wilco_result, wilco_result_pvalue = stats.wilcoxon(home_results, away_results)
            wilco_xPoints, wilco_xPoints_pvalue = stats.wilcoxon(home_xPoints, away_xPoints)
            wilco_xG, wilco_xG_pvalue = stats.wilcoxon(home_xG, away_xG)
            
            results.append({
                'league': league,
                'year': year,
                'wilco-result': wilco_result,
                'wilco-result-pvalue': wilco_result_pvalue,
                'result-cohend': cohen_d(home_results, away_results),
                'wilco-xPoints' : wilco_xPoints,
                'wilco-xPoints-pvalue': wilco_xPoints_pvalue,
                'xPoints-cohend': cohen_d(home_xPoints, away_xPoints),
                'wilco-xG': wilco_xG,
                'wilco-xG-pvalue': wilco_xG_pvalue,
                'xG-cohend' : cohen_d(home_xG, away_xG)
            })
    
    results_df = pd.DataFrame(results)
    return results_df

def style_formatter_proba(dataframe):
    # Fonction pour la coloration conditionnelle

    def color_wilco(val):
        color = 'red' if val > 0.05 else ''
        return f'color: {color};'
    
    def color_cohend(val):
        color = '#F4C430' if val < 0.2 else ''
        return f'background-color: {color}'

    def zebra_style(row_index):
        return "background-color: #f8f9fa;" if row_index % 2 == 0 else "background-color: white;"


    # Application du style combiné
    return (
        dataframe.style
        .format(precision=2, thousands=".", decimal=",")
        .format_index(str.upper, axis=1)
        # Appliquer une alternance de couleurs sur les lignes (zebra-striping)
        .apply(lambda row: [zebra_style(row.name)] * len(dataframe.columns), axis=1)
        # Texte noir par défaut
        .set_properties(**{"color": "black", "font-family": "Times New Roman", "font-size": "1.3em"})
        .map(color_wilco, subset=["wilco-result-pvalue", "wilco-xPoints-pvalue", "wilco-xG-pvalue"])
        .map(color_cohend, subset=["result-cohend", "xPoints-cohend", "xG-cohend"])
    )

leagues = ['La_liga', 'EPL', 'Bundesliga', 'Serie_A', 'RFPL', 'Ligue_1']
_, df_leagues = diff_points(leagues)
results_df = perform_statistical_tests_on_results(df_leagues)
style_formatter_proba(results_df)

#print(results_df)

Unnamed: 0,LEAGUE,YEAR,WILCO-RESULT,WILCO-RESULT-PVALUE,RESULT-COHEND,WILCO-XPOINTS,WILCO-XPOINTS-PVALUE,XPOINTS-COHEND,WILCO-XG,WILCO-XG-PVALUE,XG-COHEND
0,Bundesliga,2.014,"8.887,50",0,51,"15.127,50",0,66,"15.014,00",0,49
1,Bundesliga,2.015,"11.800,00",2,26,"17.547,00",0,46,"17.430,00",0,40
2,Bundesliga,2.016,"9.553,00",0,52,"15.789,50",0,60,"15.813,00",0,47
3,Bundesliga,2.017,"9.408,00",0,42,"14.812,50",0,67,"14.736,00",0,51
4,Bundesliga,2.018,"11.115,00",0,32,"16.635,00",0,52,"16.636,00",0,40
5,Bundesliga,2.019,"13.742,50",60,6,"20.412,50",5,23,"20.551,00",6,17
6,Bundesliga,2.02,"10.848,00",3,25,"20.471,00",5,23,"20.344,00",4,22
7,Bundesliga,2.021,"10.530,00",0,40,"17.146,00",0,48,"17.281,00",0,33
8,Bundesliga,2.022,"9.976,00",0,45,"15.665,50",0,60,"15.673,00",0,46
9,Bundesliga,2.023,"10.283,00",0,33,"16.091,00",0,57,"16.040,00",0,46
