In [1]:
# Nombre de matchs à compter pour le calcul du FDR
nb_fdr = 6
# Nombre de matchs à compter pour le calcul des stats avancées
nb_stats = 4

In [2]:
import requests
import pandas as pd
import numpy as np
import fonctions
from pulp import LpProblem, LpVariable, LpMaximize, lpSum, LpBinary

pd.set_option('display.max_columns', None)
#pd.set_option('display.max_rows', None)

url = "https://fantasy.premierleague.com/api/bootstrap-static/"
response = requests.get(url)

if response.status_code == 200:
    data = response.json()
    print("Nombre de joueurs récupérés :", len(data["elements"]))
    # Exemple : afficher le nom du premier joueur
    print("Premier joueur :", data["elements"][0]["web_name"])
else:
    print("Erreur lors de la requête :", response.status_code)

Nombre de joueurs récupérés : 742
Premier joueur : Raya


In [3]:
fixtures_url = "https://fantasy.premierleague.com/api/fixtures/"
fixtures_response = requests.get(fixtures_url)
fixtures = fixtures_response.json()
print("Nombre de matchs à venir :", len(fixtures))

Nombre de matchs à venir : 380


In [4]:
df_teams = pd.DataFrame(data['teams'])[["id", "name", "short_name", "strength", "strength_overall_home", "strength_overall_away", "strength_attack_home", "strength_attack_away", "strength_defence_home", "strength_defence_away"]]

# Renommer pour clarté
df_teams = df_teams.rename(columns={
    'name': 'team_name'
})

# Ajout d'une colonne AvgStrenght
df_teams["avg_strenght"] = (df_teams["strength_overall_home"] + df_teams["strength_overall_away"]) / 2
df_teams.sort_values(by=["strength", "avg_strenght"] , inplace=True, ascending=False)

In [5]:
df_fixtures = pd.DataFrame(fixtures)

# Pour chaque match, récupérer les noms d'équipes à domicile et à l'extérieur
df_fixtures = df_fixtures.merge(df_teams, left_on='team_h', right_on='id', suffixes=('', '_home'))
df_fixtures = df_fixtures.merge(df_teams, left_on='team_a', right_on='id', suffixes=('', '_away'))

In [6]:
# Ajout du FDR pour chacune des équipes sur les 6 prochaines journées
# Supposons df_teams contient l'ID officiel de chaque club dans la colonne 'id'

team_fdr = []

for team_id in df_teams['id']:
    # Sélectionne les fixtures où l'équipe est à domicile
    home = df_fixtures[(df_fixtures['team_h'] == team_id) & (df_fixtures['finished'] == False)][['event', 'team_h_difficulty']]
    home = home.rename(columns={'team_h_difficulty': 'difficulty'})
    # Sélectionne les fixtures où l'équipe est à l'extérieur
    away = df_fixtures[(df_fixtures['team_a'] == team_id) & (df_fixtures['finished'] == False)][['event', 'team_a_difficulty']]
    away = away.rename(columns={'team_a_difficulty': 'difficulty'})
    # Concatène toutes les futures fixtures
    all_fixtures = pd.concat([home, away], ignore_index=True)
    # Trie par numéro de GW et garde les 6 prochaines
    next_six = all_fixtures.sort_values('event').head(nb_fdr)
    # Calcule la moyenne du FDR
    fdr_mean = next_six['difficulty'].mean()
    team_fdr.append(fdr_mean)

# Ajoute la colonne FDR moyen au DataFrame équipes
df_teams['fdr_next_6'] = team_fdr

In [7]:
df_teams

Unnamed: 0,id,team_name,short_name,strength,strength_overall_home,strength_overall_away,strength_attack_home,strength_attack_away,strength_defence_home,strength_defence_away,avg_strenght,fdr_next_6
11,12,Liverpool,LIV,5,1335,1355,1290,1330,1380,1380,1345.0,3.333333
0,1,Arsenal,ARS,4,1320,1325,1350,1350,1290,1300,1322.5,2.5
12,13,Man City,MCI,4,1275,1315,1250,1250,1300,1380,1295.0,3.333333
6,7,Chelsea,CHE,4,1185,1245,1150,1190,1220,1300,1215.0,2.666667
14,15,Newcastle,NEW,4,1185,1245,1130,1170,1240,1320,1215.0,3.0
1,2,Aston Villa,AVL,3,1125,1250,1110,1200,1140,1300,1187.5,3.166667
15,16,Nott'm Forest,NFO,3,1165,1205,1150,1230,1180,1180,1185.0,3.333333
3,4,Bournemouth,BOU,3,1150,1180,1100,1160,1200,1200,1165.0,3.166667
5,6,Brighton,BHA,3,1150,1175,1090,1140,1210,1210,1162.5,2.833333
4,5,Brentford,BRE,3,1120,1185,1080,1080,1160,1290,1152.5,3.166667


In [8]:
df_positions = pd.DataFrame(data['element_types'])[["id", "singular_name_short"]]
df_positions.set_index('id', inplace=True)
df_positions

Unnamed: 0_level_0,singular_name_short
id,Unnamed: 1_level_1
1,GKP
2,DEF
3,MID
4,FWD


In [9]:
# Selection des colonnes pertinentes 
df_players = pd.DataFrame(data['elements'])[['id', 'first_name', 'web_name', 'team', 'element_type', 'now_cost', 'total_points',
'ict_index', 'selected_by_percent', 'selected_rank', 'form', 'transfers_in', 'transfers_in_event', 'transfers_out', 'transfers_out_event','minutes', 
'assists','goals_scored', 'expected_goals', 'expected_assists', 'expected_goal_involvements','expected_goals_per_90', 'saves_per_90',
'expected_assists_per_90', 'expected_goal_involvements_per_90', 'expected_goals_conceded_per_90', 'goals_conceded_per_90',
'defensive_contribution_per_90', 'clean_sheets_per_90']]

In [10]:
# Conversion des types
df_players['ict_index'] = df_players['ict_index'].astype(float)
df_players['total_points'] = df_players['total_points'].astype(float)
df_players['minutes'] = df_players['minutes'].astype(float)
df_players['goals_scored'] = df_players['goals_scored'].astype(float)
df_players['form'] = df_players['form'].astype(float)
df_players['assists'] = df_players['assists'].astype(float)
df_players['expected_goals'] = df_players['expected_goals'].astype(float)
df_players['expected_assists'] = df_players['expected_assists'].astype(float)
df_players['expected_goal_involvements'] = df_players['expected_goal_involvements'].astype(float)
df_players['expected_goals_per_90'] = df_players['expected_goals_per_90'].astype(float)
df_players['saves_per_90'] = df_players['saves_per_90'].astype(float)
df_players['expected_assists_per_90'] = df_players['expected_assists_per_90'].astype(float)
df_players['expected_goal_involvements_per_90'] = df_players['expected_goal_involvements_per_90'].astype(float)
df_players['expected_goals_conceded_per_90'] = df_players['expected_goals_conceded_per_90'].astype(float)
df_players['goals_conceded_per_90'] = df_players['goals_conceded_per_90'].astype(float)
df_players['defensive_contribution_per_90'] = df_players['defensive_contribution_per_90'].astype(float) 
df_players['now_cost'] = df_players['now_cost'].astype(float)

# Joindre le nom de l'équipe sur la colonne 'team' (qui contient l'ID)
df_players = df_players.merge(df_teams[['id','team_name', 'short_name']], left_on='team', right_on='id', suffixes=('', '_team'))
# Joindre la position du joueur sur la colonne 'element_type' (ID)
df_players = df_players.merge(df_positions, left_on='element_type', right_on='id', suffixes=('', '_pos'))
# Ajout d'une colonne points total / prix
df_players.insert(loc=6, column="points_per_cost", value=df_players['total_points'] / df_players['now_cost'])
df_players.insert(loc=6, column="points_per_minutes", value=df_players['total_points'] / df_players['minutes'])
# Ajout d'une colonne GI
df_players.insert(loc=6, column="goal_involvements", value=df_players['assists'] + df_players['goals_scored'])
# Ajout d'une colonne GI_on_xGI
df_players.insert(loc=6, column="GI_on_xGI", value=df_players['goal_involvements'] / df_players['expected_goal_involvements'])
# Tri
df_players.sort_values(by=["points_per_cost", "total_points", "ict_index"] , inplace=True, ascending=False)
# Conversion
df_players['selected_by_percent'] = df_players['selected_by_percent'].astype(str).str.replace(',', '.').astype(float)

# Ajout de la colonne fdr_nex_6 depuis df_teams
df_players = df_players.merge(
    df_teams[['team_name', 'fdr_next_6']],
    on='team_name',
    how='left'
)

# Sélectionner les colonnes intéressantes pour plus de lisibilité
df_players = df_players[[
'id', 'first_name', 'web_name', 'short_name', 'team_name', 'singular_name_short', 'now_cost', 'total_points', 'points_per_cost', 'points_per_minutes', 'fdr_next_6',
'ict_index', 'selected_by_percent', 'selected_rank', 'form', 'minutes', 'transfers_in', 'transfers_in_event', 'transfers_out', 'transfers_out_event',
'assists','goals_scored', 'goal_involvements', 'expected_goals', 'expected_assists', 'expected_goal_involvements', "GI_on_xGI", 'expected_goals_per_90', 'saves_per_90', 'expected_assists_per_90',
'expected_goal_involvements_per_90', 'expected_goals_conceded_per_90', 'goals_conceded_per_90',
'defensive_contribution_per_90', 'clean_sheets_per_90'    
]]

# Renommer pour clarté
df_players = df_players.rename(columns={
    'name': 'team_name',
    'short_name': 'team_short',
    'singular_name_short': 'position',
    'now_cost': 'price'
})

# Conversion des types
df_players['points_per_cost'] = df_players['points_per_cost'].astype(float)

# Gestion des NaN
for col in ['points_per_cost', 'ict_index', 'points_per_minutes', 'fdr_next_6']:
    df_players[col] = pd.to_numeric(df_players[col], errors='coerce').fillna(0)

# Enrichissement avec les stats avancées des n derniers matchs
df_players = fonctions.enrich_players_with_last_n_matches_data(df_players, n_last_matches=nb_stats)

# Ajout de la colonne total_points_last_per_xGI_last
df_players['total_points_last_per_xGI_last'] = df_players.apply(
    lambda row: row['total_points_last'] / row['xGI_last'] if row['xGI_last'] > 0 else 0,
    axis=1
)

# Remplacer les infinis par 0
df_players.replace([np.inf, -np.inf], 0, inplace=True) 

df_players.set_index("id", inplace=True)

In [11]:
df_players[df_players['position'] == 'MID'].head(20)

Unnamed: 0_level_0,first_name,web_name,team_short,team_name,position,price,total_points,points_per_cost,points_per_minutes,fdr_next_6,ict_index,selected_by_percent,selected_rank,form,minutes,transfers_in,transfers_in_event,transfers_out,transfers_out_event,assists,goals_scored,goal_involvements,expected_goals,expected_assists,expected_goal_involvements,GI_on_xGI,expected_goals_per_90,saves_per_90,expected_assists_per_90,expected_goal_involvements_per_90,expected_goals_conceded_per_90,goals_conceded_per_90,defensive_contribution_per_90,clean_sheets_per_90,minutes_last,xGI_last,xG_last,ict_last,total_points_last,total_points_last_per_xGI_last
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1
200,Jaidon,Anthony,BUR,Burnley,MID,57.0,40.0,0.701754,0.075614,3.0,48.2,6.3,61,4.8,529.0,804655,1661,114871,54,1.0,4.0,5.0,2.16,0.61,2.77,1.805054,0.37,0.0,0.1,0.47,1.83,1.7,8.17,0.34,66.25,0.1975,0.1425,5.975,4.75,24.050633
303,James,Garner,EVE,Everton,MID,50.0,32.0,0.64,0.059259,3.0,34.4,1.0,184,4.0,540.0,132229,189,38411,29,1.0,1.0,2.0,0.43,0.53,0.96,2.083333,0.07,0.0,0.09,0.16,1.37,1.0,12.17,0.33,67.5,0.1975,0.09,5.4,4.0,20.253165
82,Antoine,Semenyo,BOU,Bournemouth,MID,78.0,49.0,0.628205,0.085814,3.166667,47.8,55.8,3,6.5,571.0,5957202,2309,514433,205,2.0,4.0,6.0,2.99,0.38,3.37,1.780415,0.47,0.0,0.06,0.53,0.87,1.1,7.88,0.47,76.0,0.355,0.2975,5.3,6.5,18.309859
390,Ryan,Gravenberch,LIV,Liverpool,MID,56.0,35.0,0.625,0.077778,3.333333,30.0,5.6,68,6.5,450.0,697328,1365,100297,141,2.0,2.0,4.0,0.37,0.36,0.73,5.479452,0.07,0.0,0.07,0.14,1.03,1.0,9.6,0.4,67.5,0.1675,0.0825,5.575,6.5,38.80597
660,Anton,Stach,LEE,Leeds,MID,50.0,31.0,0.62,0.057407,2.666667,36.1,3.2,105,4.5,540.0,441169,433,107490,77,2.0,1.0,3.0,0.61,0.56,1.17,2.564103,0.1,0.0,0.09,0.19,1.22,1.5,8.33,0.33,67.5,0.085,0.04,3.75,4.5,52.941176
205,Josh,Cullen,BUR,Burnley,MID,50.0,31.0,0.62,0.062,3.0,29.0,1.9,137,2.8,500.0,312386,75,141302,66,1.0,1.0,2.0,0.08,0.57,0.65,3.076923,0.01,0.0,0.1,0.11,1.85,1.8,11.88,0.36,57.5,0.0125,0.0,2.1,2.75,220.0
241,Moisés,Caicedo,CHE,Chelsea,MID,57.0,35.0,0.614035,0.064815,2.666667,28.4,11.7,34,4.5,540.0,1079008,401,454168,367,0.0,2.0,2.0,0.47,0.22,0.69,2.898551,0.08,0.0,0.04,0.12,1.33,1.33,14.17,0.33,67.5,0.065,0.0325,3.525,4.5,69.230769
237,Enzo,Enzo,CHE,Chelsea,MID,67.0,39.0,0.58209,0.073864,2.666667,43.8,14.1,25,3.5,528.0,1652239,1311,596306,286,2.0,3.0,5.0,3.41,0.48,3.89,1.285347,0.58,0.0,0.08,0.66,1.35,1.36,5.8,0.34,67.5,0.4275,0.3975,5.25,3.5,8.187135
668,Granit,Xhaka,SUN,Sunderland,MID,50.0,29.0,0.58,0.053704,3.0,30.5,3.8,97,4.5,540.0,278308,513,174399,60,3.0,0.0,3.0,0.12,0.63,0.75,4.0,0.02,0.0,0.1,0.12,1.15,0.67,12.67,0.5,67.5,0.125,0.0175,4.05,4.5,36.0
493,Sean,Longstaff,LEE,Leeds,MID,49.0,26.0,0.530612,0.067532,2.666667,28.1,0.5,241,4.8,385.0,54930,125,18333,12,1.0,1.0,2.0,0.21,0.28,0.49,4.081633,0.05,0.0,0.07,0.12,1.09,1.17,10.75,0.23,65.5,0.12,0.0525,6.3,4.75,39.583333


In [12]:
df_players.to_parquet('df_players.parquet')
df_positions.to_parquet('df_positions.parquet')
df_teams.to_parquet('df_teams.parquet')

In [13]:
df_players.fillna(0, inplace=True)

In [14]:
df_players.info()

<class 'pandas.core.frame.DataFrame'>
Index: 742 entries, 683 to 229
Data columns (total 40 columns):
 #   Column                             Non-Null Count  Dtype  
---  ------                             --------------  -----  
 0   first_name                         742 non-null    object 
 1   web_name                           742 non-null    object 
 2   team_short                         742 non-null    object 
 3   team_name                          742 non-null    object 
 4   position                           742 non-null    object 
 5   price                              742 non-null    float64
 6   total_points                       742 non-null    float64
 7   points_per_cost                    742 non-null    float64
 8   points_per_minutes                 742 non-null    float64
 9   fdr_next_6                         742 non-null    float64
 10  ict_index                          742 non-null    float64
 11  selected_by_percent                742 non-null    float64
 1