In [221]:
import requests
import pandas as pd
import numpy as np
from pulp import LpProblem, LpVariable, LpMaximize, lpSum, LpBinary

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

url = "https://fantasy.premierleague.com/api/bootstrap-static/"
response = requests.get(url)

if response.status_code == 200:
    data = response.json()
    print("Nombre de joueurs récupérés :", len(data["elements"]))
    # Exemple : afficher le nom du premier joueur
    print("Premier joueur :", data["elements"][0]["web_name"])
else:
    print("Erreur lors de la requête :", response.status_code)

Nombre de joueurs récupérés : 736
Premier joueur : Raya


In [222]:
df_teams = pd.DataFrame(data['teams'])[["id", "name", "short_name", "strength", "strength_overall_home", "strength_overall_away", "strength_attack_home", "strength_attack_away", "strength_defence_home", "strength_defence_away"]]

In [223]:
df_teams["avg_strenght"] = (df_teams["strength_overall_home"] + df_teams["strength_overall_away"]) / 2
df_teams.sort_values(by=["strength", "avg_strenght"] , inplace=True, ascending=False)

In [224]:
df_teams

Unnamed: 0,id,name,short_name,strength,strength_overall_home,strength_overall_away,strength_attack_home,strength_attack_away,strength_defence_home,strength_defence_away,avg_strenght
11,12,Liverpool,LIV,5,1335,1355,1290,1330,1380,1380,1345.0
0,1,Arsenal,ARS,4,1320,1325,1350,1350,1290,1300,1322.5
12,13,Man City,MCI,4,1275,1315,1250,1250,1300,1380,1295.0
6,7,Chelsea,CHE,4,1185,1245,1150,1190,1220,1300,1215.0
14,15,Newcastle,NEW,4,1185,1245,1130,1170,1240,1320,1215.0
1,2,Aston Villa,AVL,3,1125,1250,1110,1200,1140,1300,1187.5
15,16,Nott'm Forest,NFO,3,1165,1205,1150,1230,1180,1180,1185.0
3,4,Bournemouth,BOU,3,1150,1180,1100,1160,1200,1200,1165.0
5,6,Brighton,BHA,3,1150,1175,1090,1140,1210,1210,1162.5
4,5,Brentford,BRE,3,1120,1185,1080,1080,1160,1290,1152.5


In [225]:
df_positions = pd.DataFrame(data['element_types'])[["id", "singular_name_short"]]

In [226]:
df_positions.set_index('id', inplace=True)

In [227]:
df_positions

Unnamed: 0_level_0,singular_name_short
id,Unnamed: 1_level_1
1,GKP
2,DEF
3,MID
4,FWD


In [228]:
# Selection des colonnes pertinentes 
df_players = pd.DataFrame(data['elements'])[['id', 'first_name', 'web_name', 'team', 'element_type', 'now_cost', 'total_points',
'ict_index', 'selected_by_percent', 'selected_rank', 'value_form', 'transfers_in', 'transfers_in_event', 'transfers_out', 'transfers_out_event','minutes', 'expected_goals_per_90', 'saves_per_90',
'expected_assists_per_90', 'expected_goal_involvements_per_90', 'expected_goals_conceded_per_90', 'goals_conceded_per_90',
'defensive_contribution_per_90']]

In [229]:
# Joindre le nom de l'équipe sur la colonne 'team' (qui contient l'ID)
df_players = df_players.merge(df_teams[['id','name', 'short_name']], left_on='team', right_on='id', suffixes=('', '_team'))
# Joindre la position du joueur sur la colonne 'element_type' (ID)
df_players = df_players.merge(df_positions, left_on='element_type', right_on='id', suffixes=('', '_pos'))
# Ajout d'une colonne points total / prix
df_players.insert(loc=6, column="points_per_cost", value=df_players['total_points'] / df_players['now_cost'])
# Tri
df_players.sort_values(by=["points_per_cost", "total_points", "ict_index"] , inplace=True, ascending=False)
# Conversion
df_players['selected_by_percent'] = df_players['selected_by_percent'].astype(str).str.replace(',', '.').astype(float)

In [230]:
# Sélectionner les colonnes intéressantes pour plus de lisibilité
df_players = df_players[[
'id', 'first_name', 'web_name', 'short_name', 'name', 'singular_name_short', 'now_cost', 'total_points', 'points_per_cost',
'ict_index', 'selected_by_percent', 'selected_rank', 'value_form', 'minutes', 'transfers_in', 'transfers_in_event', 'transfers_out', 'transfers_out_event', 'expected_goals_per_90', 
'saves_per_90', 'expected_assists_per_90', 'expected_goal_involvements_per_90', 'expected_goals_conceded_per_90', 'goals_conceded_per_90',
'defensive_contribution_per_90'    
]]

# Renommer pour clarté
df_players = df_players.rename(columns={
    'name': 'team_name',
    'short_name': 'team_short',
    'singular_name_short': 'position',
    'now_cost': 'price'
})

In [231]:
df_players.set_index("id", inplace=True)

In [232]:
df_players.columns

Index(['first_name', 'web_name', 'team_short', 'team_name', 'position',
       'price', 'total_points', 'points_per_cost', 'ict_index',
       'selected_by_percent', 'selected_rank', 'value_form', 'minutes',
       'transfers_in', 'transfers_in_event', 'transfers_out',
       'transfers_out_event', 'expected_goals_per_90', 'saves_per_90',
       'expected_assists_per_90', 'expected_goal_involvements_per_90',
       'expected_goals_conceded_per_90', 'goals_conceded_per_90',
       'defensive_contribution_per_90'],
      dtype='object')

In [233]:
# Supposons que df_players est votre DataFrame avec les colonnes indiquées,
# notamment 'position', 'price', 'points_per_cost' et 'minutes'

budget = 993  # budget total (ex en 0.1M £)

# Reset index pour avoir un index simple 0..n-1
df_players = df_players.reset_index(drop=True)

prob = LpProblem("FPL_Team_Optimizer", LpMaximize)

# Variables de décision avec indices correspondant à df_players
player_vars = [LpVariable(f"player_{i}", cat=LpBinary) for i in range(len(df_players))]

# Fonction objectif
prob += lpSum([df_players.loc[i, 'points_per_cost'] * player_vars[i] for i in range(len(df_players))])

# Contraintes d'effectif par position
prob += lpSum(player_vars) == 15
prob += lpSum([player_vars[i] for i in range(len(df_players)) if df_players.loc[i, 'position'] == 'GKP']) == 2
prob += lpSum([player_vars[i] for i in range(len(df_players)) if df_players.loc[i, 'position'] == 'DEF']) == 5
prob += lpSum([player_vars[i] for i in range(len(df_players)) if df_players.loc[i, 'position'] == 'MID']) == 5
prob += lpSum([player_vars[i] for i in range(len(df_players)) if df_players.loc[i, 'position'] == 'FWD']) == 3

# Contraintes budget
prob += lpSum([df_players.loc[i, 'price'] * player_vars[i] for i in range(len(df_players))]) <= budget

# Contrainte minimum minutes (exemple)
prob += lpSum([df_players.loc[i, 'minutes'] * player_vars[i] for i in range(len(df_players))]) >= 1000

# Résoudre
prob.solve()

# Sélection des joueurs choisis
selected_players = [df_players.loc[i] for i in range(len(df_players)) if player_vars[i].varValue == 1]

selected_df = pd.DataFrame(selected_players)
print(selected_df[['web_name', 'position', 'price', 'points_per_cost', 'minutes']])

Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /usr/local/python/3.12.1/lib/python3.12/site-packages/pulp/apis/../solverdir/cbc/linux/i64/cbc /tmp/a2dcfab5e8a24d38978f7829f587a6d9-pulp.mps -max -timeMode elapsed -branch -printingOptions all -solution /tmp/a2dcfab5e8a24d38978f7829f587a6d9-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 12 COLUMNS
At line 4427 RHS
At line 4435 BOUNDS
At line 5172 ENDATA
Problem MODEL has 7 rows, 736 columns and 2578 elements
Coin0008I MODEL read with 0 errors
Option for timeMode changed from cpu to elapsed
Continuous objective value is 6.04687 - 0.00 seconds
Cgl0004I processed model has 7 rows, 398 columns (398 integer (356 of which binary)) and 1549 elements
Cbc0038I Initial state - 0 integers unsatisfied sum - 3.33067e-16
Cbc0038I Solution found of -6.04687
Cbc0038I Cleaned solution of -6.04687
Cbc0038I Before mini branch and bound, 398 integers at bound fixed and 0 continuou

In [234]:
selected_df.sort_values(by = ['position'])

Unnamed: 0,first_name,web_name,team_short,team_name,position,price,total_points,points_per_cost,ict_index,selected_by_percent,selected_rank,value_form,minutes,transfers_in,transfers_in_event,transfers_out,transfers_out_event,expected_goals_per_90,saves_per_90,expected_assists_per_90,expected_goal_involvements_per_90,expected_goals_conceded_per_90,goals_conceded_per_90,defensive_contribution_per_90
0,Marcos,Senesi,BOU,Bournemouth,DEF,46,25,0.543478,12.5,5.0,82,1.8,270,474584,351132,61719,22346,0.05,0.0,0.03,0.08,0.95,1.33,14.33
1,Trevoh,Chalobah,CHE,Chelsea,DEF,51,27,0.529412,16.1,6.5,65,1.8,248,570584,272676,80804,26182,0.19,0.0,0.09,0.28,0.71,0.36,9.07
2,Marc,Guéhi,CRY,Crystal Palace,DEF,46,23,0.5,11.6,16.0,28,1.7,270,584597,339646,194321,48534,0.05,0.0,0.01,0.06,1.22,0.33,6.67
3,Riccardo,Calafiori,ARS,Arsenal,DEF,57,28,0.491228,17.7,11.3,36,1.6,224,983727,278857,129330,34801,0.57,0.0,0.01,0.58,0.63,0.4,4.82
4,Jurriën,J.Timber,ARS,Arsenal,DEF,56,26,0.464286,23.7,9.0,47,1.6,172,717585,193313,133465,35534,0.47,0.0,0.05,0.52,0.71,0.52,8.37
22,João Pedro,João Pedro,CHE,Chelsea,FWD,77,26,0.337662,28.2,63.8,1,1.1,252,1611717,452121,548232,43439,0.48,0.0,0.05,0.53,0.84,0.36,5.71
43,Richarlison,Richarlison,TOT,Spurs,FWD,67,19,0.283582,23.9,13.8,31,0.9,238,1271387,152793,300114,131699,0.33,0.0,0.05,0.38,1.46,0.38,6.43
61,Wilson,Isidor,SUN,Sunderland,FWD,55,14,0.254545,12.9,0.5,242,0.9,66,43227,26611,20579,6060,0.38,0.0,0.0,0.38,1.54,2.73,5.45
7,Guglielmo,Vicario,TOT,Spurs,GKP,50,21,0.42,9.8,8.4,51,1.4,270,454004,150402,141709,39973,0.0,4.33,0.0,0.0,1.36,0.33,0.0
13,Robin,Roefs,SUN,Sunderland,GKP,45,17,0.377778,8.1,1.3,161,1.3,270,115557,25174,40717,8036,0.0,2.33,0.0,0.0,0.92,1.0,0.0


In [235]:
selected_df['price'].sum()

np.int64(847)

In [236]:
selected_df['selected_by_percent'].sum()

np.float64(165.29999999999998)

In [237]:
# Supposons que df_players est votre DataFrame préparé
# Il doit contenir les colonnes : 'position', 'price', 'points_per_cost', 'minutes', 'selected_by_percent'

# Exemple initialisation ou import réel:
# df_players = pd.read_csv('votre_fichier.csv')
# df_players['selected_by_percent'] = df_players['selected_by_percent'].astype(float)

budget = 993  # Exemple budget total (en 0.1M £)
alpha = -0.05    # Pondération ownership (à ajuster: plus grand = plus pénalisant)

df_players = df_players.reset_index(drop=True)  # Indices alignés

# Créer problème d'optimisation
prob = LpProblem("FPL_Team_Optimizer", LpMaximize)

# Variables binaires pour chaque joueur
player_vars = [LpVariable(f"player_{i}", cat=LpBinary) for i in range(len(df_players))]

# Fonction objectif : maximiser points_per_cost tout en minimisant selected_by_percent
prob += lpSum([
    df_players.loc[i, 'points_per_cost'] * player_vars[i] - 
    alpha * df_players.loc[i, 'selected_by_percent'] * player_vars[i]
    for i in range(len(df_players))
])

# Contraintes d’effectif
prob += lpSum(player_vars) == 15
prob += lpSum([player_vars[i] for i in range(len(df_players)) if df_players.loc[i, 'position'] == 'GKP']) == 2
prob += lpSum([player_vars[i] for i in range(len(df_players)) if df_players.loc[i, 'position'] == 'DEF']) == 5
prob += lpSum([player_vars[i] for i in range(len(df_players)) if df_players.loc[i, 'position'] == 'MID']) == 5
prob += lpSum([player_vars[i] for i in range(len(df_players)) if df_players.loc[i, 'position'] == 'FWD']) == 3

# Contraintes budget
prob += lpSum([df_players.loc[i, 'price'] * player_vars[i] for i in range(len(df_players))]) <= budget

# Contrainte minimum minutes (exemple)
prob += lpSum([df_players.loc[i, 'minutes'] * player_vars[i] for i in range(len(df_players))]) >= 1000

# Résoudre
prob.solve()

# Extraire liste des joueurs sélectionnés
selected_players = [df_players.loc[i] for i in range(len(df_players)) if player_vars[i].varValue == 1]
selected_df = pd.DataFrame(selected_players)

# Afficher résultats clés
print(selected_df[['web_name', 'position', 'price', 'points_per_cost', 'minutes', 'selected_by_percent']])


Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /usr/local/python/3.12.1/lib/python3.12/site-packages/pulp/apis/../solverdir/cbc/linux/i64/cbc /tmp/aa4c4b331e3e4e138b132d2f03d98236-pulp.mps -max -timeMode elapsed -branch -printingOptions all -solution /tmp/aa4c4b331e3e4e138b132d2f03d98236-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 12 COLUMNS
At line 4662 RHS
At line 4670 BOUNDS
At line 5407 ENDATA
Problem MODEL has 7 rows, 736 columns and 2578 elements
Coin0008I MODEL read with 0 errors
Option for timeMode changed from cpu to elapsed
Continuous objective value is 27.0993 - 0.00 seconds
Cgl0003I 0 fixed, 1 tightened bounds, 0 strengthened rows, 0 substitutions
Cgl0004I processed model has 7 rows, 512 columns (512 integer (474 of which binary)) and 1906 elements
Cbc0038I Initial state - 2 integers unsatisfied sum - 0.4
Cbc0038I Solution found of -27.0107
Cbc0038I Cleaned solution of -27.0107
Cbc0038I Before

In [238]:
selected_df.sort_values(by = ['position'])

Unnamed: 0,first_name,web_name,team_short,team_name,position,price,total_points,points_per_cost,ict_index,selected_by_percent,selected_rank,value_form,minutes,transfers_in,transfers_in_event,transfers_out,transfers_out_event,expected_goals_per_90,saves_per_90,expected_assists_per_90,expected_goal_involvements_per_90,expected_goals_conceded_per_90,goals_conceded_per_90,defensive_contribution_per_90
2,Marc,Guéhi,CRY,Crystal Palace,DEF,46,23,0.5,11.6,16.0,28,1.7,270,584597,339646,194321,48534,0.05,0.0,0.01,0.06,1.22,0.33,6.67
31,Marc,Cucurella,CHE,Chelsea,DEF,61,19,0.311475,14.0,28.2,10,1.0,248,1085056,171244,254186,76968,0.04,0.0,0.16,0.2,0.71,0.36,6.53
44,Micky,Van de Ven,TOT,Spurs,DEF,46,13,0.282609,6.6,28.7,9,0.9,270,611305,90920,337881,102281,0.2,0.0,0.01,0.21,1.36,0.33,6.33
76,Virgil,Virgil,LIV,Liverpool,DEF,60,14,0.233333,15.1,26.0,11,0.8,270,474685,188808,379081,66906,0.06,0.0,0.01,0.07,1.04,1.33,14.67
98,Maxime,Estève,BUR,Burnley,DEF,40,8,0.2,7.8,17.8,22,0.7,262,228361,33852,295773,97038,0.02,0.0,0.0,0.02,2.27,2.06,7.9
22,João Pedro,João Pedro,CHE,Chelsea,FWD,77,26,0.337662,28.2,63.8,1,1.1,252,1611717,452121,548232,43439,0.48,0.0,0.05,0.53,0.84,0.36,5.71
63,Hugo,Ekitiké,LIV,Liverpool,FWD,87,22,0.252874,20.2,29.5,8,0.8,228,1706496,365691,449600,187264,0.53,0.0,0.03,0.56,1.05,0.79,3.16
83,Chris,Wood,NFO,Nott'm Forest,FWD,77,17,0.220779,17.1,25.0,13,0.7,237,1742051,78071,587234,312355,0.6,0.0,0.03,0.63,1.21,0.76,1.14
21,Robert,Sánchez,CHE,Chelsea,GKP,50,17,0.34,5.5,24.4,15,1.1,270,366837,72334,345155,89698,0.0,2.67,0.0,0.0,0.8,0.33,0.0
66,Martin,Dúbravka,BUR,Burnley,GKP,40,10,0.25,7.2,33.9,3,0.8,270,370971,77141,184397,55096,0.0,3.0,0.0,0.0,2.24,2.0,0.0


In [239]:
selected_df['price'].sum()

np.int64(993)

In [240]:
selected_df['selected_by_percent'].sum()

np.float64(457.2)