In [13]:
import requests, json, pandas as pd

# Pulling data from Premier League

pd.options.display.max_rows = 200
fpl_api_url = 'https://fantasy.premierleague.com/api/bootstrap-static/'
response = requests.get(fpl_api_url).json()
fpl_data = json.dumps(response)

# The data above has many information, but we are only interested with the players('elements'). So we are going
# to separate it into the player table, with few columns which we are interested with 
# and sort
players_info = pd.json_normalize(json.loads(fpl_data), 'elements', sep='_')

columns_to_be_used = [
    'first_name', 
    'second_name',
    'web_name',  # name that appears on the team sheet
    'now_cost', # current price, multiply with 0.1 to get value that is displayed in the app e.g 76 means price is 7.6m
    'total_points', # 2024/25 season cumulative points
    'ict_index',  # influence, creativity, threat, the higher the better
    'team',       # player club
    'points_per_game',
    'starts',      # no of times player made into the starting eleven
    'element_type',  # player position (1 = GK, 2 = DEF, 3 = MID, 4 = FWD)
    'value_season'   # points produced in relation to player cost, in other words return on investment (ROI). The higher the better.
]

players_info_with_selected_columns = players_info[columns_to_be_used]
players_info_with_selected_columns[['value_season', 'ict_index', 'points_per_game']] = players_info_with_selected_columns[['value_season','ict_index', 'points_per_game']].apply(pd.to_numeric) 
players_table = players_info_with_selected_columns.sort_values(by='value_season', ascending=False) # sorting by value season
players_table.head(100)  # change the number to change number of rows displayed

# take a moment and study the data below, you might notice something helpful

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  players_info_with_selected_columns[['value_season', 'ict_index', 'points_per_game']] = players_info_with_selected_columns[['value_season','ict_index', 'points_per_game']].apply(pd.to_numeric)


Unnamed: 0,first_name,second_name,web_name,now_cost,total_points,ict_index,team,points_per_game,starts,element_type,value_season
116,Bryan,Mbeumo,Mbeumo,76,105,132.5,4,6.6,16,3,13.8
302,Alex,Iwobi,Iwobi,57,78,116.4,9,4.9,16,3,13.7
541,Matz,Sels,Sels,47,64,36.0,16,4.0,16,1,13.6
544,Chris,Wood,Wood,66,90,91.6,16,5.6,15,4,13.6
279,Jordan,Pickford,Pickford,49,66,38.6,8,4.4,15,1,13.5
282,Ashley,Young,Young,47,62,55.9,8,4.8,13,2,13.2
471,André,Onana,Onana,52,67,37.8,14,4.2,16,1,12.9
548,Nikola,Milenković,Milenković,46,59,49.7,16,3.9,15,2,12.8
670,Matheus,Santos Carneiro Da Cunha,Cunha,71,89,134.3,20,5.6,15,4,12.5
237,Dean,Henderson,Henderson,44,55,42.3,7,3.4,16,1,12.5


In [2]:
# Interesting results above, huh?
# let's sort by total points
players_table_sorted_by_total_points = players_info_with_selected_columns.sort_values(by='total_points', ascending=False)
players_table_sorted_by_total_points.head(100)  # change the number to change the number of rows displayed

Unnamed: 0,first_name,second_name,web_name,now_cost,total_points,ict_index,team,points_per_game,starts,element_type,value_season
405,Mohamed,Salah,M.Salah,134,156,188.1,12,10.4,15,3,11.6
213,Cole,Palmer,Palmer,112,128,178.3,6,8.0,16,3,11.4
13,Bukayo,Saka,Saka,106,106,174.9,1,7.1,15,3,10.0
116,Bryan,Mbeumo,Mbeumo,76,105,132.5,4,6.6,16,3,13.8
432,Erling,Haaland,Haaland,149,100,160.4,13,6.2,16,4,6.7
607,James,Maddison,Maddison,76,91,130.6,18,5.7,13,3,12.0
544,Chris,Wood,Wood,66,90,91.6,16,5.6,15,4,13.6
211,Nicolas,Jackson,N.Jackson,82,89,119.3,6,5.9,15,4,10.9
670,Matheus,Santos Carneiro Da Cunha,Cunha,71,89,134.3,20,5.6,15,4,12.5
454,Bruno,Borges Fernandes,B.Fernandes,84,81,118.2,14,5.1,16,3,9.6


In [3]:
# let's sort by ICT index
players_table_sorted_by_ict_index = players_info_with_selected_columns.sort_values(by='ict_index', ascending=False)
players_table_sorted_by_ict_index.head(100)    # change the number to change the number of rows displayed

Unnamed: 0,first_name,second_name,web_name,now_cost,total_points,ict_index,team,points_per_game,starts,element_type,value_season
405,Mohamed,Salah,M.Salah,134,156,188.1,12,10.4,15,3,11.6
213,Cole,Palmer,Palmer,112,128,178.3,6,8.0,16,3,11.4
13,Bukayo,Saka,Saka,106,106,174.9,1,7.1,15,3,10.0
432,Erling,Haaland,Haaland,149,100,160.4,13,6.2,16,4,6.7
635,Jarrod,Bowen,Bowen,74,75,137.4,19,4.7,16,3,10.1
670,Matheus,Santos Carneiro Da Cunha,Cunha,71,89,134.3,20,5.6,15,4,12.5
116,Bryan,Mbeumo,Mbeumo,76,105,132.5,4,6.6,16,3,13.8
607,James,Maddison,Maddison,76,91,130.6,18,5.7,13,3,12.0
605,Dejan,Kulusevski,Kulusevski,63,65,124.5,18,4.1,14,3,10.3
211,Nicolas,Jackson,N.Jackson,82,89,119.3,6,5.9,15,4,10.9


In [18]:
from pulp import LpProblem, LpVariable, LpMaximize, lpSum, LpStatus
model = LpProblem(name="FantasyFootbal", sense=LpMaximize)

players_json = players_info_with_selected_columns.to_dict(orient='records')

players_vars = {i: LpVariable(name=f"player_{i}", cat="Binary") for i in range(len(players_json))}

# objective function
# motivation: Our objective function is to get a squad with many points
#             high ICT index means they are likely to perform more and bring points.
#             value season means we can get many points with small amount of money. There are
#             many unknown player with smaller selection % by managers and have many points. These
#             players might provide edge against other managers.
#             Starts: We want players who are starters
#Limitations: You are likely to drop points in the benches because total points include points
#             accumulated by bench players. This is not holy grail. It is aimed to reduce headache during
#             selection. Feel free to adjust your selection.
#Weights:     These determines which is the most important thing to consider when choosing our players.
#             Total weight should be equal to 1 (0.6 + 0.2 + 0.1 + 0.1). Feel free to tweak the weights to get different result.
#             
model += lpSum(
    players_vars[i] * (
        players_json[i]['total_points'] * 0.5 +  # change values here to get different results
        players_json[i]['ict_index'] * 0.3 +     # change values here to get different results
        players_json[i]['value_season'] * 0.1 + # change values here to get different results
        players_json[i]['starts'] * 0.1         # change values here to get different results
    )
    for i in range(len(players_json))
)

#budget constraint
budget = 1000
model += lpSum(players_vars[i] * players_json[i]['now_cost'] for i in range(len(players_json))) <= budget  # players cost should exceed the budget

# team composition
model += lpSum(players_vars[i] for i in range(len(players_json)) if players_json[i]['element_type'] == 1) == 2 # two goalkeepers
model += lpSum(players_vars[i] for i in range(len(players_json)) if players_json[i]['element_type'] == 2) == 5 # 5 defenders
model += lpSum(players_vars[i] for i in range(len(players_json)) if players_json[i]['element_type'] == 3) == 5 # 5 midfielders
model += lpSum(players_vars[i] for i in range(len(players_json)) if players_json[i]['element_type'] == 4) == 3 # 3 forwards

# team size
model += lpSum(players_vars.values()) == 15

# max 3 players per club
for team_id in range(1, 21):
    model += lpSum(players_vars[i] for i in range(len(players_json)) if players_json[i]['team'] == team_id) <= 3

# solve the problem
status = model.solve()
print(f"Problem Status: {LpStatus[status]}")

# cleaning our data for display
selected_players = [players_json[i] for i in range(len(players_json)) if players_vars[i].value() == 1]

element_type_to_position = {
    1: 'GK',
    2: 'DEF',
    3: 'MID',
    4: 'FWD'
}

teams_info = pd.json_normalize(json.loads(fpl_data), 'teams', sep='_')
team_mapper = teams_info.set_index("id")["name"].to_dict()


selected_players_table = pd.DataFrame(selected_players)

selected_players_table['element_type'] = selected_players_table['element_type'].map(element_type_to_position)
selected_players_table['team'] = selected_players_table['team'].map(team_mapper)

# organizing columns
selected_players_table["Player Name"] = selected_players_table["web_name"]
selected_players_table["Price"] = selected_players_table["now_cost"]
selected_players_table["Total points"] = selected_players_table["total_points"]
selected_players_table["ICT"] = selected_players_table["ict_index"]
selected_players_table["Starts"] = selected_players_table["starts"]
selected_players_table["ROI"] = selected_players_table["value_season"]
selected_players_table["Position"] = selected_players_table["element_type"]

columns = [
    "Player Name",
    "Price",
    "Total points",
    "ICT",
    "Starts",
    "ROI",
    "Position",
    "team",
    "element_type"
]

total_row = {
    "Player Name" : "Total",
    "Price" : selected_players_table["Price"].sum(),
    "Total points" : selected_players_table["Total points"].sum(),
    "ICT": '',
    "Starts": '',
    "ROI": '',
    "Position": '',
    "team": ''
}

selected_players_table = pd.concat([selected_players_table, pd.DataFrame([total_row])], ignore_index=True)

selected_players_table[columns].sort_values(by='element_type').drop(columns=['element_type']).reset_index(drop=True)

Problem Status: Optimal


Unnamed: 0,Player Name,Price,Total points,ICT,Starts,ROI,Position,team
0,Muñoz,47,51,78.5,16.0,10.9,DEF,Crystal Palace
1,Robinson,48,55,82.1,16.0,11.5,DEF,Fulham
2,Gvardiol,61,64,97.3,14.0,10.5,DEF,Man City
3,Hall,46,52,76.9,13.0,11.3,DEF,Newcastle
4,Pedro Porro,56,61,93.1,14.0,10.9,DEF,Spurs
5,Welbeck,58,69,83.7,13.0,11.9,FWD,Brighton
6,Wood,66,90,91.6,15.0,13.6,FWD,Nott'm Forest
7,Cunha,71,89,134.3,15.0,12.5,FWD,Wolves
8,Flekken,45,49,64.4,16.0,10.9,GK,Brentford
9,Sels,47,64,36.0,16.0,13.6,GK,Nott'm Forest
