# Current Data

In [1]:
import requests
import pandas as pd
import json
import plotly.express as px

## Todo list
- Only data from current season for now, model is limited at the start of a new season -> a new model has to be developed that uses weighted historical data
- Just use points per minute against cost for now
- Ignores current match fixture difficulty for now
- Evaluates players that have more than 0% chance to play for now -> there should be a classification model to determine who coaches will pick in the starting line up
- Investigate data anomalies that can skew calculations
- Come up with accuracy prediction evaluations each week
- Consider set piece takers

In [2]:
GAMEWEEK = 28

# Have to run this before they update the data each week
url = "https://fantasy.premierleague.com/api/bootstrap-static/"
response = requests.get(url)
general_data = response.json()
players = general_data['elements']
players = [x for x in players if x['chance_of_playing_next_round'] == 100]
# players = [x for x in players if x['chance_of_playing_this_round'] != 0 and x['chance_of_playing_this_round'] != None]
players = [x['id'] for x in players]
print(f"{len(players)} players have a chance to play in week {GAMEWEEK}")
print("")

url = "https://fantasy.premierleague.com/api/fixtures/"
response = requests.get(url)
fixture_data = response.json()
fixture_data = [x for x in fixture_data if x['event'] == GAMEWEEK]
print("1 is easy fixture, 5 is difficult fixture")
for i in range(len(fixture_data)):
    fixture_data[i]['team_a'] = general_data['teams'][fixture_data[i]['team_a']-1]['name']
    fixture_data[i]['team_h'] = general_data['teams'][fixture_data[i]['team_h']-1]['name']
    print(f"{fixture_data[i]['team_h']}, {fixture_data[i]['team_h_difficulty']} vs {fixture_data[i]['team_a']}, {fixture_data[i]['team_a_difficulty']}, {fixture_data[i]['kickoff_time']}")

historical = pd.DataFrame()
for i in range(GAMEWEEK-1):
    url = f"https://fantasy.premierleague.com/api/event/{i+1}/live/"
    response = requests.get(url)
    live_data = response.json()
    live_data = live_data['elements']
    live_df = pd.DataFrame(live_data)
    live_df = live_df[['stats']]
    live_df = pd.json_normalize(live_df['stats']).reset_index()
    live_df = live_df[live_df['index'].isin(players)]
    historical = pd.concat([historical, live_df], ignore_index=True)

print(historical.head())

300 players have a chance to play in week 28

1 is easy fixture, 5 is difficult fixture
Man Utd, 2 vs Everton, 4, 2024-03-09T12:30:00Z
Bournemouth, 2 vs Sheffield Utd, 2, 2024-03-09T15:00:00Z
Crystal Palace, 2 vs Luton, 2, 2024-03-09T15:00:00Z
Wolves, 2 vs Fulham, 2, 2024-03-09T15:00:00Z
Arsenal, 2 vs Brentford, 5, 2024-03-09T17:30:00Z
Aston Villa, 3 vs Spurs, 4, 2024-03-10T13:00:00Z
Brighton, 2 vs Nott'm Forest, 3, 2024-03-10T14:00:00Z
West Ham, 2 vs Burnley, 3, 2024-03-10T14:00:00Z
Liverpool, 5 vs Man City, 5, 2024-03-10T15:45:00Z
Chelsea, 3 vs Newcastle, 3, 2024-03-11T20:00:00Z
Bournemouth, 2 vs Luton, 2, 2024-03-13T19:30:00Z


KeyboardInterrupt: 

In [None]:
print(historical.columns)

Index(['index', 'minutes', 'goals_scored', 'assists', 'clean_sheets',
       'goals_conceded', 'own_goals', 'penalties_saved', 'penalties_missed',
       'yellow_cards', 'red_cards', 'saves', 'bonus', 'bps', 'influence',
       'creativity', 'threat', 'ict_index', 'starts', 'expected_goals',
       'expected_assists', 'expected_goal_involvements',
       'expected_goals_conceded', 'total_points', 'in_dreamteam'],
      dtype='object')


In [None]:
eval = historical.groupby(['index'])[['minutes', 'total_points']].sum()
# select players who at least have a historical record
eval = eval[eval['minutes'] > 0]
# select players who at least are positive overall
eval = eval[eval['total_points'] > 0]
print(f"We will pick from a pool of {len(eval)}")

# for each index, get the player name
for i in eval.index:
    # search the id in general data and pull the name
    for j in range(len(general_data['elements'])):
        if general_data['elements'][j]['id'] == i:
            eval.loc[i, 'ppg'] = general_data['elements'][j]['points_per_game']
            eval.loc[i, 'cost'] = general_data['elements'][j]['now_cost']
            eval.loc[i, 'first_name'] = general_data['elements'][j]['first_name']
            eval.loc[i, 'second_name'] = general_data['elements'][j]['second_name']
            eval.loc[i, 'team'] = general_data['teams'][general_data['elements'][j]['team']-1]['name']
            eval.loc[i, 'position'] = general_data['elements'][j]['element_type']
            eval.loc[i, 'news'] = general_data['elements'][j]['news']
            eval.loc[i, 'news_added'] = general_data['elements'][j]['news_added']
            eval.loc[i, 'expected_points'] = general_data['elements'][j]['ep_next']
            break

eval['name'] = eval['first_name'] + " " + eval['second_name']
eval['ppm'] = eval['total_points'] / eval['minutes']
eval['ppmcost'] = eval['ppm'] / eval['cost'] * 1000
eval['ppg'] = eval['ppg'].astype(float)
eval = eval.sort_values(by='ppmcost', ascending=False)
eval['position'] = eval['position'].map({1: 'GK', 2: 'DEF', 3: 'MID', 4: 'FWD'})
eval = eval[['name', 'team', 'position', 'cost', 'expected_points','ppg', 'ppm', 'ppmcost', 'minutes', 'total_points', 'news', 'news_added']]

print(eval.describe())

We will pick from a pool of 200
             cost         ppg         ppm     ppmcost      minutes  \
count  200.000000  200.000000  200.000000  200.000000   200.000000   
mean    51.385000    2.461000    0.059693    1.200705  1042.240000   
std     13.012818    1.364142    0.085004    1.728465   745.589398   
min     38.000000    0.000000    0.002075    0.051867     1.000000   
25%     44.000000    1.400000    0.030193    0.601504   382.250000   
50%     49.000000    2.500000    0.042197    0.845286   981.000000   
75%     54.000000    3.300000    0.059389    1.190360  1707.000000   
max    144.000000    8.200000    1.000000   20.000000  2430.000000   

       total_points  
count    200.000000  
mean      43.310000  
std       35.181424  
min        1.000000  
25%       14.000000  
50%       34.000000  
75%       66.000000  
max      167.000000  


In [None]:
# print top expected points
print(eval.sort_values(by='expected_points', ascending=False).head(20))

                                  name           team position   cost  \
index                                                                   
77                Norberto Murara Neto    Bournemouth       GK   47.0   
279             Rodrigo Muniz Carvalho         Fulham      FWD   45.0   
43        Douglas Luiz Soares de Paulo    Aston Villa      MID   57.0   
526                       Jarrod Bowen       West Ham      MID   79.0   
20                      William Saliba        Arsenal      DEF   58.0   
5         Gabriel dos Santos Magalhães        Arsenal      DEF   53.0   
355                     Erling Haaland       Man City      FWD  144.0   
134                        Pascal Groß       Brighton      MID   66.0   
352          Ederson Santana de Moraes       Man City       GK   55.0   
365                  Rodrigo Hernandez       Man City      MID   56.0   
26                    Leandro Trossard        Arsenal      MID   65.0   
406    Bruno Guimarães Rodriguez Moura      Newcast

In [None]:
# Manual Fixes

# eval = eval[eval['team'] != 'Liverpool']
# eval = eval[eval['team'] != 'Man City']
# eval = eval[eval['team'] != 'Man Utd']
# eval = eval[eval['team'] != 'Chelsea']
# eval = eval[eval['team'] != 'Arsernal']
# eval = eval[eval['team'] != 'Crystal Palace']
# eval = eval[eval['team'] != 'Wolves']
# eval = eval[eval['team'] != 'Bournemouth']
# eval = eval[eval['team'] != 'Brighton']
# eval = eval[eval['team'] != 'Newcastle']
# eval = eval[eval['team'] != 'Sheffield Utd']
# eval = eval[eval['team'] != 'Wolves']

In [None]:
# select players who play alot for more consistency, try to aim for 50 players to select from
# create a deep copy of eval
ppmcost = eval.copy(deep=True)
min_thres = 1450
cost_thres = 0.5
ppmcost = ppmcost[ppmcost['minutes'] > min_thres]
ppmcost = ppmcost[ppmcost['ppmcost'] > cost_thres]
print(f"{len(ppmcost)} players have played more than {min_thres} minutes and {cost_thres} and selected for ppmcost algo")

fig = px.scatter(ppmcost, x='minutes', y='ppmcost', hover_data=[
                 'name', 'team', 'position'], title='Minutes vs PPMCost', color='position')
fig.show()

# We can only max 3 players from each team
team_count = {'Spurs': 0,
              "Nott'm Forest": 0,
              'Liverpool': 0,
              'Bournemouth': 0,
              'Aston Villa': 0,
              'Man City': 0,
              'Arsenal': 0,
              'Crystal Palace': 0,
              'Man Utd': 0,
              'Fulham': 0,
              'Luton': 0,
              'Everton': 0,
              'Brentford': 0,
              'Newcastle': 0,
              'Wolves': 0,
              'West Ham': 0,
              'Chelsea': 0,
              'Sheffield Utd': 0,
              'Brighton': 0,
              'Burnley': 0, }

team_count = dict(sorted(team_count.items()))

# ppmcost algo
# select 11 players from eval
# first select the top GK
# then select the top FWD
# then select the top 3 DEF
# then select 6 more players up to 2 more DEF, 5 MID and 2 more FWD

# select the top GK
gk = ppmcost[ppmcost['position'] == 'GK'].sort_values(
    by='ppmcost', ascending=False).head(1)
ppmcost = ppmcost[~ppmcost.index.isin(gk.index)]
gk_count = 1
team_count[gk['team'].values[0]] += 1

# select the top FWD
fwd = ppmcost[ppmcost['position'] == 'FWD'].sort_values(
    by='ppmcost', ascending=False).head(1)
fwd_count = 1
team_count[fwd['team'].values[0]] += 1
ppmcost = ppmcost[~ppmcost.index.isin(fwd.index)]

selected_11 = pd.concat([gk, fwd])

# select the top 3 DEF
defe = ppmcost[ppmcost['position'] == 'DEF'].sort_values(
    by='ppmcost', ascending=False)
def_count = 0
for i in defe.index:
    # check if any team has more than 3 players
    if team_count[defe.loc[i, 'team']] > 3:
        continue
    else:
        selected_11 = pd.concat(
            [selected_11, defe.loc[i, :].to_frame().T])
        def_count += 1
        team_count[defe.loc[i, 'team']] += 1
        ppmcost = ppmcost[~ppmcost.index.isin([i])]
        if def_count == 3:
            break
        else:
            continue

mid_count = 0

def select_6():
    global ppmcost, gk_count, def_count, fwd_count, mid_count, team_count
    ppmcost = ppmcost.sort_values(by='ppmcost', ascending=False)
    selected_6 = pd.DataFrame()
    for i in ppmcost.index:
        if ppmcost.loc[i, 'position'] == 'DEF' and def_count < 5:
            # concat the player row to the selected_6 dataframe
            selected_6 = pd.concat(
                [selected_6, ppmcost.loc[i, :].to_frame().T])
            def_count += 1
            team_count[ppmcost.loc[i, 'team']] += 1
        elif ppmcost.loc[i, 'position'] == 'MID' and mid_count < 5:
            selected_6 = pd.concat(
                [selected_6, ppmcost.loc[i, :].to_frame().T])
            mid_count += 1
            team_count[ppmcost.loc[i, 'team']] += 1
        elif ppmcost.loc[i, 'position'] == 'FWD' and fwd_count < 3:
            selected_6 = pd.concat(
                [selected_6, ppmcost.loc[i, :].to_frame().T])
            fwd_count += 1
            team_count[ppmcost.loc[i, 'team']] += 1
        elif ppmcost.loc[i, 'position'] == 'GK' and gk_count < 1:
            selected_6 = pd.concat(
                [selected_6, ppmcost.loc[i, :].to_frame().T])
            gk_count += 1
            team_count[ppmcost.loc[i, 'team']] += 1
        else:
            continue
        if gk_count + def_count + fwd_count + mid_count == 11:
            print(f"Final Selection: {gk_count} GK, {
                  def_count} DEF, {fwd_count} FWD, {mid_count} MID")
            print("")
            return selected_6


selected_6 = select_6()
selected_11 = pd.concat([selected_6, selected_11])

# Assume 4m from bench of 4 players
BUDGET = 840
print(team_count)
print("")
print(f"Current budget spent: {selected_11['cost'].sum()}, remaining budget: {
      BUDGET - selected_11['cost'].sum()}")
print("")
print(selected_11)

57 players have played more than 1450 minutes and 0.5 and selected for ppmcost algo


Final Selection: 1 GK, 5 DEF, 2 FWD, 3 MID

{'Arsenal': 2, 'Aston Villa': 1, 'Bournemouth': 1, 'Brentford': 0, 'Brighton': 0, 'Burnley': 0, 'Chelsea': 0, 'Crystal Palace': 0, 'Everton': 0, 'Fulham': 1, 'Liverpool': 1, 'Luton': 1, 'Man City': 2, 'Man Utd': 0, 'Newcastle': 0, "Nott'm Forest": 0, 'Sheffield Utd': 0, 'Spurs': 1, 'West Ham': 1, 'Wolves': 0}

Current budget spent: 548.0, remaining budget: 292.0

                             name         team position  cost expected_points  \
515                  Oliver Skipp        Spurs      MID  45.0             0.0   
342                    Nathan Aké     Man City      DEF  51.0             4.7   
5    Gabriel dos Santos Magalhães      Arsenal      DEF  53.0             7.5   
364               Kalvin Phillips     West Ham      MID  47.0             0.5   
662                  Adama Traoré       Fulham      MID  49.0             2.8   
340                Cauley Woodrow        Luton      FWD  43.0             0.0   
352     Ederson Santana

In [None]:
# If there is still budget, excecute the maximise pointsperminute algo

# Maximise points per minute
# Sort Eval by ppm
# Substitute the lowest ppmcost player with the highest ppm player if it already does not exist
# If the player already exists, then substitute with the next highest ppm player
ppm = eval.copy(deep=True)
min_thres = 1450
ppm = ppm[ppm['minutes'] > min_thres]
ppm = ppm.sort_values(by=['ppm','ppmcost'], ascending=False)
ppm = ppm[~ppm.index.isin(selected_11.index)]
print(f"{len(ppm)} players have played more than {min_thres} minutes and selected for ppm algo")

def substitute_for_points():
    global ppm, selected_11, team_count, gk_count, def_count, fwd_count, mid_count
    selected_11 = selected_11.sort_values(by=['ppm', 'ppmcost'], ascending=True)
    # print(selected_11)

    # Substitute the lowest ppm player
    for i in selected_11.index:
        if selected_11.loc[i, 'position'] == 'GK':
            gk_count -= 1
        elif selected_11.loc[i, 'position'] == 'DEF':
            def_count -= 1
        elif selected_11.loc[i, 'position'] == 'MID':
            mid_count -= 1
        elif selected_11.loc[i, 'position'] == 'FWD':
            fwd_count -= 1
        # remove the count from team_count
        team_count[selected_11.loc[i, 'team']] -= 1
        # print(f"Substituting: {selected_11.loc[i, ['name','ppm']]}")

        # Find the highest possible ppm player
        for j in ppm.index:
            sub = False
            if selected_11.loc[i, 'ppm'] > ppm.loc[j, 'ppm']:
                pass
            elif team_count[ppm.loc[j, 'team']] > 2:
                pass
            elif ppm.loc[j, 'cost'] + selected_11['cost'].sum() > BUDGET+20:
                pass
            elif ppm.loc[j, 'position'] == 'GK' and gk_count > 0:
                pass
            elif ppm.loc[j, 'position'] == 'DEF' and def_count > 4:
                pass
            elif ppm.loc[j, 'position'] == 'MID' and mid_count > 4:
                pass
            elif ppm.loc[j, 'position'] == 'FWD' and fwd_count > 2:
                pass
            else:
                # remove the player from selected_11
                sub = True
                print(f"Substituted: {selected_11.loc[i,['name','ppm']].values} for  {ppm.loc[j, ['name','ppm']].values}")
                selected_11 = selected_11[~selected_11.index.isin([i])]
                selected_11 = pd.concat([selected_11, ppm.loc[j, :].to_frame().T])
                selected_11 = selected_11.sort_values(by=['ppm', 'ppmcost'], ascending=True)
                team_count[ppm.loc[j, 'team']] += 1
                if ppm.loc[j, 'position'] == 'GK':
                    gk_count += 1
                elif ppm.loc[j, 'position'] == 'DEF':
                    def_count += 1
                elif ppm.loc[j, 'position'] == 'MID':
                    mid_count += 1
                elif ppm.loc[j, 'position'] == 'FWD':
                    fwd_count += 1
                ppm = ppm[~ppm.index.isin([j])]
                break
        if sub == False:
            if selected_11.loc[i, 'position'] == 'GK':
                gk_count += 1
            elif selected_11.loc[i, 'position'] == 'DEF':
                def_count += 1
            elif selected_11.loc[i, 'position'] == 'MID':
                mid_count += 1
            elif selected_11.loc[i, 'position'] == 'FWD':
                fwd_count += 1
            team_count[selected_11.loc[i, 'team']] += 1
        if selected_11['cost'].sum() > BUDGET:
            print("budget reached")
            break
     
    return selected_11

selected_11 = substitute_for_points()
print(f"Final Selection: {gk_count} GK, {def_count} DEF, {fwd_count} FWD, {mid_count} MID")
print(f"Current budget spent: {selected_11['cost'].sum()}, remaining budget: {BUDGET - selected_11['cost'].sum()}")
print(team_count)
print("")
print(selected_11)
                

54 players have played more than 1450 minutes and selected for ppm algo
Substituted: ['Cauley Woodrow' 0.043478260869565216] for  ['Mohamed Salah' 0.049611476389719064]
Final Selection: 1 GK, 5 DEF, 1 FWD, 4 MID
Current budget spent: 635.0, remaining budget: 205.0
{'Arsenal': 2, 'Aston Villa': 1, 'Bournemouth': 1, 'Brentford': 0, 'Brighton': 0, 'Burnley': 0, 'Chelsea': 0, 'Crystal Palace': 0, 'Everton': 0, 'Fulham': 1, 'Liverpool': 2, 'Luton': 0, 'Man City': 2, 'Man Utd': 0, 'Newcastle': 0, "Nott'm Forest": 0, 'Sheffield Utd': 0, 'Spurs': 1, 'West Ham': 1, 'Wolves': 0}

                             name         team position   cost  \
308                 Mohamed Salah    Liverpool      MID  130.0   
662                  Adama Traoré       Fulham      MID   49.0   
364               Kalvin Phillips     West Ham      MID   47.0   
84                     Adam Smith  Bournemouth      DEF   44.0   
13                  Eddie Nketiah      Arsenal      FWD   51.0   
42                    Lucas

In [None]:
# If there is still budget, excecute the maximise pointspergame algo

# Maximise points per game
# Sort Eval by ppg
# Substitute the lowest ppg player with the highest ppg player if it already does not exist
ppg = eval.copy(deep=True)
minutes_threshold = 1450
ppg = ppg[ppg['minutes'] > minutes_threshold]
ppg = ppg.sort_values(by='ppg', ascending=False)
ppg = ppg[~ppg.index.isin(selected_11.index)]
print(f"{len(ppg)} players have played more than {minutes_threshold} minutes and selected for ppg algo")

def substitute_for_points():
    global ppg, selected_11, team_count, gk_count, def_count, fwd_count, mid_count
    selected_11 = selected_11.sort_values(by=['ppg', 'ppm', 'ppmcost'], ascending=True)
    # print(selected_11)

    # Substitute the lowest ppg player
    for i in selected_11.index:
        if selected_11.loc[i, 'position'] == 'GK':
            gk_count -= 1
        elif selected_11.loc[i, 'position'] == 'DEF':
            def_count -= 1
        elif selected_11.loc[i, 'position'] == 'MID':
            mid_count -= 1
        elif selected_11.loc[i, 'position'] == 'FWD':
            fwd_count -= 1
        team_count[selected_11.loc[i, 'team']] -= 1

        # Find the highest possible ppg player
        for j in ppg.index:
            sub = False
            if selected_11.loc[i, 'ppg'] > ppg.loc[j, 'ppg']:
                pass
            elif team_count[ppg.loc[j, 'team']] > 2:
                pass
            elif ppg.loc[j, 'cost'] + selected_11['cost'].sum() > BUDGET+20:
                pass
            elif ppg.loc[j, 'position'] == 'GK' and gk_count > 0:
                pass
            elif ppg.loc[j, 'position'] == 'DEF' and def_count > 4:
                pass
            elif ppg.loc[j, 'position'] == 'MID' and mid_count > 4:
                pass
            elif ppg.loc[j, 'position'] == 'FWD' and fwd_count > 2:
                pass
            else:
                # remove the player from selected_11
                sub = True
                print(f"Substituted: {selected_11.loc[i,['name','ppg']].values} for  {ppg.loc[j, ['name','ppg']].values}")
                selected_11 = selected_11[~selected_11.index.isin([i])]
                selected_11 = pd.concat([selected_11, ppg.loc[j, :].to_frame().T])
                selected_11 = selected_11.sort_values(by=['ppg', 'ppm', 'ppmcost'], ascending=True)
                team_count[ppg.loc[j, 'team']] += 1
                if ppg.loc[j, 'position'] == 'GK':
                    gk_count += 1
                elif ppg.loc[j, 'position'] == 'DEF':
                    def_count += 1
                elif ppg.loc[j, 'position'] == 'MID':
                    mid_count += 1
                elif ppg.loc[j, 'position'] == 'FWD':
                    fwd_count += 1
                ppg = ppg[~ppg.index.isin([j])]
                break
        if sub == False:
            if selected_11.loc[i, 'position'] == 'GK':
                gk_count += 1
            elif selected_11.loc[i, 'position'] == 'DEF':
                def_count += 1
            elif selected_11.loc[i, 'position'] == 'MID':
                mid_count += 1
            elif selected_11.loc[i, 'position'] == 'FWD':
                fwd_count += 1
            team_count[selected_11.loc[i, 'team']] += 1
        if selected_11['cost'].sum() > BUDGET:
            print("budget reached")
            break
     
    return selected_11

selected_11 = substitute_for_points()
print(f"Final Selection: {gk_count} GK, {def_count} DEF, {fwd_count} FWD, {mid_count} MID")
print(f"Current budget spent: {selected_11['cost'].sum()}, remaining budget: {BUDGET - selected_11['cost'].sum()}")
print(team_count)
print("")
print(selected_11)

54 players have played more than 1450 minutes and selected for ppg algo
Substituted: ['Milos Kerkez' 2.1] for  ['Mohamed Salah' 8.0]
Substituted: ['Ryan Christie' 2.2] for  ['Dominic Solanke' 5.0]
Substituted: ['Adam Smith' 2.6] for  ['Raheem Sterling' 4.2]
Substituted: ['John Stones' 3.2] for  ['Vitalii Mykolenko' 3.8]
Substituted: ['Ederson Santana de Moraes' 3.3] for  ['Manuel Akanji' 3.6]
Substituted: ['Bruno Guimarães Rodriguez Moura' 3.4] for  ['Abdoulaye Doucouré' 4.1]
Substituted: ['Leandro Trossard' 3.7] for  ['Kevin De Bruyne' 4.1]
Final Selection: 0 GK, 4 DEF, 2 FWD, 5 MID
Current budget spent: 751.0, remaining budget: 89.0
{'Arsenal': 1, 'Aston Villa': 0, 'Bournemouth': 1, 'Brentford': 0, 'Brighton': 0, 'Burnley': 0, 'Chelsea': 1, 'Crystal Palace': 0, 'Everton': 2, 'Fulham': 1, 'Liverpool': 2, 'Luton': 0, 'Man City': 3, 'Man Utd': 0, 'Newcastle': 0, "Nott'm Forest": 0, 'Sheffield Utd': 0, 'Spurs': 0, 'West Ham': 0, 'Wolves': 0}

    minutes total_points  ppg   cost first_na

In [None]:
xp = eval.copy(deep=True)
minutes_threshold = 1450
xp = xp[xp['minutes'] > minutes_threshold]
xp = xp.sort_values(by='expected_points', ascending=False)
xp = xp[~xp.index.isin(selected_11.index)]
print(f"{len(xp)} players have played more than {minutes_threshold} minutes and selected for expected points algo")


def substitute_for_points():
    global xp, selected_11, team_count, gk_count, def_count, fwd_count, mid_count
    selected_11 = selected_11.sort_values(by=['expected_points', 'ppg', 'ppm', 'ppmcost'], ascending=True)
    # print(selected_11)

    # Substitute the lowest xp player
    for i in selected_11.index:
        if selected_11.loc[i, 'position'] == 'GK':
            gk_count -= 1
        elif selected_11.loc[i, 'position'] == 'DEF':
            def_count -= 1
        elif selected_11.loc[i, 'position'] == 'MID':
            mid_count -= 1
        elif selected_11.loc[i, 'position'] == 'FWD':
            fwd_count -= 1
        team_count[selected_11.loc[i, 'team']] -= 1

        # Find the highest possible xp player
        for j in xp.index:
            sub = False
            if selected_11.loc[i, 'expected_points'] > xp.loc[j, 'expected_points']:
                pass
            elif team_count[xp.loc[j, 'team']] > 2:
                pass
            elif xp.loc[j, 'cost'] + selected_11['cost'].sum() > BUDGET+20:
                pass
            elif xp.loc[j, 'position'] == 'GK' and gk_count > 0:
                pass
            elif xp.loc[j, 'position'] == 'DEF' and def_count > 4:
                pass
            elif xp.loc[j, 'position'] == 'MID' and mid_count > 4:
                pass
            elif xp.loc[j, 'position'] == 'FWD' and fwd_count > 2:
                pass
            else:
                # remove the player from selected_11
                sub = True
                print(f"Substituted: {selected_11.loc[i,['name','expected_points']].values} for  {xp.loc[j, ['name','expected_points']].values}")
                selected_11 = selected_11[~selected_11.index.isin([i])]
                selected_11 = pd.concat([selected_11, xp.loc[j, :].to_frame().T])
                selected_11 = selected_11.sort_values(by=['expected_points', 'ppm', 'ppmcost'], ascending=True)
                team_count[xp.loc[j, 'team']] += 1
                if xp.loc[j, 'position'] == 'GK':
                    gk_count += 1
                elif xp.loc[j, 'position'] == 'DEF':
                    def_count += 1
                elif xp.loc[j, 'position'] == 'MID':
                    mid_count += 1
                elif xp.loc[j, 'position'] == 'FWD':
                    fwd_count += 1
                xp = xp[~xp.index.isin([j])]
                break
        if sub == False:
            if selected_11.loc[i, 'position'] == 'GK':
                gk_count += 1
            elif selected_11.loc[i, 'position'] == 'DEF':
                def_count += 1
            elif selected_11.loc[i, 'position'] == 'MID':
                mid_count += 1
            elif selected_11.loc[i, 'position'] == 'FWD':
                fwd_count += 1
            team_count[selected_11.loc[i, 'team']] += 1
        if selected_11['cost'].sum() > BUDGET:
            print("budget reached")
            break
     
    return selected_11

selected_11 = substitute_for_points()
print(f"Final Selection: {gk_count} GK, {def_count} DEF, {fwd_count} FWD, {mid_count} MID")
print(f"Current budget spent: {selected_11['cost'].sum()}, remaining budget: {BUDGET - selected_11['cost'].sum()}")
print(team_count)
print("")
print(selected_11)


54 players have played more than 1450 minutes and selected for expected points algo
Substituted: ['Vitalii Mykolenko' '1.5'] for  ['Milos Kerkez' '5.4']
Substituted: ['Abdoulaye Doucouré' '1.8'] for  ['Leandro Trossard' '6.2']
Substituted: ['Dominic Solanke' '11.0'] for  ['Adam Smith' '5.0']
Substituted: ['Kevin De Bruyne' '2.1'] for  ['Ederson Santana de Moraes' '6.3']
Substituted: ['Mohamed Salah' '2.3'] for  ['Bruno Guimarães Rodriguez Moura' '6.2']
Substituted: ['Raheem Sterling' '3.3'] for  ['Ryan Christie' '4.0']
Substituted: ['Manuel Akanji' '4.5'] for  ['John Stones' '4.5']
Final Selection: 1 GK, 5 DEF, 1 FWD, 4 MID
Current budget spent: 593.0, remaining budget: 247.0
{'Arsenal': 2, 'Aston Villa': 0, 'Bournemouth': 3, 'Brentford': 0, 'Brighton': 0, 'Burnley': 0, 'Chelsea': 0, 'Crystal Palace': 0, 'Everton': 0, 'Fulham': 1, 'Liverpool': 1, 'Luton': 0, 'Man City': 3, 'Man Utd': 0, 'Newcastle': 1, "Nott'm Forest": 0, 'Sheffield Utd': 0, 'Spurs': 0, 'West Ham': 0, 'Wolves': 0}

   

Total Model Team Points: 34

- Rodrigo: 2
- Akanji: 2
- Aké: 2
- Mykolenko: 1
- Luis Díaz: 2
- Abdoulaye: 1
- De Bruyne: 6
- Gabriel: 1
- Sterling: 1
- Solanke: 16 (C)
- Salah: 0

In [None]:
general_data['elements']

[{'chance_of_playing_next_round': 0,
  'chance_of_playing_this_round': 0,
  'code': 232223,
  'cost_change_event': 0,
  'cost_change_event_fall': 0,
  'cost_change_start': -1,
  'cost_change_start_fall': 1,
  'dreamteam_count': 0,
  'element_type': 4,
  'ep_next': '0.0',
  'ep_this': '0.0',
  'event_points': 0,
  'first_name': 'Folarin',
  'form': '0.0',
  'id': 1,
  'in_dreamteam': False,
  'news': 'Transferred to Monaco',
  'news_added': '2023-08-31T08:55:15.272751Z',
  'now_cost': 44,
  'photo': '232223.jpg',
  'points_per_game': '0.0',
  'second_name': 'Balogun',
  'selected_by_percent': '0.2',
  'special': False,
  'squad_number': None,
  'status': 'u',
  'team': 1,
  'team_code': 3,
  'total_points': 0,
  'transfers_in': 10024,
  'transfers_in_event': 0,
  'transfers_out': 63211,
  'transfers_out_event': 81,
  'value_form': '0.0',
  'value_season': '0.0',
  'web_name': 'Balogun',
  'minutes': 0,
  'goals_scored': 0,
  'assists': 0,
  'clean_sheets': 0,
  'goals_conceded': 0,
  'o