In [1]:
# Relevant module imports and installs
import pandas as pd
!pip install pulp brotli fuzzywuzzy
import pulp as plp
import sys 
import os
from collections import defaultdict
from fuzzywuzzy import process
import time




[notice] A new release of pip is available: 23.0 -> 24.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
solve_season = '2024-25'
solve_gameweek = 18
load_projections_from_file = False

In [3]:
# Get the absolute path to the directory containing the Python file
module_path = os.path.abspath(os.path.join('..', '..'))

# Add the directory to sys.path
if module_path not in sys.path:
    sys.path.append(module_path)

# Now you can import the module
from projections import generate_projections, generate_stat_projections, append_stat_projections

if not load_projections_from_file:
    point_projections = generate_projections()
    stat_projections = generate_stat_projections()

    projections_data = append_stat_projections(point_projections, stat_projections, solve_gameweek)
    print('Generated new projections.')
else:
    try:
        projections_data = pd.read_csv('gameweek_projections.csv')
        print('Loaded projections from CSV file.')
    except FileNotFoundError:
        print('Tried to load from CSV file, but it does not exist. Generating new projections...')
        point_projections = generate_projections()
        stat_projections = generate_stat_projections()
        projections_data = append_stat_projections(point_projections, stat_projections, solve_gameweek)

Generated new projections.


### Player Manipulation

### Player Force/Banning

In [4]:
def fuzzy_ban_players(df, ban_ids):
    while True:
        search_name = input("Enter player name to ban (or press enter to finish): ").strip()
        
        if search_name.lower() == '':
            break
        
        # Perform fuzzy matching with a lower score cutoff and no limit
        matches = process.extractBests(search_name, df['Name'].tolist(), score_cutoff=50, limit=10)
        
        if not matches:
            print("No matches found. Please try again.")
            continue
        
        # Display matches
        print("Matches found:")
        for idx, (name, score) in enumerate(matches, 1):
            player_index = df[df['Name'] == name].index[0]
            player_id = df.loc[player_index, 'ID']
            print(f"{idx}. {name} (ID: {player_id}, Index: {player_index}, Score: {score})")
        
        # Ask user to select a match
        while True:
            choice = input("Enter the number of the player to ban (or 'skip' to search again): ")
            if choice.lower() == 'skip':
                break
            try:
                choice_idx = int(choice) - 1
                if 0 <= choice_idx < len(matches):
                    selected_name = matches[choice_idx][0]
                    selected_index = df[df['Name'] == selected_name].index[0]
                    selected_id = df.loc[selected_index, 'ID']
                    ban_ids.append(selected_index)
                    print(f"Banned: {selected_name} (ID: {selected_id}, Index: {selected_index})")
                    break
                else:
                    print("Invalid choice. Please try again.")
            except ValueError:
                print("Invalid input. Please enter a number or 'skip'.")
    
    return ban_ids

ban_ids = []
ban_ids = fuzzy_ban_players(projections_data, ban_ids)
print("Final ban list (indices):", ban_ids)

Final ban list (indices): []


In [5]:
def fuzzy_force_players(df, force_ids):
    while True:
        search_name = input("Enter player name to force (or press enter to finish): ").strip()
        
        if search_name.lower() == '':
            break
        
        # Perform fuzzy matching with a lower score cutoff and no limit
        matches = process.extractBests(search_name, df['Name'].tolist(), score_cutoff=50, limit=10)
        
        if not matches:
            print("No matches found. Please try again.")
            continue
        
        # Display matches
        print("Matches found:")
        for idx, (name, score) in enumerate(matches, 1):
            player_index = df[df['Name'] == name].index[0]
            player_id = df.loc[player_index, 'ID']
            print(f"{idx}. {name} (ID: {player_id}, Index: {player_index}, Score: {score})")
        
        # Ask user to select a match
        while True:
            choice = input("Enter the number of the player to force (or 'skip' to search again): ")
            if choice.lower() == 'skip':
                break
            try:
                choice_idx = int(choice) - 1
                if 0 <= choice_idx < len(matches):
                    selected_name = matches[choice_idx][0]
                    selected_index = df[df['Name'] == selected_name].index[0]
                    selected_id = df.loc[selected_index, 'ID']
                    force_ids.append(selected_index)  # Add to force_ids instead of ban_ids
                    print(f"Forced: {selected_name} (ID: {selected_id}, Index: {selected_index})")
                    break
                else:
                    print("Invalid choice. Please try again.")
            except ValueError:
                print("Invalid input. Please enter a number or 'skip'.")
    
    return force_ids

force_ids = []
force_ids = fuzzy_force_players(projections_data, force_ids)
print("Final force list (indices):", force_ids)

Final force list (indices): []


# 2024/25 GW18 Challenge: Out of the Box - Goals 'outside the box' are worth 7 extra points

In [6]:
def read_understat_data(filename='understat_shot_summary.csv'):
    try:
        understat_data = pd.read_csv(filename)
        return understat_data
    except FileNotFoundError:
        print('Must run understat.ipynb in the root directory.')
        return None

def calculate_outside_xg(understat_data):
    understat_data['proportion_xg_out_box'] = understat_data['xg_out_box'] / (understat_data['xg_in_box'] + understat_data['xg_out_box'])
    return understat_data

def create_player_list(understat_data):
    player_list = understat_data['player'].unique()
    return player_list

def get_understat_player_data(player_name, player_list, understat_data):
    # Fuzzy match player name, return none if no good match is found
    try:
        player_name = process.extractOne(player_name, player_list, score_cutoff=50)[0]
    except TypeError:
        return None

    player_data = understat_data[understat_data['player'] == player_name]
    return player_data

def update_projected_points(projections_data, player_list, understat_data):
    total_players = len(projections_data)
    
    for index, row in enumerate(projections_data.iterrows(), 1):
        print(f"\rProcessing player {index}/{total_players}", end="", flush=True)
        player_name = row[1]['Name']  # row[1] contains the actual Series data
        player_data = get_understat_player_data(player_name, player_list, understat_data)
        if player_data is not None:
            xg_out_box_proportion = player_data['proportion_xg_out_box'].values[0]
            projections_data.loc[row[0], 'Outside_Box_Goals'] = row[1]['Projected_Goals'] * xg_out_box_proportion
            
            # New predicted points value is equal to the original plus seven times the outside box goals
            projections_data.loc[row[0], 'Predicted_Points'] = row[1]['Predicted_Points'] + (7 * projections_data.loc[row[0], 'Outside_Box_Goals'])

            # Round to 2dp and replace NaN with 0
            predicted_points = projections_data.loc[row[0], 'Predicted_Points']
            if pd.isna(predicted_points):
                predicted_points = 0
            else:
                predicted_points = round(predicted_points, 2)
            projections_data.loc[row[0], 'Predicted_Points'] = predicted_points
    
    print()  # Print a newline at the end
    return projections_data

understat_data = calculate_outside_xg(read_understat_data())
player_list = create_player_list(understat_data)
projections_data = update_projected_points(projections_data, player_list, understat_data)

Processing player 386/386


### Optimisation

In [7]:
# Get the number of players and their list of ids
player_ids = projections_data['ID'].tolist()
player_count = len(player_ids)

# Set up the problem
model = plp.LpProblem("fpl-challenge", plp.LpMaximize)

# Define the decision variables
lineup = [
    plp.LpVariable(f"lineup_{i}", lowBound=0, upBound=1, cat="Integer")
    for i in player_ids
]

# Define captain variables
captain = [
    plp.LpVariable(f"captain_{i}", lowBound=0, upBound=1, cat="Integer")
    for i in player_ids
]

# Set the objective function (the number of points scored by the team, with captain's points doubled)
model += plp.lpSum([lineup[i] * projections_data.loc[i, 'Predicted_Points'] for i in range(player_count)]) + \
         plp.lpSum([captain[i] * projections_data.loc[i, 'Predicted_Points'] for i in range(player_count)])

# Constraints

# Total number of players = 9
model += plp.lpSum(lineup) == 9

# List players by index to be EXCLUDED from the lineup
for id in ban_ids:
    model += lineup[id] == 0

for id in force_ids:
    model += lineup[id] == 1

# Exactly one captain
model += plp.lpSum(captain) == 1

# Captain must be in the lineup
for i in range(player_count):
    model += captain[i] <= lineup[i]

# Exactly 1 Goalkeeper
model += plp.lpSum([lineup[i] for i in range(player_count) if projections_data.loc[i, 'Position'] == 'Goalkeeper']) == 1

# Between 3-4 Defenders
model += plp.lpSum([lineup[i] for i in range(player_count) if projections_data.loc[i, 'Position'] == 'Defender']) >= 3
model += plp.lpSum([lineup[i] for i in range(player_count) if projections_data.loc[i, 'Position'] == 'Defender']) <= 4

# Between 2-4 Midfielders
model += plp.lpSum([lineup[i] for i in range(player_count) if projections_data.loc[i, 'Position'] == 'Midfielder']) >= 2
model += plp.lpSum([lineup[i] for i in range(player_count) if projections_data.loc[i, 'Position'] == 'Midfielder']) <= 4

# Between 1-3 Forwards
model += plp.lpSum([lineup[i] for i in range(player_count) if projections_data.loc[i, 'Position'] == 'Forward']) >= 1
model += plp.lpSum([lineup[i] for i in range(player_count) if projections_data.loc[i, 'Position'] == 'Forward']) <= 3

# Solve the problem
model.solve()

# Function to print players by position
def print_players_by_position(players_dict):
    total_points = 0
    total_cost = 0
    for position in ['Goalkeeper', 'Defender', 'Midfielder', 'Forward']:
        if position in players_dict:
            print(f"\n{position}:")
            for player in players_dict[position]:
                captain_str = " (C)" if player['Captain'] else ""
                points = player['Predicted_Points'] * (2 if player['Captain'] else 1)
                print(f"  {player['Name']}{captain_str} - {player['Team']} - Cost: {player['Cost']}m - Predicted Points: {points}")
                total_points += points
                total_cost += player['Cost']
    print(f"\nTotal Predicted Points: {round(total_points, 2)}")
    print(f"Total Cost: {round(total_cost, 2)}m")

# Print the results
print("Status:", plp.LpStatus[model.status])
selected_players = defaultdict(list)
for i in range(player_count):
    if lineup[i].value() == 1:
        player = projections_data.loc[i]
        selected_players[player['Position']].append({
            'Name': player['Name'],
            'Team': player['Team'],
            'Cost': player['Cost'],
            'Predicted_Points': player['Predicted_Points'],
            'Captain': captain[i].value() == 1
        })

def solution_to_txt(players_dict, filename="solution.txt", encoding="utf-8"):
    total_points = 0
    total_cost = 0
    with open(filename, 'w', encoding=encoding) as f:
        f.write(f'Current Date & Time: {time.strftime("%Y-%m-%d")} - {time.strftime("%H:%M:%S")}\n')
        for position in ['Goalkeeper', 'Defender', 'Midfielder', 'Forward']:
            if position in players_dict:
                f.write(f"\n{position}:\n")
                for player in players_dict[position]:
                    captain_str = " (C)" if player['Captain'] else ""
                    points = player['Predicted_Points'] * (2 if player['Captain'] else 1)
                    f.write(f"  {player['Name']}{captain_str} - {player['Team']} - Cost: {player['Cost']}m - Predicted Points: {points}\n")
                    total_points += points
                    total_cost += player['Cost']
        f.write(f"\nTotal Predicted Points: {round(total_points, 2)}\n")
        f.write(f"Total Cost: {round(total_cost, 2)}m\n")
        
solution_to_txt(selected_players, "optimal_solution.txt")
print_solution = False
if print_solution:
    print("\nOptimal Lineup:")
    print_players_by_position(selected_players)

Status: Optimal

Optimal Lineup:

Goalkeeper:
  A.Becker - Liverpool - Cost: 5.4m - Predicted Points: 4.74

Defender:
  Gabriel - Arsenal - Cost: 6.3m - Predicted Points: 5.8
  Alexander-Arnold - Liverpool - Cost: 7.1m - Predicted Points: 6.33
  Robertson - Liverpool - Cost: 5.9m - Predicted Points: 5.47

Midfielder:
  Palmer - Chelsea - Cost: 11.3m - Predicted Points: 8.04
  M.Salah (C) - Liverpool - Cost: 13.5m - Predicted Points: 18.84

Forward:
  Haaland - Man City - Cost: 14.8m - Predicted Points: 7.63
  Isak - Newcastle - Cost: 9.0m - Predicted Points: 6.75
  Cunha - Wolves - Cost: 7.1m - Predicted Points: 6.13

Total Predicted Points: 69.73
Total Cost: 80.4m


In [8]:
def print_top_scorers_by_position(projections_data):
    for position in ['Goalkeeper', 'Defender', 'Midfielder', 'Forward']:
        top_scorers = projections_data[projections_data['Position'] == position].nlargest(10, 'Predicted_Points')
        print(f'Top {position}s: ')
        display(top_scorers)

if print_solution:
    print_top_scorers_by_position(projections_data)

projections_data.to_csv('gameweek_projections.csv', index=False)

Top Goalkeepers: 


Unnamed: 0,ID,Name,Team,Region,Position,Cost,Predicted_Points,xMins,Opponent,Score,Assist,Goal_Involvement,Clean_Sheet,Projected_Goals,Outside_Box_Goals
166,310,A.Becker,Liverpool,30,Goalkeeper,5.4,4.74,90,Leicester (H),0.0,0.0,0.0,0.556,0.0,0.0
9,15,Raya,Arsenal,200,Goalkeeper,5.6,4.53,90,Ipswich (H),0.0,0.0,0.0,0.579,0.0,0.0
101,185,Sánchez,Chelsea,200,Goalkeeper,4.8,4.08,90,Fulham (H),0.0,0.0,0.0,0.4,0.0,0.0
84,152,Arrizabalaga,Bournemouth,200,Goalkeeper,4.5,3.77,90,Crystal Palace (H),0.0,0.0,0.0,0.308,0.0,0.0
81,146,Verbruggen,Brighton,152,Goalkeeper,4.5,3.66,90,Brentford (H),0.0,0.0,0.0,0.308,0.0,0.0
250,443,Sels,Nott'm Forest,21,Goalkeeper,4.7,3.66,90,Spurs (H),0.0,0.0,0.0,0.231,0.0,0.0
273,488,Forster,Spurs,241,Goalkeeper,4.3,3.66,90,Nott'm Forest (A),0.0,0.0,0.0,0.182,0.0,0.0
8,14,Ramsdale,Southampton,241,Goalkeeper,4.4,3.65,90,West Ham (H),0.0,0.0,0.0,0.222,0.0,0.0
29,47,Martinez,Aston Villa,10,Goalkeeper,5.0,3.64,90,Newcastle (A),0.0,0.0,0.0,0.182,0.0,0.0
225,396,Dúbravka,Newcastle,194,Goalkeeper,4.2,3.63,90,Aston Villa (H),0.0,0.0,0.0,0.294,0.0,0.0


Top Defenders: 


Unnamed: 0,ID,Name,Team,Region,Position,Cost,Predicted_Points,xMins,Opponent,Score,Assist,Goal_Involvement,Clean_Sheet,Projected_Goals,Outside_Box_Goals
167,311,Alexander-Arnold,Liverpool,241,Defender,7.1,6.33,71,Leicester (H),0.146,0.293,0.396,0.549,0.169,0.154304
1,3,Gabriel,Arsenal,30,Defender,6.3,5.8,90,Ipswich (H),0.056,0.073,0.125,0.579,0.08,0.000451
183,335,Robertson,Liverpool,243,Defender,5.9,5.47,79,Leicester (H),0.111,0.237,0.322,0.556,0.123,0.010603
196,350,Gvardiol,Man City,97,Defender,6.0,4.96,81,Everton (H),0.11,0.099,0.198,0.5,0.122,0.005455
172,322,Gomez,Liverpool,241,Defender,4.9,4.94,90,Leicester (H),0.156,0.093,0.234,0.556,0.18,0.014595
187,339,Virgil,Liverpool,152,Defender,6.5,4.94,90,Leicester (H),0.167,0.059,0.216,0.556,0.193,0.0
12,18,Saliba,Arsenal,73,Defender,6.3,4.7,90,Ipswich (H),0.096,0.05,0.141,0.579,0.107,0.0
318,578,Calafiori,Arsenal,106,Defender,5.8,4.62,78,Ipswich (H),0.0,0.0,0.0,0.0,0.0,0.0
3,6,J.Timber,Arsenal,152,Defender,5.7,4.6,89,Ipswich (H),0.094,0.118,0.201,0.579,0.106,0.00731
94,171,Gusto,Chelsea,73,Defender,4.9,4.39,79,Fulham (H),0.084,0.154,0.225,0.4,0.095,0.020213


Top Midfielders: 


Unnamed: 0,ID,Name,Team,Region,Position,Cost,Predicted_Points,xMins,Opponent,Score,Assist,Goal_Involvement,Clean_Sheet,Projected_Goals,Outside_Box_Goals
178,328,M.Salah,Liverpool,63,Midfielder,13.5,9.42,89,Leicester (H),0.69,0.407,0.816,0.556,1.238,0.05483
100,182,Palmer,Chelsea,241,Midfielder,11.3,8.04,89,Fulham (H),0.455,0.294,0.615,0.4,0.639,0.073029
194,348,Foden,Man City,241,Midfielder,9.1,6.08,82,Everton (H),0.368,0.255,0.529,0.5,0.486,0.152954
7,13,Ødegaard,Arsenal,161,Midfielder,8.4,5.71,78,Ipswich (H),0.36,0.3,0.552,0.579,0.477,0.022714
177,327,Luis Díaz,Liverpool,48,Midfielder,7.5,5.58,66,Leicester (H),0.448,0.252,0.587,0.535,0.664,0.060834
281,503,Son,Spurs,114,Midfielder,9.8,5.51,70,Nott'm Forest (A),0.366,0.176,0.477,0.18,0.495,0.042672
207,366,B.Fernandes,Man Utd,173,Midfielder,8.4,5.48,87,Wolves (A),0.287,0.267,0.477,0.333,0.351,0.05616
59,99,Mbeumo,Brentford,38,Midfielder,7.6,5.29,90,Brighton (A),0.283,0.165,0.401,0.143,0.344,0.047424
289,514,Bowen,West Ham,241,Midfielder,7.5,5.28,90,Southampton (A),0.306,0.226,0.462,0.267,0.383,0.051682
48,78,Semenyo,Bournemouth,81,Midfielder,5.6,5.22,89,Crystal Palace (H),0.281,0.159,0.396,0.308,0.357,0.039496


Top Forwards: 


Unnamed: 0,ID,Name,Team,Region,Position,Cost,Predicted_Points,xMins,Opponent,Score,Assist,Goal_Involvement,Clean_Sheet,Projected_Goals,Outside_Box_Goals
197,351,Haaland,Man City,161,Forward,14.8,7.63,90,Everton (H),0.629,0.152,0.686,0.5,1.044,0.06162
229,401,Isak,Newcastle,206,Forward,9.0,6.75,83,Aston Villa (H),0.434,0.137,0.511,0.294,0.624,0.026143
302,541,Cunha,Wolves,30,Forward,7.1,6.13,90,Man Utd (H),0.276,0.176,0.403,0.2,0.337,0.048381
252,447,Wood,Nott'm Forest,154,Forward,6.6,5.59,82,Spurs (H),0.375,0.112,0.445,0.231,0.507,0.025768
2,4,Havertz,Arsenal,80,Forward,7.9,5.55,80,Ipswich (H),0.434,0.174,0.533,0.579,0.604,0.042424
98,180,N.Jackson,Chelsea,189,Forward,8.3,5.15,79,Fulham (H),0.394,0.144,0.481,0.39,0.548,0.009458
71,129,João Pedro,Brighton,30,Forward,5.8,5.11,80,Brentford (H),0.127,0.073,0.191,0.308,0.154,0.007
110,207,Mateta,Crystal Palace,73,Forward,7.2,4.85,88,Bournemouth (A),0.258,0.11,0.34,0.198,0.318,0.01241
333,617,Evanilson,Bournemouth,30,Forward,6.0,4.76,81,Crystal Palace (H),0.322,0.106,0.393,0.292,0.415,0.02552
0,2,G.Jesus,Arsenal,30,Forward,6.8,4.75,79,Ipswich (H),0.0,0.0,0.0,0.565,0.0,0.0
