In [1]:
# Relevant module imports and installs
import pandas as pd
!pip install pulp brotli fuzzywuzzy
import pulp as plp
import sys 
import os
from collections import defaultdict
from fuzzywuzzy import process




[notice] A new release of pip is available: 23.0 -> 24.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [4]:
# Get the absolute path to the directory containing the Python file
module_path = os.path.abspath(os.path.join('..', '..'))

# Add the directory to sys.path
if module_path not in sys.path:
    sys.path.append(module_path)

# Now you can import the module
from projections import generate_projections, generate_stat_projections, append_stat_projections

point_projections = generate_projections()
stat_projections = generate_stat_projections()

projections_data = append_stat_projections(point_projections, stat_projections, 8)

In [77]:
from fbref import fbref_main

team_fbref_stats, player_fbref_stats = fbref_main()

# Replace where Team = 'Tottenham' with 'Spurs in the team_fbref_stats DataFrame
team_fbref_stats.loc[team_fbref_stats['Team'] == 'Tottenham', 'Team'] = 'Spurs'

Getting tooltip info...
Getting team tables...
    premier_league_rank_x             Team  premier_league_matches_played  \
0                       1        Liverpool                              7   
1                       2  Manchester City                              7   
2                       3          Arsenal                              7   
3                       4          Chelsea                              7   
4                       5      Aston Villa                              7   
5                       6         Brighton                              7   
6                       7    Newcastle Utd                              7   
7                       8           Fulham                              7   
8                       9        Tottenham                              7   
9                      10  Nott'ham Forest                              7   
10                     11        Brentford                              7   
11                     12    

In [78]:
def fuzzy_match_name(df, df_type, search_name):
    if df_type.lower() == 'team':
        # Fuzzy match the 'Team' column, returning the best match
        matches = process.extractOne(search_name, df['Team'].tolist(), score_cutoff=50)
        if matches:
            return matches[0]
        else:
            return None
    elif df_type.lower() == 'player':
        # Fuzzy match the 'Name' column, returning the best match
        matches = process.extractOne(search_name, df['Player'].tolist(), score_cutoff=50)
        if matches:
            return matches[0]
        else:
            return None
    else:
        return None
    
def get_team_games_played(team_name):
    team_name = fuzzy_match_name(team_fbref_stats, 'team', team_name)

    if team_name == None:
        print('MAJOR ERROR: Missing team name')
        return 0

    total_team_games_played = team_fbref_stats[team_fbref_stats['Team'] == team_name]['premier_league_matches_played'].values[0]
    return total_team_games_played

def get_team_total_tackles(team_name):
    team_name = fuzzy_match_name(team_fbref_stats, 'team', team_name)

    if team_name == None:
        print('MAJOR ERROR: Missing team name')
        return 0

    total_team_tackles = team_fbref_stats[team_fbref_stats['Team'] == team_name]['squad_defensive_actions_Tackles_Tkl'].values[0]
    return total_team_tackles

def get_team_total_tackles_against(team_name):
    team_name = fuzzy_match_name(team_fbref_stats, 'team', team_name)

    if team_name == None:
        print('MAJOR ERROR: Missing team name')
        return 0

    total_team_tackles_against = team_fbref_stats[team_fbref_stats['Team'] == team_name]['squad_defensive_actions_Tackles_Tkl_AGAINST'].values[0]
    return total_team_tackles_against

def get_team_total_interceptions(team_name):
    team_name = fuzzy_match_name(team_fbref_stats, 'team', team_name)

    if team_name == None:
        print('MAJOR ERROR: Missing team name')
        return 0

    total_team_interceptions = team_fbref_stats[team_fbref_stats['Team'] == team_name]['squad_defensive_actions_interceptions'].values[0]
    return total_team_interceptions

def get_team_total_interceptions_against(team_name):
    team_name = fuzzy_match_name(team_fbref_stats, 'team', team_name)

    if team_name == None:
        print('MAJOR ERROR: Missing team name')

    total_team_interceptions_against = team_fbref_stats[team_fbref_stats['Team'] == team_name]['squad_defensive_actions_interceptions_AGAINST'].values[0]
    return total_team_interceptions_against

def get_team_total_recoveries(team_name):
    team_name = fuzzy_match_name(team_fbref_stats, 'team', team_name)
    
    if team_name == None:
        print('MAJOR ERROR: Missing team name')

    total_team_recoveries = team_fbref_stats[team_fbref_stats['Team'] == team_name]['squad_miscellaneous_stats_Performance_Recov'].values[0]
    return total_team_recoveries

def get_team_total_recoveries_against(team_name):
    team_name = fuzzy_match_name(team_fbref_stats, 'team', team_name)

    if team_name == None:
        print('MAJOR ERROR: Missing team name')

    total_team_recoveries_against = team_fbref_stats[team_fbref_stats['Team'] == team_name]['squad_miscellaneous_stats_Performance_Recov_AGAINST'].values[0]
    return total_team_recoveries_against

def get_player_total_tackles(player_name):
    player_name = fuzzy_match_name(player_fbref_stats, 'player', player_name)

    if player_name == None:
        return 0
    
    total_player_tackles = player_fbref_stats[player_fbref_stats['Player'] == player_name]['defensive_actions_Tackles_Tkl'].values[0]
    return total_player_tackles

def get_player_total_interceptions(player_name):
    player_name = fuzzy_match_name(player_fbref_stats, 'player', player_name)

    if player_name == None:
        return 0
    
    total_player_interceptions = player_fbref_stats[player_fbref_stats['Player'] == player_name]['defensive_actions_interceptions'].values[0]
    return total_player_interceptions

def get_player_total_recoveries(player_name):
    player_name = fuzzy_match_name(player_fbref_stats, 'player', player_name)

    if player_name == None:
        return 0
    
    total_player_recoveries = player_fbref_stats[player_fbref_stats['Player'] == player_name]['miscellaneous_stats_Performance_Recov'].values[0]
    return total_player_recoveries

def strip_opponent_name(opponent_name):
    return opponent_name.split(' ')[0]

def predict_team_tackles_in_game(team_name, opponent_name):
    team_name = fuzzy_match_name(team_fbref_stats, 'team', team_name)
    opponent_name = fuzzy_match_name(team_fbref_stats, 'team', opponent_name)

    if team_name == None or opponent_name == None:
        print('MAJOR ERROR: Missing team or opponent name')
        return None

    team_games_played = get_team_games_played(team_name)
    opponent_games_played = get_team_games_played(opponent_name)

    team_tackles_per_game = get_team_total_tackles(team_name) / team_games_played
    opponent_tackles_against_per_game = get_team_total_tackles_against(opponent_name) / opponent_games_played
    
    projected_team_tackles = (team_tackles_per_game + opponent_tackles_against_per_game) / 2
    return projected_team_tackles

def predict_team_interceptions_in_game(team_name, opponent_name):
    team_name = fuzzy_match_name(team_fbref_stats, 'team', team_name)
    opponent_name = fuzzy_match_name(team_fbref_stats, 'team', opponent_name)

    if team_name == None or opponent_name == None:
        print('MAJOR ERROR: Missing team or opponent name')
        return None

    team_games_played = get_team_games_played(team_name)
    opponent_games_played = get_team_games_played(opponent_name)

    team_interceptions_per_game = get_team_total_interceptions(team_name) / team_games_played
    opponent_interceptions_against_per_game = get_team_total_interceptions_against(opponent_name) / opponent_games_played
    
    projected_team_interceptions = (team_interceptions_per_game + opponent_interceptions_against_per_game) / 2
    return projected_team_interceptions

def predict_team_recoveries_in_game(team_name, opponent_name):
    team_name = fuzzy_match_name(team_fbref_stats, 'team', team_name)
    opponent_name = fuzzy_match_name(team_fbref_stats, 'team', opponent_name)

    if team_name == None or opponent_name == None:
        print('MAJOR ERROR: Missing team or opponent name')
        return None

    team_games_played = get_team_games_played(team_name)
    opponent_games_played = get_team_games_played(opponent_name)

    team_recoveries_per_game = get_team_total_recoveries(team_name) / team_games_played
    opponent_recoveries_against_per_game = get_team_total_recoveries_against(opponent_name) / opponent_games_played
    
    projected_team_recoveries = (team_recoveries_per_game + opponent_recoveries_against_per_game) / 2
    return projected_team_recoveries

def predict_player_tackles_in_game_90(player_name, team_name, opponent_name):
    player_name = fuzzy_match_name(player_fbref_stats, 'player', player_name)
    team_name = fuzzy_match_name(team_fbref_stats, 'team', team_name)

    if player_name == None:
        return 0
    elif team_name == None:
        print('MAJOR ERROR: Missing team name:', team_name)
        return None

    player_team_tackle_proportion = get_player_total_tackles(player_name) / get_team_total_tackles(team_name)
    projected_player_tackles = player_team_tackle_proportion * predict_team_tackles_in_game(team_name, opponent_name)
    return round(projected_player_tackles, 3)

def predict_player_interceptions_in_game_90(player_name, team_name, opponent_name):
    player_name = fuzzy_match_name(player_fbref_stats, 'player', player_name)
    team_name = fuzzy_match_name(team_fbref_stats, 'team', team_name)

    if player_name == None:
        return 0
    elif team_name == None:
        print('MAJOR ERROR: Missing team name:', team_name)
        return None

    player_team_interception_proportion = get_player_total_interceptions(player_name) / get_team_total_interceptions(team_name)
    projected_player_interceptions = player_team_interception_proportion * predict_team_interceptions_in_game(team_name, opponent_name)
    return round (projected_player_interceptions, 3)

def predict_player_recoveries_in_game_90(player_name, team_name, opponent_name):
    player_name = fuzzy_match_name(player_fbref_stats, 'player', player_name)
    team_name = fuzzy_match_name(team_fbref_stats, 'team', team_name)

    if player_name == None:
        return 0
    elif team_name == None:
        print('MAJOR ERROR: Missing team name:', team_name)

    player_team_recoveries_proportion = get_player_total_recoveries(player_name) / get_team_total_recoveries(team_name)
    projected_player_recoveries = player_team_recoveries_proportion * predict_team_recoveries_in_game(team_name, opponent_name)
    return round(projected_player_recoveries, 3)

def calculate_new_xpts(xpts, projected_tackles, projected_interceptions, projected_recoveries):
    new_xpts = xpts + (projected_tackles * 1) + (projected_interceptions * 1) + (projected_recoveries * (1/3))
    return round(new_xpts, 2)

In [79]:
# Create Projected_Tackles, Projected_Interceptions, Projected_Recoveries for columns and default to 0.00
projections_data['Projected_Tackles'] = 0.000
projections_data['Projected_Interceptions'] = 0.000
projections_data['Projected_Recoveries'] = 0.000

# For each row in projections_data, append a column for Projected_Tackles, Projected_Interceptions, Projected_Recoveries
for index, row in projections_data.iterrows():
    print(f'Appending data for: {row["Name"]} in team {row["Team"]}', end='\r')
    player_minute_proportion = row['xMins'] / 90

    projected_tackles = player_minute_proportion * predict_player_tackles_in_game_90(row['Name'], row['Team'], row['Opponent'])
    projected_interceptions = player_minute_proportion * predict_player_interceptions_in_game_90(row['Name'], row['Team'], row['Opponent'])
    projected_recoveries = player_minute_proportion * predict_player_recoveries_in_game_90(row['Name'], row['Team'], row['Opponent'])

    projections_data.loc[index, 'Projected_Tackles'] = projected_tackles
    projections_data.loc[index, 'Projected_Interceptions'] = projected_interceptions
    projections_data.loc[index, 'Projected_Recoveries'] = projected_recoveries

    updated_predicted_points = calculate_new_xpts(row['Predicted_Points'], projected_tackles, projected_interceptions, projected_recoveries)
    projections_data.loc[index, 'Predicted_Points'] = updated_predicted_points

projections_data

Appending data for: Kporha in team Crystal Palacelealace

Unnamed: 0,ID,Name,Team,Region,Position,Cost,Predicted_Points,xMins,Opponent,Score,Assist,Goal_Involvement,Clean_Sheet,Projected_Goals,Projected_Tackles,Projected_Interceptions,Projected_Recoveries
0,2,G.Jesus,Arsenal,30,Forward,6.8,2.50,34,Bournemouth (A),0.009,0.005,0.013,0.010,0.012,0.164333,0.103889,0.578000
1,3,Gabriel,Arsenal,30,Defender,6.2,6.92,90,Bournemouth (A),0.126,0.077,0.193,0.357,0.143,0.871000,0.550000,1.870000
2,4,Havertz,Arsenal,80,Forward,8.3,6.51,68,Bournemouth (A),0.342,0.226,0.491,0.357,0.471,0.767644,0.415556,2.439689
3,6,J.Timber,Arsenal,152,Defender,5.5,5.22,63,Bournemouth (A),0.072,0.088,0.153,0.357,0.081,1.320900,0.289100,2.498300
4,7,Jorginho,Arsenal,106,Midfielder,4.8,0.02,1,Bournemouth (A),0.032,0.027,0.058,0.105,0.037,0.003222,0.000000,0.020778
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
362,662,Nichols,Arsenal,241,Defender,4.0,0.00,0,Bournemouth (A),0.000,0.000,0.000,0.000,0.000,0.000000,0.000000,0.000000
363,663,Samuels,Brighton,241,Defender,4.0,0.00,0,Newcastle (A),0.000,0.000,0.000,0.000,0.000,0.000000,0.000000,0.000000
364,664,Pond,Wolves,241,Defender,4.0,0.00,0,Man City (H),0.000,0.000,0.000,0.000,0.000,0.000000,0.000000,0.000000
365,665,Butler-Oyedeji,Arsenal,241,Forward,4.5,0.00,0,Bournemouth (A),0.000,0.000,0.000,0.000,0.000,0.000000,0.000000,0.000000


### Player Manipulation

### Player Force/Banning

In [80]:
def fuzzy_ban_players(df, ban_ids):
    while True:
        search_name = input("Enter player name to ban (or press enter to finish): ").strip()
        
        if search_name.lower() == '':
            break
        
        # Perform fuzzy matching with a lower score cutoff and no limit
        matches = process.extractBests(search_name, df['Name'].tolist(), score_cutoff=50, limit=10)
        
        if not matches:
            print("No matches found. Please try again.")
            continue
        
        # Display matches
        print("Matches found:")
        for idx, (name, score) in enumerate(matches, 1):
            player_index = df[df['Name'] == name].index[0]
            player_id = df.loc[player_index, 'ID']
            print(f"{idx}. {name} (ID: {player_id}, Index: {player_index}, Score: {score})")
        
        # Ask user to select a match
        while True:
            choice = input("Enter the number of the player to ban (or 'skip' to search again): ")
            if choice.lower() == 'skip':
                break
            try:
                choice_idx = int(choice) - 1
                if 0 <= choice_idx < len(matches):
                    selected_name = matches[choice_idx][0]
                    selected_index = df[df['Name'] == selected_name].index[0]
                    selected_id = df.loc[selected_index, 'ID']
                    ban_ids.append(selected_index)
                    print(f"Banned: {selected_name} (ID: {selected_id}, Index: {selected_index})")
                    break
                else:
                    print("Invalid choice. Please try again.")
            except ValueError:
                print("Invalid input. Please enter a number or 'skip'.")
    
    return ban_ids

ban_ids = []
ban_ids = fuzzy_ban_players(projections_data, ban_ids)
print("Final ban list (indices):", ban_ids)

Final ban list (indices): []


In [81]:
def fuzzy_force_players(df, force_ids):
    while True:
        search_name = input("Enter player name to force (or press enter to finish): ").strip()
        
        if search_name.lower() == '':
            break
        
        # Perform fuzzy matching with a lower score cutoff and no limit
        matches = process.extractBests(search_name, df['Name'].tolist(), score_cutoff=50, limit=10)
        
        if not matches:
            print("No matches found. Please try again.")
            continue
        
        # Display matches
        print("Matches found:")
        for idx, (name, score) in enumerate(matches, 1):
            player_index = df[df['Name'] == name].index[0]
            player_id = df.loc[player_index, 'ID']
            print(f"{idx}. {name} (ID: {player_id}, Index: {player_index}, Score: {score})")
        
        # Ask user to select a match
        while True:
            choice = input("Enter the number of the player to force (or 'skip' to search again): ")
            if choice.lower() == 'skip':
                break
            try:
                choice_idx = int(choice) - 1
                if 0 <= choice_idx < len(matches):
                    selected_name = matches[choice_idx][0]
                    selected_index = df[df['Name'] == selected_name].index[0]
                    selected_id = df.loc[selected_index, 'ID']
                    force_ids.append(selected_index)  # Add to force_ids instead of ban_ids
                    print(f"Forced: {selected_name} (ID: {selected_id}, Index: {selected_index})")
                    break
                else:
                    print("Invalid choice. Please try again.")
            except ValueError:
                print("Invalid input. Please enter a number or 'skip'.")
    
    return force_ids

force_ids = []
force_ids = fuzzy_force_players(projections_data, force_ids)
print("Final force list (indices):", force_ids)

Final force list (indices): []


# 2024/25 GW8 Challenge: Ball Winners - Interceptions and tackles are worth 1 point. 3 recoveries are worth 1 point.

### Optimisation

In [82]:
# Get the number of players and their list of ids
player_ids = projections_data['ID'].tolist()
player_count = len(player_ids)

# Set up the problem
model = plp.LpProblem("fpl-gw3-challenge", plp.LpMaximize)

# Define the decision variables
lineup = [
    plp.LpVariable(f"lineup_{i}", lowBound=0, upBound=1, cat="Integer")
    for i in player_ids
]

# Define captain variables
captain = [
    plp.LpVariable(f"captain_{i}", lowBound=0, upBound=1, cat="Integer")
    for i in player_ids
]

# Set the objective function (the number of points scored by the team, with captain's points doubled)
model += plp.lpSum([lineup[i] * projections_data.loc[i, 'Predicted_Points'] for i in range(player_count)]) + \
         plp.lpSum([captain[i] * projections_data.loc[i, 'Predicted_Points'] for i in range(player_count)])

# Constraints

# Total number of players = 5
model += plp.lpSum(lineup) == 5

# List players by index to be EXCLUDED from the lineup
for id in ban_ids:
    model += lineup[id] == 0

for id in force_ids:
    model += lineup[id] == 1

# Exactly one captain
model += plp.lpSum(captain) == 1

# Captain must be in the lineup
for i in range(player_count):
    model += captain[i] <= lineup[i]

# Exactly 1 Goalkeeper
model += plp.lpSum([lineup[i] for i in range(player_count) if projections_data.loc[i, 'Position'] == 'Goalkeeper']) == 1

# At least 1 Defender
model += plp.lpSum([lineup[i] for i in range(player_count) if projections_data.loc[i, 'Position'] == 'Defender']) >= 1

# At least 1 Midfielder
model += plp.lpSum([lineup[i] for i in range(player_count) if projections_data.loc[i, 'Position'] == 'Midfielder']) >= 1

# At least 1 Forward
model += plp.lpSum([lineup[i] for i in range(player_count) if projections_data.loc[i, 'Position'] == 'Forward']) >= 1

# No budget constraint
# model += plp.lpSum([lineup[i] * projections_data.loc[i, 'Cost'] for i in range(player_count)]) <= 30

# Solve the problem
model.solve()

# Function to print players by position
def print_players_by_position(players_dict):
    total_points = 0
    total_cost = 0
    for position in ['Goalkeeper', 'Defender', 'Midfielder', 'Forward']:
        if position in players_dict:
            print(f"\n{position}:")
            for player in players_dict[position]:
                captain_str = " (C)" if player['Captain'] else ""
                points = player['Predicted_Points'] * (2 if player['Captain'] else 1)
                print(f"  {player['Name']}{captain_str} - {player['Team']} - Cost: {player['Cost']}m - Predicted Points: {points}")
                total_points += points
                total_cost += player['Cost']
    print(f"\nTotal Predicted Points: {round(total_points, 2)}")
    print(f"Total Cost: {round(total_cost, 2)}m")

# Print the results
print("Status:", plp.LpStatus[model.status])
selected_players = defaultdict(list)
for i in range(player_count):
    if lineup[i].value() == 1:
        player = projections_data.loc[i]
        selected_players[player['Position']].append({
            'Name': player['Name'],
            'Team': player['Team'],
            'Cost': player['Cost'],
            'Predicted_Points': player['Predicted_Points'],
            'Captain': captain[i].value() == 1
        })

print_solution = True
if print_solution:
    print("\nOptimal Lineup:")
    print_players_by_position(selected_players)

Status: Optimal

Optimal Lineup:

Goalkeeper:
  Raya - Arsenal - Cost: 5.6m - Predicted Points: 8.65

Defender:
  Dalot (C) - Man Utd - Cost: 5.1m - Predicted Points: 21.48

Midfielder:
  Saka - Arsenal - Cost: 10.1m - Predicted Points: 10.5
  Bruno G. - Newcastle - Cost: 6.3m - Predicted Points: 10.52

Forward:
  Haaland - Man City - Cost: 15.4m - Predicted Points: 8.74

Total Predicted Points: 59.89
Total Cost: 42.5m
