In [17]:
# Relevant module imports and installs
import pandas as pd
!pip install pulp brotli fuzzywuzzy
import pulp as plp
import sys 
import os
from collections import defaultdict
from fuzzywuzzy import process




[notice] A new release of pip is available: 23.0 -> 24.2
[notice] To update, run: python.exe -m pip install --upgrade pip





In [18]:
solve_season = '2024-25'
solve_gameweek = 10
load_projections_from_file = False

In [19]:
# Get the absolute path to the directory containing the Python file
module_path = os.path.abspath(os.path.join('..', '..'))

# Add the directory to sys.path
if module_path not in sys.path:
    sys.path.append(module_path)

# Now you can import the module
from projections import generate_projections, generate_stat_projections, append_stat_projections

if not load_projections_from_file:
    point_projections = generate_projections()
    stat_projections = generate_stat_projections()

    projections_data = append_stat_projections(point_projections, stat_projections, solve_gameweek)
    projections_data.to_csv('gameweek_projections.csv', index=False)
    print('Generated new projections.')
else:
    try:
        projections_data = pd.read_csv('gameweek_projections.csv')
        print('Loaded projections from CSV file.')
    except FileNotFoundError:
        print('Tried to load from CSV file, but it does not exist. Generating new projections...')
        point_projections = generate_projections()
        stat_projections = generate_stat_projections()
        projections_data = append_stat_projections(point_projections, stat_projections, solve_gameweek)

Generated new projections.


In [20]:
from fbref import fbref_main

# If team_fbref_stats.csv or player_fbref_stats.csv does not exist, run the fbref_main() function
# and save the output to team_fbref_stats.csv and player_fbref_stats.csv
if not os.path.exists('team_fbref_stats.csv') or not os.path.exists('player_fbref_stats.csv'):
    print('Up to date FBRef data does not exist in this directory. Retrieving new data...')
    team_fbref_stats, player_fbref_stats = fbref_main()
    team_fbref_stats.to_csv('team_fbref_stats.csv', index=False)
    player_fbref_stats.to_csv('player_fbref_stats.csv', index=False)
else:
    print('Up to date FBRef data exists in this directory. Loading data from CSV...')
    team_fbref_stats = pd.read_csv('team_fbref_stats.csv')
    player_fbref_stats = pd.read_csv('player_fbref_stats.csv')

# Replace where Team = 'Man Utd' with 'Manchester United' in the projections_data DataFrame
projections_data.loc[projections_data['Team'] == 'Man Utd', 'Team'] = 'Manchester United'

# Replace where Team = 'Tottenham' with 'Spurs' in the team_fbref_stats DataFrame
team_fbref_stats.loc[team_fbref_stats['Team'] == 'Tottenham', 'Team'] = 'Spurs'

# Replace where Player_Team_Name = 'Tottenham' with 'Spurs' in the player_fbref_stats DataFrame
player_fbref_stats.loc[player_fbref_stats['Player_Team_Name'] == 'Tottenham', 'Player_Team_Name'] = 'Spurs'

Up to date FBRef data exists in this directory. Loading data from CSV...


In [21]:
def fuzzy_match_name(df, df_type, search_name):
    if df_type.lower() == 'team':
        # Fuzzy match the 'Team' column, returning the best match
        matches = process.extractOne(search_name, df['Team'].tolist(), score_cutoff=50)
        if matches:
            return matches[0]
        else:
            return None
    else:
        return None
    
def strip_opponent_name(opponent_name):
    return opponent_name.split(' ')[0]
    
def filter_fbref_player_df_by_team(df, fpl_team_name):
    # Find the closest matching team name using fuzzy matching in df['Player_Team_Name']
    matches = process.extractOne(fpl_team_name, df['Player_Team_Name'].tolist(), score_cutoff=50)
    if matches:
        df = df[df['Player_Team_Name'] == matches[0]]
    return df

def fuzzy_match_player(player_fbref_stats, player_name, team_name):
    # First, filter the player_fbref_stats by team
    filtered_df = filter_fbref_player_df_by_team(player_fbref_stats, team_name)
    
    # Then, perform fuzzy matching on the filtered dataframe
    matches = process.extractOne(player_name, filtered_df['Player'].tolist(), score_cutoff=50)
    if matches:
        return matches[0]
    else:
        return None

def get_team_games_played(team_name):
    team_name = fuzzy_match_name(team_fbref_stats, 'team', team_name)

    if team_name == None:
        print('MAJOR ERROR: Missing team name')
        return 0

    total_team_games_played = team_fbref_stats[team_fbref_stats['Team'] == team_name]['premier_league_matches_played'].values[0]
    return total_team_games_played

def get_team_total_fouls_won(team_name):
    team_name = fuzzy_match_name(team_fbref_stats, 'team', team_name)

    if team_name == None:
        print('MAJOR ERROR: Missing team name')
        return 0
    
    total_team_fouls_won = team_fbref_stats[team_fbref_stats['Team'] == team_name]['squad_miscellaneous_stats_Performance_Fld'].values[0]
    return total_team_fouls_won

def get_team_total_shots_on_target(team_name):
    team_name = fuzzy_match_name(team_fbref_stats, 'team', team_name)

    if team_name == None:
        print('MAJOR ERROR: Missing team name')
        return 0
    
    total_team_shots_on_target = team_fbref_stats[team_fbref_stats['Team'] == team_name]['squad_shooting_Standard_SoT'].values[0]
    return total_team_shots_on_target

def get_team_total_shots_on_target_against(team_name):
    team_name = fuzzy_match_name(team_fbref_stats, 'team', team_name)

    if team_name == None:
        print('MAJOR ERROR: Missing team name')
        return 0
    
    total_team_shots_on_target_against = team_fbref_stats[team_fbref_stats['Team'] == team_name]['squad_shooting_Standard_SoT'].values[0]
    return total_team_shots_on_target_against

def get_player_total_shots_on_target(player_name, team_name):
    player_name = fuzzy_match_player(player_fbref_stats, player_name, team_name)

    if player_name == None:
        return 0
    
    total_player_shots_on_target = player_fbref_stats[player_fbref_stats['Player'] == player_name]['shooting_Standard_SoT'].values[0]
    return total_player_shots_on_target

def predict_team_shots_on_target_in_game(team_name, opponent_name):
    team_name = fuzzy_match_name(team_fbref_stats, 'team', team_name)
    opponent_name = fuzzy_match_name(team_fbref_stats, 'team', opponent_name)

    if team_name == None or opponent_name == None:
        print('MAJOR ERROR: Missing team or opponent name')
        return None

    team_games_played = get_team_games_played(team_name)
    opponent_games_played = get_team_games_played(opponent_name)

    team_shots_on_target_per_game = get_team_total_shots_on_target(team_name) / team_games_played
    opponent_shots_on_target_against_per_game = get_team_total_shots_on_target_against(opponent_name) / opponent_games_played
    
    projected_shots_on_target = (team_shots_on_target_per_game + opponent_shots_on_target_against_per_game) / 2
    return projected_shots_on_target

def predict_player_shots_on_target_in_game_90(player_name, team_name, opponent_name):
    player_name = fuzzy_match_player(player_fbref_stats, player_name, team_name)
    team_name = fuzzy_match_name(team_fbref_stats, 'team', team_name)

    if player_name == None:
        return 0
    elif team_name == None:
        print('MAJOR ERROR: Missing team name:', team_name)
        return None

    player_team_shots_on_target_proportion = get_player_total_shots_on_target(player_name, team_name) / get_team_total_shots_on_target(team_name)
    projected_player_shots_on_target = player_team_shots_on_target_proportion * predict_team_shots_on_target_in_game(team_name, opponent_name)
    return round(projected_player_shots_on_target, 3)

def calculate_new_xpts(xpts, projected_shots_on_target):
    new_xpts = xpts + (projected_shots_on_target * 3)
    return round(new_xpts, 2)

In [22]:
# Create projected shots on target columns and default to 0.00
projections_data['Projected_Shots_On_Target'] = 0.000

for index, row in projections_data.iterrows():
    print(f'Appending data for: {row["Name"]} in team {row["Team"]}', end='\r', flush=True)
    player_minute_proportion = row['xMins'] / 90

    projected_shots_on_target = player_minute_proportion * predict_player_shots_on_target_in_game_90(row['Name'], row['Team'], row['Opponent'])

    projections_data.loc[index, 'Projected_Shots_On_Target'] = projected_shots_on_target

    updated_predicted_points = calculate_new_xpts(row['Predicted_Points'], projected_shots_on_target)

    projections_data.loc[index, 'Predicted_Points'] = updated_predicted_points

Appending data for: Kporha in team Crystal Palacelitedced

### Player Manipulation

### Player Force/Banning

In [23]:
def fuzzy_ban_players(df, ban_ids):
    while True:
        search_name = input("Enter player name to ban (or press enter to finish): ").strip()
        
        if search_name.lower() == '':
            break
        
        # Perform fuzzy matching with a lower score cutoff and no limit
        matches = process.extractBests(search_name, df['Name'].tolist(), score_cutoff=50, limit=10)
        
        if not matches:
            print("No matches found. Please try again.")
            continue
        
        # Display matches
        print("Matches found:")
        for idx, (name, score) in enumerate(matches, 1):
            player_index = df[df['Name'] == name].index[0]
            player_id = df.loc[player_index, 'ID']
            print(f"{idx}. {name} (ID: {player_id}, Index: {player_index}, Score: {score})")
        
        # Ask user to select a match
        while True:
            choice = input("Enter the number of the player to ban (or 'skip' to search again): ")
            if choice.lower() == 'skip':
                break
            try:
                choice_idx = int(choice) - 1
                if 0 <= choice_idx < len(matches):
                    selected_name = matches[choice_idx][0]
                    selected_index = df[df['Name'] == selected_name].index[0]
                    selected_id = df.loc[selected_index, 'ID']
                    ban_ids.append(selected_index)
                    print(f"Banned: {selected_name} (ID: {selected_id}, Index: {selected_index})")
                    break
                else:
                    print("Invalid choice. Please try again.")
            except ValueError:
                print("Invalid input. Please enter a number or 'skip'.")
    
    return ban_ids

ban_ids = []
ban_ids = fuzzy_ban_players(projections_data, ban_ids)
print("Final ban list (indices):", ban_ids)

Final ban list (indices): []


In [24]:
def fuzzy_force_players(df, force_ids):
    while True:
        search_name = input("Enter player name to force (or press enter to finish): ").strip()
        
        if search_name.lower() == '':
            break
        
        # Perform fuzzy matching with a lower score cutoff and no limit
        matches = process.extractBests(search_name, df['Name'].tolist(), score_cutoff=50, limit=10)
        
        if not matches:
            print("No matches found. Please try again.")
            continue
        
        # Display matches
        print("Matches found:")
        for idx, (name, score) in enumerate(matches, 1):
            player_index = df[df['Name'] == name].index[0]
            player_id = df.loc[player_index, 'ID']
            print(f"{idx}. {name} (ID: {player_id}, Index: {player_index}, Score: {score})")
        
        # Ask user to select a match
        while True:
            choice = input("Enter the number of the player to force (or 'skip' to search again): ")
            if choice.lower() == 'skip':
                break
            try:
                choice_idx = int(choice) - 1
                if 0 <= choice_idx < len(matches):
                    selected_name = matches[choice_idx][0]
                    selected_index = df[df['Name'] == selected_name].index[0]
                    selected_id = df.loc[selected_index, 'ID']
                    force_ids.append(selected_index)  # Add to force_ids instead of ban_ids
                    print(f"Forced: {selected_name} (ID: {selected_id}, Index: {selected_index})")
                    break
                else:
                    print("Invalid choice. Please try again.")
            except ValueError:
                print("Invalid input. Please enter a number or 'skip'.")
    
    return force_ids

force_ids = []
force_ids = fuzzy_force_players(projections_data, force_ids)
print("Final force list (indices):", force_ids)

Final force list (indices): []


# 2024/25 GW11 Challenge: Accuracy - Shots on target are worth 3 points.

### Optimisation

In [27]:
# Get the number of players and their list of ids
player_ids = projections_data['ID'].tolist()
player_count = len(player_ids)

# Set up the problem
model = plp.LpProblem("fpl-challenge", plp.LpMaximize)

# Define the decision variables
lineup = [
    plp.LpVariable(f"lineup_{i}", lowBound=0, upBound=1, cat="Integer")
    for i in player_ids
]

# Define captain variables
captain = [
    plp.LpVariable(f"captain_{i}", lowBound=0, upBound=1, cat="Integer")
    for i in player_ids
]

# Set the objective function (the number of points scored by the team, with captain's points doubled)
model += plp.lpSum([lineup[i] * projections_data.loc[i, 'Predicted_Points'] for i in range(player_count)]) + \
         plp.lpSum([captain[i] * projections_data.loc[i, 'Predicted_Points'] for i in range(player_count)])

# Constraints

# Total number of players = 5
model += plp.lpSum(lineup) == 5

# List players by index to be EXCLUDED from the lineup
for id in ban_ids:
    model += lineup[id] == 0

for id in force_ids:
    model += lineup[id] == 1

# Exactly one captain
model += plp.lpSum(captain) == 1

# Captain must be in the lineup
for i in range(player_count):
    model += captain[i] <= lineup[i]

# Exactly 1 Goalkeeper
model += plp.lpSum([lineup[i] for i in range(player_count) if projections_data.loc[i, 'Position'] == 'Goalkeeper']) == 1

# At least 1 Defender
model += plp.lpSum([lineup[i] for i in range(player_count) if projections_data.loc[i, 'Position'] == 'Defender']) >= 1

# At least 1 Midfielder
model += plp.lpSum([lineup[i] for i in range(player_count) if projections_data.loc[i, 'Position'] == 'Midfielder']) >= 1

# At least 1 Forward
model += plp.lpSum([lineup[i] for i in range(player_count) if projections_data.loc[i, 'Position'] == 'Forward']) >= 1

# No budget constraint
# model += plp.lpSum([lineup[i] * projections_data.loc[i, 'Cost'] for i in range(player_count)]) <= 30

# Solve the problem
model.solve()

# Function to print players by position
def print_players_by_position(players_dict):
    total_points = 0
    total_cost = 0
    for position in ['Goalkeeper', 'Defender', 'Midfielder', 'Forward']:
        if position in players_dict:
            print(f"\n{position}:")
            for player in players_dict[position]:
                captain_str = " (C)" if player['Captain'] else ""
                points = player['Predicted_Points'] * (2 if player['Captain'] else 1)
                print(f"  {player['Name']}{captain_str} - {player['Team']} - Cost: {player['Cost']}m - Predicted Points: {points}")
                total_points += points
                total_cost += player['Cost']
    print(f"\nTotal Predicted Points: {round(total_points, 2)}")
    print(f"Total Cost: {round(total_cost, 2)}m")

# Print the results
print("Status:", plp.LpStatus[model.status])
selected_players = defaultdict(list)
for i in range(player_count):
    if lineup[i].value() == 1:
        player = projections_data.loc[i]
        selected_players[player['Position']].append({
            'Name': player['Name'],
            'Team': player['Team'],
            'Cost': player['Cost'],
            'Predicted_Points': player['Predicted_Points'],
            'Captain': captain[i].value() == 1
        })

print_solution = False
if print_solution:
    print("\nOptimal Lineup:")
    print_players_by_position(selected_players)

Status: Optimal


In [28]:
def print_top_scorers_by_position(projections_data):
    for position in ['Goalkeeper', 'Defender', 'Midfielder', 'Forward']:
        top_scorers = projections_data[projections_data['Position'] == position].nlargest(10, 'Predicted_Points')
        print(f'Top {position}s: ')
        display(top_scorers)

if print_solution:
    print_top_scorers_by_position(projections_data)