In [3]:
# Relevant module imports and installs
!pip install pulp pandas brotli fuzzywuzzy
import pandas as pd
import pulp as plp
import sys 
import os
from collections import defaultdict
from fuzzywuzzy import process
import json


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0[0m[39;49m -> [0m[32;49m24.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [4]:
# Get the absolute path to the directory containing the Python file
module_path = os.path.abspath(os.path.join('..', '..'))

# Add the directory to sys.path
if module_path not in sys.path:
    sys.path.append(module_path)

# Import the data retrieval functions
from challenge_data import get_bootstrap_static_data, get_gameweek_data, update_with_gameweek_cost

# Get the actual GW5 data
bootstrap_data = get_bootstrap_static_data()
gw5_data = update_with_gameweek_cost(get_gameweek_data(5, bootstrap_data), '2024-25', 5)

# Add regions
def create_region_conversion(df):
    try:
        region_dic = json.load(open('region_conversion.json'))
    except FileNotFoundError:
        region_dic = {}

    # Find all regions in df not in region_dic
    missing_regions = set(df['Region']) - set(region_dic.keys())

    for region in missing_regions:
        print(region, ':', df[df['Region'] == region]['Name'].head(10))
        country = input('Enter country: ')
        continent = input('Enter continent: ')
        region_dic[region] = {'country': country, 'continent': continent}
    
    # Save the dictionary to a JSON file
    with open('region_conversion.json', 'w') as f:
        json.dump(region_dic, f, indent=4)

def add_region_data(df):
    # First, create or update the region conversion
    create_region_conversion(df)
    
    # Load the region conversion data
    with open('region_conversion.json', 'r') as f:
        region_dic = json.load(f)

    # Normalize the keys in region_dic
    region_dic = {str(k).strip().lower(): v for k, v in region_dic.items()}

    # Add Country and Continent columns
    df['Country'] = df['Region'].apply(lambda x: region_dic.get(str(x).strip().lower(), {}).get('country', ''))
    df['Continent'] = df['Region'].apply(lambda x: region_dic.get(str(x).strip().lower(), {}).get('continent', ''))

    # Remove the Region column
    df = df.drop('Region', axis=1)

    # Reorder the columns
    columns = df.columns.tolist()
    team_index = columns.index('Team')
    columns.remove('Country')
    columns.remove('Continent')
    new_order = columns[:team_index+1] + ['Country', 'Continent'] + columns[team_index+1:]
    df = df[new_order]

    return df

gw5_data = add_region_data(gw5_data)

# Keep only players where continent is South America or North America
gw5_data = gw5_data[gw5_data['Continent'].isin(['South America', 'North America'])]

# Reset index
gw5_data = gw5_data.reset_index(drop=True)

{'14', '238', '90', '103', '177', '44'}
14 : 554    Kalajdžić
Name: Name, dtype: object
238 : 288    Daka
Name: Name, dtype: object
90 : 217    Beto
Name: Name, dtype: object
103 : 259    Al-Hamadi
Name: Name, dtype: object
177 : 485    Dragusin
Name: Name, dtype: object
44 : 583    Brereton Díaz
Name: Name, dtype: object


In [5]:
# Get the number of players and their list of ids
player_ids = gw5_data['ID'].tolist()
player_count = len(player_ids)

# Set up the problem
model = plp.LpProblem("fpl-gw5-challenge-hindsight", plp.LpMaximize)

# Define the decision variables
lineup = [
    plp.LpVariable(f"lineup_{i}", lowBound=0, upBound=1, cat="Integer")
    for i in player_ids
]

# Define captain variables
captain = [
    plp.LpVariable(f"captain_{i}", lowBound=0, upBound=1, cat="Integer")
    for i in player_ids
]

# Set the objective function (the number of points scored by the team, with captain's points doubled)
model += plp.lpSum([lineup[i] * gw5_data.loc[i, 'Points'] for i in range(player_count)]) + \
         plp.lpSum([captain[i] * gw5_data.loc[i, 'Points'] for i in range(player_count)])

# Constraints

# Total number of players = 5
model += plp.lpSum(lineup) == 5

# Exactly one captain
model += plp.lpSum(captain) == 1

# Captain must be in the lineup
for i in range(player_count):
    model += captain[i] <= lineup[i]

# Exactly 1 Goalkeeper
model += plp.lpSum([lineup[i] for i in range(player_count) if gw5_data.loc[i, 'Position'] == 'Goalkeeper']) == 1

# At least 1 Defender
model += plp.lpSum([lineup[i] for i in range(player_count) if gw5_data.loc[i, 'Position'] == 'Defender']) >= 1

# At least 1 Midfielder
model += plp.lpSum([lineup[i] for i in range(player_count) if gw5_data.loc[i, 'Position'] == 'Midfielder']) >= 1

# At least 1 Forward
model += plp.lpSum([lineup[i] for i in range(player_count) if gw5_data.loc[i, 'Position'] == 'Forward']) >= 1

# Solve the problem
model.solve(plp.PULP_CBC_CMD(msg=False))

# Function to print players by position
def print_players_by_position(players_dict):
    total_points = 0
    for position in ['Goalkeeper', 'Defender', 'Midfielder', 'Forward']:
        if position in players_dict:
            print(f"\n{position}:")
            for player in players_dict[position]:
                captain_str = " (C)" if player['Captain'] else ""
                points = player['Points'] * (2 if player['Captain'] else 1)
                print(f"  {player['Name']}{captain_str} - {player['Team']} - Points: {points}")
                total_points += points
    print(f"\nTotal Points: {total_points}")

# Print the results
print("Status:", plp.LpStatus[model.status])

print("\nOptimal Lineup:")
selected_players = defaultdict(list)
for i in range(player_count):
    if lineup[i].value() == 1:
        player = gw5_data.loc[i]
        selected_players[player['Position']].append({
            'Name': player['Name'],
            'Team': player['Team'],
            'Points': player['Points'],
            'Captain': captain[i].value() == 1
        })

print_players_by_position(selected_players)


Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/pulp/solverdir/cbc/osx/64/cbc /var/folders/91/3jnk9jzn3kx1mwsfzhddwbww0000gn/T/05498271c4544384ba20f486ed6079a7-pulp.mps -max -timeMode elapsed -branch -printingOptions all -solution /var/folders/91/3jnk9jzn3kx1mwsfzhddwbww0000gn/T/05498271c4544384ba20f486ed6079a7-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 79 COLUMNS
At line 774 RHS
At line 849 BOUNDS
At line 986 ENDATA
Problem MODEL has 74 rows, 136 columns and 340 elements
Coin0008I MODEL read with 0 errors
Option for timeMode changed from cpu to elapsed
Continuous objective value is 57 - 0.00 seconds
Cgl0004I processed model has 73 rows, 135 columns (135 integer (135 of which binary)) and 332 elements
Cutoff increment increased from 1e-05 to 0.9999
Cbc0038I Initial state - 0 integers unsatisfied sum - 0
Cbc0038I Solution found