In [440]:
import pulp
import pandas as pd
import json
import joblib
from rapidfuzz import fuzz, process
import warnings
pd.set_option('display.max_columns', None)
warnings.filterwarnings("ignore")

In [441]:
gw = 2

In [442]:
def fpl_selection(gw=gw):
    df = pd.read_csv(f'../../data/fpl/gw-{gw}/fpl_2425_updated_features.csv')

    def manual_min_max_scaling(value, min_val, max_val):
        return (value - min_val) / (max_val - min_val)

    # Load file JSON yang berisi nilai min dan max untuk setiap kolom
    with open('../../data/json/min_max_values.json', 'r') as json_file:
        min_max_dict = json.load(json_file)
        
    for column in min_max_dict.keys():
        min_val = min_max_dict[column]['min']
        max_val = min_max_dict[column]['max']
        df[column] = df[column].apply(
            lambda x: manual_min_max_scaling(x, min_val, max_val)
        )
        
    df.drop(['total_points'], axis=1, inplace=True)
    
    with open('../../data/json/encoded_player_names.json', 'r') as json_file:
        player_dict = json.load(json_file)
    with open('../../data/json/encoded_position.json', 'r') as json_file:
        position_dict = json.load(json_file)
    with open('../../data/json/encoded_team_name.json', 'r') as json_file:
        team_dict = json.load(json_file)
        
    df['name'] = df['name'].map(player_dict)
    df['position'] = df['position'].map(position_dict)
    df['team'] = df['team'].map(team_dict)
    df['opponent_team'] = df['opponent_team'].map(team_dict)
    
    model_lgbm = joblib.load('../../model/lgbm_model.joblib') 
    
    predicted_tot_points = model_lgbm.predict(df)
    df['predicted_total_points'] = predicted_tot_points
    
    def inverse_min_max_scaling(scaled_value, min_val, max_val):
        return (scaled_value * (max_val - min_val)) + min_val

    columns_to_inverse = [col for col in min_max_dict.keys() if col != 'total_points']

    for column in columns_to_inverse:
        min_val = min_max_dict[column]['min']
        max_val = min_max_dict[column]['max']
        df[column] = df[column].apply(
            lambda x: inverse_min_max_scaling(x, min_val, max_val)
        )
    
    min_val = min_max_dict['total_points']['min']
    max_val = min_max_dict['total_points']['max']
    df['predicted_total_points'] = df['predicted_total_points'].apply(lambda x: inverse_min_max_scaling(x, min_val, max_val))
    df['predicted_total_points'] = round(df['predicted_total_points'], 2)
    
        
    inverse_player_dict = {v: k for k, v in player_dict.items()}
    inverse_position_dict = {v: k for k, v in position_dict.items()}
    inverse_team_dict = {v: k for k, v in team_dict.items()}

    # Replace the columns with the original values
    df['name'] = df['name'].map(inverse_player_dict)
    df['position'] = df['position'].map(inverse_position_dict)
    df['team'] = df['team'].map(inverse_team_dict)
    df['opponent_team'] = df['opponent_team'].map(inverse_team_dict)
    
    
    fpl_price_df_gw = pd.read_csv(f'../../data/fpl/price/fpl-price-gw-{gw}.csv')

    team_map = {
        'Arsenal':'Arsenal',
        'Aston Villa':'Aston Villa',
        'Brentford':'Brentford',
        'Bournemouth':'Bournemouth',
        'Brighton':'Brighton & Hove Albion',
        'Burnley':'Burnley',
        'Chelsea':'Chelsea',
        'Cardiff':'Cardiff City',
        'Crystal Palace':'Crystal Palace',
        'Everton':'Everton',
        'Hull':'Hull City',
        'Huddersfield':'Huddersfield Town',
        'Leeds':'Leeds United',
        'Leicester':'Leicester City',
        'Ipswich': 'Ipswich Town',
        'Liverpool':'Liverpool',
        'Luton':'Luton Town',
        'Man City':'Manchester City',
        'Man Utd':'Manchester United',
        'Middlesbrough':'Middlesbrough',
        'Newcastle':'Newcastle United',
        'Norwich':'Norwich City',
        "Nott'm Forest":'Nottingham Forest',
        'Southampton':'Southampton',
        'Swansea':'Swansea City',
        'Stoke':'Stoke City',
        'Sunderland':'Sunderland',
        'Sheffield Utd':'Sheffield United',
        'Spurs':'Tottenham Hotspur',
        'Watford':'Watford',
        'West Ham':'West Ham United',
        'West Brom':'West Bromwich United',
        'Wolves':'Wolverhampton Wanderers'
    }

    fpl_price_df_gw['team'] = fpl_price_df_gw['team'].replace(team_map)

    pulp_df = df[['name', 'position', 'team', 'predicted_total_points']]
    
    def find_best_match(row, players_df):
        # Filter players_df berdasarkan 'team' dan 'position'
        subset = players_df[
            (players_df['team'] == row['team']) &
            (players_df['position'] == row['position'])
        ]
        # Jika ada lebih dari satu kandidat, cari yang paling cocok
        if not subset.empty:
            best_match = process.extractOne(row['name'], subset['name'], scorer=fuzz.token_sort_ratio)
            if best_match and best_match[1] > 30:  # Threshold untuk kecocokan
                return subset.loc[subset['name'] == best_match[0], 'price'].values[0]
        return None  # Kembalikan None jika tidak ada kecocokan

    # Terapkan fungsi untuk menambahkan kolom 'price' ke pulp_df
    pulp_df['price'] = pulp_df.apply(find_best_match, axis=1, players_df=fpl_price_df_gw)
    
    return pulp_df

In [443]:
pulp_df = fpl_selection(gw=gw)



In [444]:
def find_condition(row, gw=gw):
    fpl_price_info = pd.read_csv(f'../../data/fpl/combined-fpl-price.csv')
    fpl_price_info = fpl_price_info[fpl_price_info['GW'] == gw]
    
    # Mencari kesamaan nama dengan threshold diatas 60
    match = process.extractOne(row['name'], fpl_price_info['name'], score_cutoff=50)
    
    if match:
        matched_name = match[0]
        # Ambil baris yang sesuai dengan nama yang cocok
        matched_row = fpl_price_info[(fpl_price_info['name'] == matched_name) & 
                                   (fpl_price_info['team'] == row['team']) & 
                                   (fpl_price_info['position'] == row['position'])]
        
        if not matched_row.empty:
            # Kembalikan nilai kondisi jika ditemukan
            return matched_row['condition'].values[0]
    return 1

# Menambahkan kolom condition berdasarkan kecocokan
pulp_df['condition'] = pulp_df.apply(find_condition, axis=1)
pulp_df = pulp_df.dropna()

In [445]:
import pulp

def pulp_model(df):
    # Define the problem
    problem = pulp.LpProblem("Best_Starting_Eleven", pulp.LpMaximize)

    # Define decision variables
    player_vars = pulp.LpVariable.dicts("Players", df.index, cat='Binary')

    # Objective function: Maximize predicted total points
    problem += pulp.lpSum([player_vars[i] * df.loc[i, 'predicted_total_points'] for i in df.index]), "Total_Points"

    # Constraint 1: Total 15 players
    problem += pulp.lpSum([player_vars[i] for i in df.index]) == 15, "Total_15_Players"

    # Constraint 2: Players selected must have condition >= 0.75
    problem += pulp.lpSum([player_vars[i] for i in df.index if df.loc[i, 'condition'] >= 0.75]) == 15, "Condition_Above_0.75"

    # Constraint 3: Max 3 players from the same team
    for team in df['team'].unique():
        problem += pulp.lpSum([player_vars[i] for i in df.index if df.loc[i, 'team'] == team]) <= 3, f"Max_3_Players_from_{team}"

    # Constraint 4: Min 3 DEF, MID, and FWD
    for position in ['DEF', 'MID', 'FWD']:
        problem += pulp.lpSum([player_vars[i] for i in df.index if df.loc[i, 'position'] == position]) >= 3, f"Min_3_{position}"

    # Constraint 5: Exactly 2 GK (Goalkeepers)
    problem += pulp.lpSum([player_vars[i] for i in df.index if df.loc[i, 'position'] == 'GK']) == 2, "Exactly_3_GK"

    # Constraint 6: Min 4 players from each position group: DEF, MID, and FWD
    for position in ['DEF', 'MID', 'FWD']:
        problem += pulp.lpSum([player_vars[i] for i in df.index if df.loc[i, 'position'] == position]) >= 4, f"Min_4_{position}_Total"

    # Constraint 7: Total price of selected players must be <= 100
    problem += pulp.lpSum([player_vars[i] * df.loc[i, 'price'] for i in df.index]) <= 100, "Total_Price_Limit"

    # Solve the problem
    problem.solve()
    
    #  Extract the selected players
    selected_players = df.loc[[i for i in df.index if player_vars[i].varValue == 1]]

    # Define the desired order of positions
    position_order = {'GK': 1, 'DEF': 2, 'MID': 3, 'FWD': 4}

    # Add an auxiliary column to the DataFrame to represent the order
    selected_players['position_order'] = selected_players['position'].map(position_order)

    # Sort the DataFrame by the auxiliary column and drop it afterward
    sorted_players = selected_players.sort_values(by='position_order').drop(columns='position_order')

    print(f"Prediction Selected Starting Eleven - Gameweek {gw}")
    sorted_players = sorted_players[['name', 'team', 'predicted_total_points', 'price', 'position']]
    return sorted_players

In [447]:
df = pulp_model(df=pulp_df)
df

Prediction Selected Starting Eleven - Gameweek 2


Unnamed: 0,name,team,predicted_total_points,price,position
416,David Raya,Arsenal,8.28,5.5,GK
610,Alisson Becker,Liverpool,5.44,5.5,GK
217,Antonee Robinson,Fulham,9.07,4.6,DEF
236,Cristian Romero,Tottenham Hotspur,11.58,5.1,DEF
248,Ola Aina,Nottingham Forest,8.81,4.4,DEF
305,Maximilian Kilman,West Ham United,10.19,4.5,DEF
191,Heung-Min Son,Tottenham Hotspur,11.37,7.5,MID
328,Morgan Gibbs-White,Nottingham Forest,10.85,6.5,MID
398,Thomas Partey,Arsenal,11.01,5.0,MID
538,Noni Madueke,Chelsea,16.23,6.5,MID
