In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

from RF_Goalkeeper import create_goalkeeper_features, train_and_evaluate_rf
from RF_Defender import create_defender_features, train_and_evaluate_rf
from RF_Midfielder import create_midfielder_features, train_and_evaluate_rf
from RF_Forward import create_forward_features, train_and_evaluate_rf

from ortools.linear_solver import pywraplp

In [3]:
base_path = '../Data/2023-24/gws/gw{}.csv'  # Base path template

num_gameweeks = 25 # Adjust based on available data

gw_data_frames = {gw: pd.read_csv(base_path.format(gw)) for gw in range(1, num_gameweeks + 1)}

# Add a 'gw' column to each DataFrame to indicate the game week, then concatenate them into one DataFrame
for gw, df in gw_data_frames.items():
    df['gw'] = gw

combined_df = pd.concat(gw_data_frames.values(), ignore_index=True)

In [4]:
# Adjust the target gameweek for training to 24
training_target_gw = 25

# Prepare features for goalkeepers up to the 24th gameweek
goalkeeper_features_up_to_24 = create_goalkeeper_features(gw_data_frames, training_target_gw - 1)

# Load the data for the 24th gameweek to use as the target variable
gw_24_data = gw_data_frames[training_target_gw]
gw_24_goalkeeper = gw_24_data.loc[gw_24_data['position'] == 'GK']

# Prepare the target variable for training
y_train = gw_24_goalkeeper[['name', 'total_points']].set_index('name').sort_index()

# Ensure we only consider goalkeepers present in both the features and target sets for training
common_indices_train = goalkeeper_features_up_to_24.index.intersection(y_train.index)
X_train_filtered = goalkeeper_features_up_to_24.loc[common_indices_train]
y_train_filtered = y_train.loc[common_indices_train]

y_train_no_duplicates = y_train_filtered[~y_train_filtered.index.duplicated(keep='first')]


# Train the Random Forest model with data up to the 24th gameweek
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train_filtered, y_train_no_duplicates.values.ravel())

# Now, prepare features for the 25th gameweek using data up to the 24th
goalkeeper_features_up_to_24 = create_goalkeeper_features(gw_data_frames, training_target_gw)

# Since we do not have actual points for the 25th gameweek yet, we use the features to predict
# No need to prepare a target variable (y) for the 25th gameweek as we are predicting it

# Ensure we consider goalkeepers present in the features set for the 25th prediction
# Here, we use all goalkeepers from the features as we're predicting, not training
X_pred_25 = goalkeeper_features_up_to_24

# Predict for the 25th gameweek
predictions_25 = rf_model.predict(X_pred_25)

# Convert predictions to a pandas Series for easy handling, though index management may be required
goalkeeper_predictions = pd.Series(predictions_25, index=X_pred_25.index, name='Predicted Points for GW 25')

# Display or process the predictions as needed
print(goalkeeper_predictions)

Aaron Ramsdale                    0.00
Adam Davies                       0.00
Adrián San Miguel del Castillo    0.00
Alfie Whiteman                    0.00
Alisson Ramses Becker             1.37
                                  ... 
Vincent Angelini                  0.00
Wayne Hennessey                   0.00
Wes Foderingham                   1.76
Zack Steffen                      0.00
Đorđe Petrović                    2.73
Name: Predicted Points for GW 25, Length: 95, dtype: float64


In [5]:
# Adjust the target gameweek for training to 24
training_target_gw = 25

# Prepare features for goalkeepers up to the 24th gameweek
goalkeeper_features_up_to_24 = create_defender_features(gw_data_frames, training_target_gw - 1)

# Load the data for the 24th gameweek to use as the target variable
gw_24_data = gw_data_frames[training_target_gw]
gw_24_goalkeeper = gw_24_data.loc[gw_24_data['position'] == 'DEF']

# Prepare the target variable for training
y_train = gw_24_goalkeeper[['name', 'total_points']].set_index('name').sort_index()

# Ensure we only consider goalkeepers present in both the features and target sets for training
common_indices_train = goalkeeper_features_up_to_24.index.intersection(y_train.index)
X_train_filtered = goalkeeper_features_up_to_24.loc[common_indices_train]
y_train_filtered = y_train.loc[common_indices_train]

y_train_no_duplicates = y_train_filtered[~y_train_filtered.index.duplicated(keep='first')]


# Train the Random Forest model with data up to the 24th gameweek
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train_filtered, y_train_no_duplicates.values.ravel())

# Now, prepare features for the 25th gameweek using data up to the 24th
goalkeeper_features_up_to_24 = create_defender_features(gw_data_frames, training_target_gw)

# Since we do not have actual points for the 25th gameweek yet, we use the features to predict
# No need to prepare a target variable (y) for the 25th gameweek as we are predicting it

# Ensure we consider goalkeepers present in the features set for the 25th prediction
# Here, we use all goalkeepers from the features as we're predicting, not training
X_pred_25 = goalkeeper_features_up_to_24

# Predict for the 25th gameweek
predictions_25 = rf_model.predict(X_pred_25)

# Convert predictions to a pandas Series for easy handling, though index management may be required
defender_predictions = pd.Series(predictions_25, index=X_pred_25.index, name='Predicted Points for GW 25')

# Display or process the predictions as needed
print(defender_predictions)

Aaron Cresswell              0.690000
Aaron Hickey                 0.310000
Aaron Wan-Bissaka            0.100000
Abdul Rahman Baba            0.013081
Adam Smith                   1.040000
                               ...   
Willy Boly                   0.160000
Willy Kambwala               0.360000
Yasser Larouci               0.360000
Yerson Mosquera              0.013081
Álvaro Fernández Carreras    0.013081
Name: Predicted Points for GW 25, Length: 264, dtype: float64


In [6]:
# Adjust the target gameweek for training to 24
training_target_gw = 25

# Prepare features for goalkeepers up to the 24th gameweek
goalkeeper_features_up_to_24 = create_midfielder_features(gw_data_frames, training_target_gw - 1)

# Load the data for the 24th gameweek to use as the target variable
gw_24_data = gw_data_frames[training_target_gw]
gw_24_goalkeeper = gw_24_data.loc[gw_24_data['position'] == 'MID']

# Prepare the target variable for training
y_train = gw_24_goalkeeper[['name', 'total_points']].set_index('name').sort_index()

# Ensure we only consider goalkeepers present in both the features and target sets for training
common_indices_train = goalkeeper_features_up_to_24.index.intersection(y_train.index)
X_train_filtered = goalkeeper_features_up_to_24.loc[common_indices_train]
y_train_filtered = y_train.loc[common_indices_train]

y_train_no_duplicates = y_train_filtered[~y_train_filtered.index.duplicated(keep='first')]

# Train the Random Forest model with data up to the 24th gameweek
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train_filtered, y_train_no_duplicates.values.ravel())

# Now, prepare features for the 25th gameweek using data up to the 24th
goalkeeper_features_up_to_24 = create_midfielder_features(gw_data_frames, training_target_gw)

# Since we do not have actual points for the 25th gameweek yet, we use the features to predict
# No need to prepare a target variable (y) for the 25th gameweek as we are predicting it

# Ensure we consider goalkeepers present in the features set for the 25th prediction
# Here, we use all goalkeepers from the features as we're predicting, not training
X_pred_25 = goalkeeper_features_up_to_24

# Predict for the 25th gameweek
predictions_25 = rf_model.predict(X_pred_25)

# Convert predictions to a pandas Series for easy handling, though index management may be required
midfielder_predictions = pd.Series(predictions_25, index=X_pred_25.index, name='Predicted Points for GW 25')

# Display or process the predictions as needed
print(midfielder_predictions)

Aaron Ramsey                      1.020000
Abdoulaye Doucouré                2.820000
Adam Lallana                      0.120000
Adam Wharton                      3.150000
Adama Traoré                      0.190000
                                    ...   
Yegor Yarmolyuk                   0.320000
Youri Tielemans                   5.000000
Yves Bissouma                     1.840000
Zack Nelson                       0.008185
Ângelo Gabriel Borges Damaceno    0.008185
Name: Predicted Points for GW 25, Length: 359, dtype: float64


In [7]:
# Adjust the target gameweek for training to 24
training_target_gw = 25

# Prepare features for goalkeepers up to the 24th gameweek
goalkeeper_features_up_to_24 = create_forward_features(gw_data_frames, training_target_gw - 1)

# Load the data for the 24th gameweek to use as the target variable
gw_24_data = gw_data_frames[training_target_gw]
gw_24_goalkeeper = gw_24_data.loc[gw_24_data['position'] == 'FWD']

# Prepare the target variable for training
y_train = gw_24_goalkeeper[['name', 'total_points']].set_index('name').sort_index()

# Ensure we only consider goalkeepers present in both the features and target sets for training
common_indices_train = goalkeeper_features_up_to_24.index.intersection(y_train.index)
X_train_filtered = goalkeeper_features_up_to_24.loc[common_indices_train]
y_train_filtered = y_train.loc[common_indices_train]

y_train_no_duplicates = y_train_filtered[~y_train_filtered.index.duplicated(keep='first')]

# Train the Random Forest model with data up to the 24th gameweek
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train_filtered, y_train_no_duplicates.values.ravel())

# Now, prepare features for the 25th gameweek using data up to the 24th
goalkeeper_features_up_to_24 = create_forward_features(gw_data_frames, 25)

# Since we do not have actual points for the 25th gameweek yet, we use the features to predict
# No need to prepare a target variable (y) for the 25th gameweek as we are predicting it

# Ensure we consider goalkeepers present in the features set for the 25th prediction
# Here, we use all goalkeepers from the features as we're predicting, not training
X_pred_25 = goalkeeper_features_up_to_24

# Predict for the 25th gameweek
predictions_25 = rf_model.predict(X_pred_25)

# Convert predictions to a pandas Series for easy handling, though index management may be required
forward_predictions = pd.Series(predictions_25, index=X_pred_25.index, name='Predicted Points for GW 25')

# Display or process the predictions as needed
print(forward_predictions)

Aaron Connolly              0.00
Ademola Ola-Adebomi         0.00
Admiral Muskwe              0.00
Alejo Véliz                 0.00
Aleksandar Mitrović         0.00
                            ... 
William Osula               0.90
Wout Weghorst               0.00
Yoane Wissa                 1.98
Youssef Ramalho Chermiti    0.01
Zeki Amdouni                1.18
Name: Predicted Points for GW 25, Length: 108, dtype: float64


In [8]:
# Combine all predictions into one DataFrame
all_predictions = pd.concat([goalkeeper_predictions, defender_predictions, midfielder_predictions, forward_predictions], axis=0)

In [9]:
all_predictions

Aaron Ramsdale                    0.00
Adam Davies                       0.00
Adrián San Miguel del Castillo    0.00
Alfie Whiteman                    0.00
Alisson Ramses Becker             1.37
                                  ... 
William Osula                     0.90
Wout Weghorst                     0.00
Yoane Wissa                       1.98
Youssef Ramalho Chermiti          0.01
Zeki Amdouni                      1.18
Name: Predicted Points for GW 25, Length: 826, dtype: float64

In [10]:
# Convert the Series to a DataFrame and reset the index
all_predictions_df = all_predictions.reset_index()

# Rename the columns
all_predictions_df.columns = ['Player Name', 'Predicted Points for GW 25']

In [11]:
all_predictions_df

Unnamed: 0,Player Name,Predicted Points for GW 25
0,Aaron Ramsdale,0.00
1,Adam Davies,0.00
2,Adrián San Miguel del Castillo,0.00
3,Alfie Whiteman,0.00
4,Alisson Ramses Becker,1.37
...,...,...
821,William Osula,0.90
822,Wout Weghorst,0.00
823,Yoane Wissa,1.98
824,Youssef Ramalho Chermiti,0.01


In [12]:
current_gw = 25
current_gw_data = gw_data_frames[current_gw]

In [13]:
current_gw_data 

Unnamed: 0,name,position,team,xP,assists,bonus,bps,clean_sheets,creativity,element,...,team_h_score,threat,total_points,transfers_balance,transfers_in,transfers_out,value,was_home,yellow_cards,gw
0,Femi Seriki,DEF,Sheffield Utd,0.0,0,0,0,0,0.0,653,...,0,0.0,0,-327,0,327,39,True,0,25
1,Josh Brooking,DEF,Chelsea,-1.0,0,0,0,0,0.0,723,...,1,0.0,0,3,93,90,40,False,0,25
2,Radek Vítek,GK,Man Utd,0.0,0,0,0,0,0.0,669,...,1,0.0,0,-92,0,92,40,False,0,25
3,Jack Hinshelwood,MID,Brighton,0.7,0,0,0,0,0.0,621,...,0,0.0,0,-3692,266,3958,45,False,0,25
4,Jadon Sancho,MID,Man Utd,0.0,0,0,0,0,0.0,397,...,1,0.0,0,-129,0,129,67,False,0,25
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
977,Jacob Brown,FWD,Luton,-1.1,0,0,0,0,0.0,631,...,4,0.0,0,-33,551,584,49,False,0,25
978,Vicente Guaita,GK,Crystal Palace,0.0,0,0,0,0,0.0,227,...,1,0.0,0,-208,0,208,44,False,0,25
979,Braian Ojeda Rodríguez,MID,Nott'm Forest,0.0,0,0,0,0,0.0,459,...,2,0.0,0,-1,0,1,45,True,0,25
980,Mads Bech Sørensen,DEF,Brentford,0.0,0,0,0,0,0.0,93,...,1,0.0,0,-78,0,78,40,True,0,25


In [14]:
current_gw_data = current_gw_data.drop_duplicates(subset='name', keep='first').set_index('name')
current_gw_data = current_gw_data.reset_index()

current_gw_data

Unnamed: 0,name,position,team,xP,assists,bonus,bps,clean_sheets,creativity,element,...,team_h_score,threat,total_points,transfers_balance,transfers_in,transfers_out,value,was_home,yellow_cards,gw
0,Femi Seriki,DEF,Sheffield Utd,0.0,0,0,0,0,0.0,653,...,0,0.0,0,-327,0,327,39,True,0,25
1,Josh Brooking,DEF,Chelsea,-1.0,0,0,0,0,0.0,723,...,1,0.0,0,3,93,90,40,False,0,25
2,Radek Vítek,GK,Man Utd,0.0,0,0,0,0,0.0,669,...,1,0.0,0,-92,0,92,40,False,0,25
3,Jack Hinshelwood,MID,Brighton,0.7,0,0,0,0,0.0,621,...,0,0.0,0,-3692,266,3958,45,False,0,25
4,Jadon Sancho,MID,Man Utd,0.0,0,0,0,0,0.0,397,...,1,0.0,0,-129,0,129,67,False,0,25
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
818,Kyle Walker,DEF,Man City,8.0,1,2,23,0,15.0,369,...,1,6.0,7,-73863,65483,139346,55,True,0,25
819,Jacob Brown,FWD,Luton,-1.1,0,0,0,0,0.0,631,...,1,0.0,0,-33,551,584,49,True,0,25
820,Vicente Guaita,GK,Crystal Palace,0.0,0,0,0,0,0.0,227,...,1,0.0,0,-208,0,208,44,False,0,25
821,Braian Ojeda Rodríguez,MID,Nott'm Forest,0.0,0,0,0,0,0.0,459,...,2,0.0,0,-1,0,1,45,True,0,25


In [15]:
# Add the value column to the all_predictions_df DataFrame by name
all_predictions_df['Value'] = all_predictions_df['Player Name'].map(current_gw_data.set_index('name')['value'])

# Add the position column to the all_predictions_df DataFrame by name
all_predictions_df['Position'] = all_predictions_df['Player Name'].map(current_gw_data.set_index('name')['position'])

# Add the team column to the all_predictions_df DataFrame by name
all_predictions_df['Team'] = all_predictions_df['Player Name'].map(current_gw_data.set_index('name')['team'])

In [16]:
all_predictions_df

Unnamed: 0,Player Name,Predicted Points for GW 25,Value,Position,Team
0,Aaron Ramsdale,0.00,45.0,GK,Arsenal
1,Adam Davies,0.00,40.0,GK,Sheffield Utd
2,Adrián San Miguel del Castillo,0.00,39.0,GK,Liverpool
3,Alfie Whiteman,0.00,39.0,GK,Spurs
4,Alisson Ramses Becker,1.37,58.0,GK,Liverpool
...,...,...,...,...,...
821,William Osula,0.90,43.0,FWD,Sheffield Utd
822,Wout Weghorst,0.00,55.0,FWD,Burnley
823,Yoane Wissa,1.98,56.0,FWD,Brentford
824,Youssef Ramalho Chermiti,0.01,48.0,FWD,Everton


In [17]:
all_predictions_df.to_csv("dataset.csv", index = False)

In [18]:
df = all_predictions_df

In [21]:
df = all_predictions_df
positions = {
    'GK': 2,
    'DEF': 5,
    'MID': 5,
    'FWD': 3
}

# Assuming `df` is your DataFrame with player data
df['Value for Money'] = df['Predicted Points for GW 25'] / df['Value']

# Filter players with a 'Value for Money' below a certain threshold
# Adjust the threshold based on your analysis of the data distribution
threshold = df['Value for Money'].quantile(0.05)  # Example: Exclude bottom 25%
df_filtered = df[df['Value for Money'] >= threshold]

# Further reduction by considering top N players per position
top_n_multiplier = 100  # Example: Consider twice the number of required players per position
df_reduced = pd.DataFrame()  # Initialize an empty DataFrame for reduced player set
for position in ['GK', 'DEF', 'MID', 'FWD']:
    required_number = positions[position]  # Assuming 'positions' dict is defined as before
    top_n = required_number * top_n_multiplier
    df_position = df_filtered[df_filtered['Position'] == position].nlargest(top_n, 'Value for Money')
    df_reduced = pd.concat([df_reduced, df_position])

len(df_reduced)

819

In [22]:
df = df_reduced
# Assuming `df` is your DataFrame with player data
# Check if SCIP solver is available
solver = pywraplp.Solver.CreateSolver('SCIP')
if not solver:
    print('SCIP solver not available.')
    exit(1)

# Increase the emphasis on finding a first feasible solution and enable parallel processing
solver.SetSolverSpecificParametersAsString("""
    heuristics/rens/freq=10
    constraints/setppc/upgrade=FALSE
    separating/maxrounds=0
    presolving/maxrestarts=0
    parallel/maxnthreads=4  # Adjust based on your CPU capabilities
""")

# Define decision variables
player_vars = {}
for i, row in df.iterrows():
    player_vars[row['Player Name']] = solver.IntVar(0, 1, f"var_{row['Player Name']}")

# Objective: Maximize total predicted points
objective = solver.Objective()
for name, var in player_vars.items():
    objective.SetCoefficient(var, df.loc[df['Player Name'] == name, 'Predicted Points for GW 25'].values[0])
objective.SetMaximization()

# Constraints
# Budget constraint
solver.Add(solver.Sum([df.loc[df['Player Name'] == name, 'Value'].values[0] * var for name, var in player_vars.items()]) <= 1000)

# Squad size
solver.Add(solver.Sum(player_vars.values()) == 15)

# Position constraints
positions = {'GK': 2, 'DEF': 5, 'MID': 5, 'FWD': 3}
for position, required_count in positions.items():
    solver.Add(solver.Sum([var for name, var in player_vars.items() if df.loc[df['Player Name'] == name, 'Position'].values[0] == position]) == required_count)

# Team constraint (max 3 players from the same team)
for team in df['Team'].unique():
    solver.Add(solver.Sum([var for name, var in player_vars.items() if df.loc[df['Player Name'] == name, 'Team'].values[0] == team]) <= 3)

# Solve the problem
status = solver.Solve()

# Output
if status == pywraplp.Solver.OPTIMAL:
    print('Solution:')
    total_points = 0
    total_value = 0

    selected_players = []
    for name, var in player_vars.items():
        if var.solution_value() == 1:
            print(f"{name}: Selected")
            total_points += df.loc[df['Player Name'] == name, 'Predicted Points for GW 25'].values[0]
            total_value += df.loc[df['Player Name'] == name, 'Value'].values[0]
            selected_players.append(name)
    print(f"Total Predicted Points: {total_points}")
    print(f"Total Value: {total_value}")
else:
    print('The problem does not have an optimal solution.')

Solution:
David Raya Martin: Selected
Ederson Santana de Moraes: Selected
Gabriel dos Santos Magalhães: Selected
Ashley Young: Selected
Antonee Robinson: Selected
Lewis Dunk: Selected
Rúben Gato Alves Dias: Selected
Rodrigo Hernandez: Selected
Callum Hudson-Odoi: Selected
Anthony Gordon: Selected
Bukayo Saka: Selected
Pascal Groß: Selected
Dominic Solanke: Selected
Ollie Watkins: Selected
Danny Welbeck: Selected
Total Predicted Points: 96.11
Total Value: 880.0


In [62]:
selected_players_data = df.loc[df['Player Name'].isin(selected_players)]
selected_players_data


Unnamed: 0,Player Name,Predicted Points for GW 25,Value,Position,Team,Value for Money
17,David Raya Martin,4.8,50.0,GK,Arsenal,0.096
22,Ederson Santana de Moraes,3.59,55.0,GK,Man City,0.065273
175,Gabriel dos Santos Magalhães,5.59,51.0,DEF,Arsenal,0.109608
116,Ashley Young,4.43,44.0,DEF,Everton,0.100682
114,Antonee Robinson,4.03,44.0,DEF,Fulham,0.091591
245,Lewis Dunk,4.17,50.0,DEF,Brighton,0.0834
317,Rúben Gato Alves Dias,4.38,55.0,DEF,Man City,0.079636
657,Rodrigo Hernandez,8.14,56.0,MID,Man City,0.145357
411,Callum Hudson-Odoi,6.6,47.0,MID,Nott'm Forest,0.140426
385,Anthony Gordon,7.64,61.0,MID,Newcastle,0.125246


In [64]:
best_gk = selected_players_data[selected_players_data['Position'] == 'GK'].nlargest(1, 'Predicted Points for GW 25')
best_defenders = selected_players_data[selected_players_data['Position'] == 'DEF'].nlargest(3, 'Predicted Points for GW 25')
best_midfielders = selected_players_data[selected_players_data['Position'] == 'MID'].nlargest(2, 'Predicted Points for GW 25')
best_forward = selected_players_data[selected_players_data['Position'] == 'FWD'].nlargest(1, 'Predicted Points for GW 25')

# Select last 4 from the players with highest predicted points that are not in best_gk, best_defenders, best_midfielders, best_forward
best_four = selected_players_data[~selected_players_data['Player Name'].isin(pd.concat([best_gk, best_defenders, best_midfielders, best_forward])['Player Name'])].nlargest(4, 'Predicted Points for GW 25')

best_eleven = pd.concat([best_gk, best_defenders, best_midfielders, best_forward, best_four])

best_eleven["Captain"] = False
best_eleven["Vice Captain"] = False

best_eleven.loc[best_eleven['Predicted Points for GW 25'].idxmax(), 'Captain'] = True
best_eleven.loc[best_eleven[best_eleven["Captain"] == False]['Predicted Points for GW 25'].idxmax(), 'Vice Captain'] = True

best_eleven

Unnamed: 0,Player Name,Predicted Points for GW 25,Value,Position,Team,Value for Money,Captain,Vice Captain
17,David Raya Martin,4.8,50.0,GK,Arsenal,0.096,False,False
175,Gabriel dos Santos Magalhães,5.59,51.0,DEF,Arsenal,0.109608,False,False
116,Ashley Young,4.43,44.0,DEF,Everton,0.100682,False,False
317,Rúben Gato Alves Dias,4.38,55.0,DEF,Man City,0.079636,False,False
410,Bukayo Saka,9.62,90.0,MID,Arsenal,0.106889,False,True
657,Rodrigo Hernandez,8.14,56.0,MID,Man City,0.145357,False,False
805,Ollie Watkins,10.51,87.0,FWD,Aston Villa,0.120805,True,False
755,Dominic Solanke,9.29,69.0,FWD,Bournemouth,0.134638,False,False
385,Anthony Gordon,7.64,61.0,MID,Newcastle,0.125246,False,False
644,Pascal Groß,6.67,64.0,MID,Brighton,0.104219,False,False


In [65]:
sum(best_eleven["Value"])

684.0

# Weighted Team Selection

In [24]:
df = df_reduced
# Assuming `df` is your DataFrame with player data
# Check if SCIP solver is available
solver = pywraplp.Solver.CreateSolver('SCIP')
if not solver:
    print('SCIP solver not available.')
    exit(1)

# Increase the emphasis on finding a first feasible solution and enable parallel processing
solver.SetSolverSpecificParametersAsString("""
    heuristics/rens/freq=10
    constraints/setppc/upgrade=FALSE
    separating/maxrounds=0
    presolving/maxrestarts=0
    parallel/maxnthreads=4  # Adjust based on your CPU capabilities
""")

# Define decision variables
player_vars = {}
for i, row in df.iterrows():
    player_vars[row['Player Name']] = solver.IntVar(0, 1, f"var_{row['Player Name']}")

# Objective: Maximize total predicted points
objective = solver.Objective()
for name, var in player_vars.items():
    objective.SetCoefficient(var, df.loc[df['Player Name'] == name, 'Predicted Points for GW 25'].values[0])
objective.SetMaximization()

# Constraints
# Budget constraint and squad size
solver.Add(solver.Sum([df.loc[df['Player Name'] == name, 'Value'].values[0] * var for name, var in player_vars.items()]) <= 1000)
solver.Add(solver.Sum(player_vars.values()) == 11)  # Adjust squad size to 11

# Adjusted Position constraints for the best eleven
# At least 1 GK, at least 3 DEF, at least 2 MID, at least 1 FWD
position_constraints = {'GK': 1, 'DEF': 3, 'MID': 2, 'FWD': 1}
for position, min_count in position_constraints.items():
    solver.Add(solver.Sum([var for name, var in player_vars.items() if df.loc[df['Player Name'] == name, 'Position'].values[0] == position]) >= min_count)

# Team constraint (max 3 players from the same team)
for team in df['Team'].unique():
    solver.Add(solver.Sum([var for name, var in player_vars.items() if df.loc[df['Player Name'] == name, 'Team'].values[0] == team]) <= 3)

# Solve the problem
status = solver.Solve()

# Output
if status == pywraplp.Solver.OPTIMAL:
    print('Solution:')
    total_points = 0
    total_value = 0

    selected_players = []
    for name, var in player_vars.items():
        if var.solution_value() == 1:
            print(f"{name}: Selected")
            total_points += df.loc[df['Player Name'] == name, 'Predicted Points for GW 25'].values[0]
            total_value += df.loc[df['Player Name'] == name, 'Value'].values[0]
            selected_players.append(name)
    print(f"Total Predicted Points: {total_points}")
    print(f"Total Value: {total_value}")
else:
    print('The problem does not have an optimal solution.')

Solution:
David Raya Martin: Selected
Gabriel dos Santos Magalhães: Selected
Ashley Young: Selected
Rúben Gato Alves Dias: Selected
Rodrigo Hernandez: Selected
Anthony Gordon: Selected
Bukayo Saka: Selected
Pascal Groß: Selected
Dominic Solanke: Selected
Ollie Watkins: Selected
Danny Welbeck: Selected
Total Predicted Points: 77.72
Total Value: 684.0
