In [1]:
import pulp
import csv
import pandas as pd
from dfply import *

## Data

Import the Datasets

In [2]:
# path_to_files = "C:\\Users\\Tom\\Documents\\Blog\\2021\\Fantasy Football"

gw_df = pd.read_csv("merged_gw.csv")
gw_df

Unnamed: 0,name,assists,bonus,bps,clean_sheets,creativity,element,fixture,goals_conceded,goals_scored,...,team_h_score,threat,total_points,transfers_balance,transfers_in,transfers_out,value,was_home,yellow_cards,GW
0,Aaron_Cresswell_376,0,0,7,0,1.5,376,8,5,0,...,0.0,0.0,0,0,0,0,50,True,0,1
1,Aaron_Lennon_430,0,0,3,0,0.0,430,3,0,0,...,3.0,0.0,1,0,0,0,50,True,0,1
2,Aaron_Mooy_516,0,0,0,0,0.0,516,7,0,0,...,0.0,0.0,0,0,0,0,50,False,0,1
3,Aaron_Ramsdale_494,0,0,11,0,0.0,494,2,1,0,...,1.0,0.0,2,0,0,0,45,True,0,1
4,Aaron_Wan-Bissaka_122,0,2,34,1,16.1,122,9,0,0,...,4.0,2.0,8,0,0,0,55,True,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22555,Youri_Tielemans_448,0,0,11,0,37.4,448,376,1,0,...,0.0,19.0,2,-3103,1905,5008,61,True,0,47
22556,Yves_Bissouma_53,0,0,22,0,1.3,53,372,1,1,...,1.0,7.0,7,77,147,70,50,False,0,47
22557,Zech_Medley_636,0,0,0,0,0.0,636,371,0,0,...,3.0,0.0,0,153,325,172,40,True,0,47
22558,Çaglar_Söyüncü_164,0,0,0,0,0.0,164,376,0,0,...,0.0,0.0,0,-95515,165,95680,49,True,0,47


- Remove all unnecessary columns
- Subset to every player who is there from GW1
- Get the final score of those players


In [3]:
## Remove all unnecessary columns
gw_trim = gw_df[["name", "total_points", "GW"]]

## Dfply tutorial
## https://towardsdatascience.com/dplyr-style-data-manipulation-with-pipes-in-python-380dcb137000
## Dfply docs
## https://github.com/kieferk/dfply

## Subset to every player who is there from GW1
gw_trim = (gw_trim >>
    group_by(X.name) >>
    mutate(firstWeek = X.GW.min()) >>
    ungroup()
       )

gw_trim = gw_trim[gw_trim["firstWeek"] == 1]


## Get the final score of those players
gw_trim = (gw_trim >>
    group_by(X.name) >>
    summarize(finalScore = X.total_points.sum()) >>
    ungroup()
       )


- Add second dataset and combine with price, pos, and team

In [4]:
pl_df = pd.read_csv("players_raw.csv")
pl_trim = pl_df[["first_name",  "second_name", "element_type",  "id",  "team"]]
pl_trim

Unnamed: 0,first_name,second_name,element_type,id,team
0,Shkodran,Mustafi,2,1,1
1,Héctor,Bellerín,2,2,1
2,Sead,Kolasinac,2,3,1
3,Ainsley,Maitland-Niles,2,4,1
4,Sokratis,Papastathopoulos,2,5,1
...,...,...,...,...,...
661,Oskar,Buur,2,587,20
662,Ryan,Giles,2,607,20
663,Bruno André,Cavaco Jordao,3,609,20
664,Daniel,Castelo Podence,3,619,20


In [5]:
pl_trim["id"] = pl_trim.loc[:,"id"].astype(str)
pl_trim = (pl_trim >>
           mutate(name = X.first_name + "_" + X.second_name + "_" + X.id) >>
           drop(X.first_name, X.second_name, X.id)
          )
pl_trim

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pl_trim["id"] = pl_trim.loc[:,"id"].astype(str)


Unnamed: 0,element_type,team,name
0,2,1,Shkodran_Mustafi_1
1,2,1,Héctor_Bellerín_2
2,2,1,Sead_Kolasinac_3
3,2,1,Ainsley_Maitland-Niles_4
4,2,1,Sokratis_Papastathopoulos_5
...,...,...,...
661,2,20,Oskar_Buur_587
662,2,20,Ryan_Giles_607
663,3,20,Bruno André_Cavaco Jordao_609
664,3,20,Daniel_Castelo Podence_619


- Combine the two dfs

In [25]:
combined_df = (gw_trim >> left_join(pl_trim, by = "name"))
combined_df

Unnamed: 0,name,finalScore,element_type,team
0,Aaron_Cresswell_376,79,2,19
1,Aaron_Lennon_430,15,3,5
2,Aaron_Mooy_516,80,3,4
3,Aaron_Ramsdale_494,126,1,3
4,Aaron_Wan-Bissaka_122,127,2,12
...,...,...,...,...
521,Yoshinori_Muto_260,9,4,13
522,Youri_Tielemans_448,117,3,9
523,Yves_Bissouma_53,39,3,4
524,Çaglar_Söyüncü_164,120,2,9


# Prep complete, now for Pulp
----
# Objective Function
Add the score of 11 players to get maximum

# Constraints
- No more than 3 players from one team
- Budget is 100m
- 1 GK
- 3-5 Def
- 2-5 Mid
- 1-3 Str
- Exactly 11 players

In [27]:
x = pulp.LpVariable.dict("player", range(0, len(combined_df)),
                        0,1, cat=pulp.LpInteger)

In [30]:
prob = pulp.LpProblem("FantasyFootball", pulp.LpMaximize)

In [31]:
prob

FantasyFootball:
MAXIMIZE
None
VARIABLES