# Fantasy Premier League Moneyball
        

# The plan:

link to trello board:
https://trello.com/b/FC7BUMHM/fpl-moneyball



#### 1. Pull data from fantasy premier league site to create datasets for:
- Club data 
- Player data
- Player history
- Fixture data
- My team data
- Fixtures / gameweek multiplier
- Previous season points (the previous season 'points per appearance has been added as gameweek 0 history)

    
#### 2. Create player score based on:
- v1: total points & total points per million
- v2: predicted points next gameweek / next 3 gameweeks and predicted points per million

    
#### 3. Create Optimal Team function using player score v1 or v2    


#### 4. Figure out a way to make transfer most efficiently to maximise my team score

#### Optimisation constraints:
- My budget (start at £100 million but can go up and down based on player prices changing)
- Fill a team of 15 players consisting of 
        - 2 x GKP
        - 5 x DEF
        - 5 x MID
        - 3 X FWD
- 4 of these players will be subs and do not generate you gamepoints.
- The 'starting 11' Can be of any combination/formation, providing 1 goalkeeper, at least 3 defenders and at least 1 forward are selected at all times.
- Only 1 free transfer can be made each week, further transfers deduct 4 points from your total points


#### We want to Maximise:
- Total points per player
- Player score (points per million x fixture multiplier) x chance to play
- Average points over next 3 gameweeks

#### Caps
- You are given a selection of boosts / advantages (called Caps) which you can play at any time, these are:
        - 2 x Wildcards: Make unlimited free transfers to your team in a gameweek
        - 1 x Free Hit: Same as above, but team reverts back to previous lineup after 1 week
        - Triple Captain: Your captain scores you triple points for the next gameweek
        - Bench Boost: The points from all your subs are included in your total score for the gameweek


### Manual inputs (login details, team id, free transfers available and home/hp)

In [None]:
login = ''
password = ''
my_team_id = '3366741'
free_transfers_available = 1
transfers_made_since_last_deadline = False
new_money_in_bank = 0

# Upload_path depending on which computer I'm using
upload_path = r'C:\Users\andre\Documents\GitHub\murry_code\Fantasy Premier League\\' # ACER
# upload_path = r'C:\Users\andrew.morris\Documents\GitHub\murry_code\Fantasy Premier League\\' # HP

## Other teams
# my_team_id = '6442816' # riiaah - 
# my_team_id = '6443347' #amao

print('--- Manual inputs set ---')
print("--- all operations complete ---")

### Imports and APIs

In [None]:
#############
## imports ##
#############

import pandas as pd
from datetime import datetime as dt
import requests
import numpy as np
import time
import math 
import itertools
import statistics

#######################################
### Setting dataframe display limits###
#######################################

pd.options.display.max_rows = 1000
pd.options.display.max_columns = 100

#########################################################
### Create a login session for my team specific pages ###
#########################################################

session = requests.session()

## Create session with login details for urls which require login ##
## Add pasword and login email
login_url = 'https://users.premierleague.com/accounts/login/'
payload = {'password': password,
           'login': login,
           'redirect_uri': 'https://fantasy.premierleague.com/a/login',
           'app': 'plfpl-web'}
session.post(login_url, data=payload)

print('--- Session created for '+login+ '---')

################
### API URLs ###
################


## api url for the main dataset on player stats 
main_url = 'https://fantasy.premierleague.com/api/bootstrap-static/'

## api for my team
team_url = 'https://fantasy.premierleague.com/api/my-team/'+my_team_id+'/'

## api for my team details
my_entry_ses = (session.get('https://fantasy.premierleague.com/api/entry/'+my_team_id+'/'))
my_entry_json = my_entry_ses.json()
current_gameweek = my_entry_json['current_event']+1
print('--- Current gameweek set to '+ str(current_gameweek)+' ---')
# calculating money in bank, if no transfers made since last deadline pull from FPL API, else use 'new_money_in_bank'
money_in_bank = my_entry_json['last_deadline_bank']/10
def money_in_bank_to_use():
    if transfers_made_since_last_deadline == True:
        return new_money_in_bank
    else: 
        return money_in_bank
money_in_bank = money_in_bank_to_use()
print('--- Money In The Bank: £'+str(money_in_bank)+' ---')

## api for fixtures
fix_url = 'https://fantasy.premierleague.com/api/fixtures/'

print('--- API URL Variables Created ---')
print("--- all operations complete ---")    

### Creating Functions

In [None]:
# Export a dataframe to csv
# Pass the dataframe through first then optional filname and export path
def export_to_csv(dataframe, filename = 'FPL_data', path = 'C:\\Python CSV Output\\'):
    print('----------------')
    print('Start csv export')
    filename = 'FPL_' + filename + '-' + str(dt.now().strftime("%Y%m%d-%H%M%S"))
    print('- filename: '+ filename)
    path = path+filename+'.csv'
    print('- save location: ' + path)
    dataframe.to_csv(path, index = False, encoding="utf8")
    print('- successfully exported')
    print('-----------------------')

print("--- 'export_to_csv' function created ---")    

# Calculate the time it takes to run a cell
# enter <start_time = time.time()> at the beginning of a cell then call this function at the end of a cell
def time_taken(start_time):
    start_time = start_time
    seconds_to_run = (time.time() - start_time)
    minutes = str(math.floor(seconds_to_run / 60)) + ' minutes '
    seconds = str(round(seconds_to_run % 60)) + ' seconds '
    print("--- Time taken to run: " + minutes + seconds + " ---")

print("--- 'time_taken' function created ---")  

## Creating a function to make an empty dataframe from any dataframe you pass through the function

def create_empty_df(dataframe_name):
    list_of_col = []
    for col in dataframe_name.columns: 
        list_of_col.append(col) 
    return pd.DataFrame(columns=list_of_col)

print("--- 'create_empty_df' function created ---")


## Each player has an api url with their own history
## This function will iterate through all the players
## By inserting their element (id) into the url and appending to a blank dataframe

def create_player_history_table():
    start_time = time.time()
    print('--- Starting to create player history dataframe ---')
    # creating a blank dataframe from the first players url
    url = 'https://fantasy.premierleague.com/api/element-summary/1/'
    req = requests.get(url)
    json = req.json()
    df = pd.DataFrame(json['history'])
    player_history_df = create_empty_df(df)
    # creating a range containing each player id
    id_range = range(1,len(full_player_df)+1)
    
    for i in id_range:    
        i_url = 'https://fantasy.premierleague.com/api/element-summary/'+str(i)+'/'
        i_req = requests.get(i_url)
        i_json = i_req.json()
        i_df = pd.DataFrame(i_json['history'])
        player_history_df = player_history_df.append(i_df, ignore_index=True)
    return player_history_df

print("--- 'create_player_history_table' function created ---")
print("--- all operations complete ---")

### Creating core data frames from apis and csvs

In [None]:
####################
## CLUB DATAFRAME ##
####################

# Creating a dataframe for the club (team) data

main_req = requests.get(main_url)
main_json = main_req.json()
club_df = pd.DataFrame(main_json['teams'])

print("--- 'club_df' dataframe created ---")    

# club_df.head()

###################################
### PREVIOUS SEASON PLAYER DATA ###
###################################

# Creating a dataframe for the previos season player data
# Several uses, will contain the player score used to determine best team / transfers 

import_previous_season_path = upload_path

prev_season_filename = "FPL_Players_2019_2020_season_v2.csv"
prev_season_player_df = pd.read_csv(import_previous_season_path+prev_season_filename) 

# Removing records where a player did not play last season
prev_season_player_df = prev_season_player_df[prev_season_player_df.appearances > 1]


prev_season_player_df['points_per_gameweek'] = round(prev_season_player_df.total_points / 38,2)
prev_season_player_df['points_per_appearance'] = prev_season_player_df.total_points / prev_season_player_df.appearances 
prev_season_player_df['round'] = 0
prev_season_player_df['opponent_team'] = 'Previous Season'
prev_season_player_df['minutes_per_appearance'] = round(prev_season_player_df.minutes / prev_season_player_df.appearances,0)

print("--- 'prev_season_player_df' dataframe created ---")    

# Creating a single row per player to reflect their season as if it were one gameweek, 
# This will be concatenated later with this current seasons player history

prev_season_player_history_df = prev_season_player_df[['id', 
                                                       'opponent_team', 
                                                       'points_per_appearance', 
                                                       'round', 
                                                       'minutes_per_appearance']].copy()

prev_season_player_history_df['minutes_per_appearance'] = prev_season_player_history_df.minutes_per_appearance.astype(int)

prev_season_player_history_df = prev_season_player_history_df.rename(columns={"id": "element",
                                                                              "points_per_appearance": "total_points",
                                                                              "minutes_per_appearance": "minutes"
                                                                             })    
    
print("--- 'prev_season_player_history_df' dataframe created ---")    


  
### GAMEWEEK ZERO DATAFRAME ###

# This is creating a view of points per appearance for the last season
# Which will be added to the player history as 'gameweek zero' 
# And will act as the points a player has scored in a gameweek in the current season
# The point of this is to enrich player this seasons data, but become less important as data from this season builds up


gameweek_zero_df = prev_season_player_df[['id', 
                                          'opponent_team', 
                                          'points_per_appearance', 
                                          'round', 
                                          'minutes_per_appearance']].copy()

gameweek_zero_df['minutes_per_appearance'] = gameweek_zero_df.minutes_per_appearance.astype(int)

gameweek_zero_df = gameweek_zero_df.rename(columns={"id": "element",
                                                    "points_per_appearance": "total_points",
                                                    "minutes_per_appearance": "minutes"
                                                    })


print("--- 'gameweek_zero_df' dataframe created ---")
# gameweek_zero_df.head()

#####################
#### PLAYER DATA ####
#####################

# Creating full_player_df
# Several uses, will contain the data on individual players such as points and is used to determine best team / transfers

full_player_df = pd.DataFrame(main_json['elements'])         # Player dataframe
position_types_df = pd.DataFrame(main_json['element_types']) # Position dataframe
events_df = pd.DataFrame(main_json['events'])                # Gameweek dataframe

# Updating and creating some of the fields in full player dataframe: 
    # Pulling through position and team name instead of 'id', and changing data types
full_player_df['position_abv'] = full_player_df.element_type.map(position_types_df.set_index('id').singular_name_short)
full_player_df['team_abv'] = full_player_df.team.map(club_df.set_index('id').short_name)
full_player_df['team_and_position'] = + full_player_df['team_abv'] + '-' + full_player_df['position_abv']

    # Pulling through data related to previous season
full_player_df['prev_season_points'] = full_player_df.id.map(prev_season_player_df.set_index('id').total_points).fillna(0.0)
full_player_df['prev_season_points_per_gw'] = full_player_df.id.map(prev_season_player_df.set_index('id').points_per_gameweek).fillna(0.0)
full_player_df['prev_season_minutes'] = full_player_df.id.map(prev_season_player_df.set_index('id').minutes).fillna(0.0)

    # Converting selected_by_percent to float data type
full_player_df['selected_by_percent'] = full_player_df.selected_by_percent.astype(float)

    # Creating chance of playing next round multiplier
full_player_df['nxt_round_play_chance'] = (full_player_df['chance_of_playing_next_round']/100.0).fillna(1.0)
full_player_df['this_round_play_chance'] = (full_player_df['chance_of_playing_this_round']/100.0).fillna(1.0)

    # Creating custom points_per_mil
full_player_df['cost_in_mil'] = full_player_df['now_cost']/10
full_player_df['points_per_mil'] = round(full_player_df['total_points']/full_player_df['cost_in_mil'],2)
full_player_df['prev_season_points_per_mil'] = ((round(full_player_df['prev_season_points']/full_player_df['cost_in_mil'],2))/38)*(current_gameweek-1)
full_player_df['total_points_inc_prv_season'] = full_player_df.total_points + full_player_df.prev_season_points_per_gw

full_player_df['current_gameweek'] = current_gameweek

print("--- 'full_player_df' dataframe created ---")
print("--- all operations complete ---")
# export_to_csv(full_player_df, filename = 'FPL_full_player_df')  

In [None]:
### FIXTURE DATA ###
### Creating a dataframe for the fixtures ###
### Primarily used to create a gameweek difficulty multiplier which is applied to the player score

#########################################
#### full fixture dataframe creation ####
#########################################

## Getting fixture data from the api url
fix_req = requests.get(fix_url)

## Converting that data to json then dataframe
fix_json = fix_req.json()
fix_df = pd.DataFrame(fix_json)

# Replacing null game weeks with 99 and converting data type to int, required for matches not yet scheduled
fix_df['event'] = fix_df.event.fillna(99.0).astype(int)

# Adding club names and abbreviations as new columns
fix_df['home_team_full']  = fix_df.team_h.map(club_df.set_index('id').name)
fix_df['home_team_short'] = fix_df.team_h.map(club_df.set_index('id').short_name)
fix_df['away_team_full']  = fix_df.team_a.map(club_df.set_index('id').name)
fix_df['away_team_short'] = fix_df.team_a.map(club_df.set_index('id').short_name)

# Adding Home/Away Strengths to fix_df then blending the home and away strengths, 
# As there are no crowds this year the home advantage might be underplayed
# Logic which utilises the home and away scores separately is saved under random at the end of the code

# Keep an eye on this to see if they flip around at any point
    # Home team strength scores
fix_df['h_team_str_overall_home'] = fix_df.team_h.map(club_df.set_index('id').strength_overall_away)
fix_df['h_team_str_att_home'] = fix_df.team_h.map(club_df.set_index('id').strength_attack_away)
fix_df['h_team_str_def_home'] = fix_df.team_h.map(club_df.set_index('id').strength_defence_away)
fix_df['h_team_str_overall_away'] = fix_df.team_h.map(club_df.set_index('id').strength_overall_home)
fix_df['h_team_str_att_away'] = fix_df.team_h.map(club_df.set_index('id').strength_attack_home)
fix_df['h_team_str_def_away'] = fix_df.team_h.map(club_df.set_index('id').strength_defence_home)

    # Blending home and away strength scores for the home team
fix_df['h_team_str_overall']  = fix_df[['h_team_str_overall_home','h_team_str_overall_away']].mean(axis=1)
fix_df['h_team_str_att'] = fix_df[['h_team_str_att_home','h_team_str_att_away']].mean(axis=1)
fix_df['h_team_str_def'] = fix_df[['h_team_str_def_home','h_team_str_def_away']].mean(axis=1)

    # Away team strength scores
fix_df['a_team_str_overall_away'] = fix_df.team_a.map(club_df.set_index('id').strength_overall_home)
fix_df['a_team_str_att_away'] = fix_df.team_a.map(club_df.set_index('id').strength_attack_home)
fix_df['a_team_str_def_away'] = fix_df.team_a.map(club_df.set_index('id').strength_defence_home)
fix_df['a_team_str_overall_home'] = fix_df.team_a.map(club_df.set_index('id').strength_overall_away)
fix_df['a_team_str_att_home'] = fix_df.team_a.map(club_df.set_index('id').strength_attack_away)
fix_df['a_team_str_def_home'] = fix_df.team_a.map(club_df.set_index('id').strength_defence_away)
    
    # Blending home and away strength scores for the away team
fix_df['a_team_str_overall']  = fix_df[['a_team_str_overall_home','a_team_str_overall_away']].mean(axis=1)
fix_df['a_team_str_att'] = fix_df[['a_team_str_att_home','a_team_str_att_away']].mean(axis=1)
fix_df['a_team_str_def'] = fix_df[['a_team_str_def_home','a_team_str_def_away']].mean(axis=1)

# Creating home/away position specific multipliers
    # Basic logic is:
        # For goalkeepers or defenders - home team defense score / away team attack score
        # For midfielders - home team overall score / away team overall score
        # For forwards - home team attack score / away team defense score
    
    # Home team multipliers
fix_df['home_GKP_DEF_multi'] = fix_df['h_team_str_def'] / fix_df['a_team_str_att']
fix_df['home_MID_multi'] = fix_df['h_team_str_overall'] / fix_df['a_team_str_overall']
fix_df['home_FWD_multi'] = fix_df['h_team_str_att'] / fix_df['a_team_str_def']

    # Away team multiplier
fix_df['away_GKP_DEF_multi'] = fix_df['a_team_str_def'] / fix_df['h_team_str_att']
fix_df['away_MID_multi'] = fix_df['a_team_str_overall'] / fix_df['h_team_str_overall']
fix_df['away_FWD_multi'] = fix_df['a_team_str_att'] / fix_df['h_team_str_def']

    # Renaming 'event' to 'gameweek'
fix_df = fix_df.rename(columns={"event": "gameweek"})

print("--- 'fix_df' dataframe created ---")

####################################
### GAMES PLAYED BY EACH CLUB DF ###
####################################

### creating a dataframe of all historic fixtures to provide a number of games played by each team 

club_games_played_df = fix_df[fix_df.gameweek < current_gameweek]
club_games_played_df = pd.concat([club_games_played_df[['gameweek','team_a']].rename(columns={'team_a':'team_id'}),
                                  club_games_played_df[['gameweek','team_h']].rename(columns={'team_h':'team_id'})]) 
club_games_played_df = club_games_played_df.merge(club_df[['id', 'short_name']],
                                                  how = 'left', left_on = ['team_id'], right_on = ['id']).copy()
club_games_played_df = club_games_played_df.drop(['id'], axis=1)
club_games_played_df = club_games_played_df.groupby(['short_name']).size().reset_index(name='counts')
club_games_played_df = club_games_played_df.rename(columns={'short_name':'team_abv','counts':'games_played'})

#######################################################
### SLIM FIXTURES DATAFRAME AND GAMEWEEK MULTIPLIER ###
#######################################################

# Creating a slim version of the fixtures dataframe with a row per position, per team, per fixture  
# Firstly done by creating a dataframe for each position for home and away teams then concatenating all the dataframes

###################################
### Goalkeepers playing at home ###
###################################

GKP_home_fix_df = fix_df[[
    'gameweek', 'kickoff_time', 'team_h', 'home_team_short', 'away_team_short', 'team_h_difficulty', 'home_GKP_DEF_multi'
                         ]].copy()
GKP_home_fix_df['position_abv'],GKP_home_fix_df['home_or_away'] = ['GKP','home']
GKP_home_fix_df = GKP_home_fix_df.rename(columns={'team_h':'team_id',
                                                  'home_team_short':'team_abv',
                                                  'away_team_short':'opponent_abv',
                                                  'team_h_difficulty':'match_difficulty',
                                                  'home_GKP_DEF_multi':'match_multiplier'
                                                 })

#################################
### Defenders playing at home ###
#################################

DEF_home_fix_df = fix_df[[
    'gameweek', 'kickoff_time', 'team_h', 'home_team_short', 'away_team_short', 'team_h_difficulty', 'home_GKP_DEF_multi'
                         ]].copy()
DEF_home_fix_df['position_abv'],DEF_home_fix_df['home_or_away'] = ['DEF','home']
DEF_home_fix_df = DEF_home_fix_df.rename(columns={'team_h':'team_id',
                                                  'home_team_short':'team_abv',
                                                  'away_team_short':'opponent_abv',
                                                  'team_h_difficulty':'match_difficulty',
                                                  'home_GKP_DEF_multi':'match_multiplier'
                                                 })

###################################
### Midfielders playing at home ###
###################################

MID_home_fix_df = fix_df[[
    'gameweek', 'kickoff_time', 'team_h', 'home_team_short','away_team_short', 'team_h_difficulty','home_MID_multi'
                         ]].copy()
MID_home_fix_df['position_abv'],MID_home_fix_df['home_or_away'] = ['MID','home']
MID_home_fix_df = MID_home_fix_df.rename(columns={'team_h':'team_id',
                                                  'home_team_short':'team_abv',
                                                  'away_team_short':'opponent_abv',
                                                  'team_h_difficulty':'match_difficulty',
                                                  'home_MID_multi':'match_multiplier',
                                                 })

################################
### Forwards playing at home ###
################################

FWD_home_fix_df = fix_df[[
    'gameweek', 'kickoff_time', 'team_h', 'home_team_short', 'away_team_short', 'team_h_difficulty', 'home_FWD_multi'
                        ]].copy()
FWD_home_fix_df['position_abv'],FWD_home_fix_df['home_or_away'] = ['FWD','home']
FWD_home_fix_df = FWD_home_fix_df.rename(columns={'team_h':'team_id',
                                                  'home_team_short':'team_abv',
                                                  'away_team_short':'opponent_abv',
                                                  'team_h_difficulty':'match_difficulty',
                                                  'home_FWD_multi':'match_multiplier'
                                                 })

################################
### Goalkeepers playing away ###
################################

GKP_away_fix_df = fix_df[[
    'gameweek', 'kickoff_time', 'team_a', 'away_team_short', 'home_team_short', 'team_a_difficulty', 'away_GKP_DEF_multi'
                         ]].copy()
GKP_away_fix_df['position_abv'],GKP_away_fix_df['home_or_away'] = ['GKP','away']
GKP_away_fix_df = GKP_away_fix_df.rename(columns={'team_a':'team_id',
                                                  'away_team_short':'team_abv',
                                                  'home_team_short':'opponent_abv',
                                                  'team_a_difficulty':'match_difficulty',
                                                  'away_GKP_DEF_multi':'match_multiplier'
                                                 })

##############################
### Defenders playing away ###
##############################

DEF_away_fix_df = fix_df[[
    'gameweek', 'kickoff_time', 'team_a', 'away_team_short', 'home_team_short', 'team_a_difficulty', 'away_GKP_DEF_multi'
                         ]].copy()
DEF_away_fix_df['position_abv'],DEF_away_fix_df['home_or_away'] = ['DEF','away']
DEF_away_fix_df = DEF_away_fix_df.rename(columns={'team_a':'team_id',
                                                  'away_team_short':'team_abv',
                                                  'home_team_short':'opponent_abv',
                                                  'team_a_difficulty':'match_difficulty',
                                                  'away_GKP_DEF_multi':'match_multiplier'
                                                 })

################################
### Midfielders playing away ###
################################

MID_away_fix_df = fix_df[[
    'gameweek', 'kickoff_time', 'team_a', 'away_team_short', 'home_team_short', 'team_a_difficulty', 'away_MID_multi'
                         ]].copy()
MID_away_fix_df['position_abv'],MID_away_fix_df['home_or_away'] = ['MID','away']
MID_away_fix_df = MID_away_fix_df.rename(columns={'team_a':'team_id',
                                                  'away_team_short':'team_abv',
                                                  'home_team_short':'opponent_abv',
                                                  'team_a_difficulty':'match_difficulty',
                                                  'away_MID_multi':'match_multiplier'
                                                 })

#############################
### Forwards playing away ###
#############################

FWD_away_fix_df = fix_df[[
    'gameweek', 'kickoff_time', 'team_a', 'away_team_short', 'home_team_short', 'team_a_difficulty','away_FWD_multi'
                         ]].copy()
FWD_away_fix_df['position_abv'],FWD_away_fix_df['home_or_away'] = ['FWD','away']
FWD_away_fix_df = FWD_away_fix_df.rename(columns={'team_a':'team_id',
                                                  'away_team_short':'team_abv',
                                                  'home_team_short':'opponent_abv',
                                                  'team_a_difficulty':'match_difficulty',
                                                  'away_FWD_multi':'match_multiplier'
                                                 })

## Concatenating all the above dataframes
slim_fix_df = pd.concat([GKP_home_fix_df,
                         DEF_home_fix_df,
                         MID_home_fix_df,
                         FWD_home_fix_df,
                         GKP_away_fix_df,
                         DEF_away_fix_df,
                         MID_away_fix_df,
                         FWD_away_fix_df
                        ])

# Adding boolean fields for identifying current, historic, future and next 5 games weeks 
slim_fix_df['is_current_gameweek'   ] = slim_fix_df.gameweek == current_gameweek 
slim_fix_df['is_historic_gameweek'  ] = slim_fix_df.gameweek <  current_gameweek
slim_fix_df['is_future_gameweek'    ] = slim_fix_df.gameweek >  current_gameweek
slim_fix_df['is_in_next_3_gameweeks'] = slim_fix_df['gameweek'].apply(lambda a: a in range(current_gameweek, current_gameweek+3))
slim_fix_df['is_in_next_5_gameweeks'] = slim_fix_df['gameweek'].apply(lambda a: a in range(current_gameweek, current_gameweek+5))

# Combining team_abv and position_abv to use for mapping 
slim_fix_df['team_and_position'] = slim_fix_df.team_abv + '-' + slim_fix_df.position_abv

# Sorting by kickoff datetime 
slim_fix_df = slim_fix_df.sort_values(by=['kickoff_time']).reset_index(drop = True)

print("--- 'slim_fix_df' dataframe created ---")

##############################################
### Creating Gameweek Multiplier Dataframe ###
##############################################

# ##########################################
# ### Next 5 Gameweek Average Multiplier ###
# ##########################################

# # Not using this at the moment

# Creating a dataframe of just the next 5 gameweeks with columns 
next_5_gameweeks_multi_df = slim_fix_df[slim_fix_df.is_in_next_5_gameweeks == True]

# Creating a Pivot table of next 5 gameweeks with 'team_and_position' & average 'match_multiplier' 
next_5_gameweeks_multi_pivot = next_5_gameweeks_multi_df.pivot_table(index=['team_and_position'],
                                                          values=['match_multiplier'], 
                                                          aggfunc=np.mean)                                                  
# Transforming the pivot table back into a dataframe
next_5_gameweeks_multi_df = pd.DataFrame(next_5_gameweeks_multi_pivot.to_records()).rename(columns={"match_multiplier": "nxt_5_gw_avg_multi"})

print("--- 'next_5_gameweeks_multi_df' dataframe created ---")
# print(next_5_gameweeks_multi_df.head())

##########################################
### Next 3 Gameweek Average Multiplier ###
##########################################

# Creating a dataframe of just the next 3 gameweeks with columns 
next_3_gameweeks_multi_df = slim_fix_df[slim_fix_df.is_in_next_3_gameweeks == True]

# Creating a Pivot table of next 5 gameweeks with 'team_and_position' & average 'match_multiplier' 
next_3_gameweeks_multi_pivot = next_3_gameweeks_multi_df.pivot_table(index=['team_and_position'],
                                                          values=['match_multiplier'], 
                                                          aggfunc=np.mean)                                                  
# Transforming the pivot table back into a dataframe
next_3_gameweeks_multi_df = pd.DataFrame(next_3_gameweeks_multi_pivot.to_records()).rename(columns={"match_multiplier": "nxt_3_gw_avg_multi"})

print("--- 'next_3_gameweeks_multi_df' dataframe created ---")
# print(next_3_gameweeks_multi_df.head())

####################################

# Creating a dataframe of just the next gameweek with columns 'team_and_position' & 'match_multiplier'
gameweek_multiplier_df = slim_fix_df[slim_fix_df.is_current_gameweek == True].reset_index(drop = True)
gameweek_multiplier_df = gameweek_multiplier_df[['team_and_position', 'match_multiplier']]

# Renaming match multipiler to 'current_gw_multiplier'
gameweek_multiplier_df = gameweek_multiplier_df.rename(columns={"match_multiplier": "gw_multi"})

# Pulling the 5 week average match multiplier into 'gameweek_multiplier_df' 
gameweek_multiplier_df['nxt_5_gw_multi'] = gameweek_multiplier_df.team_and_position.map(next_5_gameweeks_multi_df.set_index('team_and_position').nxt_5_gw_avg_multi)
gameweek_multiplier_df['nxt_3_gw_multi'] = gameweek_multiplier_df.team_and_position.map(next_3_gameweeks_multi_df.set_index('team_and_position').nxt_3_gw_avg_multi)

# Taking an average of of the current gameweek multiplier and the next 5 week match multiplier
# This way match strength will still be a little weighted towards the next match
# However the upcoming matches are also taken into account to avoid have to transfer players out of the team again in upcoming weeks
gameweek_multiplier_df['nxt_3_wk_blended_match_multi'] = gameweek_multiplier_df[['gw_multi','nxt_3_gw_multi']].mean(axis=1)
    
# Gameweek 5 multiplier not being used currently
gameweek_multiplier_df['nxt_5_wk_blended_match_multi'] = gameweek_multiplier_df[['gw_multi','nxt_5_gw_multi']].mean(axis=1)

print("--- 'gameweek_multiplier_df' dataframe created ---")
print("--- all operations complete ---")
# gameweek_multiplier_df.head(10)

In [None]:
### Player History Data ###
### Data frame containing a row per player history for each previous gameweek ###
## Primarily use to determine the appearances and average gameweek multiplier for games played
## these will be used to determine points per gameweek and then used to calculate a predicted player score ###

#######################################
### Create Player History Dataframe ###
#######################################

start_time = time.time()

## Creating player_history df using the function created earlier
player_history_df = create_player_history_table()

print("--- 'player_history_df' dataframe created ---")

time_taken(start_time = start_time)

# player_history_df.head(50)

####################################################################
###        Creating a slimline version of player history         ###
### and combining with gameweek multipliers and full player data ###
####################################################################

### Used to provide a score per match, adjusted based on that match difficulty

# Creating a slimline version of the player history dataframe
slim_player_history_df = player_history_df[['element', 'opponent_team', 'total_points', 'round', 'minutes']].copy()

# Enruching this seasons player history by adding previous season average as gameweek 0
slim_player_history_df = pd.concat([gameweek_zero_df, slim_player_history_df])

# mapping opponent team_abv, filling with 'PRE' if cannot be found as this will be the previous season rather than a specific team
slim_player_history_df['opponent_abv'] = slim_player_history_df.opponent_team.map((club_df.set_index('id').short_name)).fillna('PRE')

# Renaming the 'round' field to 'gameweek'
slim_player_history_df = slim_player_history_df.rename(columns={'round':'gameweek', 'total_points': 'match_points'})

# merging with full player df to pull through required fields
slim_player_history_df = slim_player_history_df.merge(full_player_df[['id',
                                                                      'first_name',
                                                                      'second_name',
                                                                      'web_name',
                                                                      'position_abv',
                                                                      'team_abv']], 
                                                       how = 'left', left_on = ['element'], right_on = ['id']).copy()

# Combining the team and position abbreviations to create a lookup field for the fixture dataframe
slim_player_history_df['team_and_position'] = slim_player_history_df.team_abv + '-' + slim_player_history_df.position_abv

slim_player_history_df = slim_player_history_df.merge(slim_fix_df[['gameweek', 'team_and_position', 'match_multiplier']], 
                                                      how = 'left', left_on = ['gameweek', 'team_and_position'], right_on = ['gameweek', 'team_and_position']).copy()

# filling null match multipliers with 1 
# (should only be the gameweek 0 records as there is not a relative gameweek to merge with in slim_fix_df)
slim_player_history_df['match_multiplier'] = slim_player_history_df['match_multiplier'].fillna(1)

# creating appearance field, 1 if played more than one minute, 0 if no mins played that gameweek
slim_player_history_df['appearance'] = slim_player_history_df.minutes.apply(lambda x: 1 if x > 0 else 0)

# Assigning an appearance factor based on the number of mins they played. 1 if over 60 mins, otherwise the mins played / 60, 
slim_player_history_df['appearance_factor'] = slim_player_history_df.minutes.apply(lambda x: 1.0 if x > 60 else (x / 90))

# Adjusting the match points based on match difficulty, so tougher matches have points increased and easier matches have points decreased
slim_player_history_df['adjusted_match_points'] = slim_player_history_df['match_points'] / (slim_player_history_df['match_multiplier'] / 1)
slim_player_history_df['adjusted_match_points'] = slim_player_history_df['adjusted_match_points'].astype(float)

## Removing records where they did not play
slim_player_history_df = slim_player_history_df[slim_player_history_df.appearance == 1]

slim_player_history_df = slim_player_history_df[['gameweek',
                                                 'id',
                                                 'first_name',
                                                 'second_name',
                                                 'web_name',
                                                 'team_abv',
                                                 'position_abv',
                                                 'team_and_position',
                                                 'opponent_abv',
                                                 'match_points',                                 
                                                 'match_multiplier',
                                                 'adjusted_match_points',
                                                 'minutes',
                                                 'appearance',
                                                 'appearance_factor']].copy()

print("--- 'slim_player_history_df' dataframe created ---")

# create average points per appearance pivot
avg_points_per_app_df = slim_player_history_df[['id','adjusted_match_points']].copy()
avg_points_per_app_df['adjusted_match_points'] = avg_points_per_app_df['adjusted_match_points'].astype(float)
avg_points_per_app_pivot = avg_points_per_app_df.pivot_table(index=['id'], values=['adjusted_match_points'],aggfunc='mean')
# Then converting to a dataframe and renaming 
avg_points_per_app_df = pd.DataFrame(avg_points_per_app_pivot.to_records()).rename(columns={"adjusted_match_points": "avg_points_per_app"})

print("--- 'avg_points_per_app_df' dataframe created ---")

# Dataframe showing appearances per player this season
season_appearances_df = slim_player_history_df[slim_player_history_df.gameweek != 0].copy()
season_appearances_pivot = season_appearances_df.pivot_table(index=['id'], values=['appearance'],aggfunc='sum')
season_appearances_df = pd.DataFrame(season_appearances_pivot.to_records()).rename(columns={"appearance": "season_appearances"})

print("--- 'season_appearances_df' dataframe created ---")
print("--- all operations complete ---")
# slim_player_history_df.head()
# avg_points_per_app_df.head()

### Adding gameweek Multipliers To Players and Calculating Player Score

#### Version 2 of the player score calc:
   - Using the predicted score
   - average points per appearance so far
   - multiplied by average gw multiplier for the next 3 game weeks
   - divided by cost in million
   - Multiply by chance to play next round
   - If the average multiplier for the next 3 gameweeks is less than 1 then player score = 0
   

In [None]:
#############################################################
### Updating slim_player_df with the gameweek multipliers ###
#############################################################

# Creating a slimline version of the elements table with relevant data points required for players
slim_player_df = full_player_df[['first_name', 'second_name', 'web_name', 'id', 'position_abv',
                                 'team_abv', 'team_and_position', 'selected_by_percent', 'current_gameweek',
                                 'this_round_play_chance', 'nxt_round_play_chance', 'status',
                                 'cost_in_mil', 'minutes', 'transfers_in', 'transfers_out', 'total_points',
                                 'total_points_inc_prv_season', 'prev_season_minutes', 'prev_season_points',
                                 'prev_season_points_per_gw', 'prev_season_points_per_mil', 'points_per_mil']].copy()

# Adding current game week multipliers from gameweek_multiplier_df
slim_player_df = slim_player_df.merge(gameweek_multiplier_df[['team_and_position',
                                                              'gw_multi',
                                                              'nxt_3_gw_multi',
                                                              'nxt_3_wk_blended_match_multi',
                                                              'nxt_5_gw_multi',
                                                              'nxt_5_wk_blended_match_multi']], 
                                                       how = 'left', on = ['team_and_position']).copy()

# Using the players average points per appearance calculate the predicted score for:
    # next gameweek using next gameweek multiplier
    # average for next 3 gameweeks using the average of the next 3 gameweeks
    # average for next 5 gameweeks using the average of the next 5 gameweeks
slim_player_df['avg_points_per_app'] = slim_player_df.id.map(avg_points_per_app_df.set_index('id').avg_points_per_app)
slim_player_df['nxt_gw_pred_score']  = (slim_player_df['avg_points_per_app'] * slim_player_df['gw_multi']) * slim_player_df['nxt_round_play_chance']
slim_player_df['nxt_3gw_pred_score'] = (slim_player_df['avg_points_per_app'] * slim_player_df['nxt_3_gw_multi']) * slim_player_df['nxt_round_play_chance']
slim_player_df['nxt_5gw_pred_score'] = (slim_player_df['avg_points_per_app'] * slim_player_df['nxt_5_gw_multi']) * slim_player_df['nxt_round_play_chance']
slim_player_df['nxt_gw_pred_score_per_mil'] =  slim_player_df['nxt_gw_pred_score']  / slim_player_df['cost_in_mil']
slim_player_df['nxt_3gw_pred_score_per_mil'] = slim_player_df['nxt_3gw_pred_score'] / slim_player_df['cost_in_mil']
slim_player_df['nxt_5gw_pred_score_per_mil'] = slim_player_df['nxt_5gw_pred_score'] / slim_player_df['cost_in_mil']

slim_player_df['club_games_played'] = slim_player_df.team_abv.map(club_games_played_df.set_index('team_abv').games_played)
slim_player_df['appearances_this_season'] = slim_player_df.id.map(season_appearances_df.set_index('id').season_appearances).fillna(0.0)
slim_player_df['appearance_percentage'] = slim_player_df.appearances_this_season / slim_player_df.club_games_played


##################################
###### Player Score V2 Calc ######
##################################

# Player score v2 calc = 
    # if A True then 0.0 else (B)
        # A = Has Negative 3 GW Multiplier (their average multiplier for the next 3 weeks is less than 0.97)
        # B = predicted player score for the next 3 gameweeks as points per million:
        #     (average points per appearance x average gw multiplier for next 3 weeks x chance to play next round) / cost in million

calc_player_score_v2 = slim_player_df.nxt_3gw_pred_score_per_mil * slim_player_df['nxt_3_wk_blended_match_multi'].apply(lambda x: 0.0 if x < 0.97 else 1.0)

slim_player_df['player_score_v2'] = calc_player_score_v2

########################################
### End of player score calculations ###
########################################

slim_player_df = slim_player_df.sort_values(by=['player_score_v2'], ascending=False).reset_index(drop = True)

print("--- 'slim_player_df' updated ---")
print("--- all operations complete ---")
slim_player_df.head(10)

In [None]:
##########################################################
### TOP POINTS PLAYERS AND TOP PLAYER SCORE DATAFRAMES ###
###           BASED ON PLAYER SCORE CALC V2            ###
##########################################################


######################################################################
# Creating a data frame of players ordered by their predicted points #
######################################################################

points_top_players = slim_player_df[['cost_in_mil',
                                         'team_abv',
                                         'position_abv',
                                         'web_name',
                                         'id',
                                         'avg_points_per_app',
                                         'gw_multi',
                                         'nxt_gw_pred_score',
                                         'nxt_3_gw_multi',
                                         'nxt_3gw_pred_score',
                                         'player_score_v2',
                                         'appearance_percentage'
                                        ]].copy()

points_top_players = points_top_players[points_top_players['player_score_v2'] > 0.0]
points_top_players = points_top_players.sort_values(by=['nxt_3gw_pred_score'], ascending=False).reset_index(drop = True)

print("--- 'points_top_players' dataframe created ---")

###########################################################################################################
# Creating a data frame of players ordered by their player score (to get the most cost effective players) #
###########################################################################################################

player_score_top_players = slim_player_df[['cost_in_mil',
                                               'team_abv',
                                               'position_abv',
                                               'web_name',
                                               'id',
                                               'avg_points_per_app',
                                               'gw_multi',
                                               'nxt_gw_pred_score',
                                               'nxt_3_gw_multi',
                                               'nxt_3gw_pred_score',
                                               'player_score_v2',
                                               'appearance_percentage'
                                              ]].copy()

player_score_top_players = player_score_top_players[player_score_top_players['player_score_v2'] > 0.0]
player_score_top_players = player_score_top_players.sort_values(by=['player_score_v2'], ascending=False).reset_index(drop = True)

print("--- 'player_score_top_players' dataframe created ---")
print("--- all operations complete ---")
points_top_players


In [None]:
#########################################################
### Creating optimal team using optimal team function ###
#########################################################

################################################
### Optimal team logic Using Player Score V2 ###
################################################

# Function which produces the optimal team based on:
    # A specified number of star players (picking solely based on top points)
    # Filling the rest of the team based on points per million 
    # A set the budget you want to spend on your team
    # Specify the minimum cost you want left to spend on the final players (most likely your subs)

def get_optimal_team_objects(points_top_players=points_top_players, 
                             player_score_top_players=player_score_top_players,
                             budget = 100, 
                             star_player_limit = 3, 
                             gk = 2, 
                             df = 5, 
                             md = 5, 
                             fwd = 3, 
                             min_player_cost = 4.5,
                             min_appearance_percentage = 0.8):
    
    optimal_team = pd.DataFrame(columns = ['id', 
                                           'web_name', 
                                           'position_abv',
                                           'team_abv',
                                           'cost_in_mil', 
                                           'gw_multi',
                                           'nxt_gw_pred_score',
                                           'nxt_3_gw_multi', 
                                           'nxt_3gw_pred_score', 
                                           'appearance_percentage',
                                           'player_score_v2'])
    star_player_limit = star_player_limit
    budget = budget
    positions = {'GKP': gk, 'DEF': df, 'MID': md, 'FWD': fwd}
    club_player_count = dict((club, 3) for club in list(club_df['short_name'].unique()))
    players_left = 15
    min_player_cost = min_player_cost
    min_appearance_percentage = min_appearance_percentage
    
    for index, player in points_top_players.iterrows():
        if len(optimal_team) < star_player_limit and budget >= player.cost_in_mil and positions[player.position_abv] > 0 and club_player_count[player.team_abv] > 0 and (budget - player.cost_in_mil) >= ((players_left - 1) * min_player_cost) and player.appearance_percentage >= min_appearance_percentage:
            my_dict = {'id': player.id,
                       'web_name': player.web_name, 
                       'position_abv': player.position_abv, 
                       'cost_in_mil': player.cost_in_mil,
                       'gw_multi': player.gw_multi,
                       'nxt_gw_pred_score': player.nxt_gw_pred_score,
                       'nxt_3_gw_multi': player.nxt_3_gw_multi, 
                       'nxt_3gw_pred_score': player.nxt_3gw_pred_score, 
                       'team_abv': player.team_abv,
                       'appearance_percentage': player.appearance_percentage,
                       'player_score_v2': player.player_score_v2}
            optimal_team = optimal_team.append(my_dict, ignore_index=True)
            budget -= player.cost_in_mil
            players_left = players_left - 1 
            positions[player.position_abv] = positions[player.position_abv] -1
            club_player_count[player.team_abv] = club_player_count[player.team_abv] -1
    for index, player in player_score_top_players.iterrows():
        if player.id not in optimal_team['id'].to_list() and budget >= player.cost_in_mil and positions[player.position_abv] > 0 and club_player_count[player.team_abv] > 0 and (budget - player.cost_in_mil) >= ((players_left - 1) * min_player_cost) and player.appearance_percentage >= min_appearance_percentage:
            my_dict2 = {'id':player.id,
                        'web_name': player.web_name, 
                        'position_abv': player.position_abv, 
                        'cost_in_mil': player.cost_in_mil, 
                        'gw_multi': player.gw_multi,
                        'nxt_gw_pred_score': player.nxt_gw_pred_score,
                        'nxt_3_gw_multi': player.nxt_3_gw_multi, 
                        'nxt_3gw_pred_score': player.nxt_3gw_pred_score, 
                        'team_abv': player.team_abv,
                        'appearance_percentage': player.appearance_percentage,
                        'player_score_v2': player.player_score_v2}
            optimal_team = optimal_team.append(my_dict2, ignore_index=True) 
            budget -= player.cost_in_mil
            players_left = players_left - 1 
            positions[player.position_abv] = positions[player.position_abv] -1
            club_player_count[player.team_abv] = club_player_count[player.team_abv] -1
    return optimal_team

# functions to evaluate optimal team 
def valid_team(team):
    if (len(team)) < 15:
        return 'Valid Team: False - Cannot make selection of 15 players with current star_player_limit, budget & min_player_cost'
    else:
        return 'Valid Team: True'
    
def terrible_subs(team):
    if team.nxt_3gw_pred_score.min() < 3:
        return 'Terrible Subs! Some players have less than 3 points on average over next 3 gameweeks'
    else:
        return 'Great Team! All players expected to score more than 3 points on average over next 3 gameweeks'

budget = 95
star_player_limit = 1     
min_player_cost = 5
min_appearance_percentage = 0.30
    
# Run optimal team function to create optimal_team_dataframe with custom parameters    
optimal_team_df = get_optimal_team_objects(budget = budget,
                                           star_player_limit = star_player_limit,                                         
                                           min_player_cost = min_player_cost,
                                           min_appearance_percentage = min_appearance_percentage)  


print('Optimal Team For Gameweek: ' + str(current_gameweek))
print('--------------------------')
print('Total Team Points: ' + str(round(optimal_team_df['nxt_3gw_pred_score'].sum(),2)))
print('Total Team Cost: ' + str(round(optimal_team_df['cost_in_mil'].sum(),2)))
print('Total Team Multiplier: ' + str(round(optimal_team_df['nxt_3_gw_multi'].sum(),2)))
print(valid_team(optimal_team_df))
print('--------------------------')
print(terrible_subs(optimal_team_df))

print("--- all operations complete ---")
# export_to_csv(dataframe = optimal_team_df, filename = 'Optimal_Team_gw_'+str(current_gameweek))
optimal_team_df.head(15)

In [None]:
### Create database of optimal teams
### Using all possible combinations of a range of start player limit, budget and min player cost 
## Then pivot that table to find highest total player score for the top 11 where subs are min 3 points

import itertools

start_time = time.time()

def create_optimal_team_combos(budget_list, star_player_limit_list, min_player_cost_list, min_appearance_percentage = min_appearance_percentage):
    all_optimal_teams_df = pd.DataFrame(columns = ['id', 
                                                   'web_name', 
                                                   'position_abv',
                                                   'team_abv',
                                                   'cost_in_mil', 
                                                   'gw_multi',
                                                   'nxt_gw_pred_score',
                                                   'nxt_3_gw_multi', 
                                                   'nxt_3gw_pred_score', 
                                                   'player_score_v2',
                                                   'budget_limit',
                                                   'star_player_limit',
                                                   'min_player_cost'
                                                  ])
    budget_list = budget_list
    star_player_limit_list = star_player_limit_list
    min_player_cost_list = min_player_cost_list
    min_appearance_percentage = min_appearance_percentage
    for x, y, z in itertools.product(budget_list, star_player_limit_list, min_player_cost_list):
        combo_list = [x,y,z]
        optimal_team_df = get_optimal_team_objects(budget = combo_list[0], 
                                                   star_player_limit = combo_list[1], 
                                                   min_player_cost = combo_list[2],
                                                   min_appearance_percentage = min_appearance_percentage)
        optimal_team_df['budget_limit'] = combo_list[0]
        optimal_team_df['star_player_limit'] = combo_list[1]
        optimal_team_df['min_player_cost'] = combo_list[2]
        all_optimal_teams_df = pd.concat([all_optimal_teams_df, optimal_team_df])
    all_optimal_teams_df['combo_str'] = all_optimal_teams_df['budget_limit'].astype(str) + '-' + all_optimal_teams_df['star_player_limit'].astype(str) + '-' + all_optimal_teams_df['min_player_cost'].astype(str)
    all_optimal_teams_df['combo_list'] = all_optimal_teams_df[['budget_limit','star_player_limit','min_player_cost']].values.tolist()
    return all_optimal_teams_df  

####### need to somehow drop the tables to run this again with new variables ########

# entering the range for each input in  
budget_list = [98, 97, 96, 95]
star_player_limit_list = [1, 2, 3, 4, 5, 6, 7, 8]
min_player_cost_list = [4.5, 5, 5.5, 6]
                                                   
all_optimal_teams_df = create_optimal_team_combos(budget_list, star_player_limit_list, min_player_cost_list)
        
print("'all_optimal_teams_df' dataframe created")  
   
# create a pivot table showing the total predicted points per combination
all_optimal_teams_pivot = all_optimal_teams_df.pivot_table(index=['combo_str'],
                                                      values=['nxt_3gw_pred_score'], 
                                                      aggfunc='sum')

total_predicted_points_per_combo = pd.DataFrame(all_optimal_teams_pivot.to_records()).rename(columns={"nxt_3gw_pred_score": "total_pred_score"})

# Convert to dataframe
total_predicted_points_per_combo = total_predicted_points_per_combo.sort_values(by=['total_pred_score'], ascending=False).reset_index(drop = True)

time_taken(start_time = start_time)
print("--- all operations complete ---")
total_predicted_points_per_combo.head(20)

In [None]:
### MY TEAM PICKS DATA ###
### Creating a dataframe for my team picks data and saving as csv ###
### Used to see value of current team and highlight:
                                                    # Players to use as captain / subs   
                                                    # When to use wildcards
                                                    # Weak players to transfer out
                    
#############################################
#### my_team_picks_df dataframe creation ####
#############################################

# ## Getting my team data from the api url, using session from earlier as login details required
my_team_ses = (session.get(team_url))

## Converting that data to json then dataframe
my_team_json = my_team_ses.json()
my_team_picks_df = pd.DataFrame(my_team_json['picks'])

my_team_picks_df['potential_profit'] = my_team_picks_df.selling_price - my_team_picks_df.purchase_price
my_team_picks_df['cost_in_mil'] = my_team_picks_df.selling_price / 10  

# Pulling through player data from 'full_player_df'
my_team_picks_df = my_team_picks_df.merge(slim_player_df[['id',
                                                          'first_name',
                                                          'second_name',
                                                          'web_name',
                                                          'total_points',
                                                          'points_per_mil',
                                                          'position_abv',
                                                          'team_abv',
                                                          'prev_season_points_per_gw',
                                                          'nxt_round_play_chance',
                                                          'nxt_gw_pred_score',
                                                          'nxt_3gw_pred_score',
                                                          'gw_multi',
                                                          'nxt_5_gw_multi',
                                                          'nxt_5_wk_blended_match_multi' ,
                                                          'nxt_3_gw_multi',
                                                          'nxt_3_wk_blended_match_multi' ,
                                                          'player_score_v2']], 
                                           how = 'left', left_on = ['element'], right_on = ['id'])

my_team_picks_df['team_and_position'] = my_team_picks_df['team_abv'].astype(str) + '-' + my_team_picks_df['position_abv'].astype(str)
my_team_picks_df = my_team_picks_df.sort_values(by=['nxt_gw_pred_score'], ascending=False).reset_index(drop = True)
    
print("--- 'my_team_picks_df' dataframe created ---") 

# Creating a slim version of my_team which has the same fields as optimal_team_df
my_team_df = my_team_picks_df[['id',
                               'web_name',
                               'position_abv',
                               'cost_in_mil',
                               'gw_multi',
                               'nxt_gw_pred_score',
                               'nxt_3_gw_multi',
                               'nxt_3gw_pred_score',
                               'team_abv',
                               'player_score_v2']].copy()

# Creating a function to highlight the captain / vice captain choices
def captain_vice_capatin_selection(team_dataframe):
    my_team_df = team_dataframe.copy()
    my_capt_list = []
    my_team_df = my_team_df[my_team_df.position_abv != 'GKP']
    # create columns for score rank, multiplier rank, then average of these two, then rank again
    my_team_df['score_rank'] = my_team_df['nxt_gw_pred_score'].rank(ascending=False)
    my_team_df['multi_rank'] = my_team_df['gw_multi'].rank(ascending=False)
    my_team_df['avg_rank'] = my_team_df[['score_rank', 'multi_rank']].mean(axis=1)
    my_team_df['captain_rank'] = my_team_df['avg_rank'].rank()
    # sort by this rank again, using higest score as the decider if the ranks are the same
    my_team_df = my_team_df.sort_values(by=['captain_rank', 'score_rank'])
    # for loop to pick the first two rows from the reordered datframe and assign as captain and vice captain
    for player in my_team_df.iterrows():
        if len(my_capt_list) < 2:
            my_new_list = [player[1].web_name]
            my_capt_list = my_capt_list + my_new_list
    return (my_capt_list,'Captain: ' + my_capt_list[0] + ' / Vice Captain: ' + my_capt_list[1])   

print("--- 'captain_vice_capatin_selection' function created ---")

# Creating funtion to pickout the top 11 and subs from a team dataframe passed through the function
def create_first_11_and_subs(team_dataframe):
    team_dataframe = team_dataframe
    # Sorting dataframe by nxt_gw_pred_score
    team_dataframe = team_dataframe.sort_values(by=['nxt_gw_pred_score'], ascending=False).reset_index(drop = True)
    GKP_team_dataframe = team_dataframe[team_dataframe.position_abv == 'GKP'].copy()
    my_team_first_11 = pd.DataFrame(columns = ['id', 
                                               'web_name', 
                                               'position_abv',
                                               'team_abv',
                                               'cost_in_mil', 
                                               'gw_multi',
                                               'nxt_gw_pred_score',
                                               'nxt_3_gw_multi', 
                                               'nxt_3gw_pred_score', 
                                               'player_score_v2'])
    my_team_subs = pd.DataFrame(columns = ['id', 
                                           'web_name', 
                                           'position_abv',
                                           'team_abv',
                                           'cost_in_mil', 
                                           'gw_multi',
                                           'nxt_gw_pred_score',
                                           'nxt_3_gw_multi', 
                                           'nxt_3gw_pred_score', 
                                           'player_score_v2'])
    first_11_player_limit = 11
    positions = {'GKP': 1, 'DEF': 5, 'MID': 5, 'FWD': 3}
    for index, player in GKP_team_dataframe.iterrows():
        if first_11_player_limit > 0 and positions[player.position_abv] > 0:
            my_dict = {'id': player.id,
                       'web_name': player.web_name, 
                       'position_abv': player.position_abv, 
                       'cost_in_mil': player.cost_in_mil,
                       'gw_multi': player.gw_multi,
                       'nxt_gw_pred_score': player.nxt_gw_pred_score,
                       'nxt_3_gw_multi': player.nxt_3_gw_multi, 
                       'nxt_3gw_pred_score': player.nxt_3gw_pred_score, 
                       'team_abv': player.team_abv,
                       'player_score_v2': player.player_score_v2}
            my_team_first_11 = my_team_first_11.append(my_dict, ignore_index=True)
            first_11_player_limit = first_11_player_limit - 1 
            positions[player.position_abv] = positions[player.position_abv] -1
    
    
    for index, player in team_dataframe.iterrows():
        if first_11_player_limit > 0 and positions[player.position_abv] > 0:
            my_dict = {'id': player.id,
                       'web_name': player.web_name, 
                       'position_abv': player.position_abv, 
                       'cost_in_mil': player.cost_in_mil,
                       'gw_multi': player.gw_multi,
                       'nxt_gw_pred_score': player.nxt_gw_pred_score,
                       'nxt_3_gw_multi': player.nxt_3_gw_multi, 
                       'nxt_3gw_pred_score': player.nxt_3gw_pred_score, 
                       'team_abv': player.team_abv,
                       'player_score_v2': player.player_score_v2}
            my_team_first_11 = my_team_first_11.append(my_dict, ignore_index=True)
            first_11_player_limit = first_11_player_limit - 1 
            positions[player.position_abv] = positions[player.position_abv] -1
            
    for index, player in team_dataframe.iterrows():
        if player.id not in my_team_first_11['id'].to_list():
            my_dict2 = {'id':player.id,
                        'web_name': player.web_name, 
                        'position_abv': player.position_abv, 
                        'cost_in_mil': player.cost_in_mil, 
                        'gw_multi': player.gw_multi,
                        'nxt_gw_pred_score': player.nxt_gw_pred_score,
                        'nxt_3_gw_multi': player.nxt_3_gw_multi, 
                        'nxt_3gw_pred_score': player.nxt_3gw_pred_score, 
                        'team_abv': player.team_abv,
                        'player_score_v2': player.player_score_v2}
            my_team_subs = my_team_subs.append(my_dict2, ignore_index=True) 
    return (my_team_first_11, my_team_subs)

print("--- 'create_first_11_and_subs' function created ---")

def create_first_11_and_subs_for_3_gw_average(team_dataframe):
    # function variables
    team_dataframe = team_dataframe
    # sorting dataframe by nxt_3gw_pred_score
    team_dataframe = team_dataframe.sort_values(by=['nxt_3gw_pred_score'], ascending=False).reset_index(drop = True)
    GKP_team_dataframe = team_dataframe[team_dataframe.position_abv == 'GKP'].copy()
    my_team_first_11 = pd.DataFrame(columns = ['id', 
                                               'web_name', 
                                               'position_abv',
                                               'team_abv',
                                               'cost_in_mil', 
                                               'gw_multi',
                                               'nxt_gw_pred_score',
                                               'nxt_3_gw_multi', 
                                               'nxt_3gw_pred_score', 
                                               'player_score_v2'])
    my_team_subs = pd.DataFrame(columns = ['id', 
                                           'web_name', 
                                           'position_abv',
                                           'team_abv',
                                           'cost_in_mil', 
                                           'gw_multi',
                                           'nxt_gw_pred_score',
                                           'nxt_3_gw_multi', 
                                           'nxt_3gw_pred_score', 
                                           'player_score_v2'])
    first_11_player_limit = 11
    gk_limit = 1
    positions = {'GKP': 1, 'DEF': 5, 'MID': 5, 'FWD': 3}
    # Iteration:
    
    # Picking first 11 GKP first
    for index, player in GKP_team_dataframe.iterrows():
        if first_11_player_limit > 0 and positions[player.position_abv] > 0:
            my_dict = {'id': player.id,
                       'web_name': player.web_name, 
                       'position_abv': player.position_abv, 
                       'cost_in_mil': player.cost_in_mil,
                       'gw_multi': player.gw_multi,
                       'nxt_gw_pred_score': player.nxt_gw_pred_score,
                       'nxt_3_gw_multi': player.nxt_3_gw_multi, 
                       'nxt_3gw_pred_score': player.nxt_3gw_pred_score, 
                       'team_abv': player.team_abv,
                       'player_score_v2': player.player_score_v2}
            my_team_first_11 = my_team_first_11.append(my_dict, ignore_index=True)
            first_11_player_limit = first_11_player_limit - 1 
            positions[player.position_abv] = positions[player.position_abv] -1
            
    # Then pick the rest of the top 11
    for index, player in team_dataframe.iterrows():
        if first_11_player_limit > 0 and positions[player.position_abv] > 0:
            my_dict = {'id': player.id,
                       'web_name': player.web_name, 
                       'position_abv': player.position_abv, 
                       'cost_in_mil': player.cost_in_mil,
                       'gw_multi': player.gw_multi,
                       'nxt_gw_pred_score': player.nxt_gw_pred_score,
                       'nxt_3_gw_multi': player.nxt_3_gw_multi, 
                       'nxt_3gw_pred_score': player.nxt_3gw_pred_score, 
                       'team_abv': player.team_abv,
                       'player_score_v2': player.player_score_v2}
            my_team_first_11 = my_team_first_11.append(my_dict, ignore_index=True)
            first_11_player_limit = first_11_player_limit - 1 
            positions[player.position_abv] = positions[player.position_abv] -1
    # Then assign leftover players to subs df        
    for index, player in team_dataframe.iterrows():
        if player.id not in my_team_first_11['id'].to_list():
            my_dict2 = {'id':player.id,
                        'web_name': player.web_name, 
                        'position_abv': player.position_abv, 
                        'cost_in_mil': player.cost_in_mil, 
                        'gw_multi': player.gw_multi,
                        'nxt_gw_pred_score': player.nxt_gw_pred_score,
                        'nxt_3_gw_multi': player.nxt_3_gw_multi, 
                        'nxt_3gw_pred_score': player.nxt_3gw_pred_score, 
                        'team_abv': player.team_abv,
                        'player_score_v2': player.player_score_v2}
            my_team_subs = my_team_subs.append(my_dict2, ignore_index=True) 
    return (my_team_first_11, my_team_subs)

print("--- 'create_first_11_and_subs_for_3_gw_average' function created ---")

# list of the player ids of those in my team
my_team_id_list = list(my_team_df.id)
print("--- all operations complete ---")
# export_to_csv(my_team_picks_df)  
# my_team_picks_df.head(15)

In [None]:
#########################################
### Comparing my team to optimal team ###
#########################################

# adding a column to optimal_team_df to see if they are
optimal_team_df['player_in_my_team'] = optimal_team_df['id'].apply(lambda x: 1 if x in my_team_id_list else 0)

my_team_first_11_df, my_team_subs_df = create_first_11_and_subs(my_team_df)
optimal_team_first_11_df, optimal_team_subs_df = create_first_11_and_subs(optimal_team_df)

#######################################################################
print('My Team Stats')
print('-------------')
print('Budget Available:')
print('  - £' + str(round(sum(my_team_df.cost_in_mil)+money_in_bank,1)) + 'm')
print('  - Team Value: £' + str(round(sum(my_team_df.cost_in_mil),1)) + 'm')
print('  - Cash In The Bank: £' + str(money_in_bank) + 'm')
print('Next Gameweek:')
print('  - First 11 Total Points:       ' + str(round(sum(my_team_first_11_df.nxt_gw_pred_score),2)))
print('  - First 11 Average GW Multi:   ' + str(round(np.mean(my_team_first_11_df.gw_multi),2)))
print('  - Subs Total Points:           ' + str(round(sum(my_team_subs_df.nxt_gw_pred_score),2)))
print('  - Subs Average GW Multi:       ' + str(round(np.mean(my_team_subs_df.gw_multi),2)))

print('Next 3 Gameweeks:')
print('  - First 11 Total Points:       ' + str(round(sum(my_team_first_11_df.nxt_3gw_pred_score),2)))
print('  - First 11 Average 3 GW Multi: ' + str(round(np.mean(my_team_first_11_df.nxt_3_gw_multi),2)))
print('  - Subs Total Points:           ' + str(round(sum(my_team_subs_df.nxt_3gw_pred_score),2)))
print('  - Subs Average 3 GW Multi:     ' + str(round(np.mean(my_team_subs_df.nxt_3_gw_multi),2)))

print('Captaincy Suggestion: ')
print('  - ' + captain_vice_capatin_selection(team_dataframe = my_team_first_11_df)[1])

print('Subs:')
print('  - ' + str(list(my_team_subs_df.web_name)))

#######################################################################

print('')
print('')

#######################################################################

print('Optimal Team Stats')
print('------------------')
print('Team Cost: £')
print('  - £' + str(round(sum(optimal_team_df.cost_in_mil),1)) + 'm')
print('Next Gameweek:')
print('  - First 11 Total Points:       ' + str(round(sum(optimal_team_first_11_df.nxt_gw_pred_score),2)))
print('  - First 11 Average GW Multi:   ' + str(round(np.mean(optimal_team_first_11_df.gw_multi),2)))
print('  - Subs Total Points:           ' + str(round(sum(optimal_team_subs_df.nxt_gw_pred_score),2)))
print('  - Subs Average GW Multi:       ' + str(round(np.mean(optimal_team_subs_df.gw_multi),2)))

print('Next 3 Gameweeks:')
print('  - First 11 Total Points:       ' + str(round(sum(optimal_team_first_11_df.nxt_3gw_pred_score),2)))
print('  - First 11 Average 3 GW Multi: ' + str(round(np.mean(optimal_team_first_11_df.nxt_3_gw_multi),2)))
print('  - Subs Total Points:           ' + str(round(sum(optimal_team_subs_df.nxt_3gw_pred_score),2)))
print('  - Subs Average 3 GW Multi:     ' + str(round(np.mean(optimal_team_subs_df.nxt_3_gw_multi),2)))

#######################################################################

# optimal_team_df

In [None]:
print('My First 11:')
my_team_first_11_df

In [None]:
print('Set the following as subs:')
my_team_subs_df

In [None]:
# Function to suggest a transfer for a player

def transfer_suggestion(df_to_select_from, replacement_position, replacement_budget, player_team, min_appearance_percent = 0.8):
    # sets the player dataframe to that of the parameter, could be df of top points, could be df of top player score
    player_df = df_to_select_from.copy()
    
    # Creating a list of teams which have hit the player limit
    club_limit_df = my_team_df.groupby(['team_abv']).size().reset_index(name='counts')
    club_limit_df = club_limit_df[club_limit_df.counts >= 3] 
    club_limit_list = list(club_limit_df.team_abv)    
    if player_team in club_limit_list:
        club_limit_list = club_limit_list.remove('player_team')
    else:
        club_limit_list = club_limit_list
    
    # removing players where we have exceeded the player limits for that team using the above list
    player_df['club_limit_exceeded'] = player_df['team_abv'].apply(lambda x: True if x in club_limit_list else False)   
    player_df = player_df[player_df.club_limit_exceeded == False]
    
    # ignores players whose appearance percentsage is less than the specified min accepted appearance percentage
    player_df = player_df[player_df.appearance_percentage >= min_appearance_percent]
    
    # adds an 'in my team column' then removes any players already in the team
    player_df['in_my_team'] = player_df['id'].apply(lambda x: 1 if x in my_team_id_list else 0)
    player_df = player_df[player_df.in_my_team != 1] 
    
    # only players in the desired position
    player_df = player_df[player_df.position_abv == replacement_position]
    
    # only players within the replacement budget
    player_df = player_df[player_df.cost_in_mil <= replacement_budget]
    player_df = player_df.drop(['club_limit_exceeded', 'in_my_team'], axis=1)
    return player_df

# Enter the player position_abv, budget to spend, team_abv and a minimal appearance percentage
# pick between using top points or top player score to pick the player

# df_to_select_from = player_score_top_players
df_to_select_from = points_top_players
replacement_position = 'MID'
replacement_budget = 5.4
player_team = 'SHU'
min_appearance_percent = 0.25

replacement_player_score_df = transfer_suggestion(df_to_select_from = df_to_select_from,
                                                  replacement_position = replacement_position,
                                                  replacement_budget = replacement_budget,
                                                  player_team = player_team,
                                                  min_appearance_percent = min_appearance_percent
                                                 )
replacement_player_score_df

### Random Bits / In Development

In [None]:
slim_player_df.head(50)

new_df = slim_player_df.sort_values(by = slim_player_df.selected_by_percent)



In [None]:
test_df = player_history_df[player_history_df.element == 457]
test_df.head(500)

In [None]:
### creating a dataframe of all historic fixtures to provide a number of games played by each team 
### and the minimum number of mins a player must play to be considered a first team player (60 mins per match)

club_games_played_df = fix_df[fix_df.gameweek < current_gameweek]
club_games_played_df = pd.concat([club_games_played_df[['gameweek','team_a']].rename(columns={'team_a':'team_id'}),
                                  club_games_played_df[['gameweek','team_h']].rename(columns={'team_h':'team_id'})]) 
club_games_played_df = club_games_played_df.merge(club_df[['id', 'short_name']],
                                                  how = 'left', left_on = ['team_id'], right_on = ['id']).copy()
club_games_played_df = club_games_played_df.drop(['id'], axis=1)
club_games_played_df = club_games_played_df.groupby(['short_name']).size().reset_index(name='counts')
club_games_played_df = club_games_played_df.rename(columns={'short_name':'team_abv','counts':'games_played'})

club_games_played_df


In [None]:
fix_df.head(10)

In [None]:
main_json.keys()


In [None]:
# gameweek multiplier ranking to highlight when to use a bench boost or triple captain

top_gw_muli = slim_fix_df

# top_gw_muli = slim_fix_df.sort_values(by='match_multiplier', ascending=False).reset_index(drop = True)
top_gw_muli['pct_rank'] = top_gw_muli['match_multiplier'].rank(ascending = False, pct=True) * 100

# top_gw_muli.head(50)

In [None]:
# creating a list of all combinations of each position from the top players dataframe
# only looking at players where their score is predicted score for the next 3 gameweeks is above 3 

from itertools import combinations 

start_time = time.time()

points_top_players_over_3_points_df = points_top_players[points_top_players.nxt_3gw_pred_score > 3].copy()

top_GKP_df = points_top_players_over_3_points_df[points_top_players_over_3_points_df.position_abv == 'GKP'].copy()
top_DEF_df = points_top_players_over_3_points_df[points_top_players_over_3_points_df.position_abv == 'DEF'].copy()
top_MID_df = points_top_players_over_3_points_df[points_top_players_over_3_points_df.position_abv == 'MID'].copy()
top_FWD_df = points_top_players_over_3_points_df[points_top_players_over_3_points_df.position_abv == 'FWD'].copy()

top_GKP_id_list = list(top_GKP_df.id)
top_DEF_id_list = list(top_DEF_df.id)
top_MID_id_list = list(top_MID_df.id)
top_FWD_id_list = list(top_FWD_df.id)

print('top_GKP_id_list length: ' + str(len(top_GKP_id_list)))
print('top_DEF_id_list length: ' + str(len(top_DEF_id_list)))
print('top_MID_id_list length: ' + str(len(top_MID_id_list)))
print('top_FWD_id_list length: ' + str(len(top_FWD_id_list)))

GKP_comb = combinations(top_GKP_id_list, 2) # Get all combinations of 2 GKPs from the top players
GKP_combo_df = pd.DataFrame(columns = ['GKP_id_combinations']) # create empty GKP combo dataframe
# Append the obtained combinations to  all_player_combo_df
for i in list(GKP_comb): 
    GKP_dict = {'GKP_id_combinations': i}
    GKP_combo_df = GKP_combo_df.append(GKP_dict, ignore_index=True)
print('GKP Combinations:')
print(len(GKP_combo_df))    

DEF_comb = combinations(top_DEF_id_list, 5) # Get all combinations of 5 DEFs from the top players
DEF_combo_df = pd.DataFrame(columns = ['DEF_id_combinations']) # create empty DEF combo dataframe
# Append the obtained combinations to  all_player_combo_df
for i in list(DEF_comb): 
    DEF_dict = {'DEF_id_combinations': i}
    DEF_combo_df = DEF_combo_df.append(DEF_dict, ignore_index=True)
print('DEF Combinations:')
print(len(DEF_combo_df))   

MID_comb = combinations(top_MID_id_list, 5) # Get all combinations of 5 MIDs from the top players
MID_combo_df = pd.DataFrame(columns = ['MID_id_combinations']) # create empty GKP combo dataframe
# Append the obtained combinations to  all_player_combo_df
for i in list(MID_comb): 
    MID_dict = {'MID_id_combinations': i}
    MID_combo_df = MID_combo_df.append(MID_dict, ignore_index=True)
print('MID Combinations:')
print(len(MID_combo_df))   

FWD_comb = combinations(top_FWD_id_list, 3) # Get all combinations of 3 FWDs from the top players
FWD_combo_df = pd.DataFrame(columns = ['FWD_id_combinations']) # create empty GKP combo dataframe
# Append the obtained combinations to  all_player_combo_df
for i in list(FWD_comb): 
    FWD_dict = {'FWD_id_combinations': i}
    FWD_combo_df = FWD_combo_df.append(FWD_dict, ignore_index=True)
print('FWD Combinations:')
print(len(FWD_combo_df))   

### time taken to run message ### 
seconds_to_run = (time.time() - start_time)
minutes = str(math.floor(seconds_to_run / 60)) + ' minutes '
seconds = str(round(seconds_to_run % 60)) + ' seconds '
print("--- Time taken to run: " + minutes + seconds + " ---")
#################################

# points_top_players

# Output from the last time this ran
# top_GKP_id_list length: 7
# top_DEF_id_list length: 28
# top_MID_id_list length: 28
# top_FWD_id_list length: 10
# GKP Combinations:
# 21
# DEF Combinations:
# 98280
# MID Combinations:
# 98280
# FWD Combinations:
# 120
# --- Time taken to run: 20 minutes 49 seconds  ---    


In [None]:
## Testing using 'combinations' in order to create all different possible combinations of players. 
## This will not be feasable as by my calculations it'll take 33 billion millenium to run the script

## try creating a list of GKP ,DEF, MID and FWD and pass those lists through the combinations function instead, possibly use product?
start_time = time.time()

# creating a list of all player ids
all_player_id_list = list(player_score_top_players.id)

# create an empty dataframe for player combinations
all_player_combo_df = pd.DataFrame(columns = ['id_combinations'])                             
                
# Get all combinations of 15 players in all_player_id_list, testing with 2 players for now
comb = combinations(all_player_id_list, 2) 
  
# Append the obtained combinations to  all_player_combo_df
for i in list(comb): 
    my_dict = {'id_combinations': i}
    all_player_combo_df = all_player_combo_df.append(my_dict, ignore_index=True)
    
print("--- Time taken to run: %s seconds ---" % (time.time() - start_time))


In [None]:
## Testing using 'combinations' in order to create all different possible combinations of players. 
## This will not be feasable as by my calculations it'll take 33 billion millenium to run the script

## try creating a list of GKP ,DEF, MID and FWD and pass those lists through the combinations function instead, possibly use product?
start_time = time.time()

# creating a list of all player ids
all_player_id_list = list(player_score_top_players.id)

# create an empty dataframe for player combinations
all_player_combo_df = pd.DataFrame(columns = ['id_combinations'])                             
                
# Get all combinations of 15 players in all_player_id_list, testing with 2 players for now
comb = combinations(all_player_id_list, 2) 
  
# Append the obtained combinations to  all_player_combo_df
for i in list(comb): 
    my_dict = {'id_combinations': i}
    all_player_combo_df = all_player_combo_df.append(my_dict, ignore_index=True)
    
print("--- Time taken to run: %s seconds ---" % (time.time() - start_time))

# making a copy of the above dataframe
all_player_combo_df2 = all_player_combo_df.copy()
all_player_combo_df2[['id1', 'id2']] = pd.DataFrame(all_player_combo_df2['id_combinations'].tolist(), index=all_player_combo_df2.index)  


all_player_combo_df2 = all_player_combo_df2.merge(points_top_players[['id','cost_in_mil','team_abv','position_abv','nxt_3gw_pred_score']],
                                                  how = 'left', left_on = ['id1'], right_on = ['id']).copy()
all_player_combo_df2 = all_player_combo_df2.drop(['id'], axis=1)
# all_player_combo_df2 = pd.DataFrame(all_player_combo_df2.to_records()).rename(columns={'cost_in_mil':        'id1_cost_in_mil',
#                                                                                        'team_abv':           'id1_team_abv',
#                                                                                        'position_abv':       'id1_position_abv',
#                                                                                        'nxt_3gw_pred_score': 'id1_position_abv'})



all_player_combo_df2 = all_player_combo_df2.merge(points_top_players[['id','cost_in_mil','team_abv','position_abv','nxt_3gw_pred_score']],
                                                  how = 'left', left_on = ['id2'], right_on = ['id']).copy()
all_player_combo_df2 = all_player_combo_df2.drop(['id'], axis=1)
# all_player_combo_df2 = pd.DataFrame(all_player_combo_df2.to_records()).rename(columns={'cost_in_mil':        'id2_cost_in_mil',
#                                                                                        'team_abv':           'id2_team_abv',
#                                                                                        'position_abv':       'id2_position_abv',
#                                                                                        'nxt_3gw_pred_score': 'id2_position_abv'})

all_player_combo_df2 = pd.DataFrame(all_player_combo_df2.to_records()).rename(columns={'cost_in_mil_x':        'id1_cost_in_mil',
                                                                                       'team_abv_x':           'id1_team_abv',
                                                                                       'position_abv_x':       'id1_position_abv',
                                                                                       'nxt_3gw_pred_score_x': 'id1_pred_score',
                                                                                       'cost_in_mil_y':        'id2_cost_in_mil',
                                                                                       'team_abv_y':           'id2_team_abv',
                                                                                       'position_abv_y':       'id2_position_abv',
                                                                                       'nxt_3gw_pred_score_y': 'id2_pred_score'
                                                                                      })

all_player_combo_df2['total_cost'] = all_player_combo_df2.id1_cost_in_mil + \
                                     all_player_combo_df2.id2_cost_in_mil

all_player_combo_df2['total_points'] = all_player_combo_df2.id1_pred_score + \
                                       all_player_combo_df2.id2_pred_score

all_player_combo_df2['GKP_count'] = all_player_combo_df2['id1_position_abv'].apply(lambda x: 1 if x == 'GKP' else 0) + \
                                    all_player_combo_df2['id2_position_abv'].apply(lambda x: 1 if x == 'GKP' else 0)

all_player_combo_df2['DEF_count'] = all_player_combo_df2['id1_position_abv'].apply(lambda x: 1 if x == 'DEF' else 0) + \
                                    all_player_combo_df2['id2_position_abv'].apply(lambda x: 1 if x == 'DEF' else 0)

all_player_combo_df2['MID_count'] = all_player_combo_df2['id1_position_abv'].apply(lambda x: 1 if x == 'MID' else 0) + \
                                    all_player_combo_df2['id2_position_abv'].apply(lambda x: 1 if x == 'MID' else 0)

all_player_combo_df2['FWD_count'] = all_player_combo_df2['id1_position_abv'].apply(lambda x: 1 if x == 'FWD' else 0) + \
                                    all_player_combo_df2['id2_position_abv'].apply(lambda x: 1 if x == 'FWD' else 0)


all_player_combo_df2.head()



In [None]:
##############################################
####### Show Datatypes Of Dataframe ##########
##############################################

# dataTypeSeries = <insert dataframe name>.dtypes
# print('Data type of each column of Dataframe :')
# print(dataTypeSeries)

In [None]:
###########################################
### print column names from a dataframe ###
###########################################

for col in club_df.columns: 
    print(col) 

In [None]:
## LOGIC TO MAKE A PIVOT TABLE FOR AVERAGE COST PER POSITION (ONLY PLAYERS WHO HAVE PLAYED) AND CONVERT TO A DATAFRAME ##
# The plan was to blend this with player value (points per mil) 
# but I realised this would be detrimental to those players who are less than the average value
# so I shelved this logic

# There might be something in belnd if player cost more than average else just use the points per mil, 
# so that more expensive players are not penalised as much for their higher price

# Without logic like this the very top expensive players like Salah would not have made it into my 2020 team 
# and therefore not banked 40 points as my captain in Gameweek 1!

# Adding Average Cost of position and points per average mil cost of position:

    # A cut of the slim player table with only players who have played for more than 0 minutes
player_has_played_df = full_player_df[full_player_df.minutes != 0]
    # Creating a pivot table to calculate the average cost for the position (only using players who have played this season)
average_cost_pivot = player_has_played_df.pivot_table(index=['position_abv'],
                                                      values=['cost_in_mil'], 
                                                      aggfunc='mean')
    # Then converting to a dataframe and renaming 
average_cost_df = pd.DataFrame(average_cost_pivot.to_records()).rename(columns={"cost_in_mil": "avg_position_cost"})

In [None]:
# https://github.com/vaastav/Fantasy-Premier-League/tree/master/data
# GitHub for another FPL moneyball

In [None]:
# Make a dataframe from player history_past which covers all previous seasons, 
# Rather than play_history which is just the current season player history

# test_url = 'https://fantasy.premierleague.com/api/element-summary/1/'
# test_req = requests.get(test_url)
# test_json = test_req.json()
# test_df = pd.DataFrame(test_json['history_past'])
# test_df.head(10)

In [None]:
## old logic for player score  

#    - Use total points per million 
#    - Add on prev season total points per million (which has been divided by 38 and times by current game week number -1 in order to match the amount of gameweeks the current total points reflects)
#    - Next 3 gameweek blended multiplier is then applied (weighted toward next gameweek)
#    - Multiplied by change to play next round
#    - Then any players with a 'Next 3 gameweek blended multiplier' of less than 1 is removed. 
    
    

##################################
###### Player Score V1 Calc ######
##################################

# Player score v1 calc = 
    # if E = True then 0.0 else ((A+B) X C) X D
        # A - Current season points per mil (Float)
        # B - Previous season points per mil (Float)
        # C - Next 3 Gameweek Multiplier (Float)
        # D - Chance to play next round (Float)
        # E - Has Negative Multiplier (Boolean)

## create a variable containing the player score v1 calculation        
# calc_player_score_v1 = (((slim_player_df.points_per_mil +
#                        slim_player_df.prev_season_points_per_mil) *
#                       slim_player_df.nxt_3_wk_blended_match_multi) *
#                      slim_player_df.nxt_round_play_chance) * slim_player_df['nxt_3_wk_blended_match_multi'].apply(lambda x: 0.0 if x < 1 else 1.0)

# slim_player_df['player_score_v1'] = calc_player_score_v1

In [None]:
### Logic to make optimal team from version 1 of the player score. potentially use this instead of predicted score logic?


# ##########################################################
# ### TOP POINTS PLAYERS AND TOP PLAYER SCORE DATAFRAMES ###
# ###           BASED ON PLAYER SCORE CALC V1            ###
# ##########################################################


# ##################################################################
# # Creating a data frame of players ordered by their total points #
# ##################################################################

# points_top_players = slim_player_df[['cost_in_mil',
#                                          'team_abv',
#                                          'position_abv',
#                                          'web_name',
#                                          'id',
#                                          'total_points_inc_prv_season',
#                                          'gw_multi',
#                                          'nxt_3_wk_blended_match_multi',
#                                          'player_score_v1'
#                                         ]].copy()

# points_top_players = points_top_players[points_top_players['player_score_v1'] > 0.0]
# points_top_players = points_top_players[points_top_players['nxt_3_wk_blended_match_multi'] > 0.98]
# points_top_players = points_top_players.sort_values(by=['total_points_inc_prv_season'], ascending=False).reset_index(drop = True)


# ###########################################################################################################
# # Creating a data frame of players ordered by their player score (to get the most cost effective players) #
# ###########################################################################################################

# player_score_top_players = slim_player_df[['cost_in_mil',
#                                                'team_abv',
#                                                'position_abv',
#                                                'web_name',
#                                                'id',
#                                                'total_points_inc_prv_season',
#                                                'gw_multi',
#                                                'nxt_3_wk_blended_match_multi',
#                                                'player_score_v1'
#                                               ]].copy()

# player_score_top_players = player_score_top_players[player_score_top_players['player_score_v1'] > 0.0]
# player_score_top_players = player_score_top_players[player_score_top_players['nxt_3_wk_blended_match_multi'] > 0.98]
# player_score_top_players = player_score_top_players.sort_values(by=['player_score_v1'], ascending=False).reset_index(drop = True)


# # print('Table of Top Players Ordered By Total Points:')
# # points_top_players.head(20)

# # print('Table of Top Players Ordered By Player Score:')
# # player_score_top_players.head(20)

# ################################################
# ### Optimal team logic Using Player Score V1 ###
# ################################################

# # Function which produces the optimal team based on:
#     # A specified number of star players (picking solely based on top points)
#     # Filling the rest of the team based on points per million 
#     # A set the budget you want to spend on your team
#     # Specify the minimum cost you want left to spend on the final players (most likely your subs)

# def get_money_team_objects(points_top_players=points_top_players, 
#                            player_score_top_players=player_score_top_players,
#                            budget = 100, 
#                            star_player_limit = 3, 
#                            gk = 2, 
#                            df = 5, 
#                            md = 5, 
#                            fwd = 3, 
#                            min_player_cost = 4.5):
    
#     money_team = pd.DataFrame(columns = ['id', 'web_name', 'position_abv', 'cost_in_mil', 'total_points_inc_prv_season', 'team_abv'])
#     star_player_limit = star_player_limit
#     budget = budget
#     positions = {'GKP': gk, 'DEF': df, 'MID': md, 'FWD': fwd}
#     club_player_count = dict((club, 3) for club in list(club_df['short_name'].unique()))
#     players_left = 15
#     min_player_cost = min_player_cost
    
#     for index, player in points_top_players.iterrows():
#         if len(money_team) < star_player_limit and budget >= player.cost_in_mil and positions[player.position_abv] > 0  and club_player_count[player.team_abv] > 0 and (budget - player.cost_in_mil) >= ((players_left - 1) * min_player_cost):
#             my_dict = {'id': player.id,
#                        'web_name': player.web_name, 
#                        'position_abv': player.position_abv, 
#                        'cost_in_mil': player.cost_in_mil, 
#                        'total_points_inc_prv_season': player.total_points_inc_prv_season, 
#                        'team_abv': player.team_abv}
#             money_team = money_team.append(my_dict, ignore_index=True)
#             budget -= player.cost_in_mil
#             players_left = players_left - 1 
#             positions[player.position_abv] = positions[player.position_abv] -1
#             club_player_count[player.team_abv] = club_player_count[player.team_abv] -1
#     for index, player in player_score_top_players.iterrows():
#         if player.id not in money_team['id'].to_list() and budget >= player.cost_in_mil and positions[player.position_abv] > 0 and club_player_count[player.team_abv] > 0 and (budget - player.cost_in_mil) >= ((players_left - 1) * min_player_cost):
#             my_dict2 = {'id':player.id,
#                         'web_name': player.web_name, 
#                         'position_abv': player.position_abv, 
#                         'cost_in_mil': player.cost_in_mil, 
#                         'total_points_inc_prv_season': player.total_points_inc_prv_season, 
#                         'team_abv': player.team_abv}
#             money_team = money_team.append(my_dict2, ignore_index=True) 
#             budget -= player.cost_in_mil
#             players_left = players_left - 1 
#             positions[player.position_abv] = positions[player.position_abv] -1
#             club_player_count[player.team_abv] = club_player_count[player.team_abv] -1
#     return money_team

# optimal_team_df = get_money_team_objects(star_player_limit = 6, budget = 99, min_player_cost = 5)
# print('Total Team Points: ' + str(optimal_team_df['total_points_inc_prv_season'].sum()))
# print('Total Team Cost: ' + str(optimal_team_df['cost_in_mil'].sum()))

# optimal_team_df.head(15)