In [1]:
import pandas as pd
import numpy as np
#import utils
import io
import os
from ipywidgets import IntProgress
from IPython.display import display
import itertools
import matplotlib.pyplot as plt
import time
from datetime import datetime
import math
import unidecode
import requests
import seaborn as sns
import xgboost as xgb
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import mean_squared_error as MSE
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
from scipy import stats
from scipy.stats import norm, skew, pearsonr #for some statistics
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
import re
import sys, getopt
import csv
import random
from collections import Counter
pd.set_option('display.max_columns', None)

pd.options.display.max_columns=999
pd.options.display.max_rows=100

import warnings
warnings.filterwarnings('ignore')

from IPython.core.display import display, HTML
display(HTML("<style>.container { width:95% !important; }</style>"))

### Function to scrape FBRef for the last five games
### Functions to make finished spreadsheets for plain xG and p90 xG, respectively

In [2]:
def scrapeURL(url, homeoraway):
    res = requests.get(url)
    ## The next two lines get around the issue with comments breaking the parsing
    comm = re.compile("<!--|-->")
    soup = BeautifulSoup(comm.sub("",res.text),'lxml')
    all_tables = soup.findAll("tbody")

    if (homeoraway == "home"):
        summary_table = all_tables[0] # 'player', minutes', 'goals', 'assists', 'shots_total', shots_on_target', 'xg', 'xa'
        passing_table = all_tables[1] # 'assisted_shots', 'passes_completed'
        misc_table = all_tables[5] # 'crosses' ,'fouled', 'fouls', 'tackles_won', 'interceptions', 'cards_yellow', 'cards_red'
    elif (homeoraway == "away"):
        summary_table = all_tables[7]
        passing_table = all_tables[8]
        misc_table = all_tables[12]
    else:
        return("Have to supply home or away")

    # parse each table and create dict
    pre_df_player = dict()
    features_from_summary =["shirtnumber", "minutes", "goals", "assists", "shots_total", "shots_on_target", "xg", "xa"]
    features_from_passing = ["assisted_shots", "passes_completed"]
    features_from_misc = ["crosses", "fouled", "fouls", "tackles_won", "interceptions", "cards_yellow"]

    rows_summary = summary_table.find_all('tr')
    rows_passing = passing_table.find_all('tr')
    rows_misc = misc_table.find_all('tr')
    for row in range(len(rows_summary)):
        # get the names of each player
        cell = rows_summary[row].find("th", {"data-stat": "player"})
        text = cell.text.strip()
        text = unidecode.unidecode(text)
        if "player" in pre_df_player:
            pre_df_player["player"].append(text)
        else:
            pre_df_player["player"] = [text]

        for feature in features_from_summary:
            cell = rows_summary[row].find("td", {"data-stat": feature})
            if cell.text == "":
                stat = 0.0
            else:
                stat = float(cell.text)
            if feature in pre_df_player:
                pre_df_player[feature].append(stat)
            else:
                pre_df_player[feature] = [stat]

        for feature in features_from_passing:
            cell = rows_passing[row].find("td", {"data-stat": feature})
            if cell.text == "":
                stat = 0.0
            else:
                stat = float(cell.text)
            if feature in pre_df_player:
                pre_df_player[feature].append(stat)
            else:
                pre_df_player[feature] = [stat]

        for feature in features_from_misc:
            cell = rows_misc[row].find("td", {"data-stat": feature})
            if cell.text == "":
                stat = 0.0
            else:
                stat = float(cell.text)
            if feature in pre_df_player:
                pre_df_player[feature].append(stat)
            else:
                pre_df_player[feature] = [stat]

    df_player = pd.DataFrame.from_dict(pre_df_player)
    df_player = df_player.set_index("player").sort_values(by="player")
    
    df_player['starts'] = 0
    df_player['Start_Mins'] = 0
    df_player['subs'] = 0
    df_player['Sub_Mins'] = 0
    df_player['FPTS'] = df_player['goals']*10 + df_player['assists']*6 + \
                        df_player['shots_total'] + df_player['shots_on_target'] + \
                        df_player['assisted_shots'] + df_player['passes_completed']*0.02 + \
                        df_player['crosses']*0.7 + df_player['fouled'] - \
                        df_player['fouls']*0.5 + df_player['tackles_won'] + \
                        df_player['interceptions']*0.5 - df_player['cards_yellow']*1.5
    
    df_player['FPTS_min'] = np.nan
    for name in df_player.index:
        df_player.loc[name,'FPTS_min'] = ( df_player.loc[name]['FPTS'] / max(30,df_player.loc[name]['minutes']) )*90
    for name in df_player.index:
        if (df_player.loc[name]['minutes'] >= 45):
            df_player.loc[name, 'starts'] = 1
            df_player.loc[name, 'Start_Mins'] = df_player.loc[name]['minutes']
        else:
            df_player.loc[name, 'subs'] = 1
            df_player.loc[name, 'Sub_Mins'] = df_player.loc[name]['minutes'] 
    return(df_player)
    #return(pre_df_player)



def make_finished_spreadsheet(starters, team_names, team_abbrevs, predictions_matrix, model, date):
    starters["90s"] = starters["90s"].astype('float')
    for i in range(len(starters)):
        if starters["90s"][i] > 67.5:
            starters["90s"][i] = starters["90s"][i]/90
        else:
            starters["90s"][i] = 0.75
    starters["Floor"] = (starters["Sh"] + starters["SoT"] + starters["Crs"]*0.7 + starters["KP"] + starters["Pass_Cmp"]*0.02 + starters["Fld"] - starters["Fls"]*0.5 + starters["TklW"] + starters["Int"]*0.5) / starters["90s"]
    starters["FPTS"] = (starters["Gls"]*10 + starters["Ast"]*6 + starters["Sh"] + starters["SoT"] + starters["Crs"]*0.7 + starters["KP"] + starters["Pass_Cmp"]*0.02 + starters["Fld"] - starters["Fls"]*0.5 + starters["TklW"] + starters["Int"]*0.5 - starters["CrdY"]*1.5) / starters["90s"]

    # Minutes (to simplify some calculations)
    starters["Proj_Mins"] = 0.1
    for i in range(len(starters)):
        if (starters["Starting"][i] == "y"):
            starters["Proj_Mins"][i] = starters["Start_Mins"][i]
        else:
            starters["Proj_Mins"][i] = starters["Sub_Mins"][i]

    starters_final = pd.DataFrame()
    for game_i in range(len(team_abbrevs)):
        home_name = team_names[game_i][0]
        away_name = team_names[game_i][1]
        home_abbrev = team_abbrevs[game_i][0]
        away_abbrev = team_abbrevs[game_i][1]
        game_starters = starters[(starters['Team'] == home_abbrev) | (starters['Team'] == away_abbrev)]
        game_starters = game_starters.reset_index().drop(columns=['index'])
        game_preds = predictions_matrix[(predictions_matrix['Team'] == home_name) | (predictions_matrix['Team'] == away_name)]
        game_preds = game_preds.reset_index().drop(columns=['index'])
        # xG
        game_starters["xG_p90"] = game_starters['xG']/game_starters['90s'] # create a p90 column for the stat
        game_starters["Raw_Goals"] = game_starters["xG_p90"] * (starters['Proj_Mins']/90) # this is (stat_p90)*projmins
        home_stat = sum(game_starters[game_starters['Team'] == home_abbrev]["Raw_Goals"]) # get the sum (stat_p90)*proj_mins for each team
        away_stat = sum(game_starters[game_starters['Team'] == away_abbrev]["Raw_Goals"])
        game_starters['Goal_Share'] = 0.1
        for i in range(len(game_starters['Player'])):
            if (game_starters['Team'][i] == home_abbrev):
                game_starters['Goal_Share'][i] = game_starters['Raw_Goals'][i] / home_stat
            else:
                game_starters['Goal_Share'][i] = game_starters['Raw_Goals'][i] / away_stat
        game_starters['Team_Goal_Proj'] = 0.1
        #print(game_preds)
        for i in range(len(game_starters['Player'])):
            if (game_starters["Team"][i] == home_abbrev):
                game_starters["Team_Goal_Proj"][i] = game_preds["Real_Goals"][0]
            else:
                game_starters["Team_Goal_Proj"][i] = game_preds["Real_Goals"][1]
        game_starters['Proj_Gls'] = game_starters['Goal_Share'] * game_starters['Team_Goal_Proj']

        # xA
        game_starters["xA_p90"] = game_starters['xA']/game_starters['90s'] # create a p90 column for the stat
        game_starters["Raw_Ast"] = game_starters["xA_p90"] * (game_starters['Proj_Mins']/90) # this is (stat_p90)*projmins
        home_stat = sum(game_starters[game_starters['Team'] == home_abbrev]["Raw_Ast"]) # get the sum (stat_p90)*proj_mins for each team
        away_stat = sum(game_starters[game_starters['Team'] == away_abbrev]["Raw_Ast"])
        game_starters['Ast_Share'] = 0.1
        for i in range(len(game_starters['Player'])):
            if (game_starters['Team'][i] == home_abbrev):
                game_starters['Ast_Share'][i] = game_starters['Raw_Ast'][i] / home_stat
            else:
                game_starters['Ast_Share'][i] = game_starters['Raw_Ast'][i] / away_stat
        game_starters['Team_Ast_Proj'] = 0.1
        for i in range(len(game_starters['Player'])):
            if (game_starters["Team"][i] == home_abbrev):
                game_starters["Team_Ast_Proj"][i] = game_preds["Real_Goals"][0]
            else:
                game_starters["Team_Ast_Proj"][i] = game_preds["Real_Goals"][1]
        game_starters['Proj_Ast'] = game_starters['Ast_Share'] * game_starters['Team_Ast_Proj']


        # NonGoal Stats
        statnames = ['Sh', 'SoT', 'KP', 'Fls', 'Fld', 'Crs', 'TklW', 'Int']
        for stat in statnames:
            game_starters[stat+"_p90"] = game_starters[stat]/game_starters['90s'] # create a p90 column for the stat
            game_starters["Raw_"+stat] = game_starters[stat+"_p90"] * game_starters['Proj_Mins'] # this is (stat_p90)*projmins
            home_stat = sum(game_starters[game_starters['Team'] == home_abbrev]["Raw_"+stat]) # get the sum (stat_p90)*proj_mins for each team
            away_stat = sum(game_starters[game_starters['Team'] == away_abbrev]["Raw_"+stat])
            game_starters[stat+'_Share'] = 0.1
            for i in range(len(game_starters['Player'])):
                if (game_starters['Team'][i] == home_abbrev):
                    game_starters[stat+'_Share'][i] = game_starters['Raw_'+stat][i] / home_stat
                else:
                    game_starters[stat+'_Share'][i] = game_starters['Raw_'+stat][i] / away_stat
            game_starters['Team_'+stat+'_Proj'] = 0.1
            for i in range(len(game_starters['Player'])):
                if (game_starters["Team"][i] == home_abbrev):
                    game_starters["Team_"+stat+"_Proj"][i] = game_preds["Real_"+stat][0]
                else:
                    game_starters["Team_"+stat+"_Proj"][i] = game_preds["Real_"+stat][1]
            game_starters['Proj_'+stat] = game_starters[stat+'_Share'] * game_starters['Team_'+stat+'_Proj']        


        # Fill NA's with zeros for later arithmetic
        game_starters = game_starters.fillna(0)

        # Pts_w_StartMins
        game_starters["Pts_w_StartMins"] = 0.1
        for i in range(len(game_starters["Player"])):
            if (game_starters["Starting"][i] == "y"):
                game_starters["Pts_w_StartMins"][i] = game_starters["Proj_Gls"][i]*10 + game_starters["Proj_Ast"][i]*6 + \
                                                 game_starters["Proj_Sh"][i] + game_starters["Proj_SoT"][i] +    \
                                                 game_starters["Proj_KP"][i] + game_starters["Proj_Crs"][i]*0.7 +   \
                                                 game_starters["Proj_Fld"][i] - game_starters["Proj_Fls"][i]*0.5 +  \
                                                 game_starters["Proj_TklW"][i] + game_starters["Proj_Int"][i]*0.5 + \
                                                (game_starters["Pass_Cmp"][i]*0.02)/game_starters["90s"][i] *(game_starters["Start_Mins"][i]/90)
            else:
                game_starters["Pts_w_StartMins"][i] = game_starters["Proj_Gls"][i]*10 + game_starters["Proj_Ast"][i]*6 + \
                                                 game_starters["Proj_Sh"][i] + game_starters["Proj_SoT"][i] +    \
                                                 game_starters["Proj_KP"][i] + game_starters["Proj_Crs"][i]*0.7 +   \
                                                 game_starters["Proj_Fld"][i] - game_starters["Proj_Fls"][i]*0.5 +  \
                                                 game_starters["Proj_TklW"][i] + game_starters["Proj_Int"][i]*0.5 + \
                                                (game_starters["Pass_Cmp"][i]*0.02)/game_starters["90s"][i] * (game_starters["Sub_Mins"][i]/90)
        starters_final = pd.concat([starters_final, game_starters])

    starters_final = starters_final.reset_index()

    # Now get goalkeeper projections that are actually based on gk stats and scoring
    data = pd.read_csv("Shotlogs/"+regression_league+"_shotlog2021.csv")
    # Create a logistic regression to predict clean sheet odds,
    # because CS odds stopped being scrapeable online
    data['Opp_538_Goals'] = np.nan
    for i in range(len(data)):
        if (data['HorA'][i] == 'H'): data['Opp_538_Goals'][i] = data['FiveThirtyEight_Goals'][i+1]
        elif (data['HorA'][i] == 'A'): data['Opp_538_Goals'][i] = data['FiveThirtyEight_Goals'][i-1]
    model_data = data[data['VOID'] != 'void']
    model_data = model_data[~pd.isna(model_data['Real_Goals'])]
    model_data = model_data[~pd.isna(model_data['FiveThirtyEight_Goals'])].reset_index().drop(columns=['index'])
    model_data['CS'] = 0
    for i in range(len(model_data)):
        if (model_data['HorA'][i] == 'H'):
            if (model_data['Real_Goals'][i+1] == 0): model_data['CS'][i] = 1
        elif(model_data['HorA'][i] == 'A'):
            if (model_data['Real_Goals'][i-1] == 0): model_data['CS'][i] = 1
    X = np.array(model_data['Opp_538_Goals']).reshape(-1,1)
    y = np.array(model_data['CS'])
    cs_model = LogisticRegression().fit(X,y)
    #data = data[pd.isna(data['Real_Goals'])] # focus on upcoming games
    if ('/' in str(data['Date'][0])):
        for i in range(len(data)):
            year ='20' + data['Date'][i].split('/')[2]
            month = data['Date'][i].split('/')[0]
            if (int(month) < 10): month = '0' + month
            day = data['Date'][i].split('/')[1]
            if (int(day) < 10): day = '0' + day
            data['Date'][i] = int(year+month+day)
    data = data[data['Date'] >= int(date)]
    gk_df = pd.DataFrame(columns=['Player', 'Team', 'Pos', 'Salary', 'proj_GA', 'proj_Saves', 'WinOdds', 'CSOdds', 'Pts_w_StartMins'],
                         index=list(range(len(team_names)*2)))
    gk_df['Pos'] = 'GK'
    gk_ctr=0
    for game_i in range(len(team_names)):
        home_name = team_names[game_i][0]
        away_name = team_names[game_i][1]
        home_abbrev = team_abbrevs[game_i][0]
        away_abbrev = team_abbrevs[game_i][1]
        
        #home gk
        gk_i = starters_final[(starters_final['Pos'] == 'GK') & (starters_final['Team'] == home_abbrev)].index[0]
        proj_GoalsA = predictions_matrix[predictions_matrix['Opp'] == home_name]['Real_Goals'].iloc[0]
        proj_SoTA = predictions_matrix[predictions_matrix['Opp'] == home_name]['Real_SoT'].iloc[0]
        win_odds = data[data['Team'] == home_name]['Win_Odds'].iloc[0]
        cs_odds = cs_model.predict_proba(data[data['Team'] == home_name]['Opp_538_Goals'].iloc[0].reshape(-1,1))[0][1]*100
        starters_final['Pts_w_StartMins'][gk_i] = (win_odds/100)*5 + (cs_odds/100)*5 + (proj_SoTA - proj_GoalsA)*2 - proj_GoalsA*2
        gk_df['Player'][gk_ctr] = starters_final['Player'][gk_i]
        gk_df['Team'][gk_ctr] = home_abbrev
        gk_df['Salary'][gk_ctr] = starters_final['Salary'][gk_i]
        gk_df['proj_GA'][gk_ctr] = proj_GoalsA
        gk_df['proj_Saves'][gk_ctr] = proj_SoTA - proj_GoalsA
        gk_df['WinOdds'][gk_ctr] = win_odds
        gk_df['CSOdds'][gk_ctr] = cs_odds
        gk_df['Pts_w_StartMins'][gk_ctr] = starters_final['Pts_w_StartMins'][gk_i]
        gk_ctr+=1
        #away_gk
        gk_i = starters_final[(starters_final['Pos'] == 'GK') & (starters_final['Team'] == away_abbrev)].index[0]
        proj_GoalsA = predictions_matrix[predictions_matrix['Opp'] == away_name]['Real_Goals'].iloc[0]
        proj_SoTA = predictions_matrix[predictions_matrix['Opp'] == away_name]['Real_SoT'].iloc[0]
        win_odds = data[data['Team'] == away_name]['Win_Odds'].iloc[0]
        cs_odds = cs_model.predict_proba(data[data['Team'] == away_name]['Opp_538_Goals'].iloc[0].reshape(-1,1))[0][1]*100
        starters_final['Pts_w_StartMins'][gk_i] = (win_odds/100)*5 + (cs_odds/100)*5 + (proj_SoTA - proj_GoalsA)*2 - proj_GoalsA*2
        gk_df['Player'][gk_ctr] = starters_final['Player'][gk_i]
        gk_df['Team'][gk_ctr] = away_abbrev
        gk_df['Salary'][gk_ctr] = starters_final['Salary'][gk_i]
        gk_df['proj_GA'][gk_ctr] = proj_GoalsA
        gk_df['proj_Saves'][gk_ctr] = proj_SoTA - proj_GoalsA
        gk_df['WinOdds'][gk_ctr] = win_odds
        gk_df['CSOdds'][gk_ctr] = cs_odds
        gk_df['Pts_w_StartMins'][gk_ctr] = starters_final['Pts_w_StartMins'][gk_i]
        gk_ctr+=1

    starters_final["Player_"] = starters_final["Player"] # add name column on right side for easy reading
    starters_final = starters_final.sort_values(by="Pts_w_StartMins", ascending=False).reset_index().drop(columns=["index"])

    cols = ["Player", "Team", "Pos", "Salary", "90s", "Gls", "Ast", "Sh", "SoT", "Crs", "KP", "Pass_Cmp",
            "Fld", "Fls", "TklW", "Int", "CrdY", "Floor", "FPTS", "FPTS_med", "xG", "xA",
            "Raw_Goals", "Goal_Share", "Team_Goal_Proj", "Proj_Gls",
            "Raw_Ast", "Ast_Share", "Team_Ast_Proj", "Proj_Ast",
            "Raw_Sh", "Sh_Share", "Team_Sh_Proj", "Proj_Sh",
            "Raw_SoT", "SoT_Share", "Team_SoT_Proj", "Proj_SoT",
            "Raw_KP", "KP_Share", "Team_KP_Proj", "Proj_KP",
            "Raw_Crs", "Crs_Share", "Team_Crs_Proj", "Proj_Crs",
            "Raw_Fld", "Fld_Share", "Team_Fld_Proj", "Proj_Fld",
            "Raw_Fls", "Fls_Share", "Team_Fls_Proj", "Proj_Fls",
            "Raw_TklW", "TklW_Share", "Team_TklW_Proj", "Proj_TklW",
            "Raw_Int", "Int_Share", "Team_Int_Proj", "Proj_Int",
            "Pts_w_StartMins", "Start_Mins", "Sub_Mins", "Starting", "starts", "subs", "Player_"]
    starters_final = starters_final[cols]
    
    starters_final['Proj_Mins'] = np.nan
    for i in starters_final.index:
        if (starters_final['Starting'][i] == 'y'): starters_final['Proj_Mins'][i] = starters_final['Start_Mins'][i]
        elif (starters_final['Starting'][i] == 'b'): starters_final['Proj_Mins'][i] = starters_final['Sub_Mins'][i]

    y_pred = model.predict(np.array(starters_final[['Salary','Pts_w_StartMins','Proj_Mins']]))
    starters_final['Reg_Pts'] = y_pred
    starters_final['FPTS_med_w_mins'] = starters_final['FPTS_med'] * (starters_final['Proj_Mins'] / 90)
    
    gk_df = gk_df.merge(starters_final[['Player','Reg_Pts']])
    
    starters_final = starters_final.sort_values(by='Reg_Pts', ascending=False).reset_index().drop(columns=['index'])
    gk_df = gk_df.sort_values(by='Reg_Pts', ascending=False).reset_index().drop(columns=['index'])

    starters_final.to_csv("Matchup_Spreadsheets/"+league+"/"+contest_name+"_"+date+"_spreadsheet_finished.csv")

    return(starters_final, gk_df)

### Scrape the last five games and write out the basic stats

In [14]:
league = 'MLS'
fbref_league = ['MLS']*2
regression_league = 'MLS'
contest_name = 'MLS_Late' # Ex: MLS, Ex: MLS_Turbo

team_names = [['Portland Timbers', 'San Jose'], ['Vancouver', "Minnesota Utd"]]
#team_abbrevs = [['MIN', 'LAFC'], ['', ''],
#                ['', '']]

date = "20211027" # Use form YYYYMMDD

      
teamname_df = pd.read_csv('TeamNameConventions.csv')
name_to_url = {}
name_to_abbrev = {}
for i in range(len(teamname_df)):
    name_to_url[teamname_df['FBRef'][i]] = teamname_df['FBRef_URL'][i]
    name_to_abbrev[teamname_df['FBRef'][i]] = teamname_df['DK_Abbrev'][i]

team_urls = []
team_abbrevs = []
#team_urls = [['', ''], ['', '']]
for game_i in range(len(team_names)):
    home_abbrev = name_to_abbrev[team_names[game_i][0]]
    away_abbrev = name_to_abbrev[team_names[game_i][1]]
    team_abbrevs.append([home_abbrev, away_abbrev])
    print(team_abbrevs[game_i][0], 'vs.', team_abbrevs[game_i][1])
    home_url = name_to_url[team_names[game_i][0]]
    away_url = name_to_url[team_names[game_i][1]]
    team_urls.append([home_url, away_url])
    print(team_urls[game_i][0])
    print(team_urls[game_i][1])
    print('')

#8, 8, 7, 7, 8, 8
prevgames = 8

POR vs. SJ
https://fbref.com/en/squads/d076914e/Portland-Timbers-Stats
https://fbref.com/en/squads/ca460650/San-Jose-Earthquakes-Stats

VAN vs. MIN
https://fbref.com/en/squads/ab41cb90/Vancouver-Whitecaps-FC-Stats
https://fbref.com/en/squads/99ea75a6/Minnesota-United-Stats



In [15]:
print(team_abbrevs)
team_urls

[['POR', 'SJ'], ['VAN', 'MIN']]


[['https://fbref.com/en/squads/d076914e/Portland-Timbers-Stats',
  'https://fbref.com/en/squads/ca460650/San-Jose-Earthquakes-Stats'],
 ['https://fbref.com/en/squads/ab41cb90/Vancouver-Whitecaps-FC-Stats',
  'https://fbref.com/en/squads/99ea75a6/Minnesota-United-Stats']]

In [16]:
cols = ["Team", "Pos", "Salary", "90s", "Gls", "Ast", "Sh", "SoT", "Crs", "KP", "Pass_Cmp", "Fld", "Fls",
        "TklW", "Int", "CrdY", "xG", "xA", "Start_Mins", "Sub_Mins", "Starting", "starts", "subs"]
full_stats = pd.DataFrame(columns=cols)

for game_i in range(len(team_names)):
    # scrape in the url of the last 5 matches played (in the league) for the home and away teams, and indicate if they were home or away
    home_urls = ['']*prevgames
    home_homeoraways = ['']*prevgames
    headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'}
    res = requests.get(team_urls[game_i][0], headers=headers)
    # The next two lines get around the issue with comments breaking the parsing
    comm = re.compile("<!--|-->")
    soup = BeautifulSoup(comm.sub("",res.text),'lxml')
    #options = Options()
    #options.headless = True
    #driver_path = 'cdriver/chromedriver'
    #driver = webdriver.Chrome(executable_path=driver_path)
    #driver.get(team_urls[game_i][0])
    #soup = BeautifulSoup(driver.page_source)
    #driver.quit()
    all_tables = soup.findAll("tbody")
    for i in range(1,len(all_tables[1].findAll("tr"))):
        game_date = all_tables[1].findAll("tr")[i].findAll("a", href=True)[0].text.replace('-', '')
        note = all_tables[1].findAll("tr")[i].findAll('td', {'data-stat':'notes'})[0].text
        if (not game_date.isnumeric() and (note == '')):
            break
    ctr=0
    for k in reversed(range(i)):
        if (ctr == prevgames):
            break
        if (all_tables[1].findAll("tr")[k].findAll("a", href=True)[1].text.replace(' ', '') == fbref_league[game_i]):
            url = all_tables[1].findAll("tr")[k].findAll("a", href=True)[0]['href']
            if (url == ''): continue
            home_urls[ctr] = 'https://fbref.com/' + url
            home_homeoraways[ctr] = all_tables[1].findAll("tr")[k].findAll("td", {"data-stat":"venue"})[0].text.lower()
            ctr+=1

    away_urls = ['']*prevgames
    away_homeoraways = ['']*prevgames
    headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'}
    res = requests.get(team_urls[game_i][1], headers=headers)
    # The next two lines get around the issue with comments breaking the parsing
    comm = re.compile("<!--|-->")
    soup = BeautifulSoup(comm.sub("",res.text),'lxml')
    #options = Options()
    #options.headless = True
    #driver_path = 'cdriver/chromedriver'
    #driver = webdriver.Chrome(executable_path=driver_path)
    #driver.get(team_urls[game_i][1])
    #soup = BeautifulSoup(driver.page_source)
    #driver.quit()
    all_tables = soup.findAll("tbody")
    
    for i in range(1,len(all_tables[1].findAll("tr"))):
        game_date = all_tables[1].findAll("tr")[i].findAll("a", href=True)[0].text.replace('-', '')
        note = all_tables[1].findAll("tr")[i].findAll('td', {'data-stat':'notes'})[0].text
        #print(game_date)
        if (not game_date.isnumeric() and (note == '')):
            break
    #print(i)
    ctr=0
    for k in reversed(range(i)):
        if (ctr == prevgames):
            break
        if (all_tables[1].findAll("tr")[k].findAll("a", href=True)[1].text.replace(' ', '') == fbref_league[game_i]):
            url = all_tables[1].findAll("tr")[k].findAll("a", href=True)[0]['href']
            if (url == ''): continue
            away_urls[ctr] = 'https://fbref.com/' + url
            away_homeoraways[ctr] = all_tables[1].findAll("tr")[k].findAll("td", {"data-stat":"venue"})[0].text.lower()
            ctr+=1
            
    # COMMENT THESE OUT IF PREPARING FOR GAME THAT HAS NOT YET OCCURRED
    #home_urls = home_urls[1:]
    #home_homeoraways = home_homeoraways[1:]
    #away_urls = away_urls[1:]
    #away_homeoraways = away_homeoraways[1:]
    #prevgames = prevgames-1

# alright now you're good to run the cell
##################################################################################################################

    # alright now you're good to run the cell
    ##################################################################################################################
    print(home_urls)
    home_stats = scrapeURL(home_urls[0], home_homeoraways[0])
    home_fpts = {}
    for name in home_stats.index:
        home_fpts[name] = [home_stats.loc[name,'FPTS_min']]
    for i in range(1,prevgames):
        print(home_urls[i])
        tmp = scrapeURL(home_urls[i], home_homeoraways[i])
        for name in tmp.index:
            if name in home_stats.index:
                home_fpts[name] = home_fpts[name] + [tmp.loc[name,'FPTS_min']]
                for col in home_stats.columns:
                    home_stats.loc[name][col] = home_stats.loc[name][col] + tmp.loc[name][col]
            else:
                home_fpts[name] = [tmp.loc[name,'FPTS_min']]
                home_stats = home_stats.append(tmp.loc[name])
                if (tmp.loc[name]['minutes'] >= 45):
                    home_stats.loc[name]['starts'] = 1
                    home_stats.loc[name]['Start_Mins'] = tmp.loc[name]['minutes']
                else:
                    home_stats.loc[name]['subs'] = 1
                    home_stats.loc[name]['Sub_Mins'] = tmp.loc[name]['minutes']
    home_stats["Team"] = team_abbrevs[game_i][0]
    #home_stats = home_stats.sort_values(by="minutes")

    print(away_urls)
    away_stats = scrapeURL(away_urls[0], away_homeoraways[0])
    away_fpts = {}
    for name in away_stats.index:
        away_fpts[name] = [away_stats.loc[name,'FPTS_min']]
    for i in range(1,prevgames):
        print(away_urls[i])
        tmp = scrapeURL(away_urls[i], away_homeoraways[i])
        for name in tmp.index:
            if name in away_stats.index:
                away_fpts[name] = away_fpts[name] + [tmp.loc[name,'FPTS_min']]
                for col in away_stats.columns:
                    away_stats.loc[name][col] = away_stats.loc[name][col] + tmp.loc[name][col]
            else:
                away_fpts[name] = [tmp.loc[name,'FPTS_min']]
                away_stats = away_stats.append(tmp.loc[name])      
    away_stats["Team"] = team_abbrevs[game_i][1]
    #away_stats = away_stats.sort_values(by="minutes")

    game_stats = pd.concat([home_stats, away_stats])
    game_stats["Pos"] = np.nan
    game_stats["Salary"] = np.nan
    game_stats["Start_Mins"] = game_stats['Start_Mins'] / game_stats['starts']
    game_stats["Sub_Mins"] = game_stats['Sub_Mins'] / game_stats['subs']
    game_stats["Starting"] = 'y'
    game_stats = game_stats.drop(labels="shirtnumber", axis=1)
    #full_stats = full_stats.reset_index()
    game_stats = game_stats.rename(columns={"player": "Player",
                                            "minutes":"90s",
                                            "goals":"Gls",
                                            "assists":"Ast",
                                            "shots_total":"Sh",
                                            "shots_on_target":"SoT",
                                            "crosses":"Crs",
                                            "assisted_shots":"KP",
                                            "passes_completed":"Pass_Cmp",
                                            "fouled":"Fld",
                                            "fouls":"Fls",
                                            "tackles_won":"TklW",
                                            "interceptions":"Int",
                                            "cards_yellow":"CrdY",
                                            "xg":"xG",
                                            "xa":"xA"})
    game_stats = game_stats[cols]
    game_stats['FPTS_stdev'] = np.nan
    game_stats['FPTS_med'] = np.nan
    for name in game_stats[game_stats['Team'] == team_abbrevs[game_i][0]].index:
        game_stats.loc[name,'FPTS_stdev'] = np.array(home_fpts[name]).std()
        game_stats.loc[name,'FPTS_med'] = np.median(home_fpts[name])
    for name in game_stats[game_stats['Team'] == team_abbrevs[game_i][1]].index:
        game_stats.loc[name,'FPTS_stdev'] = np.array(away_fpts[name]).std()
        game_stats.loc[name,'FPTS_med'] = np.median(away_fpts[name])
    full_stats = pd.concat([full_stats, game_stats])
full_stats.index.name = 'player'

['https://fbref.com//en/matches/269901bf/Colorado-Rapids-Portland-Timbers-October-23-2021-Major-League-Soccer', 'https://fbref.com//en/matches/b5b8656f/Portland-Timbers-Vancouver-Whitecaps-FC-October-20-2021-Major-League-Soccer', 'https://fbref.com//en/matches/ec9883a5/LA-Galaxy-Portland-Timbers-October-16-2021-Major-League-Soccer', 'https://fbref.com//en/matches/63b6da89/Portland-Timbers-Inter-Miami-October-3-2021-Major-League-Soccer', 'https://fbref.com//en/matches/39fa0b7e/Los-Angeles-FC-Portland-Timbers-September-29-2021-Major-League-Soccer', 'https://fbref.com//en/matches/1e419f66/Portland-Timbers-Real-Salt-Lake-September-25-2021-Major-League-Soccer', 'https://fbref.com//en/matches/bc9a8903/Portland-Timbers-Los-Angeles-FC-September-19-2021-Major-League-Soccer', 'https://fbref.com//en/matches/e3533e2b/Portland-Timbers-Colorado-Rapids-September-15-2021-Major-League-Soccer']
https://fbref.com//en/matches/b5b8656f/Portland-Timbers-Vancouver-Whitecaps-FC-October-20-2021-Major-League-So

In [17]:
full_stats

Unnamed: 0_level_0,Team,Pos,Salary,90s,Gls,Ast,Sh,SoT,Crs,KP,Pass_Cmp,Fld,Fls,TklW,Int,CrdY,xG,xA,Start_Mins,Sub_Mins,Starting,starts,subs,FPTS_stdev,FPTS_med
player,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1
Bill Tuiloma,POR,,,221.0,0.0,0.0,2.0,0.0,3.0,0.0,79.0,2.0,2.0,1.0,6.0,2.0,0.1,0.0,90.0,41.0,y,2.0,1.0,2.359642,2.24
Dairon Asprilla,POR,,,574.0,4.0,1.0,16.0,7.0,9.0,5.0,164.0,9.0,10.0,9.0,6.0,2.0,2.1,0.6,77.857143,29.0,y,7.0,1.0,10.773592,14.176667
Dario Zuparic,POR,,,720.0,0.0,2.0,2.0,1.0,0.0,3.0,242.0,7.0,6.0,6.0,14.0,0.0,0.1,0.6,90.0,,y,8.0,0.0,2.231255,5.01
Diego Chara,POR,,,630.0,1.0,0.0,1.0,1.0,3.0,4.0,296.0,15.0,20.0,16.0,16.0,2.0,0.1,0.4,90.0,,y,7.0,0.0,4.627109,6.8
Diego Valeri,POR,,,167.0,0.0,0.0,4.0,2.0,5.0,6.0,75.0,2.0,4.0,1.0,1.0,2.0,0.3,0.5,75.0,18.4,y,1.0,5.0,3.979847,5.01
Felipe Mora,POR,,,622.0,2.0,0.0,21.0,7.0,2.0,4.0,129.0,13.0,2.0,1.0,2.0,0.0,3.0,0.2,85.142857,26.0,y,7.0,1.0,6.411382,8.536207
George Fochive,POR,,,315.0,1.0,0.0,4.0,2.0,3.0,2.0,88.0,10.0,8.0,10.0,7.0,1.0,0.6,0.1,77.0,16.8,y,3.0,5.0,15.444636,5.04
Jaroslaw Niezgoda,POR,,,109.0,2.0,0.0,2.0,2.0,0.0,2.0,22.0,0.0,0.0,1.0,0.0,0.0,0.3,0.1,64.0,6.428571,y,1.0,7.0,16.098849,0.18
Jose Van Rankin,POR,,,595.0,0.0,0.0,1.0,0.0,12.0,4.0,289.0,4.0,9.0,6.0,13.0,0.0,0.1,0.2,82.857143,15.0,y,7.0,1.0,2.704898,4.32
Larrys Mabiala,POR,,,615.0,0.0,0.0,4.0,1.0,0.0,0.0,239.0,2.0,4.0,5.0,15.0,1.0,0.5,0.0,87.857143,,y,7.0,0.0,1.046326,3.56


### Get the stat projections using linear regression
#### This will find a file generated by an R script, which contains predictions for each upcoming game for the stats relevant to DFS
#### Ex: For an MLS game in the weekend of 8/13/21 to 8/15/21, the predictions are in the file: 'Regression_Matrices/MLS/MLS_predictions_matrix_20210813-20210815'

In [18]:
## Run this cell to get the shot and SoT projections
## Fill in the league to get the write patch to the shotlog file
#################################################################################################################

#league = "LaLiga"  # MLS, PremierLeague, Bundesliga, LaLiga, Ligue1, SerieA

#################################################################################################################
#filename = "Shotlogs/" + league + "_shotlog2021.csv"
#filename = 'Regression_Matrices/MLS/MLS_predictions_matrix_20210813-20210815.csv'

directory_name = 'Regression_Matrices/'+regression_league+'/'
directory_list = os.listdir(directory_name)
for file in directory_list:
    if ('predictions_matrix' in file):
        start_date = file.split('-')[0][-8:]
        end_date = file.split('-')[1][:-4]
        if (end_date[-1] not in ['1','2','3','4','5','6','7','8','9','0']):
            continue
        else:
            start_date = int(start_date)
            end_date = int(end_date)
        if (int(date) in range(start_date, end_date+1)):
            print('Found File:', file)
            break

full_preds = pd.read_csv(directory_name + file)
predictions_matrix = pd.DataFrame()
for game_i in range(len(team_names)):
    home_full = team_names[game_i][0]
    idx = full_preds[full_preds['Team'] == home_full].index[0]
    game_preds = full_preds.loc[idx:idx+1]
    game_preds = game_preds.reset_index().drop(columns=['index'])
    predictions_matrix = pd.concat([predictions_matrix, game_preds])
    
predictions_matrix = predictions_matrix.rename(columns={'Pred.Goals':'Real_Goals', 'Pred.Sh':'Real_Sh', 'Pred.SoT':'Real_SoT',
                                                        'Pred.KP':'Real_KP', 'Pred.Fls':'Real_Fls', 'Pred.Fld':'Real_Fld',
                                                        'Pred.Crs':'Real_Crs', 'Pred.Int':'Real_Int', 'Pred.TklW':'Real_TklW'})
predictions_matrix

Found File: MLS_predictions_matrix_20211026-20211027.csv


Unnamed: 0,Date,Team,Opp,League,Real_Goals,Real_Sh,Real_SoT,Real_KP,Real_Fls,Real_Fld,Real_Crs,Real_Int,Real_TklW
0,20211027,Portland Timbers,San Jose,MLS,1.771649,14.581081,5.506159,12.220174,14.625359,12.843254,12.130337,16.337537,11.881695
1,20211027,San Jose,Portland Timbers,MLS,1.448446,12.657065,4.971703,9.030093,13.622397,12.716513,12.68458,16.272469,12.042626
0,20211027,Vancouver,Minnesota Utd,MLS,1.381876,11.984526,4.022533,8.613686,12.880172,13.677803,12.073382,15.374641,10.80107
1,20211027,Minnesota Utd,Vancouver,MLS,1.363713,13.083653,4.719975,10.013969,13.497063,12.276763,12.008485,15.032581,10.961626


In [19]:
## ADJUST SALARY SHEET FOR :
### PremierLeague - Arsenal - Martin Odegaard
### PremierLeague - Brentford - Christian Norgaard
### PremierLeague - Brentford - Mads Bech Sorensen
### PremierLeague - Brighton - Pascal Gross
### PremierLeague - Tottenham - Pierre-Emile Hojbjerg
### LaLiga - Real Sociedad - Alexander Sorloth
### Bundesliga - Union Berlin - Niko Giesselmann
### Bundesliga - Freiburg - Noah Weisshaupt
filename = 'DK_Salaries/' + league + '/' + contest_name + '_' + date + '_Salaries.csv'
DK_Salaries = pd.read_csv(filename, encoding = "ISO-8859-1")

for i in range(len(DK_Salaries)):
    if (('Martin ' in DK_Salaries['Name'][i]) & ('degaard' in DK_Salaries['Name'][i])): DK_Salaries['Name'][i] = 'Martin Odegaard'
    elif (('Christian N' in DK_Salaries['Name'][i]) & ('rgaard' in DK_Salaries['Name'][i])): DK_Salaries['Name'][i] = 'Christian Norgaard'
    elif (('Mads Bech S' in DK_Salaries['Name'][i]) & ('rensen' in DK_Salaries['Name'][i])): DK_Salaries['Name'][i] = 'Mads Bech Sorensen'
    elif ('Pascal Gro' in DK_Salaries['Name'][i]): DK_Salaries['Name'][i] = 'Pascal Gross'
    elif (('Pierre-Emile H' in DK_Salaries['Name'][i]) & ('jbjerg' in DK_Salaries['Name'][i])): DK_Salaries['Name'][i] = 'Pierre-Emile Hojbjerg'
    elif (('Alexander S' in DK_Salaries['Name'][i]) & ('rloth' in DK_Salaries['Name'][i])): DK_Salaries['Name'][i] = 'Alexander Sorloth'
    elif (('Niko Gie' in DK_Salaries['Name'][i]) & ('elmann' in DK_Salaries['Name'][i])): DK_Salaries['Name'][i] = 'Niko Giesselmann'
    elif (('Noah Wei' in DK_Salaries['Name'][i]) & ('haupt' in DK_Salaries['Name'][i])): DK_Salaries['Name'][i] = 'Noah Weisshaupt'
    elif (('Bj' in DK_Salaries['Name'][i]) & ('rn Johnsen' in DK_Salaries['Name'][i])): DK_Salaries['Name'][i] = 'Bjorn Johnsen'
        
DK_Salaries.to_csv(filename, index=False)
    


NameMatching = pd.read_csv("NameMatching.csv", encoding = "ISO-8859-1")
fbref_to_dk = {}
for i in range(len(NameMatching)):
    if (pd.notna(NameMatching['FBRef'][i])):
        fbref_to_dk[NameMatching['FBRef'][i]] = NameMatching['Name'][i]

ctr = 0
for fbref_name in list(full_stats.index):
    if (fbref_name not in fbref_to_dk):
        if (fbref_name in list(DK_Salaries['Name'])):
            idx = DK_Salaries[DK_Salaries['Name'] == fbref_name].index[0]
            fbref_to_dk[fbref_name] = DK_Salaries['Name'][idx]
            if (type(full_stats['Team'][fbref_name]) != str):
                for i in range(len(full_stats['Team'][fbref_name])):
                    missing = league + ',' + full_stats['Team'][fbref_name][i] + ',' + DK_Salaries['Position'][idx] + ',,' + DK_Salaries['Name'][idx] + ',,,,' + fbref_name
            else:
                missing = league + ',' + full_stats['Team'][fbref_name] + ',' + DK_Salaries['Position'][idx] + ',,' + DK_Salaries['Name'][idx] + ',,,,' + fbref_name
            print(missing)
            ctr+=1
if (ctr == 0): print("No players missing")
            
print('')
ctr=0
for fbref_name in list(full_stats.index):
    if (fbref_name not in fbref_to_dk):
        if (ctr==0): print('Still Missing:')
        print(fbref_name)
        ctr+=1
if (ctr == 0): print("None still missing")

No players missing

None still missing


In [20]:
## MAKE SURE THE NAMEMATCHING CSV IS UPDATED AT THIS POINT
NameMatching = pd.read_csv("NameMatching.csv", encoding = "ISO-8859-1")
fbref_to_dk = {}
for i in range(len(NameMatching)):
    if (pd.notna(NameMatching['FBRef'][i])):
        fbref_to_dk[NameMatching['FBRef'][i]] = NameMatching['Name'][i]
        
for fbref_name in list(full_stats.index):
    if (fbref_name not in fbref_to_dk):
        fbref_to_dk[fbref_name] = ''
        
for name in list(full_stats.index):
    if (fbref_to_dk[name] not in list(DK_Salaries['Name'])):
        full_stats = full_stats.drop(name)
        print(name, "dropped")
full_stats

Unnamed: 0_level_0,Team,Pos,Salary,90s,Gls,Ast,Sh,SoT,Crs,KP,Pass_Cmp,Fld,Fls,TklW,Int,CrdY,xG,xA,Start_Mins,Sub_Mins,Starting,starts,subs,FPTS_stdev,FPTS_med
player,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1
Bill Tuiloma,POR,,,221.0,0.0,0.0,2.0,0.0,3.0,0.0,79.0,2.0,2.0,1.0,6.0,2.0,0.1,0.0,90.0,41.0,y,2.0,1.0,2.359642,2.24
Dairon Asprilla,POR,,,574.0,4.0,1.0,16.0,7.0,9.0,5.0,164.0,9.0,10.0,9.0,6.0,2.0,2.1,0.6,77.857143,29.0,y,7.0,1.0,10.773592,14.176667
Dario Zuparic,POR,,,720.0,0.0,2.0,2.0,1.0,0.0,3.0,242.0,7.0,6.0,6.0,14.0,0.0,0.1,0.6,90.0,,y,8.0,0.0,2.231255,5.01
Diego Chara,POR,,,630.0,1.0,0.0,1.0,1.0,3.0,4.0,296.0,15.0,20.0,16.0,16.0,2.0,0.1,0.4,90.0,,y,7.0,0.0,4.627109,6.8
Diego Valeri,POR,,,167.0,0.0,0.0,4.0,2.0,5.0,6.0,75.0,2.0,4.0,1.0,1.0,2.0,0.3,0.5,75.0,18.4,y,1.0,5.0,3.979847,5.01
Felipe Mora,POR,,,622.0,2.0,0.0,21.0,7.0,2.0,4.0,129.0,13.0,2.0,1.0,2.0,0.0,3.0,0.2,85.142857,26.0,y,7.0,1.0,6.411382,8.536207
George Fochive,POR,,,315.0,1.0,0.0,4.0,2.0,3.0,2.0,88.0,10.0,8.0,10.0,7.0,1.0,0.6,0.1,77.0,16.8,y,3.0,5.0,15.444636,5.04
Jaroslaw Niezgoda,POR,,,109.0,2.0,0.0,2.0,2.0,0.0,2.0,22.0,0.0,0.0,1.0,0.0,0.0,0.3,0.1,64.0,6.428571,y,1.0,7.0,16.098849,0.18
Jose Van Rankin,POR,,,595.0,0.0,0.0,1.0,0.0,12.0,4.0,289.0,4.0,9.0,6.0,13.0,0.0,0.1,0.2,82.857143,15.0,y,7.0,1.0,2.704898,4.32
Larrys Mabiala,POR,,,615.0,0.0,0.0,4.0,1.0,0.0,0.0,239.0,2.0,4.0,5.0,15.0,1.0,0.5,0.0,87.857143,,y,7.0,0.0,1.046326,3.56


In [21]:
for name in list(full_stats.index):
    tmp = DK_Salaries[DK_Salaries['Name'] == fbref_to_dk[name]]
    #tmp = tmp[tmp['Roster Position'] == 'FLEX']
    full_stats['Pos'][name] = tmp['Position'].iloc[0]
    full_stats['Salary'][name] = tmp['Salary'].iloc[0]

sorter = {}
ctr=0
for game_i in range(len(team_abbrevs)):
    sorter[team_abbrevs[game_i][0]] = ctr
    sorter[team_abbrevs[game_i][1]] = ctr+1
    ctr+=2
full_stats['TeamRk'] = full_stats['Team'].map(sorter)
full_stats = full_stats.sort_values(by=['TeamRk', 'player']).drop(columns=['TeamRk'])
full_stats.index.names = ['Player']
full_stats.to_csv("Matchup_Spreadsheets/"+league + '/' + contest_name +"_" + date + "_spreadsheet.csv")
full_stats

Unnamed: 0_level_0,Team,Pos,Salary,90s,Gls,Ast,Sh,SoT,Crs,KP,Pass_Cmp,Fld,Fls,TklW,Int,CrdY,xG,xA,Start_Mins,Sub_Mins,Starting,starts,subs,FPTS_stdev,FPTS_med
Player,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1
Bill Tuiloma,POR,D,2600.0,221.0,0.0,0.0,2.0,0.0,3.0,0.0,79.0,2.0,2.0,1.0,6.0,2.0,0.1,0.0,90.0,41.0,y,2.0,1.0,2.359642,2.24
Claudio Bravo,POR,D,4400.0,450.0,0.0,0.0,2.0,0.0,6.0,4.0,216.0,4.0,9.0,13.0,17.0,1.0,0.0,0.4,90.0,,y,5.0,0.0,1.496577,6.62
Cristhian Paredes,POR,M,4000.0,420.0,1.0,1.0,7.0,1.0,1.0,1.0,144.0,6.0,4.0,7.0,14.0,3.0,0.6,0.2,76.2,19.5,y,5.0,2.0,11.192433,7.046341
Dairon Asprilla,POR,M/F,8400.0,574.0,4.0,1.0,16.0,7.0,9.0,5.0,164.0,9.0,10.0,9.0,6.0,2.0,2.1,0.6,77.857143,29.0,y,7.0,1.0,10.773592,14.176667
Dario Zuparic,POR,D,3100.0,720.0,0.0,2.0,2.0,1.0,0.0,3.0,242.0,7.0,6.0,6.0,14.0,0.0,0.1,0.6,90.0,,y,8.0,0.0,2.231255,5.01
Diego Chara,POR,M,3000.0,630.0,1.0,0.0,1.0,1.0,3.0,4.0,296.0,15.0,20.0,16.0,16.0,2.0,0.1,0.4,90.0,,y,7.0,0.0,4.627109,6.8
Diego Valeri,POR,M,6600.0,167.0,0.0,0.0,4.0,2.0,5.0,6.0,75.0,2.0,4.0,1.0,1.0,2.0,0.3,0.5,75.0,18.4,y,1.0,5.0,3.979847,5.01
Felipe Mora,POR,F,7500.0,622.0,2.0,0.0,21.0,7.0,2.0,4.0,129.0,13.0,2.0,1.0,2.0,0.0,3.0,0.2,85.142857,26.0,y,7.0,1.0,6.411382,8.536207
George Fochive,POR,M,3400.0,315.0,1.0,0.0,4.0,2.0,3.0,2.0,88.0,10.0,8.0,10.0,7.0,1.0,0.6,0.1,77.0,16.8,y,3.0,5.0,15.444636,5.04
Jaroslaw Niezgoda,POR,F,6100.0,109.0,2.0,0.0,2.0,2.0,0.0,2.0,22.0,0.0,0.0,1.0,0.0,0.0,0.3,0.1,64.0,6.428571,y,1.0,7.0,16.098849,0.18


In [78]:
#fbref_to_dk['Niko Giesselmann']
#fbref_to_dk['Niko Giesselmann'] == NameMatching[NameMatching['FBRef'] == 'Niko Giesselmann']['Name'].iloc[0]
#print(fbref_to_dk['Niko Giesselmann'])
#print(NameMatching[NameMatching['FBRef'] == 'Niko Giesselmann']['Name'].iloc[0])

### Make a quick regression for projecting player fantasy points
#### Initial predictors are Salary, Projected Points, and Project Minutes
#### Data stored in `PlayerPointsAnalysis2.csv`, with a scraper here to calculate real player points from FBRef from provided URL

In [23]:
points_data = pd.read_csv('PlayerPointsAnalysis2.csv')
points_data = points_data[~pd.isna(points_data['MatchReport_URL'])]

unfilled = points_data[pd.isna(points_data['Real_Pts'])]

for i in unfilled.index:
    if (points_data['Starting'][i] == 'y'): points_data['Proj_Mins'][i] = points_data['Start_Mins'][i]
    elif (points_data['Starting'][i] == 'b'): points_data['Proj_Mins'][i] = points_data['Sub_Mins'][i]

for url in unfilled['MatchReport_URL'].unique():
    match = unfilled[points_data['MatchReport_URL'] == url]
    
    # Time to get stats from fbref
    headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'}
    res = requests.get(url, headers=headers)
    ## The next two lines get around the issue with comments breaking the parsing
    comm = re.compile("<!--|-->")
    soup = BeautifulSoup(comm.sub("",res.text),'lxml')
    all_tables = soup.findAll("tbody")
    
    homedata = match[match['HorA'] == 'H']
    for i in homedata.index:
        found=0
        for player_i in range(len(all_tables[0].findAll('th'))):
            if (unidecode.unidecode(all_tables[0].findAll('th')[player_i].text.replace('\xa0', '')) == points_data['Player'][i]):
                found = 1
                break
        if (found):
            points_data['Real_Mins'][i] = int(all_tables[0].findAll('td', {'data-stat':'minutes'})[player_i].text)
            goals = int(all_tables[0].findAll('td', {'data-stat':'goals'})[player_i].text)
            assists = int(all_tables[0].findAll('td', {'data-stat':'assists'})[player_i].text)
            shots = int(all_tables[0].findAll('td', {'data-stat':'shots_total'})[player_i].text)
            sot = int(all_tables[0].findAll('td', {'data-stat':'shots_on_target'})[player_i].text)
            passes = 0 if (all_tables[0].findAll('td', {'data-stat':'passes_completed'})[player_i].text == '') else int(all_tables[0].findAll('td', {'data-stat':'passes_completed'})[player_i].text)
            cardsy = int(all_tables[0].findAll('td', {'data-stat':'cards_yellow'})[player_i].text)
            cardsr = int(all_tables[0].findAll('td', {'data-stat':'cards_red'})[player_i].text)
            if (cardsy==2): cardsr=0
            kp = 0 if (all_tables[1].findAll('td', {'data-stat':'assisted_shots'})[player_i].text == '') else int(all_tables[1].findAll('td', {'data-stat':'assisted_shots'})[player_i].text)
            fls = int(all_tables[5].findAll('td', {'data-stat':'fouls'})[player_i].text)
            fld = int(all_tables[5].findAll('td', {'data-stat':'fouled'})[player_i].text)
            crs = int(all_tables[5].findAll('td', {'data-stat':'crosses'})[player_i].text)
            ints = int(all_tables[5].findAll('td', {'data-stat':'interceptions'})[player_i].text)
            tklw = int(all_tables[5].findAll('td', {'data-stat':'tackles_won'})[player_i].text)
            cs = 3 if ((int(soup.findAll('div', {'class':'score'})[1].text) == 0) and (points_data['Pos'][i] == 'D')) else 0
            points_data['Real_Pts'][i] = goals*10 + assists*6 + shots + sot + passes*.02 + kp + fld + crs*0.7 + ints*0.5 + tklw + cs - cardsy*1.5 - cardsr*3 - fls*0.5
            points_data['TeamGoals'][i] = int(soup.findAll('div', {'class':['score']})[0].text)
    
    awaydata = match[match['HorA'] == 'A']
    for i in awaydata.index:
        found=0
        for player_i in range(len(all_tables[7].findAll('th'))):
            if (unidecode.unidecode(all_tables[7].findAll('th')[player_i].text.replace('\xa0', '')) == points_data['Player'][i]):
                found = 1
                break
        if (found):
            points_data['Real_Mins'][i] = int(all_tables[7].findAll('td', {'data-stat':'minutes'})[player_i].text)
            goals = int(all_tables[7].findAll('td', {'data-stat':'goals'})[player_i].text)
            assists = int(all_tables[7].findAll('td', {'data-stat':'assists'})[player_i].text)
            shots = int(all_tables[7].findAll('td', {'data-stat':'shots_total'})[player_i].text)
            sot = int(all_tables[7].findAll('td', {'data-stat':'shots_on_target'})[player_i].text)
            passes = int(all_tables[7].findAll('td', {'data-stat':'passes_completed'})[player_i].text)
            cardsy = int(all_tables[7].findAll('td', {'data-stat':'cards_yellow'})[player_i].text)
            cardsr = int(all_tables[7].findAll('td', {'data-stat':'cards_red'})[player_i].text)
            if (cardsy==2): cardsr=0
            kp = int(all_tables[8].findAll('td', {'data-stat':'assisted_shots'})[player_i].text)
            fls = int(all_tables[12].findAll('td', {'data-stat':'fouls'})[player_i].text)
            fld = int(all_tables[12].findAll('td', {'data-stat':'fouled'})[player_i].text)
            crs = int(all_tables[12].findAll('td', {'data-stat':'crosses'})[player_i].text)
            ints = int(all_tables[12].findAll('td', {'data-stat':'interceptions'})[player_i].text)
            tklw = int(all_tables[12].findAll('td', {'data-stat':'tackles_won'})[player_i].text)
            cs = 3 if ((int(soup.findAll('div', {'class':'score'})[0].text) == 0) and (points_data['Pos'][i] == 'D')) else 0
            points_data['Real_Pts'][i] = goals*10 + assists*6 + shots + sot + passes*.02 + kp + fld + crs*0.7 + ints*0.5 + tklw + cs - cardsy*1.5 - cardsr*3 - fls*0.5
            points_data['TeamGoals'][i] = int(soup.findAll('div', {'class':['score']})[1].text)
            
points_data = points_data[~pd.isna(points_data['Real_Pts'])].reset_index().drop(columns=['index'])
points_data.to_csv('PlayerPointsAnalysis2.csv', index=False)

#for x in [.7,.8,.9,1,1.1,1.2,1.3,1.4,1.5,1.6,1.7,1.8,1.9,2]:
    #print('')
    #print('Exponential Value:', x)
    #points_data = pd.read_csv('PlayerPointsAnalysis2.csv')
points_data = points_data[points_data['GameStyle'] == 'Classic']
    #points_data['Salary'] = points_data['Salary'] ** x # test poly fit
# Now that the data is updated and clean, time for the regressuibs
X_full = np.array(points_data[['Salary','Pts_w_StartMins','Proj_Mins']])
X = np.array(points_data[['Salary','Pts_w_StartMins']])
pts = np.array(points_data['Pts_w_StartMins'])
y = np.array(points_data['Real_Pts'])

n = 5000
full_rmse_list = [np.nan]*n
rmse_list = [np.nan]*n
pts_rmse_list = [np.nan]*n

full_mae_list = [np.nan]*n
mae_list = [np.nan]*n
pts_mae_list = [np.nan]*n

full_r2_list = [np.nan]*n
r2_list = [np.nan]*n
pts_r2_list = [np.nan]*n
for i in range(n):
    seed = random.randint(0,999999999)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.1, random_state=seed)
    Xfull_train, Xfull_test, y_train, y_test1 = train_test_split(X_full, y, test_size=.1, random_state=seed)
    pts_X_train, pts_X_test, y_train, y_test2 = train_test_split(pts, y, test_size=.1, random_state=seed)
    pts_X_train = pts_X_train.reshape(-1,1)
    pts_X_test = pts_X_test.reshape(-1,1)
    y_pred1 = LinearRegression().fit(X_train, y_train).predict(X_test)
    rmse_list[i] = np.sqrt(MSE(y_test, y_pred1))
    mae_list[i] = np.mean(abs(y_test - y_pred1))
    r2_list[i] = r2_score(y_test, y_pred1)
    y_pred2 = LinearRegression().fit(Xfull_train, y_train).predict(Xfull_test)
    full_rmse_list[i] = np.sqrt(MSE(y_test, y_pred2))
    full_mae_list[i] = np.mean(abs(y_test - y_pred2))
    full_r2_list[i] = r2_score(y_test, y_pred2)
    y_pred3 = LinearRegression().fit(pts_X_train, y_train).predict(pts_X_test)
    pts_rmse_list[i] = np.sqrt(MSE(y_test, pts_X_test))
    pts_mae_list[i] = np.mean(abs(y_test - pts_X_test))
    pts_r2_list[i] = r2_score(y_test, pts_X_test)

print('RMSEs')
print('Just the algorithm:  ', sum(pts_rmse_list)/len(pts_rmse_list))
print('Algo w/ Salary reg:  ', sum(rmse_list)/len(rmse_list))
print('Algo+Salary+Mins reg:', sum(full_rmse_list)/len(full_rmse_list))

print('')
print('MAEs')
print('Just the algorithm:  ', sum(pts_mae_list)/len(pts_mae_list))
print('Algo w/ Salary reg:  ', sum(mae_list)/len(mae_list))
print('Algo+Salary+Mins reg:', sum(full_mae_list)/len(full_mae_list))

print('')
print('R2s')
print('Just the algorithm:  ', sum(pts_r2_list)/len(pts_r2_list))
print('Algo w/ Salary reg:  ', sum(r2_list)/len(r2_list))
print('Algo+Salary+Mins reg:', sum(full_r2_list)/len(full_r2_list))

model = LinearRegression().fit(X_full, y)


RMSEs
Just the algorithm:   5.637854598681987
Algo w/ Salary reg:   5.339423337288669
Algo+Salary+Mins reg: 5.316873228445343

MAEs
Just the algorithm:   5.524734721350159
Algo w/ Salary reg:   3.900625934625638
Algo+Salary+Mins reg: 3.861129337125351

R2s
Just the algorithm:   0.11199292278237147
Algo w/ Salary reg:   0.2059352746679516
Algo+Salary+Mins reg: 0.21267856638449506


In [24]:
model.coef_

array([0.00071763, 0.35109623, 0.02282686])

In [25]:
print(len(points_data[points_data['Pos'] == 'GK']))
print(len(points_data[points_data['Pos'] != 'GK']))

231
2995


### Make sure you open the file to select which players are starting

In [9]:
starters = pd.read_csv("Matchup_Spreadsheets/"+league+"/"+contest_name+"_"+date+"_spreadsheet.csv")
starters = starters[starters["Starting"].notna()]
starters = starters.reset_index().drop(columns=["index"])
#starters = starters[starters["Pos"] != "GK"] # these projections really only work for field players, GKs are a different beast
starters = starters.reset_index().drop(columns=["index"])

# adjust projected minutes so each team sums to 990 (or 900 if a player is purposefully left out)
#starters = starters[starters['Starting'] == 'y']
#starters = starters.reset_index().drop(columns=["index"])
for game in team_abbrevs:
    print(game[0])
    for k in range(10):
        home = starters[starters['Team'] == game[0]]
        target_mins = len(home[home['Starting'] == 'y'])*90
        real_mins = home[home['Starting'] == 'y']['Start_Mins'].sum() + home[home['Starting'] == 'b']['Sub_Mins'].sum()
        non_90s = home[~((home['Starting'] == 'y') & (home['Start_Mins'] == 90))]
        if (k==0):
            print('Target:', target_mins)
            print('Real:  ',real_mins)

        for i in non_90s.index:
            if (starters['Starting'][i] == 'y'):
                starters['Start_Mins'][i] = min([starters['Start_Mins'][i] * (target_mins/real_mins), 90])
            elif (starters['Starting'][i] == 'b'):
                starters['Sub_Mins'][i] = min(starters['Sub_Mins'][i] * (target_mins/real_mins), 45)
    home = starters[starters['Team'] == game[0]]
    print('Result:', home[home['Starting'] == 'y']['Start_Mins'].sum() + home[home['Starting'] == 'b']['Sub_Mins'].sum())
    print('')
    
    print(game[1])
    for k in range(10):
        away = starters[starters['Team'] == game[1]]
        target_mins = len(away[away['Starting'] == 'y'])*90
        real_mins = away[away['Starting'] == 'y']['Start_Mins'].sum() + away[away['Starting'] == 'b']['Sub_Mins'].sum()
        non_90s = away[~((away['Starting'] == 'y') & (away['Start_Mins'] == 90))]
        if (k==0):
            print('Target:', target_mins)
            print('Real:  ',real_mins)

        for i in non_90s.index:
            if (starters['Starting'][i] == 'y'):
                starters['Start_Mins'][i] = min([starters['Start_Mins'][i] * (target_mins/real_mins), 90])
            elif (starters['Starting'][i] == 'b'):
                starters['Sub_Mins'][i] = min(starters['Sub_Mins'][i] * (target_mins/real_mins), 45)
    away = starters[starters['Team'] == game[1]]
    print('Result:', away[away['Starting'] == 'y']['Start_Mins'].sum() + away[away['Starting'] == 'b']['Sub_Mins'].sum())
    print('')

#################################################################################################################
# Utilized predictions matrix that was read in earlier

starters, gk_df = make_finished_spreadsheet(starters, team_names, team_abbrevs, predictions_matrix, model, date)
#game_preds = make_finished_spreadsheet(starters, team_names, team_abbrevs, predictions_matrix, model, date)

starters[['Player', 'Team', 'Salary', '90s', 'Gls', 'Ast', 'Sh', 'SoT', 'Crs', 'KP', 'Pass_Cmp', 'Fld', 'Fls', 'TklW', 'Int', 'CrdY', 'Floor', 'FPTS',
         'xG', 'xA', 'Proj_Gls', 'Proj_Ast', 'Proj_Sh', 'Proj_SoT', 'Proj_KP', 'Proj_Crs', 'Proj_Fld', 'Proj_Fls', 'Proj_TklW', 'Proj_Int',
         'Pts_w_StartMins',  'Reg_Pts', 'FPTS_med', 'Start_Mins', 'Sub_Mins', 'Starting', 'Player_']]

MAR
Target: 990
Real:   993.5619047800001
Result: 990.0001431881109

PSG
Target: 990
Real:   1053.4952381
Result: 990.0001328382259

INT
Target: 990
Real:   1068.44523809
Result: 990.0000028235407

JUV
Target: 990
Real:   1011.5999999999999
Result: 990.0008442807139

ATL
Target: 990
Real:   929.8333333329999
Result: 989.9998968598368

RSO
Target: 990
Real:   995.9345238
Result: 990.0002376770498



Unnamed: 0,Player,Team,Salary,90s,Gls,Ast,Sh,SoT,Crs,KP,Pass_Cmp,Fld,Fls,TklW,Int,CrdY,Floor,FPTS,xG,xA,Proj_Gls,Proj_Ast,Proj_Sh,Proj_SoT,Proj_KP,Proj_Crs,Proj_Fld,Proj_Fls,Proj_TklW,Proj_Int,Pts_w_StartMins,Reg_Pts,FPTS_med,Start_Mins,Sub_Mins,Starting,Player_
0,Kylian Mbappe,PSG,11400,7.711111,5,2,31,9,22,14,269,13,7,4,3,2,11.642939,19.294236,5.4,1.9,0.663579,0.322574,3.416557,1.143141,1.540497,2.392831,1.235444,1.238601,0.571759,0.383681,18.344154,14.807328,19.625000,79.719708,0.000000,y,Kylian Mbappe
1,Lionel Messi,PSG,10800,2.111111,0,0,9,2,1,5,114,6,1,1,0,0,12.069474,12.069474,0.9,0.5,0.384177,0.294874,3.445561,0.882424,1.911143,0.377815,1.980712,0.614644,0.496528,0.000000,15.194302,13.208584,10.760000,75.814131,25.000000,y,Lionel Messi
2,Neymar,PSG,11000,4.555556,1,2,8,1,11,11,208,18,4,4,4,1,11.822927,16.322927,1.7,1.1,0.334247,0.298805,1.410708,0.203225,1.936624,1.914264,2.736984,1.132436,0.914815,0.818520,14.285262,13.031528,17.020000,75.354652,0.000000,y,Neymar
3,Dimitri Payet,MAR,10000,5.000000,3,3,11,4,11,23,179,4,1,0,1,0,10.656000,20.256000,1.9,3.0,0.278257,0.512813,1.787967,0.669318,2.859497,2.136124,0.758945,0.283156,0.000000,0.323862,14.166816,12.635428,18.380000,90.000000,0.000000,y,Dimitri Payet
4,Joao Felix,ATL,7000,1.355556,0,0,3,1,0,2,35,9,1,0,2,3,11.950820,8.631148,0.3,0.4,0.427084,0.382215,1.788494,0.858812,1.079390,0.000000,5.010870,0.572891,0.000000,1.342384,16.127615,10.825244,9.887324,76.891440,25.500000,y,Joao Felix
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
103,Andrea Ranocchia,INT,3000,0.750000,0,0,0,0,0,0,9,0,0,0,0,0,0.240000,0.240000,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.058349,1.127971,0.540000,0.000000,21.880921,b,Andrea Ranocchia
104,Sime Vrsaljko,ATL,2800,0.750000,0,1,0,0,3,1,21,0,0,0,1,0,5.360000,13.360000,0.0,0.2,0.000000,0.040541,0.000000,0.000000,0.114489,0.415067,0.000000,0.000000,0.000000,0.142385,0.775629,0.910704,4.260000,0.000000,9.024817,b,Sime Vrsaljko
105,Leonardo Balerdi,MAR,3100,5.988889,0,0,0,0,0,1,447,5,7,5,21,3,4.498330,3.746939,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.003440,0.000000,0.026253,0.054850,0.029657,0.188206,0.175507,0.772556,3.331034,89.333333,2.983135,b,Leonardo Balerdi
106,Alvaro Gonzalez,MAR,2600,1.011111,0,0,1,0,0,0,84,0,2,0,0,0,1.661538,1.661538,0.1,0.0,0.000800,0.000000,0.008881,0.000000,0.000000,0.000000,0.000000,0.030941,0.000000,0.000000,0.019769,0.311545,0.980000,90.000000,0.994378,b,Alvaro Gonzalez


In [10]:
gk_df

Unnamed: 0,Player,Team,Pos,Salary,proj_GA,proj_Saves,WinOdds,CSOdds,Pts_w_StartMins,Reg_Pts
0,Jan Oblak,ATL,GK,5500,0.825756,2.051921,59.090909,40.634243,7.438588,7.11661
1,Keylor Navas,PSG,GK,5500,1.190652,2.301073,59.375,29.112366,6.64521,6.846707
2,Samir Handanovic,INT,GK,5100,1.339691,3.000201,46.666667,22.771129,6.79291,6.609851
3,Wojciech Szczesny,JUV,GK,4400,1.773846,3.040035,28.571429,14.682558,4.695078,5.393748
4,Alex Remiro,RSO,GK,3900,1.710082,2.498345,16.949153,21.11641,3.479804,4.621439
5,Pau Lopez,MAR,GK,4000,1.965138,2.769103,20.0,12.422613,3.229062,4.607913


In [22]:
#starters.head(50)

### Optimizer to get best lineups

In [11]:
def expandgrid(*itrs):
    product = list(itertools.product(*itrs))
    return {'Var{}'.format(i+1):[x[i] for x in product] for i in range(len(itrs))}
    #return(product)

start_full = time.time()
full_starters = starters.copy()
starters = starters[starters['Starting'] == 'y']
starters['Pts_Ratio'] = (starters['Reg_Pts'] / starters['Salary'])*10000
starters['Pts_Ratio2'] = (starters['Pts_w_StartMins'] / starters['Salary'])*10000
#starters = starters.sort_values(by='Pts_Ratio', ascending=False).reset_index().drop(columns=['index']) ## Comment this out to filter by reg_pts
#starters = starters.sort_values(by='Pts_w_StartMins', ascending=False).reset_index().drop(columns=['index']) ## Comment this out to filter by reg_pts
starters = starters.sort_values(by='Pts_Ratio2', ascending=False).reset_index().drop(columns=['index']) ## Comment this out to filter by reg_pts
#line = min(1 + ( (len(starters['Team'].unique())/2) / 10),  1.42)
line = 1.25
avg_pts = starters['Reg_Pts'].mean() * 8
min_pts = avg_pts * line
#min_pts = starters['Reg_Pts'].mean() * (8)

s1 = time.time()
forwards = starters[(starters['Pos'] == 'F') | (starters['Pos'] == 'M/F')]
print('F: ', len(forwards))
forwards = forwards.iloc[0:min(7,len(forwards))]
#forwards = forwards.iloc[0:min(10,len(forwards))]
mids = starters[(starters['Pos'] == 'M') | (starters['Pos'] == 'M/F')]
print('M: ', len(mids))
mids = mids.iloc[0:min(17,len(mids))]
#mids = mids.iloc[0:min(18,len(mids))] ## CHANGE BACK TO 17
defenders = starters[starters['Pos'] == 'D']
print('D: ', len(defenders))
defenders = defenders.iloc[0:min(17,len(defenders))]
#defenders = defenders.iloc[0:min(16,len(defenders))]
gks = starters[starters['Pos'] == 'GK']
print('GK: ', len(gks))
gks = gks.iloc[0:min(8,len(gks))]
utils = starters[starters['Pos'] != 'GK']
print('U: ', len(utils))
#utils = utils.iloc[0:min(35,len(utils))]
utils = utils.iloc[0:min(60,len(utils))]
print( [len(forwards), len(mids), len(defenders), len(gks), len(utils)])
f = math.factorial(len(forwards))/(math.factorial(2)*(math.factorial(len(forwards)-2)))
m = math.factorial(len(mids))/(math.factorial(2)*(math.factorial(len(mids)-2)))
d = math.factorial(len(defenders))/(math.factorial(2)*(math.factorial(len(defenders)-2)))
gk = len(gks)
u = len(utils)
print('Total Possible Combinations:', f*m*d*gk*u)
print('line:',line)
print('avg_pts:', round(avg_pts,4))
print('min_pts:', round(min_pts,4))
print('Took', round(time.time()-s1,2), 'seconds')
print('')

F:  14
M:  27
D:  24
GK:  6
U:  60
[7, 17, 17, 6, 60]
Total Possible Combinations: 139829760.0
line: 1.25
avg_pts: 54.2227
min_pts: 67.7784
Took 0.0 seconds



In [12]:
print('f_pts:', round(forwards['Reg_Pts'].mean(), 4))
print('m_pts:', round(mids['Reg_Pts'].mean(), 4))
print('d_pts:', round(defenders['Reg_Pts'].mean(), 4))
print('gk_pts:', round(gks['Reg_Pts'].mean(), 4))
print('u_pts:', round(utils['Reg_Pts'].mean(), 4))


f_pts: 11.8357
m_pts: 8.2705
d_pts: 5.4251
gk_pts: 5.866
u_pts: 6.869


In [13]:
forwards

Unnamed: 0,Player,Team,Pos,Salary,90s,Gls,Ast,Sh,SoT,Crs,KP,Pass_Cmp,Fld,Fls,TklW,Int,CrdY,Floor,FPTS,FPTS_med,xG,xA,Raw_Goals,Goal_Share,Team_Goal_Proj,Proj_Gls,Raw_Ast,Ast_Share,Team_Ast_Proj,Proj_Ast,Raw_Sh,Sh_Share,Team_Sh_Proj,Proj_Sh,Raw_SoT,SoT_Share,Team_SoT_Proj,Proj_SoT,Raw_KP,KP_Share,Team_KP_Proj,Proj_KP,Raw_Crs,Crs_Share,Team_Crs_Proj,Proj_Crs,Raw_Fld,Fld_Share,Team_Fld_Proj,Proj_Fld,Raw_Fls,Fls_Share,Team_Fls_Proj,Proj_Fls,Raw_TklW,TklW_Share,Team_TklW_Proj,Proj_TklW,Raw_Int,Int_Share,Team_Int_Proj,Proj_Int,Pts_w_StartMins,Start_Mins,Sub_Mins,Starting,starts,subs,Player_,Proj_Mins,Reg_Pts,FPTS_med_w_mins,Pts_Ratio,Pts_Ratio2
1,Joao Felix,ATL,M/F,7000,1.355556,0,0,3,1,0,2,35,9,1,0,2,3,11.95082,8.631148,9.887324,0.3,0.4,0.221311,0.249744,1.710082,0.427084,0.252103,0.223507,1.710082,0.382215,170.169581,0.139463,12.824161,1.788494,56.723194,0.20407,4.208427,0.858812,113.446388,0.116969,9.228013,1.07939,0.0,0.0,12.576603,0.0,510.508744,0.340651,14.7097,5.01087,56.723194,0.042017,13.634803,0.572891,0.0,0.0,10.713856,0.0,113.446388,0.10043,13.366331,1.342384,16.127615,76.89144,25.5,y,1,2,Joao Felix,76.89144,10.825244,8.447229,15.464634,23.03945
12,Edin Dzeko,INT,F,8200,6.611111,6,1,17,8,8,11,140,12,4,2,2,1,8.682353,18.438655,21.158261,3.4,0.7,0.514286,0.30147,1.773846,0.534761,0.085961,0.072617,1.773846,0.128811,187.885663,0.14957,14.344536,2.145517,88.416783,0.202335,4.813882,0.974015,121.573076,0.128347,10.867015,1.394749,88.416783,0.089188,12.731561,1.1355,132.625174,0.139538,12.968348,1.809577,44.208391,0.034682,15.810967,0.548357,22.104196,0.027441,10.876046,0.298451,22.104196,0.025542,13.699217,0.349908,13.782259,73.066647,34.0,y,7,1,Edin Dzeko,73.066647,10.794144,17.177369,13.16359,16.807633
16,Kylian Mbappe,PSG,F,11400,7.711111,5,2,31,9,22,14,269,13,7,4,3,2,11.642939,19.294236,19.625,5.4,1.9,0.620297,0.337676,1.965138,0.663579,0.218253,0.164148,1.965138,0.322574,320.48701,0.241967,14.119949,3.416557,93.044616,0.241462,4.734241,1.143141,144.736069,0.142353,10.821648,1.540497,227.442394,0.203368,11.766012,2.392831,134.397778,0.117878,10.480741,1.235444,72.368035,0.096064,12.893521,1.238601,41.353163,0.049696,11.50508,0.571759,31.014872,0.027686,13.858211,0.383681,18.344154,79.719708,0.0,y,8,0,Kylian Mbappe,79.719708,14.807328,17.383325,12.988884,16.091363
27,Lionel Messi,PSG,M/F,10800,2.111111,0,0,9,2,1,5,114,6,1,1,0,0,12.069474,12.069474,10.76,0.9,0.5,0.35912,0.195496,1.965138,0.384177,0.199511,0.150053,1.965138,0.294874,323.207613,0.244021,14.119949,3.445561,71.823914,0.186392,4.734241,0.882424,179.559785,0.176604,10.821648,1.911143,35.911957,0.032111,11.766012,0.377815,215.471742,0.188986,10.480741,1.980712,35.911957,0.047671,12.893521,0.614644,35.911957,0.043157,11.50508,0.496528,0.0,0.0,13.858211,0.0,15.194302,75.814131,25.0,y,2,1,Lionel Messi,75.814131,13.208584,9.064001,12.23017,14.068798
32,Angel Di Maria,PSG,M/F,8400,4.155556,1,2,10,4,15,9,157,3,3,3,3,1,10.260963,15.194118,14.15625,0.4,1.0,0.08305,0.045211,1.965138,0.088845,0.207626,0.156156,1.965138,0.306868,186.863221,0.141081,14.119949,1.992059,74.745289,0.193973,4.734241,0.918316,168.176899,0.165408,10.821648,1.789989,280.294832,0.250626,11.766012,2.94887,56.058966,0.049168,10.480741,0.515319,56.058966,0.074415,12.893521,0.959467,56.058966,0.067369,11.50508,0.775085,56.058966,0.050042,13.858211,0.693499,11.303596,77.65205,18.0,y,4,2,Angel Di Maria,77.65205,10.20779,12.21402,12.152131,13.456662
34,Alvaro Morata,JUV,F,8500,4.255556,2,0,13,6,8,8,76,14,9,2,0,1,10.720104,15.067363,11.355556,1.4,1.3,0.094055,0.083911,1.339691,0.112415,0.217251,0.238694,1.339691,0.319776,195.526278,0.186981,13.113785,2.452028,90.242898,0.256535,4.339892,1.113335,120.323863,0.140852,10.057453,1.416614,120.323863,0.141375,10.147931,1.43466,210.566761,0.14952,15.784274,2.360072,135.364346,0.091496,14.18414,1.297796,30.080966,0.040607,10.02095,0.406925,0.0,0.0,14.694037,0.0,11.401164,64.005611,26.0,y,5,2,Alvaro Morata,64.005611,9.975494,8.07577,11.735876,13.413134
38,Neymar,PSG,M/F,11000,4.555556,1,2,8,1,11,11,208,18,4,4,4,1,11.822927,16.322927,17.02,1.7,1.1,0.312446,0.170088,1.965138,0.334247,0.202171,0.152053,1.965138,0.298805,132.33012,0.099909,14.119949,1.410708,16.541265,0.042927,4.734241,0.203225,181.953915,0.178958,10.821648,1.936624,181.953915,0.162694,11.766012,1.914264,297.742771,0.261144,10.480741,2.736984,66.16506,0.08783,12.893521,1.132436,66.16506,0.079514,11.50508,0.914815,66.16506,0.059064,13.858211,0.81852,14.285262,75.354652,0.0,y,5,0,Neymar,75.354652,13.031528,14.250402,11.846843,12.986602


In [14]:
mids

Unnamed: 0,Player,Team,Pos,Salary,90s,Gls,Ast,Sh,SoT,Crs,KP,Pass_Cmp,Fld,Fls,TklW,Int,CrdY,Floor,FPTS,FPTS_med,xG,xA,Raw_Goals,Goal_Share,Team_Goal_Proj,Proj_Gls,Raw_Ast,Ast_Share,Team_Ast_Proj,Proj_Ast,Raw_Sh,Sh_Share,Team_Sh_Proj,Proj_Sh,Raw_SoT,SoT_Share,Team_SoT_Proj,Proj_SoT,Raw_KP,KP_Share,Team_KP_Proj,Proj_KP,Raw_Crs,Crs_Share,Team_Crs_Proj,Proj_Crs,Raw_Fld,Fld_Share,Team_Fld_Proj,Proj_Fld,Raw_Fls,Fls_Share,Team_Fls_Proj,Proj_Fls,Raw_TklW,TklW_Share,Team_TklW_Proj,Proj_TklW,Raw_Int,Int_Share,Team_Int_Proj,Proj_Int,Pts_w_StartMins,Start_Mins,Sub_Mins,Starting,starts,subs,Player_,Proj_Mins,Reg_Pts,FPTS_med_w_mins,Pts_Ratio,Pts_Ratio2
0,Valentin Rongier,MAR,M,3600,5.722222,0,1,9,2,5,5,394,7,7,17,9,0,9.153786,10.20233,8.76,1.1,0.5,0.168143,0.103409,1.190652,0.123124,0.076429,0.054863,1.190652,0.065323,123.814566,0.098066,11.40111,1.118062,27.514348,0.073252,3.491726,0.255775,68.78587,0.06136,7.742856,0.475104,68.78587,0.067918,10.926364,0.742097,96.300218,0.077588,13.083123,1.015091,96.300218,0.132558,11.428138,1.514886,233.871959,0.260633,10.68499,2.784857,123.814566,0.134769,16.529858,2.22771,9.352462,78.721607,20.0,y,6,2,Valentin Rongier,78.721607,6.125226,7.662236,17.014516,25.97906
1,Joao Felix,ATL,M/F,7000,1.355556,0,0,3,1,0,2,35,9,1,0,2,3,11.95082,8.631148,9.887324,0.3,0.4,0.221311,0.249744,1.710082,0.427084,0.252103,0.223507,1.710082,0.382215,170.169581,0.139463,12.824161,1.788494,56.723194,0.20407,4.208427,0.858812,113.446388,0.116969,9.228013,1.07939,0.0,0.0,12.576603,0.0,510.508744,0.340651,14.7097,5.01087,56.723194,0.042017,13.634803,0.572891,0.0,0.0,10.713856,0.0,113.446388,0.10043,13.366331,1.342384,16.127615,76.89144,25.5,y,1,2,Joao Felix,76.89144,10.825244,8.447229,15.464634,23.03945
4,Mikel Merino,RSO,M,4900,7.877778,1,0,14,7,7,8,364,14,16,16,6,1,8.400846,9.479831,8.51,1.2,1.0,0.015165,0.012454,0.825756,0.010284,0.123834,0.150944,0.825756,0.124643,156.030273,0.158878,8.264696,1.313079,78.015137,0.229534,2.877677,0.660524,89.160156,0.142813,5.764444,0.82324,78.015137,0.120983,9.932205,1.201626,156.030273,0.131822,12.606239,1.661775,178.320312,0.13275,15.005223,1.991942,178.320312,0.220052,10.666128,2.347105,66.870117,0.095468,14.550278,1.389091,9.097637,87.797987,0.0,y,8,0,Mikel Merino,87.797987,7.195936,8.301787,14.685584,18.566606
7,Dejan Kulusevski,JUV,M,6800,2.955556,0,1,7,2,6,4,58,5,17,3,2,1,6.381203,7.903759,2.46,0.9,0.9,0.269728,0.240638,1.339691,0.322381,0.261704,0.287534,1.339691,0.385207,183.193034,0.175187,13.113785,2.297361,52.340867,0.14879,4.339892,0.645734,104.681734,0.122541,10.057453,1.232454,157.022601,0.184494,10.147931,1.87223,130.852167,0.092916,15.784274,1.466616,444.897369,0.300718,14.18414,4.265421,78.5113,0.105985,10.02095,1.062075,52.340867,0.054108,14.694037,0.795061,12.151975,77.34817,17.666667,y,2,6,Dejan Kulusevski,77.34817,9.340485,2.114183,13.736007,17.870552
8,Danilo Pereira,PSG,M,4200,3.0,1,0,2,1,0,1,251,2,4,5,6,0,5.673333,9.006667,5.0,0.5,0.0,0.166667,0.090729,1.965138,0.178296,0.0,0.0,1.965138,0.0,60.0,0.0453,14.119949,0.639631,30.0,0.077854,4.734241,0.368578,30.0,0.029506,10.821648,0.319305,0.0,0.0,11.766012,0.0,60.0,0.052625,10.480741,0.551547,120.0,0.159292,12.893521,2.053837,150.0,0.180263,11.50508,2.073938,180.0,0.160682,13.858211,2.226759,7.495752,90.0,0.0,y,3,0,Danilo Pereira,90.0,6.202974,5.0,14.768985,17.847028
9,Weston McKennie,JUV,M,3000,3.388889,0,0,7,0,4,1,78,1,3,3,5,0,5.122623,5.122623,3.44,0.9,0.0,0.049322,0.044002,1.339691,0.05895,0.0,0.0,1.339691,0.0,147.785725,0.141327,13.113785,1.85333,0.0,0.0,4.339892,0.0,21.112246,0.024714,10.057453,0.248562,84.448986,0.099223,10.147931,1.006912,21.112246,0.014992,15.784274,0.23663,63.336739,0.042811,14.18414,0.607236,63.336739,0.085501,10.02095,0.856799,105.561232,0.109125,14.694037,1.603481,5.353723,71.547057,9.0,y,4,1,Weston McKennie,71.547057,4.156904,2.734688,13.856346,17.845744
10,Nicolo Barella,INT,M,7000,7.533333,1,5,13,2,12,17,302,13,8,11,3,2,9.018584,13.930088,14.498696,1.1,2.3,0.127719,0.074868,1.773846,0.132804,0.262115,0.221427,1.773846,0.392777,133.336862,0.106146,14.344536,1.52261,20.513363,0.046943,4.813882,0.225979,174.363589,0.184079,10.867015,2.00039,123.080181,0.124154,12.731561,1.580668,133.336862,0.140287,12.968348,1.819288,82.053454,0.064372,15.810967,1.017785,112.823499,0.140064,10.876046,1.523343,30.770045,0.035556,13.699217,0.487088,12.305769,77.267002,0.0,y,8,0,Nicolo Barella,77.267002,9.53435,12.447453,13.6205,17.57967
14,Matteo Guendouzi,MAR,M,5300,6.644444,2,2,13,5,2,8,381,20,5,6,4,1,9.108361,13.698662,12.59875,0.7,0.5,0.098274,0.060439,1.190652,0.071962,0.070196,0.050389,1.190652,0.059995,164.25769,0.130099,11.40111,1.483269,63.176035,0.168195,3.491726,0.587289,101.081655,0.09017,7.742856,0.698171,25.270414,0.024952,10.926364,0.27263,252.704138,0.2036,13.083123,2.663728,63.176035,0.086962,11.428138,0.993814,75.811242,0.084486,10.68499,0.902731,50.540828,0.055012,16.529858,0.909346,8.633165,83.95393,7.0,y,7,1,Matteo Guendouzi,83.95393,7.230024,11.752384,13.641554,16.28899
15,Marcelo Brozovic,INT,M,4000,7.111111,0,1,7,4,3,7,447,10,13,8,8,2,6.263438,6.685313,5.23,0.3,0.3,0.042188,0.02473,1.773846,0.043867,0.034189,0.028882,1.773846,0.051232,71.796772,0.057155,14.344536,0.819867,41.026727,0.093886,4.813882,0.451958,71.796772,0.075797,10.867015,0.82369,30.770045,0.031038,12.731561,0.395167,102.566817,0.107913,12.968348,1.399452,133.336862,0.104605,15.810967,1.6539,82.053454,0.101865,10.876046,1.107886,82.053454,0.094816,13.699217,1.298901,6.466862,72.936403,0.0,y,8,0,Marcelo Brozovic,72.936403,5.287682,4.238415,13.219205,16.167154
18,Thomas Lemar,ATL,M,7600,3.766667,2,2,8,3,5,7,146,5,5,4,3,2,8.60708,16.306195,15.946875,0.9,0.8,0.181276,0.204565,1.710082,0.349824,0.173276,0.153621,1.710082,0.262705,155.948837,0.127808,12.824161,1.639033,58.480814,0.210393,4.208427,0.885423,136.455232,0.140692,9.228013,1.298308,97.468023,0.089108,12.576603,1.120681,97.468023,0.065038,14.7097,0.956692,97.468023,0.072198,13.634803,0.984404,77.974419,0.095036,10.713856,1.018206,58.480814,0.051771,13.366331,0.691989,12.142859,73.425911,0.0,y,5,0,Thomas Lemar,73.425911,9.814652,13.010154,12.914016,15.977446


In [15]:
defenders

Unnamed: 0,Player,Team,Pos,Salary,90s,Gls,Ast,Sh,SoT,Crs,KP,Pass_Cmp,Fld,Fls,TklW,Int,CrdY,Floor,FPTS,FPTS_med,xG,xA,Raw_Goals,Goal_Share,Team_Goal_Proj,Proj_Gls,Raw_Ast,Ast_Share,Team_Ast_Proj,Proj_Ast,Raw_Sh,Sh_Share,Team_Sh_Proj,Proj_Sh,Raw_SoT,SoT_Share,Team_SoT_Proj,Proj_SoT,Raw_KP,KP_Share,Team_KP_Proj,Proj_KP,Raw_Crs,Crs_Share,Team_Crs_Proj,Proj_Crs,Raw_Fld,Fld_Share,Team_Fld_Proj,Proj_Fld,Raw_Fls,Fls_Share,Team_Fls_Proj,Proj_Fls,Raw_TklW,TklW_Share,Team_TklW_Proj,Proj_TklW,Raw_Int,Int_Share,Team_Int_Proj,Proj_Int,Pts_w_StartMins,Start_Mins,Sub_Mins,Starting,starts,subs,Player_,Proj_Mins,Reg_Pts,FPTS_med_w_mins,Pts_Ratio,Pts_Ratio2
2,Joseba Zaldua,RSO,D,4100,2.088889,0,0,1,1,5,2,95,5,3,4,0,0,8.090426,8.090426,4.78,0.0,0.2,0.0,0.0,0.825756,0.0,0.087474,0.106624,0.825756,0.088045,39.36326,0.040082,8.264696,0.331263,39.36326,0.115813,2.877677,0.333274,78.726521,0.126101,5.764444,0.726903,196.816302,0.305215,9.932205,3.031459,196.816302,0.16628,12.606239,2.09616,118.089781,0.087912,15.005223,1.319132,157.453042,0.194301,10.666128,2.072443,0.0,0.0,14.550278,0.0,8.381772,82.225477,7.333333,y,2,3,Joseba Zaldua,82.225477,6.240476,4.367086,15.220672,20.443347
3,Leonardo Bonucci,JUV,D,3500,7.0,1,0,4,0,1,3,340,4,5,3,16,1,3.857143,5.071429,4.32,1.1,0.3,0.139059,0.124062,1.339691,0.166205,0.042857,0.047087,1.339691,0.063082,51.428571,0.049181,13.113785,0.644948,0.0,0.0,4.339892,0.0,38.571429,0.045152,10.057453,0.454115,12.857143,0.015107,10.147931,0.1533,51.428571,0.036519,15.784274,0.576421,64.285714,0.043452,14.18414,0.616335,38.571429,0.052069,10.02095,0.521782,205.714286,0.212659,14.694037,3.124812,6.570781,90.0,0.0,y,7,0,Leonardo Bonucci,90.0,5.385873,4.32,15.388209,18.773659
5,Aritz Elustondo,RSO,D,3300,7.011111,2,1,6,3,0,1,380,8,5,3,18,0,5.006339,8.714739,4.81,0.6,0.1,0.024467,0.020093,0.825756,0.016592,0.014263,0.017386,0.825756,0.014356,77.020602,0.078426,8.264696,0.64817,38.510301,0.113304,2.877677,0.326052,12.836767,0.020561,5.764444,0.118525,0.0,0.0,9.932205,0.0,102.694136,0.086761,12.606239,1.093727,64.183835,0.047781,15.005223,0.716971,38.510301,0.047523,10.666128,0.506884,231.061807,0.32988,14.550278,4.799841,6.070843,90.0,1.0,y,7,1,Aritz Elustondo,90.0,5.072245,4.81,15.37044,18.396494
6,Achraf Hakimi,PSG,D,5800,5.166667,2,2,6,4,11,9,298,2,3,6,6,0,8.16,14.353548,8.16,0.5,1.6,0.085177,0.046368,1.965138,0.09112,0.272565,0.204997,1.965138,0.402847,91.990777,0.069453,14.119949,0.980669,61.327185,0.159152,4.734241,0.753462,137.986166,0.135714,10.821648,1.468654,168.649758,0.150798,11.766012,1.774297,30.663592,0.026894,10.480741,0.281873,45.995389,0.061056,12.893521,0.787225,91.990777,0.11055,11.50508,1.271888,91.990777,0.082118,13.858211,1.138007,10.517532,79.21428,17.0,y,5,2,Achraf Hakimi,79.21428,8.112818,7.182095,13.987617,18.133676
11,William Saliba,MAR,D,3300,7.0,0,0,4,1,0,1,528,4,4,10,12,1,4.937143,4.722857,4.92,0.3,0.0,0.042857,0.026357,1.190652,0.031382,0.0,0.0,1.190652,0.0,51.428571,0.040733,11.40111,0.464407,12.857143,0.03423,3.491726,0.119521,12.857143,0.011469,7.742856,0.088804,0.0,0.0,10.926364,0.0,51.428571,0.041435,13.083123,0.542103,51.428571,0.070792,11.428138,0.809016,128.571429,0.143283,10.68499,1.530979,154.285714,0.167936,16.529858,2.775957,5.55168,90.0,0.0,y,7,0,William Saliba,90.0,4.895628,4.92,14.835237,16.823273
13,Alex Sandro,JUV,D,4600,5.533333,0,0,1,1,10,9,242,9,7,5,10,0,6.928916,6.928916,6.64,0.2,0.5,0.005351,0.004774,1.339691,0.006396,0.080086,0.08799,1.339691,0.117879,14.415416,0.013785,13.113785,0.180779,14.415416,0.040979,4.339892,0.177844,129.738741,0.151873,10.057453,1.527458,144.154157,0.169374,10.147931,1.718796,129.738741,0.092126,15.784274,1.454136,100.90791,0.068206,14.18414,0.967447,72.077079,0.0973,10.02095,0.975035,144.154157,0.14902,14.694037,2.18971,7.676005,79.7653,3.0,y,6,1,Alex Sandro,79.7653,6.298453,5.884907,13.69229,16.686968
17,Mario Hermoso,ATL,D,3400,5.244444,0,1,3,0,4,4,300,3,7,6,11,2,5.110169,5.682203,4.65,0.1,0.5,0.000632,0.000713,1.710082,0.00122,0.084703,0.075095,1.710082,0.128419,45.739761,0.037486,12.824161,0.480728,0.0,0.0,4.208427,0.0,60.986348,0.06288,9.228013,0.580257,60.986348,0.055756,12.576603,0.701217,45.739761,0.030521,14.7097,0.448956,106.726109,0.079056,13.634803,1.077909,91.479522,0.111497,10.713856,1.194558,167.712456,0.14847,13.366331,1.984501,5.447796,79.959878,29.0,y,6,1,Mario Hermoso,79.959878,4.683928,4.13126,13.776257,16.022931
20,Pol Lirola,MAR,D,4400,4.644444,0,1,3,0,17,4,190,8,1,3,8,0,8.009569,9.301435,6.816,0.2,0.6,0.03267,0.020092,1.190652,0.023923,0.09801,0.070355,1.190652,0.083768,44.104717,0.034933,11.40111,0.398272,0.0,0.0,3.491726,0.0,58.806289,0.052458,7.742856,0.406175,249.92673,0.246773,10.926364,2.696336,117.612579,0.094759,13.083123,1.239742,14.701572,0.020237,11.428138,0.231269,44.104717,0.049151,10.68499,0.525182,117.612579,0.128018,16.529858,2.116122,6.761806,68.280636,6.0,y,6,1,Pol Lirola,68.280636,5.560058,5.17112,12.636496,15.36774
21,Juan Cuadrado,JUV,D,6000,5.0,1,0,3,3,10,10,275,7,5,9,6,1,9.0,10.7,10.255556,0.1,0.7,0.001991,0.001776,1.339691,0.00238,0.112799,0.123932,1.339691,0.166031,43.508346,0.041607,13.113785,0.545623,43.508346,0.123682,4.339892,0.536767,145.027819,0.169771,10.057453,1.707462,145.027819,0.170401,10.147931,1.729213,101.519473,0.072088,15.784274,1.137849,72.513909,0.049014,14.18414,0.695222,130.525037,0.176201,10.02095,1.7657,87.016691,0.089954,14.694037,1.321789,9.123398,72.513909,0.0,y,6,0,Juan Cuadrado,72.513909,7.616495,8.263005,12.694159,15.205664
22,Nuno Mendes,PSG,D,3800,2.944444,0,0,1,0,4,1,156,4,3,3,9,2,6.086038,5.06717,3.40625,0.0,0.2,0.0,0.0,1.965138,0.0,0.060108,0.045207,1.965138,0.088839,27.04861,0.020422,14.119949,0.288352,0.0,0.0,4.734241,0.0,27.04861,0.026603,10.821648,0.287892,108.194438,0.096742,11.766012,1.13827,108.194438,0.094895,10.480741,0.994571,81.145829,0.107716,12.893521,1.388836,81.145829,0.097517,11.50508,1.121943,243.437486,0.217311,13.858211,3.011537,5.771615,79.643128,5.0,y,3,1,Nuno Mendes,79.643128,5.073363,3.014271,13.350956,15.18846


In [16]:
gks

Unnamed: 0,Player,Team,Pos,Salary,90s,Gls,Ast,Sh,SoT,Crs,KP,Pass_Cmp,Fld,Fls,TklW,Int,CrdY,Floor,FPTS,FPTS_med,xG,xA,Raw_Goals,Goal_Share,Team_Goal_Proj,Proj_Gls,Raw_Ast,Ast_Share,Team_Ast_Proj,Proj_Ast,Raw_Sh,Sh_Share,Team_Sh_Proj,Proj_Sh,Raw_SoT,SoT_Share,Team_SoT_Proj,Proj_SoT,Raw_KP,KP_Share,Team_KP_Proj,Proj_KP,Raw_Crs,Crs_Share,Team_Crs_Proj,Proj_Crs,Raw_Fld,Fld_Share,Team_Fld_Proj,Proj_Fld,Raw_Fls,Fls_Share,Team_Fls_Proj,Proj_Fls,Raw_TklW,TklW_Share,Team_TklW_Proj,Proj_TklW,Raw_Int,Int_Share,Team_Int_Proj,Proj_Int,Pts_w_StartMins,Start_Mins,Sub_Mins,Starting,starts,subs,Player_,Proj_Mins,Reg_Pts,FPTS_med_w_mins,Pts_Ratio,Pts_Ratio2
30,Jan Oblak,ATL,GK,5500,8.0,0,0,0,0,0,0,152,3,0,0,0,0,0.755,0.755,0.43,0.0,0.0,0.0,0.0,1.710082,0.0,0.0,0.0,1.710082,0.0,0.0,0.0,12.824161,0.0,0.0,0.0,4.208427,0.0,0.0,0.0,9.228013,0.0,0.0,0.0,12.576603,0.0,33.75,0.022521,14.7097,0.331271,0.0,0.0,13.634803,0.0,0.0,0.0,10.713856,0.0,0.0,0.0,13.366331,0.0,7.438588,90.0,0.0,y,8,0,Jan Oblak,90.0,7.11661,0.43,12.939291,13.524706
35,Samir Handanovic,INT,GK,5100,8.0,0,0,0,0,0,0,215,0,0,0,0,0,0.5375,0.5375,0.53,0.0,0.0,0.0,0.0,1.773846,0.0,0.0,0.0,1.773846,0.0,0.0,0.0,14.344536,0.0,0.0,0.0,4.813882,0.0,0.0,0.0,10.867015,0.0,0.0,0.0,12.731561,0.0,0.0,0.0,12.968348,0.0,0.0,0.0,15.810967,0.0,0.0,0.0,10.876046,0.0,0.0,0.0,13.699217,0.0,6.79291,90.0,0.0,y,8,0,Samir Handanovic,90.0,6.609851,0.53,12.960491,13.319431
42,Keylor Navas,PSG,GK,5500,4.0,0,0,0,0,0,0,84,0,0,0,0,0,0.42,0.42,0.43,0.0,0.0,0.0,0.0,1.965138,0.0,0.0,0.0,1.965138,0.0,0.0,0.0,14.119949,0.0,0.0,0.0,4.734241,0.0,0.0,0.0,10.821648,0.0,0.0,0.0,11.766012,0.0,0.0,0.0,10.480741,0.0,0.0,0.0,12.893521,0.0,0.0,0.0,11.50508,0.0,0.0,0.0,13.858211,0.0,6.64521,90.0,0.0,y,4,0,Keylor Navas,90.0,6.846707,0.43,12.448557,12.082201
51,Wojciech Szczesny,JUV,GK,4400,7.0,0,0,0,0,0,0,159,3,2,0,0,2,0.74,0.311429,0.38,0.0,0.0,0.0,0.0,1.339691,0.0,0.0,0.0,1.339691,0.0,0.0,0.0,13.113785,0.0,0.0,0.0,4.339892,0.0,0.0,0.0,10.057453,0.0,0.0,0.0,10.147931,0.0,38.571429,0.027389,15.784274,0.432316,25.714286,0.017381,14.18414,0.246534,0.0,0.0,10.02095,0.0,0.0,0.0,14.694037,0.0,4.695078,90.0,0.0,y,7,0,Wojciech Szczesny,90.0,5.393748,0.38,12.258518,10.670631
59,Alex Remiro,RSO,GK,3900,6.0,0,0,0,0,0,0,163,1,0,0,0,0,0.71,0.71,0.5,0.0,0.0,0.0,0.0,0.825756,0.0,0.0,0.0,0.825756,0.0,0.0,0.0,8.264696,0.0,0.0,0.0,2.877677,0.0,0.0,0.0,5.764444,0.0,0.0,0.0,9.932205,0.0,15.0,0.012673,12.606239,0.159755,0.0,0.0,15.005223,0.0,0.0,0.0,10.666128,0.0,0.0,0.0,14.550278,0.0,3.479804,90.0,0.0,y,6,0,Alex Remiro,90.0,4.621439,0.5,11.849844,8.922575
62,Pau Lopez,MAR,GK,4000,6.0,0,0,0,0,0,0,177,0,0,0,0,0,0.59,0.59,0.58,0.0,0.0,0.0,0.0,1.190652,0.0,0.0,0.0,1.190652,0.0,0.0,0.0,11.40111,0.0,0.0,0.0,3.491726,0.0,0.0,0.0,7.742856,0.0,0.0,0.0,10.926364,0.0,0.0,0.0,13.083123,0.0,0.0,0.0,11.428138,0.0,0.0,0.0,10.68499,0.0,0.0,0.0,16.529858,0.0,3.229062,90.0,0.0,y,6,0,Pau Lopez,90.0,4.607913,0.58,11.519783,8.072655


In [17]:
utils

Unnamed: 0,Player,Team,Pos,Salary,90s,Gls,Ast,Sh,SoT,Crs,KP,Pass_Cmp,Fld,Fls,TklW,Int,CrdY,Floor,FPTS,FPTS_med,xG,xA,Raw_Goals,Goal_Share,Team_Goal_Proj,Proj_Gls,Raw_Ast,Ast_Share,Team_Ast_Proj,Proj_Ast,Raw_Sh,Sh_Share,Team_Sh_Proj,Proj_Sh,Raw_SoT,SoT_Share,Team_SoT_Proj,Proj_SoT,Raw_KP,KP_Share,Team_KP_Proj,Proj_KP,Raw_Crs,Crs_Share,Team_Crs_Proj,Proj_Crs,Raw_Fld,Fld_Share,Team_Fld_Proj,Proj_Fld,Raw_Fls,Fls_Share,Team_Fls_Proj,Proj_Fls,Raw_TklW,TklW_Share,Team_TklW_Proj,Proj_TklW,Raw_Int,Int_Share,Team_Int_Proj,Proj_Int,Pts_w_StartMins,Start_Mins,Sub_Mins,Starting,starts,subs,Player_,Proj_Mins,Reg_Pts,FPTS_med_w_mins,Pts_Ratio,Pts_Ratio2
0,Valentin Rongier,MAR,M,3600,5.722222,0,1,9,2,5,5,394,7,7,17,9,0,9.153786,10.20233,8.76,1.1,0.5,0.168143,0.103409,1.190652,0.123124,0.076429,0.054863,1.190652,0.065323,123.814566,0.098066,11.40111,1.118062,27.514348,0.073252,3.491726,0.255775,68.78587,0.06136,7.742856,0.475104,68.78587,0.067918,10.926364,0.742097,96.300218,0.077588,13.083123,1.015091,96.300218,0.132558,11.428138,1.514886,233.871959,0.260633,10.68499,2.784857,123.814566,0.134769,16.529858,2.22771,9.352462,78.721607,20.0,y,6,2,Valentin Rongier,78.721607,6.125226,7.662236,17.014516,25.97906
1,Joao Felix,ATL,M/F,7000,1.355556,0,0,3,1,0,2,35,9,1,0,2,3,11.95082,8.631148,9.887324,0.3,0.4,0.221311,0.249744,1.710082,0.427084,0.252103,0.223507,1.710082,0.382215,170.169581,0.139463,12.824161,1.788494,56.723194,0.20407,4.208427,0.858812,113.446388,0.116969,9.228013,1.07939,0.0,0.0,12.576603,0.0,510.508744,0.340651,14.7097,5.01087,56.723194,0.042017,13.634803,0.572891,0.0,0.0,10.713856,0.0,113.446388,0.10043,13.366331,1.342384,16.127615,76.89144,25.5,y,1,2,Joao Felix,76.89144,10.825244,8.447229,15.464634,23.03945
2,Joseba Zaldua,RSO,D,4100,2.088889,0,0,1,1,5,2,95,5,3,4,0,0,8.090426,8.090426,4.78,0.0,0.2,0.0,0.0,0.825756,0.0,0.087474,0.106624,0.825756,0.088045,39.36326,0.040082,8.264696,0.331263,39.36326,0.115813,2.877677,0.333274,78.726521,0.126101,5.764444,0.726903,196.816302,0.305215,9.932205,3.031459,196.816302,0.16628,12.606239,2.09616,118.089781,0.087912,15.005223,1.319132,157.453042,0.194301,10.666128,2.072443,0.0,0.0,14.550278,0.0,8.381772,82.225477,7.333333,y,2,3,Joseba Zaldua,82.225477,6.240476,4.367086,15.220672,20.443347
3,Leonardo Bonucci,JUV,D,3500,7.0,1,0,4,0,1,3,340,4,5,3,16,1,3.857143,5.071429,4.32,1.1,0.3,0.139059,0.124062,1.339691,0.166205,0.042857,0.047087,1.339691,0.063082,51.428571,0.049181,13.113785,0.644948,0.0,0.0,4.339892,0.0,38.571429,0.045152,10.057453,0.454115,12.857143,0.015107,10.147931,0.1533,51.428571,0.036519,15.784274,0.576421,64.285714,0.043452,14.18414,0.616335,38.571429,0.052069,10.02095,0.521782,205.714286,0.212659,14.694037,3.124812,6.570781,90.0,0.0,y,7,0,Leonardo Bonucci,90.0,5.385873,4.32,15.388209,18.773659
4,Mikel Merino,RSO,M,4900,7.877778,1,0,14,7,7,8,364,14,16,16,6,1,8.400846,9.479831,8.51,1.2,1.0,0.015165,0.012454,0.825756,0.010284,0.123834,0.150944,0.825756,0.124643,156.030273,0.158878,8.264696,1.313079,78.015137,0.229534,2.877677,0.660524,89.160156,0.142813,5.764444,0.82324,78.015137,0.120983,9.932205,1.201626,156.030273,0.131822,12.606239,1.661775,178.320312,0.13275,15.005223,1.991942,178.320312,0.220052,10.666128,2.347105,66.870117,0.095468,14.550278,1.389091,9.097637,87.797987,0.0,y,8,0,Mikel Merino,87.797987,7.195936,8.301787,14.685584,18.566606
5,Aritz Elustondo,RSO,D,3300,7.011111,2,1,6,3,0,1,380,8,5,3,18,0,5.006339,8.714739,4.81,0.6,0.1,0.024467,0.020093,0.825756,0.016592,0.014263,0.017386,0.825756,0.014356,77.020602,0.078426,8.264696,0.64817,38.510301,0.113304,2.877677,0.326052,12.836767,0.020561,5.764444,0.118525,0.0,0.0,9.932205,0.0,102.694136,0.086761,12.606239,1.093727,64.183835,0.047781,15.005223,0.716971,38.510301,0.047523,10.666128,0.506884,231.061807,0.32988,14.550278,4.799841,6.070843,90.0,1.0,y,7,1,Aritz Elustondo,90.0,5.072245,4.81,15.37044,18.396494
6,Achraf Hakimi,PSG,D,5800,5.166667,2,2,6,4,11,9,298,2,3,6,6,0,8.16,14.353548,8.16,0.5,1.6,0.085177,0.046368,1.965138,0.09112,0.272565,0.204997,1.965138,0.402847,91.990777,0.069453,14.119949,0.980669,61.327185,0.159152,4.734241,0.753462,137.986166,0.135714,10.821648,1.468654,168.649758,0.150798,11.766012,1.774297,30.663592,0.026894,10.480741,0.281873,45.995389,0.061056,12.893521,0.787225,91.990777,0.11055,11.50508,1.271888,91.990777,0.082118,13.858211,1.138007,10.517532,79.21428,17.0,y,5,2,Achraf Hakimi,79.21428,8.112818,7.182095,13.987617,18.133676
7,Dejan Kulusevski,JUV,M,6800,2.955556,0,1,7,2,6,4,58,5,17,3,2,1,6.381203,7.903759,2.46,0.9,0.9,0.269728,0.240638,1.339691,0.322381,0.261704,0.287534,1.339691,0.385207,183.193034,0.175187,13.113785,2.297361,52.340867,0.14879,4.339892,0.645734,104.681734,0.122541,10.057453,1.232454,157.022601,0.184494,10.147931,1.87223,130.852167,0.092916,15.784274,1.466616,444.897369,0.300718,14.18414,4.265421,78.5113,0.105985,10.02095,1.062075,52.340867,0.054108,14.694037,0.795061,12.151975,77.34817,17.666667,y,2,6,Dejan Kulusevski,77.34817,9.340485,2.114183,13.736007,17.870552
8,Danilo Pereira,PSG,M,4200,3.0,1,0,2,1,0,1,251,2,4,5,6,0,5.673333,9.006667,5.0,0.5,0.0,0.166667,0.090729,1.965138,0.178296,0.0,0.0,1.965138,0.0,60.0,0.0453,14.119949,0.639631,30.0,0.077854,4.734241,0.368578,30.0,0.029506,10.821648,0.319305,0.0,0.0,11.766012,0.0,60.0,0.052625,10.480741,0.551547,120.0,0.159292,12.893521,2.053837,150.0,0.180263,11.50508,2.073938,180.0,0.160682,13.858211,2.226759,7.495752,90.0,0.0,y,3,0,Danilo Pereira,90.0,6.202974,5.0,14.768985,17.847028
9,Weston McKennie,JUV,M,3000,3.388889,0,0,7,0,4,1,78,1,3,3,5,0,5.122623,5.122623,3.44,0.9,0.0,0.049322,0.044002,1.339691,0.05895,0.0,0.0,1.339691,0.0,147.785725,0.141327,13.113785,1.85333,0.0,0.0,4.339892,0.0,21.112246,0.024714,10.057453,0.248562,84.448986,0.099223,10.147931,1.006912,21.112246,0.014992,15.784274,0.23663,63.336739,0.042811,14.18414,0.607236,63.336739,0.085501,10.02095,0.856799,105.561232,0.109125,14.694037,1.603481,5.353723,71.547057,9.0,y,4,1,Weston McKennie,71.547057,4.156904,2.734688,13.856346,17.845744


In [18]:
start_cell = time.time()

print('Getting Name Combinations')
start = time.time()
s1 = time.time()
F_combos_name = list(itertools.combinations(list(forwards['Player']), 2))
print('combos took', round(time.time()-s1,2), 'sec')
M_combos_name = list(itertools.combinations(list(mids['Player']), 2))
D_combos_name = list(itertools.combinations(list(defenders['Player']), 2))
#GK_combos_name = list(itertools.combinations(list(gks['Player']), 1))
#U_combos_name = list(itertools.combinations(list(utils['Player']), 1))
GK_combos_name = list(gks['Player'])
U_combos_name = list(utils['Player'])
s1 = time.time()
res_name = expandgrid(F_combos_name, M_combos_name, D_combos_name, GK_combos_name, U_combos_name)
print('expandgrid took', round(time.time()-s1,2), 'sec')
stop = time.time()
print('Took', round(stop-start,2), 'seconds')
print('')

print('Getting Salary Combinations')
start = time.time()
F_combos_salary = list(itertools.combinations(list(forwards['Salary']), 2))
M_combos_salary = list(itertools.combinations(list(mids['Salary']), 2))
D_combos_salary = list(itertools.combinations(list(defenders['Salary']), 2))
#GK_combos_salary = list(itertools.combinations(list(gks['Salary']), 1))
#U_combos_salary = list(itertools.combinations(list(utils['Salary']), 1))
GK_combos_salary = list(gks['Salary'])
U_combos_salary = list(utils['Salary'])
res_salary = expandgrid(F_combos_salary, M_combos_salary, D_combos_salary, GK_combos_salary, U_combos_salary)
stop = time.time()
print('Took', round(stop-start,2), 'seconds')
print('')

print('Getting Point Projection Combinations')
start = time.time()
F_combos_pts = list(itertools.combinations(list(forwards['Reg_Pts']), 2))
M_combos_pts = list(itertools.combinations(list(mids['Reg_Pts']), 2))
D_combos_pts = list(itertools.combinations(list(defenders['Reg_Pts']), 2))
#GK_combos_pts = list(itertools.combinations(list(gks['Reg_Pts']), 1))
#U_combos_pts = list(itertools.combinations(list(utils['Reg_Pts']), 1))
GK_combos_pts = list(gks['Reg_Pts'])
U_combos_pts = list(utils['Reg_Pts'])
res_pts = expandgrid(F_combos_pts, M_combos_pts, D_combos_pts, GK_combos_pts, U_combos_pts)
stop = time.time()
print('Took', round(stop-start,2), 'seconds')
print('')
stop_cell = time.time()
print('Took', round(stop_cell-start_cell,2), 'seconds total')

Getting Name Combinations
combos took 0.0 sec
expandgrid took 61.41 sec
Took 61.42 seconds

Getting Salary Combinations
Took 70.93 seconds

Getting Point Projection Combinations
Took 68.33 seconds

Took 200.69 seconds total


In [19]:
tmp_df = pd.DataFrame(columns=['F1_name','F2_name','M1_name','M2_name','D1_name','D2_name','GK_name','UTIL_name','Price','Proj_Pts'])
start = time.time()
N = len(res_salary['Var2'])
print(N, 'lineups to start')
print('')
#N = int(N/100)
s1 = time.time()
print('getting points in')
proj_pts = np.zeros(N)
for i in range(N):
    proj_pts[i] = res_pts['Var1'][i][0] + res_pts['Var1'][i][1] + res_pts['Var2'][i][0] + res_pts['Var2'][i][1] + res_pts['Var3'][i][0] + res_pts['Var3'][i][1] + res_pts['Var4'][i] + res_pts['Var5'][i]
tmp_df['Proj_Pts'] = proj_pts
tmp_df = tmp_df[tmp_df['Proj_Pts'] > min_pts]
print(len(tmp_df), 'Lineups Remaining')
print('points took', round(time.time()-s1,2), 'sec')
print('')

s1 = time.time()
print('getting prices in')
prices = np.zeros(len(tmp_df), dtype=int)
ctr=0
for i in tmp_df.index:
    prices[ctr] = res_salary['Var1'][i][0] + res_salary['Var1'][i][1] + res_salary['Var2'][i][0] + res_salary['Var2'][i][1] + res_salary['Var3'][i][0] + res_salary['Var3'][i][1] + res_salary['Var4'][i] + res_salary['Var5'][i]
    ctr+=1
tmp_df['Price'] = prices
tmp_df = tmp_df[tmp_df['Price'] <= 50000]
print(len(tmp_df), 'Lineups Remaining')
print('prices took', round(time.time()-s1,2), 'sec')
print('')

s1 = time.time()
print('getting names in')
dtype = np.dtype('U26')
data = ['']*len(tmp_df)
f1, f2, m1, m2 = np.array(data, dtype=dtype), np.array(data, dtype=dtype), np.array(data, dtype=dtype), np.array(data, dtype=dtype)
d1, d2, gk, u = np.array(data, dtype=dtype), np.array(data, dtype=dtype), np.array(data, dtype=dtype), np.array(data, dtype=dtype)
ctr=0
for i in tmp_df.index:
    f1[ctr] = res_name['Var1'][i][0]
    f2[ctr] = res_name['Var1'][i][1]
    m1[ctr] = res_name['Var2'][i][0]
    m2[ctr] = res_name['Var2'][i][1]
    d1[ctr] = res_name['Var3'][i][0]
    d2[ctr] = res_name['Var3'][i][1]
    gk[ctr] = res_name['Var4'][i]
    u[ctr] = res_name['Var5'][i]
    ctr+=1
    
tmp_df['F1_name'] = f1
tmp_df['F2_name'] = f2
tmp_df['M1_name'] = m1
tmp_df['M2_name'] = m2
tmp_df['D1_name'] = d1
tmp_df['D2_name'] = d2
tmp_df['GK_name'] = gk
tmp_df['UTIL_name'] = u
print(len(tmp_df), 'Lineups Remaining')
print('names took', round(time.time()-s1,2), 'sec')
print('')


print('Removing Lineups with Repeat Players')
s1 = time.time()
players_array = np.array(tmp_df[['F1_name', 'F2_name', 'M1_name', 'M2_name', 'D1_name', 'D2_name', 'GK_name', 'UTIL_name']])
sets2 = list(map(len, list(map(np.unique, players_array))))
tmp_df['Num_Players'] = sets2
tmp_df = tmp_df[tmp_df['Num_Players'] == 8]
tmp_df = tmp_df.reset_index().drop(columns=['index', 'Num_Players'])
s2 = time.time()
print(len(tmp_df), 'Lineups Remaining')
print('Took', round(s2-s1,2), 'seconds')
print('')

print('Checking for repeat lineups')
s1 = time.time()
tmp_df['Price'] = tmp_df['Price'].astype(str)
tmp_df['Proj_Pts'] = tmp_df['Proj_Pts'].astype(str)
tmp = pd.DataFrame(np.sort(tmp_df.values, axis=1), index=tmp_df.index, columns=tmp_df.columns)
tmp_df = tmp_df[~tmp.duplicated()]
tmp_df = tmp_df.reset_index().drop(columns='index')
tmp_df['Price'] = tmp_df['Price'].astype(int)
tmp_df['Proj_Pts'] = tmp_df['Proj_Pts'].astype(float)
s2 = time.time()
print(len(tmp_df), 'Lineups Remaining')
print('Took', round(s2-s1,2), 'seconds')
print('')

stop = time.time()
print('Took', round(stop-start,2), 'seconds')
#print('Should Take', round((stop-start)*100,2), 'seconds')


139829760 lineups to start

getting points in
31227395 Lineups Remaining
points took 233.49 sec

getting prices in
274227 Lineups Remaining
prices took 50.89 sec

getting names in
274227 Lineups Remaining
names took 3.73 sec

Removing Lineups with Repeat Players
75769 Lineups Remaining
Took 2.64 seconds

Checking for repeat lineups
31956 Lineups Remaining
Took 0.47 seconds

Took 291.24 seconds


In [20]:
final_df = tmp_df.sort_values(by='Proj_Pts', ascending=False).reset_index().drop(columns='index')
final_df

Unnamed: 0,F1_name,F2_name,M1_name,M2_name,D1_name,D2_name,GK_name,UTIL_name,Price,Proj_Pts
0,Joao Felix,Kylian Mbappe,Dejan Kulusevski,Nicolo Barella,Aritz Elustondo,Achraf Hakimi,Samir Handanovic,Valentin Rongier,50000,70.427546
1,Joao Felix,Kylian Mbappe,Valentin Rongier,Dejan Kulusevski,Aritz Elustondo,Achraf Hakimi,Samir Handanovic,Nicolo Barella,50000,70.427546
2,Joao Felix,Edin Dzeko,Valentin Rongier,Mikel Merino,Leonardo Bonucci,Achraf Hakimi,Jan Oblak,Kylian Mbappe,49900,70.363178
3,Joao Felix,Kylian Mbappe,Valentin Rongier,Mikel Merino,Joseba Zaldua,Leonardo Bonucci,Jan Oblak,Dimitri Payet,50000,70.332120
4,Edin Dzeko,Kylian Mbappe,Joao Felix,Nicolo Barella,Joseba Zaldua,Leonardo Bonucci,Samir Handanovic,Valentin Rongier,49900,70.322490
...,...,...,...,...,...,...,...,...,...,...
31951,Joao Felix,Kylian Mbappe,Dejan Kulusevski,Nicolo Barella,Giorgio Chiellini,Renan Lodi,Jan Oblak,Aritz Elustondo,49900,67.778496
31952,Joao Felix,Alvaro Morata,Valentin Rongier,Mikel Merino,Achraf Hakimi,Alex Sandro,Samir Handanovic,Dimitri Payet,49500,67.778449
31953,Joao Felix,Alvaro Morata,Valentin Rongier,Dimitri Payet,Achraf Hakimi,Alex Sandro,Samir Handanovic,Mikel Merino,49500,67.778449
31954,Joao Felix,Edin Dzeko,Mikel Merino,Danilo Pereira,Alex Sandro,Giorgio Chiellini,Jan Oblak,Kylian Mbappe,49000,67.778385


In [21]:
pos_depth = pd.DataFrame(columns=['F1','F2','M1','M2','D1','D2','GK','UTIL'], index=[0])
f2 = forwards.reset_index(drop=True)
m2 = mids.reset_index(drop=True)
d2 = defenders.reset_index(drop=True)
gk2 = gks.reset_index(drop=True)
u2 = utils.reset_index(drop=True)
pos_depth['F1'][0] = f2[f2['Player'] == final_df['F1_name'][0]].index[0] + 1
pos_depth['F2'][0] = f2[f2['Player'] == final_df['F2_name'][0]].index[0] + 1
pos_depth['M1'][0] = m2[m2['Player'] == final_df['M1_name'][0]].index[0] + 1
pos_depth['M2'][0] = m2[m2['Player'] == final_df['M2_name'][0]].index[0] + 1
pos_depth['D1'][0] = d2[d2['Player'] == final_df['D1_name'][0]].index[0] + 1
pos_depth['D2'][0] = d2[d2['Player'] == final_df['D2_name'][0]].index[0] + 1
pos_depth['GK'][0] = gk2[gk2['Player'] == final_df['GK_name'][0]].index[0] + 1
pos_depth['UTIL'][0] = u2[u2['Player'] == final_df['UTIL_name'][0]].index[0] + 1

pos_depth

Unnamed: 0,F1,F2,M1,M2,D1,D2,GK,UTIL
0,1,3,4,7,3,4,2,1


In [22]:
final_df.to_csv('Generated_Lineups/'+league+'/'+contest_name+'_'+date+'_lineups.csv')

In [24]:
f1 = 26.5  #25.72 # B. Vazquez - originalseph
f2 = 6.4  #4.86 # J. Shaffelburg - gandriole (2)
m1 = 21.98  #16.74 # L. Acosta - Alex2rich (7)
m2 = 15.14  #8.62 # J. Gressel - originalseph
d1 = 10.88  #5.92 # G. Bello - ederg
d2 = 13.24  #9.2 # M. Pineda - Alex2rich (4)
gk = 2.3  #-1.84 # A. Bono - 94expos4ever (2)
u = 26.48   #20.48 # M. Moreno - Alex2rich (7)
f1 + f2 + m1 + m2 + d1 + d2 + gk + u

122.91999999999999