<a href="https://colab.research.google.com/github/Bryanwaterhouse19/SoccerStats/blob/main/Fantasy_Scoring_Test_Pandas.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Imports

In [209]:
import pandas as pd

## Functions

In [210]:
def get_player_name_from_url(url):

  name = url[66:len(url)-11]

  name = name.replace("-", " ")

  return name

In [211]:
def get_url_from_player_name(name):
  #I want to figure out how to do this, but the random letters and numbers they put in the player's urls might make this difficult
  
  return

In [212]:
def get_outfield_player_match_logs(url):
  list_of_dfs = pd.read_html(url)
  df = list_of_dfs[0]
  df.columns = (['Date','Day','Competition','Round','Venue','Result','Squad','Opponent','Start','Position','Mins','Goals','Assists','Penalties Made', 'Penalties Taken','Shots','Shots on Target',
                 'Yellow Cards','Red Cards','Touches','Presses','Tackles','Interceptions','Blocks','Expected Goals', 'Non Penalty Expected Goals', 'Expected Assists','Shot Creating Actions',
                 'Goal Creating Actions','Passes Completed','Passes Attempted','Pass Completion %','Progressive Passes','Carries','Progressive Carries','Dribbles Succeeded', 'Dribbles Attempted','Match Report'])
  name = get_player_name_from_url(url)
  df['Player Name'] = name
  return df

In [213]:
def get_gk_match_logs(url):
  list_of_dfs = pd.read_html(url)
  df = list_of_dfs[0]
  df.columns = (['Date','Day','Competition','Round','Venue','Result','Squad','Opponent','Start','Position','Mins','Shots on Target Against','Goals Allowed','Saves', 'Save Percentage','Clean Sheets',
                 'Post Shot Expected Goals','Penalty Kicks Attempted','Penalty Kicks Allowed','Penalty Kicks Saved','Penalty Kicks Missed','Passes over 40 yards Completed','Passes over 40 yards Attempted',
                 'Passes over 40 yards completion %','Passes Attempted', 'Throws Attempted', 'Percent of passes over 40 yards','Average Pass Length', 'Goal Kicks Attempted','Goal Kicks over 40 yards',
                 'Goal Kick Average Length','Crosses Attempted','Crosses Stopped','Cross Stop %','Defensive Actions outside Penalty Area','Average Distance of Defensive Actions outside Penalty Area','Match Report'])
  name = get_player_name_from_url(url)
  df['Player Name'] = name
  return df

In [218]:
def calculate_game_week_points(df):
  #establish all the weights for different stats

  #Outfield Multipliers
  mins_multiplier = 0.025
  goals_multiplier = 5
  assists_multiplier = 2.5
  penalty_miss_multiplier = -2
  shots_on_target_multiplier = 0.25
  yellow_card_multiplier = -1
  red_card_multiplier = -3
  press_multiplier = 0.25
  tackle_multiplier = 0.25
  interception_multiplier = 0.25
  block_multiplier = 0.25
  no_penalty_xG_multiplier = 0.5
  expected_assist_multiplier = 0.25
  shot_creating_actions_multiplier = 0.25
  passes_completed_multiplier = 0.1 #will multiply by pass completion percentage as well
  successful_dribble_multiplier = 0.25

  #Goalkeeper/def multipliers
  clean_sheet_def_gk_multiplier = 4
  gk_df_goal_conceded_multiplier = -1

  #Just gk multipliers
  penalty_save_multiplier = 4
  gk_save_multiplier = 2
  gk_penalty_allowed_multiplier = -1
  gk_pass_longer_than_40_yds_multiplier = 0.5
  gk_cross_stopped_multiplier = 1
  gk_defensive_action_outside_box_multiplier = 1

  ######calculate game week points##########

  Position = df.iloc[0]['Position']
  #sometimes FBref lists positions like "RW,WB", or "CM,DM", I don't want to deal with that so I'm just going to take the first position
  Position = Position[:2]
  Name = df.iloc[0]['Player Name']
  gameweek = df.iloc[0]['Round']

  clean_sheet = False
  result = df.iloc[0]['Result']
  goals_conceded = int(result[-1])
  if goals_conceded == 0:
    clean_sheet = True

  
  #attackers and midfielders get scored the same
  if Position in ['FW','RW','LW','CM','DM','MF','AM','RM','LM']:
    Penalties_missed = float(df['Penalties Taken']) - float(df['Penalties Made'])
    pass_completion_percentage = float(df['Pass Completion %']) / 100

    points = (float(df['Mins'])*mins_multiplier + float(df['Goals'])*goals_multiplier + float(df['Assists'])*assists_multiplier + Penalties_missed * penalty_miss_multiplier 
              + float(df['Shots on Target'])*shots_on_target_multiplier + float(df['Yellow Cards']) * yellow_card_multiplier + float(df['Red Cards']) * red_card_multiplier
              + float(df['Presses']) * press_multiplier + float(df['Tackles']) * tackle_multiplier + float(df['Interceptions']) * interception_multiplier + float(df['Blocks']) * block_multiplier 
              + float(df['Non Penalty Expected Goals']) * no_penalty_xG_multiplier + float(df['Expected Assists']) * expected_assist_multiplier 
              + float(df['Passes Completed']) * passes_completed_multiplier * pass_completion_percentage + float(df['Shot Creating Actions']) * shot_creating_actions_multiplier 
              + float(df['Dribbles Succeeded']) * successful_dribble_multiplier)
    
  #Defenders get scored a little differently
  elif Position in ['CB','LB','RB','WB','DF']:
    Penalties_missed = float(df['Penalties Taken']) - float(df['Penalties Made'])
    pass_completion_percentage = float(df['Pass Completion %']) / 100
    points = (float(df['Mins'])*mins_multiplier + float(df['Goals'])*goals_multiplier + float(df['Assists'])*assists_multiplier + Penalties_missed * penalty_miss_multiplier 
          + float(df['Shots on Target'])*shots_on_target_multiplier + float(df['Yellow Cards']) * yellow_card_multiplier + float(df['Red Cards']) * red_card_multiplier
          + float(df['Presses']) * press_multiplier + float(df['Tackles']) * tackle_multiplier + float(df['Interceptions']) * interception_multiplier + float(df['Blocks']) * block_multiplier 
          + float(df['Non Penalty Expected Goals']) * no_penalty_xG_multiplier + float(df['Expected Assists']) * expected_assist_multiplier 
          + float(df['Passes Completed']) * passes_completed_multiplier * pass_completion_percentage + float(df['Shot Creating Actions']) * shot_creating_actions_multiplier 
          + float(df['Dribbles Succeeded']) * successful_dribble_multiplier) +  goals_conceded * gk_df_goal_conceded_multiplier + clean_sheet * clean_sheet_def_gk_multiplier
  elif Position == 'GK':
        points = (float(df['Mins'])*mins_multiplier + float(df['Saves'])*gk_save_multiplier + goals_conceded * gk_df_goal_conceded_multiplier + clean_sheet * clean_sheet_def_gk_multiplier 
                    + float(df['Penalty Kicks Allowed'])*gk_penalty_allowed_multiplier + float(df['Passes over 40 yards Completed'])*gk_pass_longer_than_40_yds_multiplier + float(df['Crosses Stopped'])*gk_cross_stopped_multiplier 
                  + float(df['Defensive Actions outside Penalty Area'])*gk_defensive_action_outside_box_multiplier+ (float(df['Post Shot Expected Goals'])-float(df['Goals Allowed'])))
  else:
    print("You forgot to include this position:", Position)

  identifier = Name + " " + gameweek
  points = round(points,3)
  
  return identifier, points


In [221]:
def clean_up_data(url):
  if "keeper" in url:
    player_df = get_gk_match_logs(url)
  else:
    player_df = get_outfield_player_match_logs(url)
  intermediate_df = player_df.drop(['Day','Venue','Match Report'],axis=1)
  intermediate_df = intermediate_df.fillna(value=0)
  league_df = intermediate_df[intermediate_df['Competition'].isin(['Ligue 1','La Liga','Premier League','Serie A',"Bundesliga"])]
  return league_df

In [220]:
def get_scores(url):
  df = clean_up_data(url)
  num_rows = df.shape[0]
  
  scores = []
  for i in range(num_rows):
    matchweek = df.iloc[i]['Round']
    gw_df = df.loc[df['Round']==matchweek]
    gw_points = calculate_game_week_points(gw_df)
    scores.append(gw_points)

  return scores

## Main

In [207]:
url = 'https://fbref.com/en/players/42fd9c7f/matchlogs/2022-2023/summary/Kylian-Mbappe-Match-Logs'
url2 = 'https://fbref.com/en/players/70d74ece/matchlogs/2022-2023/summary/Karim-Benzema-Match-Logs'
url3 = 'https://fbref.com/en/players/1f44ac21/matchlogs/2022-2023/summary/Erling-Haaland-Match-Logs'
url4 = 'https://fbref.com/en/players/aef6e670/matchlogs/2022-2023/summary/Presnel-Kimpembe-Match-Logs'
url5 = 'https://fbref.com/en/players/da974c7b/matchlogs/2022-2023/summary/Kalidou-Koulibaly-Match-Logs'
url6 = 'https://fbref.com/en/players/6b625ac2/matchlogs/2022-2023/keeper/Illan-Meslier-Match-Logs'

In [223]:
#Main function

#Input = url of the player you want

#Output = list of tuples of the format: (player name and matchweek , points per week)
get_scores(url5)

[('Kalidou Koulibaly Matchweek 1', 15.133),
 ('Kalidou Koulibaly Matchweek 2', 15.547),
 ('Kalidou Koulibaly Matchweek 3', 4.223),
 ('Kalidou Koulibaly Matchweek 5', 6.627),
 ('Kalidou Koulibaly Matchweek 6', 10.042)]