<a href="https://colab.research.google.com/github/Bryanwaterhouse19/SoccerStats/blob/main/Fantasy_Scoring_Test_Pandas.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Imports

In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
import re
import sys, getopt
import csv
import time

## Functions

In [52]:
def get_outfield_player_name_from_url(url):

  name = url[66:len(url)-11]

  name = name.replace("-", " ")

  return name

In [22]:
def get_gk_name_from_url(url):
  name = url[65:len(url)-11]

  name = name.replace("-", " ")

  return name

In [48]:
def make_player_name_player_links_dict(url):

  player_name_player_links_dict = {}

  res = requests.get(url)
  ## The next two lines get around the issue with comments breaking the parsing.
  comm = re.compile("<!--|-->")
  soup = BeautifulSoup(comm.sub("",res.text),'lxml')
  all_links = soup.findAll("a")
  for link in all_links:
    link = str(link)
    if "Match-Logs" in link:
      link = link[9:len(link)-13]
      link = f"https://fbref.com{link}"
      if "keeper" in link:
        name = get_gk_name_from_url(link)
      else:
        name = get_outfield_player_name_from_url(link)
      
      player_name_player_links_dict[name] = link


  return player_name_player_links_dict

In [4]:
def get_outfield_player_match_logs(url):
  list_of_dfs = pd.read_html(url)
  df = list_of_dfs[0]
  df.columns = (['Date','Day','Competition','Round','Venue','Result','Squad','Opponent','Start','Position','Mins','Goals','Assists','Penalties Made', 'Penalties Taken','Shots','Shots on Target',
                 'Yellow Cards','Red Cards','Touches','Presses','Tackles','Interceptions','Blocks','Expected Goals', 'Non Penalty Expected Goals', 'Expected Assists','Shot Creating Actions',
                 'Goal Creating Actions','Passes Completed','Passes Attempted','Pass Completion %','Progressive Passes','Carries','Progressive Carries','Dribbles Succeeded', 'Dribbles Attempted','Match Report'])
  name = get_outfield_player_name_from_url(url)
  df['Player Name'] = name
  return df

In [23]:
def get_gk_match_logs(url):
  list_of_dfs = pd.read_html(url)
  df = list_of_dfs[0]
  df.columns = (['Date','Day','Competition','Round','Venue','Result','Squad','Opponent','Start','Position','Mins','Shots on Target Against','Goals Allowed','Saves', 'Save Percentage','Clean Sheets',
                 'Post Shot Expected Goals','Penalty Kicks Attempted','Penalty Kicks Allowed','Penalty Kicks Saved','Penalty Kicks Missed','Passes over 40 yards Completed','Passes over 40 yards Attempted',
                 'Passes over 40 yards completion %','Passes Attempted', 'Throws Attempted', 'Percent of passes over 40 yards','Average Pass Length', 'Goal Kicks Attempted','Goal Kicks over 40 yards',
                 'Goal Kick Average Length','Crosses Attempted','Crosses Stopped','Cross Stop %','Defensive Actions outside Penalty Area','Average Distance of Defensive Actions outside Penalty Area','Match Report'])
  name = get_gk_name_from_url(url)
  df['Player Name'] = name
  return df

In [24]:
def calculate_game_week_points(df):
  #establish all the weights for different stats

  #Outfield Multipliers
  mins_multiplier = 0.025
  goals_multiplier = 6
  assists_multiplier = 3
  penalty_miss_multiplier = -2
  shots_on_target_multiplier = 0.5
  yellow_card_multiplier = -1
  red_card_multiplier = -3
  press_multiplier = 0.5
  tackle_multiplier = 1
  interception_multiplier = 1
  block_multiplier = 0.5
  no_penalty_xG_multiplier = 1
  expected_assist_multiplier = 0.5
  shot_creating_actions_multiplier = 0.25
  passes_completed_multiplier = 0.1 #will multiply by pass completion percentage as well
  successful_dribble_multiplier = 0.25

  #Goalkeeper/def multipliers
  clean_sheet_def_gk_multiplier = 6
  gk_df_goal_conceded_multiplier = -1

  #Just gk multipliers
  penalty_save_multiplier = 4
  gk_save_multiplier = 2.5
  gk_penalty_allowed_multiplier = -1
  gk_pass_longer_than_40_yds_multiplier = 0.5
  gk_cross_stopped_multiplier = 1
  gk_defensive_action_outside_box_multiplier = 1

  ######calculate game week points##########

  Position = df.iloc[0]['Position']
  #sometimes FBref lists positions like "RW,WB", or "CM,DM", I don't want to deal with that so I'm just going to take the first position
  Position = Position[:2]
  Name = df.iloc[0]['Player Name']
  gameweek = df.iloc[0]['Round']

  clean_sheet = False
  result = df.iloc[0]['Result']
  goals_conceded = int(result[-1])
  if goals_conceded == 0:
    clean_sheet = True

  
  #attackers and midfielders get scored the same
  if Position in ['FW','RW','LW','CM','DM','MF','AM','RM','LM']:
    Penalties_missed = float(df['Penalties Taken']) - float(df['Penalties Made'])
    pass_completion_percentage = float(df['Pass Completion %']) / 100

    points = (float(df['Mins'])*mins_multiplier + float(df['Goals'])*goals_multiplier + float(df['Assists'])*assists_multiplier + Penalties_missed * penalty_miss_multiplier 
              + float(df['Shots on Target'])*shots_on_target_multiplier + float(df['Yellow Cards']) * yellow_card_multiplier + float(df['Red Cards']) * red_card_multiplier
              + float(df['Presses']) * press_multiplier + float(df['Tackles']) * tackle_multiplier + float(df['Interceptions']) * interception_multiplier + float(df['Blocks']) * block_multiplier 
              + float(df['Non Penalty Expected Goals']) * no_penalty_xG_multiplier + float(df['Expected Assists']) * expected_assist_multiplier 
              + float(df['Passes Completed']) * passes_completed_multiplier * pass_completion_percentage + float(df['Shot Creating Actions']) * shot_creating_actions_multiplier 
              + float(df['Dribbles Succeeded']) * successful_dribble_multiplier)
    
  #Defenders get scored a little differently
  elif Position in ['CB','LB','RB','WB','DF']:
    Penalties_missed = float(df['Penalties Taken']) - float(df['Penalties Made'])
    pass_completion_percentage = float(df['Pass Completion %']) / 100
    points = (float(df['Mins'])*mins_multiplier + float(df['Goals'])*goals_multiplier + float(df['Assists'])*assists_multiplier + Penalties_missed * penalty_miss_multiplier 
          + float(df['Shots on Target'])*shots_on_target_multiplier + float(df['Yellow Cards']) * yellow_card_multiplier + float(df['Red Cards']) * red_card_multiplier
          + float(df['Presses']) * press_multiplier + float(df['Tackles']) * tackle_multiplier + float(df['Interceptions']) * interception_multiplier + float(df['Blocks']) * block_multiplier 
          + float(df['Non Penalty Expected Goals']) * no_penalty_xG_multiplier + float(df['Expected Assists']) * expected_assist_multiplier 
          + float(df['Passes Completed']) * passes_completed_multiplier * pass_completion_percentage + float(df['Shot Creating Actions']) * shot_creating_actions_multiplier 
          + float(df['Dribbles Succeeded']) * successful_dribble_multiplier) +  goals_conceded * gk_df_goal_conceded_multiplier + clean_sheet * clean_sheet_def_gk_multiplier
  elif Position == 'GK':
        points = (float(df['Mins'])*mins_multiplier + float(df['Saves'])*gk_save_multiplier + goals_conceded * gk_df_goal_conceded_multiplier + clean_sheet * clean_sheet_def_gk_multiplier 
                    + float(df['Penalty Kicks Allowed'])*gk_penalty_allowed_multiplier + float(df['Passes over 40 yards Completed'])*gk_pass_longer_than_40_yds_multiplier + float(df['Crosses Stopped'])*gk_cross_stopped_multiplier 
                  + float(df['Defensive Actions outside Penalty Area'])*gk_defensive_action_outside_box_multiplier+ (float(df['Post Shot Expected Goals'])-float(df['Goals Allowed'])))
  else:
    print("You forgot to include this position:", Position)
  identifier = Name + " " + gameweek
  points = round(points,3)
  
  return identifier, points


In [25]:
def clean_up_data(url):
  if "keeper" in url:
    player_df = get_gk_match_logs(url)
  else:
    player_df = get_outfield_player_match_logs(url)
  intermediate_df = player_df.drop(['Day','Venue','Match Report'],axis=1)
  intermediate_df = intermediate_df.fillna(value=0)
  league_df = intermediate_df[intermediate_df['Competition'].isin(['Ligue 1','La Liga','Premier League','Serie A',"Bundesliga"])]
  return league_df

In [26]:
def get_scores(name):
  url = Big_5_player_name_dict[name]
  df = clean_up_data(url)
  num_rows = df.shape[0]
  
  scores = []
  for i in range(num_rows):
    matchweek = df.iloc[i]['Round']
    gw_df = df.loc[df['Round']==matchweek]
    gw_points = calculate_game_week_points(gw_df)
    scores.append(gw_points)

  return scores

In [27]:
def Merge(dict1, dict2):
    for i in dict2.keys():
        dict1[i]=dict2[i]
    return dict1
     


## Main

In [53]:
#Run this one time to generate the dictionary of names to urls (takes some time to run so don't want to run it on every lookup)
Big_5_outfield_url = "https://fbref.com/en/comps/Big5/stats/players/Big-5-European-Leagues-Stats"
Big_5_outfield_player_name_dict = make_player_name_player_links_dict(Big_5_outfield_url)
Big_5_keeper_url = "https://fbref.com/en/comps/Big5/keepers/players/Big-5-European-Leagues-Stats"
Big_5_keeper_player_name_dict = make_player_name_player_links_dict(Big_5_keeper_url)



Big_5_player_name_dict = Merge(Big_5_outfield_player_name_dict,Big_5_keeper_player_name_dict)

In [55]:
#Main function

#Input = name of the player you want the scores for as a string

#Output = list of tuples of the format: (player name and matchweek , points per week

get_scores("Alisson")

https://fbref.com/en/players/7a2e46a8/matchlogs/2022-2023/keeper/Alisson-Match-Logs
player name Alisson


[('Alisson Matchweek 1', 3.45),
 ('Alisson Matchweek 2', 7.25),
 ('Alisson Matchweek 3', 14.05),
 ('Alisson Matchweek 4', 17.35),
 ('Alisson Matchweek 5', 11.55),
 ('Alisson Matchweek 6', 23.85)]

In [56]:
#Main function with user input

#Input = User inputted player name

#Output = list of tuples of the format: (player name and matchweek , points per week
player_name = input("Enter the name of a player in the top 5 European Soccer Leagues: ")
get_scores(player_name)

Enter the name of a player in the top 5 European Soccer Leagues: Alisson
https://fbref.com/en/players/7a2e46a8/matchlogs/2022-2023/keeper/Alisson-Match-Logs
player name Alisson


[('Alisson Matchweek 1', 3.45),
 ('Alisson Matchweek 2', 7.25),
 ('Alisson Matchweek 3', 14.05),
 ('Alisson Matchweek 4', 17.35),
 ('Alisson Matchweek 5', 11.55),
 ('Alisson Matchweek 6', 23.85)]

In [15]:
PL_url = "https://fbref.com/en/comps/9/stats/Premier-League-Stats"
Serie_A_url = "https://fbref.com/en/comps/11/stats/Serie-A-Stats"
Bundesliga_url = "https://fbref.com/en/comps/20/Bundesliga-Stats"
La_Liga_url = "https://fbref.com/en/comps/12/stats/La-Liga-Stats"
Ligue_1_url = "https://fbref.com/en/comps/12/stats/La-Liga-Stats"

Big_5_url = "https://fbref.com/en/comps/Big5/stats/players/Big-5-European-Leagues-Stats"



