In [1]:
import pandas as pd
import requests, json
import matplotlib.pyplot as plt
from bs4 import BeautifulSoup

In [2]:
seasons = pd.read_csv("https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/master/raw-data/all_leages_and_cups/all_competitions.csv")

In [3]:
seasons = seasons[(seasons.country == 'ENG') & (seasons.tier == '1st') & (seasons.gender == 'M')]

In [29]:
seasons

In [24]:
def get_player_matchlogs(url):
    r = requests.get(url, verify=False)
    soup = BeautifulSoup(r.content, 'html.parser')
    table = soup.find('table', id='matchlogs')
    match_logs = pd.read_html(str(table))[0]
    match_logs = match_logs.dropna()
    return match_logs

def get_league_url(country, gender, season_end_year, tier):
    seasons = pd.read_csv("https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/master/raw-data/all_leages_and_cups/all_competitions.csv")
    seasons = seasons[(seasons.country == country) & (seasons.tier == tier) & (seasons.gender == gender) & (seasons.season_end_year == season_end_year)]
    return seasons.seasons_urls.iloc[0]

def get_teams_urls(league_url):
    r = requests.get(league_url, verify=False)
    soup = BeautifulSoup(r.content, 'html.parser')
    table = soup.find('table', id='stats_squads_standard_for')
    urls = table.find_all('a')
    urls = {link.text:link.get('href') for link in urls}
    return urls

def get_players_urls(team_url):
    url_base = 'https://fbref.com/'
    r = requests.get(url_base+team_url, verify=False)
    soup = BeautifulSoup(r.content, 'html.parser')
    table = soup.find('table', id='stats_standard_9')
    ths = table.find_all('th')
    urls = [th.find('a') for th in ths]
    urls = [url for url in urls if url is not None]
    urls = {link.text:link.get('href') for link in urls}
    return urls

def get_scouting_report(player_url):
    url_base = 'https://fbref.com/'
    r = requests.get(url_base+player_url, verify=False)
    soup = BeautifulSoup(r.content, 'html.parser')
    table = soup.find('table', id=lambda value: value and value.startswith('scout_summary_'))
    scouting_report = pd.read_html(str(table))[0]
    scouting_report = scouting_report.dropna()
    return scouting_report

league_url = get_league_url(country = "ENG", gender = "M", season_end_year = 2024, tier = '1st')
teams_urls = get_teams_urls(league_url)
players_urls = get_players_urls(teams_urls['Arsenal'])
players_urls




{'William Saliba': '/en/players/972aeb2a/William-Saliba',
 'Declan Rice': '/en/players/1c7012b8/Declan-Rice',
 'Bukayo Saka': '/en/players/bc7dc64d/Bukayo-Saka',
 'Martin Ødegaard': '/en/players/79300479/Martin-Odegaard',
 'Ben White': '/en/players/35e413f1/Ben-White',
 'Gabriel Dos Santos': '/en/players/67ac5bb8/Gabriel-Dos-Santos',
 'Gabriel Martinelli': '/en/players/48a5a5d6/Gabriel-Martinelli',
 'David Raya': '/en/players/98ea5115/David-Raya',
 'Kai Havertz': '/en/players/fed7cb61/Kai-Havertz',
 'Oleksandr Zinchenko': '/en/players/51cf8561/Oleksandr-Zinchenko',
 'Gabriel Jesus': '/en/players/b66315ae/Gabriel-Jesus',
 'Eddie Nketiah': '/en/players/a53649b7/Eddie-Nketiah',
 'Leandro Trossard': '/en/players/38ceb24a/Leandro-Trossard',
 'Jakub Kiwior': '/en/players/dc3e663e/Jakub-Kiwior',
 'Jorginho': '/en/players/45db685d/Jorginho',
 'Takehiro Tomiyasu': '/en/players/b3af9be1/Takehiro-Tomiyasu',
 'Aaron Ramsdale': '/en/players/466fb2c5/Aaron-Ramsdale',
 'Thomas Partey': '/en/players/5

In [25]:
teams_urls

{'Arsenal': '/en/squads/18bb7c10/Arsenal-Stats',
 'Aston Villa': '/en/squads/8602292d/Aston-Villa-Stats',
 'Bournemouth': '/en/squads/4ba7cbea/Bournemouth-Stats',
 'Brentford': '/en/squads/cd051869/Brentford-Stats',
 'Brighton': '/en/squads/d07537b9/Brighton-and-Hove-Albion-Stats',
 'Burnley': '/en/squads/943e8050/Burnley-Stats',
 'Chelsea': '/en/squads/cff3d9bb/Chelsea-Stats',
 'Crystal Palace': '/en/squads/47c64c55/Crystal-Palace-Stats',
 'Everton': '/en/squads/d3fd31cc/Everton-Stats',
 'Fulham': '/en/squads/fd962109/Fulham-Stats',
 'Liverpool': '/en/squads/822bd0ba/Liverpool-Stats',
 'Luton Town': '/en/squads/e297cd13/Luton-Town-Stats',
 'Manchester City': '/en/squads/b8fd03ef/Manchester-City-Stats',
 'Manchester Utd': '/en/squads/19538871/Manchester-United-Stats',
 'Newcastle Utd': '/en/squads/b2b47a98/Newcastle-United-Stats',
 "Nott'ham Forest": '/en/squads/e4a775cb/Nottingham-Forest-Stats',
 'Sheffield Utd': '/en/squads/1df6b87e/Sheffield-United-Stats',
 'Tottenham': '/en/squads/

In [None]:
get_scouting_report(players_urls['Kai Havertz'])

In [26]:
from difflib import get_close_matches

In [28]:
word = 'David Raya'
patterns = players_urls.keys()
get_close_matches(word, patterns)

['David Raya']

In [30]:
def get_players_data():
    base_url = 'https://fantasy.premierleague.com/api/' 
    r = requests.get(base_url+'bootstrap-static/').json()
    players = pd.DataFrame(r['elements'])
    teams = pd.DataFrame(r['teams'])
    players = players.merge(right=teams, left_on='team', right_on='id')
    top_scorers = players[players.goals_scored != 0].sort_values('goals_scored', ascending=False)[['first_name','second_name','name','goals_scored','expected_goals']]
    top_assisters = players[players.assists != 0].sort_values('assists', ascending=False)[['first_name','second_name','name','assists','expected_assists']]
    players['now_cost'] = players['now_cost'] / 10
    positions = pd.DataFrame(r['element_types'])
    players = players.merge(right=positions, left_on='element_type', right_on='id')
    players['expected_goal_involvements'] = players['expected_goal_involvements'].astype(float)
    players['expected_goals_conceded'] = players['expected_goals_conceded'].astype(float)
    players['full_name'] = players['first_name']+' '+players['second_name']

    return players

players_fpl = get_players_data()

In [36]:
report = get_scouting_report(players_urls['Kai Havertz'])



In [37]:
report

Unnamed: 0,Statistic,Per 90,Percentile
0,Non-Penalty Goals,0.20,91.0
1,npxG: Non-Penalty xG,0.35,99.0
2,Shots Total,2.26,96.0
3,Assists,0.06,40.0
4,xAG: Exp. Assisted Goals,0.12,69.0
5,npxG + xAG,0.47,98.0
6,Shot-Creating Actions,2.74,65.0
8,Passes Attempted,33.86,8.0
9,Pass Completion %,83.0%,50.0
10,Progressive Passes,3.95,30.0


In [42]:
get_scouting_report(players_urls['David Raya'])



Unnamed: 0,Statistic,Per 90,Percentile
0,PSxG-GA,-0.02,45.0
1,Goals Against,0.93,89.0
2,Save Percentage,72.6%,57.0
3,PSxG/SoT,0.25,15.0
4,Save% (Penalty Kicks),33.3%,81.0
5,Clean Sheet Percentage,35.7%,79.0
7,Touches,38.45,59.0
8,Launch %,39.3%,65.0
9,Goal Kicks,4.26,4.0
10,Avg. Length of Goal Kicks,47.9,74.0


In [41]:
attacking = report.iloc[[2,5,6,9,12]]
deffending = report.iloc[[13,15,16,17]]
goalkeeping = report.iloc[[0,2,3,4]]

Unnamed: 0,Statistic,Per 90,Percentile
2,Shots Total,2.26,96.0
5,npxG + xAG,0.47,98.0
6,Shot-Creating Actions,2.74,65.0
11,Progressive Carries,2.01,77.0
14,Progressive Passes Rec,6.61,98.0


In [46]:
print(players_fpl.columns)

Index(['chance_of_playing_next_round', 'chance_of_playing_this_round',
       'code_x', 'cost_change_event', 'cost_change_event_fall',
       'cost_change_start', 'cost_change_start_fall', 'dreamteam_count',
       'element_type', 'ep_next',
       ...
       'plural_name_short', 'singular_name', 'singular_name_short',
       'squad_select', 'squad_min_play', 'squad_max_play', 'ui_shirt_specific',
       'sub_positions_locked', 'element_count', 'full_name'],
      dtype='object', length=121)
