# ScraperFC
## FBRef Examples

code: https://github.com/oseymour/ScraperFC/blob/3e3a56ee67375f3056d3b1f123dbcc245bafb8c3/ScraperFC/FBRef.py

In [77]:
import ScraperFC as sfc
import traceback
import warnings
import pandas as pd
import numpy as np
import os

In [66]:
cwd = os.getcwd()
data_dir = os.path.join(os.path.join(cwd, "data"), "fbref")
data_dir

'c:\\dev\\fantacalcio\\ScraperFC\\data\\fbref'

### League Table (e.g. Serie A)

In [110]:
# Initialize the FBRef scraper
scraper = sfc.FBRef()
try:
    # Scrape the table
    lg_table = scraper.scrape_league_table(year=2024, league='Serie A')
except:
    # Catch and print any exceptions. This allows us to still close the
    # scraper below, even if an exception occurs.
    traceback.print_exc()
finally:
    # It's important to close the scraper when you're done with it. Otherwise,
    # you'll have a bunch of webdrivers open and running in the background.
    scraper.close()

lg_table

Scraping 2024 Serie A league table


Unnamed: 0,Rk,Squad,MP,W,D,L,GF,GA,GD,Pts,Pts/MP,xG,xGA,xGD,xGD/90,Last 5,Attendance,Top Team Scorer,Goalkeeper,Notes
0,1,Inter,34,28,5,1,81,18,63,89,2.62,72.6,27.4,45.2,1.29,W D W W L,68838,Lautaro Martínez - 23,Yann Sommer,
1,2,Milan,34,21,7,6,64,39,25,70,2.06,55.2,40.4,14.8,0.44,W W D L D,72041,Olivier Giroud - 13,Mike Maignan,
2,3,Juventus,34,18,11,5,47,26,21,65,1.91,49.5,25.5,24.0,0.71,L W D D D,37191,Dušan Vlahović - 16,Wojciech Szczęsny,
3,4,Bologna,34,17,12,5,49,27,22,63,1.85,44.2,33.4,10.8,0.31,D D W D D,24187,Joshua Zirkzee - 11,Łukasz Skorupski,
4,5,Roma,34,17,8,9,61,41,20,59,1.74,46.3,34.8,11.5,0.34,D W L W D,62602,Paulo Dybala - 13,Rui Patrício,
5,6,Atalanta,33,17,6,10,61,37,24,57,1.73,47.3,35.9,11.4,0.35,W L D W W,13786,Teun Koopmeiners - 11,Marco Carnesecchi,
6,7,Lazio,34,17,4,13,43,35,8,55,1.62,43.3,38.4,4.9,0.14,L W W W D,38947,Ciro Immobile - 7,Ivan Provedel,
7,8,Fiorentina,33,14,8,11,50,37,13,50,1.52,44.6,40.8,3.7,0.11,L L D W W,30142,Nicolás González - 9,Pietro Terracciano,
8,9,Napoli,34,13,11,10,52,43,9,50,1.47,56.5,35.1,21.3,0.63,L W D L D,43401,Victor Osimhen - 14,Alex Meret,
9,10,Torino,34,11,13,10,31,31,0,46,1.35,35.2,31.6,3.6,0.1,L D D L D,21321,Duván Zapata - 11,Vanja Milinković-Savić,


In [111]:
lg_table.to_csv(data_dir+"\\serieA_2024.csv")

### Scrape Stats
Scrapes a single stats category

stat_category = ['standard', 'goalkeeping', 'advanced goalkeeping', 'shooting', 'passing', 'pass types', 'goal and shot creation', 'defensive', 'possession', 'playing time', 'misc']

e.g. https://fbref.com/en/comps/11/stats/Serie-A-Stats

In [116]:
scraper = sfc.FBRef()
try:
    stats = scraper.scrape_stats(year=2024, league='Serie A', stat_category="standard", normalize=True)  
except:
    traceback.print_exc()
finally:
    scraper.close()

stats[0]

Unnamed: 0_level_0,Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Playing Time,Playing Time,Playing Time,Playing Time,Performance,Performance,Performance,Performance,Performance,Performance,Performance,Performance,Expected,Expected,Expected,Expected,Progression,Progression,Per 90 Minutes,Per 90 Minutes,Per 90 Minutes,Per 90 Minutes,Per 90 Minutes,Per 90 Minutes,Per 90 Minutes,Per 90 Minutes,Per 90 Minutes,Per 90 Minutes,Team ID
Unnamed: 0_level_1,Squad,# Pl,Age,Poss,MP,Starts,Min,90s,Gls,Ast,G+A,G-PK,PK,PKatt,CrdY,CrdR,xG,npxG,xAG,npxG+xAG,PrgC,PrgP,Gls,Ast,G+A,G-PK,G+A-PK,xG,xAG,xG+xAG,npxG,npxG+xAG,Unnamed: 33_level_1
0,Atalanta,28,27.1,50.4,33,363,2970,33.0,61,49,110,57,4,4,70,1,47.3,44.2,35.3,79.4,600,1484,1.85,1.48,3.33,1.73,3.21,1.43,1.07,2.5,1.34,2.41,922493f3
1,Bologna,27,26.0,58.1,34,385,3150,35.0,47,29,76,43,4,5,81,2,44.2,40.2,29.9,70.1,620,1379,1.34,0.83,2.17,1.23,2.06,1.26,0.85,2.12,1.15,2.0,1d8099f8
2,Cagliari,28,26.9,42.4,34,374,3060,34.0,35,25,60,32,3,5,68,3,37.1,33.4,25.0,58.4,481,1012,1.03,0.74,1.76,0.94,1.68,1.09,0.74,1.83,0.98,1.72,c4260e09
3,Empoli,34,26.8,44.0,34,374,3060,34.0,23,17,40,19,4,4,74,0,31.3,28.3,22.5,50.8,478,965,0.68,0.5,1.18,0.56,1.06,0.92,0.66,1.58,0.83,1.49,a3d88bd8
4,Fiorentina,29,27.8,57.5,33,363,2970,33.0,49,33,82,46,3,7,71,0,44.6,39.0,29.6,68.6,558,1459,1.48,1.0,2.48,1.39,2.39,1.35,0.9,2.25,1.18,2.08,421387cf
5,Frosinone,34,25.0,50.2,34,374,3060,34.0,43,28,71,35,8,10,63,2,40.4,32.6,26.5,59.0,482,1192,1.26,0.82,2.09,1.03,1.85,1.19,0.78,1.97,0.96,1.74,6a7ad59d
6,Genoa,32,27.5,44.2,34,374,3060,34.0,38,23,61,34,4,5,76,3,31.8,28.0,20.6,48.6,423,954,1.12,0.68,1.79,1.0,1.68,0.94,0.61,1.54,0.82,1.43,658bf2de
7,Hellas Verona,34,26.5,44.8,34,374,3060,34.0,31,24,55,29,2,6,80,5,31.0,26.2,21.4,47.6,481,1002,0.91,0.71,1.62,0.85,1.56,0.91,0.63,1.54,0.77,1.4,0e72edf2
8,Inter,26,29.9,56.9,34,385,3150,35.0,79,55,134,66,13,14,45,1,72.6,61.7,51.4,113.1,666,1505,2.26,1.57,3.83,1.89,3.46,2.07,1.47,3.54,1.76,3.23,d609edc0
9,Juventus,29,27.3,48.7,34,374,3060,34.0,45,34,79,42,3,5,77,2,49.5,45.5,35.3,80.8,629,1306,1.32,1.0,2.32,1.24,2.24,1.46,1.04,2.49,1.34,2.38,e0652b02


In [118]:
stats[0].to_csv(data_dir+"\\serieA_2024_std_stats.csv")

### Scraper All Stats

Scrapes all stat categories
        
Runs scrape_stats() for each stats category on dumps the returned tuple of dataframes into a dict.

In [119]:
#Scrape All Stats
scraper = sfc.FBRef()
try:
    # Scrape the table
    all_stats = scraper.scrape_all_stats(year=2024, league='Serie A', normalize=False)  
except:
    traceback.print_exc()
finally:
    scraper.close()

all_stats[0]

100%|██████████| 11/11 [02:31<00:00, 13.80s/it]


KeyError: 0

In [None]:
all_stats[0].to_csv(data_dir+"\\serieA_2024_all_stats.csv")

### Scraper Matches

Scrapes the FBRef standard stats page of the chosen league season.
            
Works by gathering all of the match URL's from the homepage of the chosen league season on FBRef and then 
calling scrape_match() on each one.

In [None]:
warnings.filterwarnings('ignore')

scraper = sfc.FBRef()

try:
    # Scrape the table
    matches = scraper.scrape_matches(year=2024, league='Serie A', save=False) 
except:
    traceback.print_exc()
finally:
    scraper.close()

matches

#30 mins

### Scraper Match with link

Scrapes an FBRef match page.

In [23]:
warnings.filterwarnings('ignore')

scraper = sfc.FBRef()

try:
    # Scrape the match using the FBRef match link
    link = 'https://fbref.com/en/partite/07524327/Milan-Napoli-11-Febbraio-2024-Serie-A'
    match_report = scraper.scrape_match(link=link)
except:
    traceback.print_exc()
finally:
    scraper.close()

match_report

Unnamed: 0,Link,Date,Stage,Home Team,Away Team,Home Team ID,Away Team ID,Home Formation,Away Formation,Home Goals,...,Away Ast,Home xG,Away xG,Home npxG,Away npxG,Home xAG,Away xAG,Home Player Stats,Away Player Stats,Shots
0,https://fbref.com/en/partite/07524327/Milan-Na...,2024-02-11,Matchweek 24,Milan,Napoli,dc56fe14,d48ad4ff,4-2-3-1,3-5-1-1,1,...,0,0.9,0.7,0.9,0.7,0.5,0.5,Team...,Team...,...


In [26]:
match_report.transpose()

Unnamed: 0,0
Link,https://fbref.com/en/partite/07524327/Milan-Na...
Date,2024-02-11
Stage,Matchweek 24
Home Team,Milan
Away Team,Napoli
Home Team ID,dc56fe14
Away Team ID,d48ad4ff
Home Formation,4-2-3-1
Away Formation,3-5-1-1
Home Goals,1


#### Home/Away Player Stats

In [42]:
match_report['Home Player Stats'].values[0]

Unnamed: 0,Team Sheet,Summary,GK,Passing,Pass Types,Defense,Possession,Misc
0,Milan (4-2-3-1) Milan (4-2-3-1).1 0 ...,Unnamed: 0_level_0 Unnamed: 1_level_0 Unn...,Unnamed: 0_level_0 Unnamed: 1_level_0 Unname...,Unnamed: 0_level_0 Unnamed: 1_level_0 Unn...,Unnamed: 0_level_0 Unnamed: 1_level_0 Unn...,Unnamed: 0_level_0 Unnamed: 1_level_0 Unn...,Unnamed: 0_level_0 Unnamed: 1_level_0 Unn...,Unnamed: 0_level_0 Unnamed: 1_level_0 Unn...


In [44]:
match_report['Home Player Stats'].keys()

Index(['Team Sheet', 'Summary', 'GK', 'Passing', 'Pass Types', 'Defense',
       'Possession', 'Misc'],
      dtype='object')

In [69]:
match_report['Home Player Stats'].values[0].to_csv(data_dir+"\\home_player_stats.csv")

In [83]:
match_report['Home Player Stats'].values[0]['Summary'].to_csv(data_dir+"\\home_player_stats_sum.csv")

#### Shots

In [88]:
match_report['Shots'][0]['Both'].to_csv(data_dir+"\\shots.csv")

### Player Scout Reports

Scrapes the FBRef scouting reports for all players in the chosen league season.

In [None]:
warnings.filterwarnings('ignore')

scraper = sfc.FBRef()

try:
    scout_report = scraper.scrape_complete_scouting_reports(year=2024, league='Serie A',goalkeepers=False)
except:
    traceback.print_exc()
finally:
    scraper.close()

scout_report

#### Single Player Scout Report

In [101]:
warnings.filterwarnings('ignore')

scraper = sfc.FBRef()

try:
    player_link='https://fbref.com/en/players/20730eae/Rafael-Leao'
    scout_report = scraper.complete_report_from_player_link(player_link = player_link)
except:
    traceback.print_exc()
finally:
    scraper.close()

scout_report[0]

Unnamed: 0_level_0,Unnamed: 1_level_0,Per 90,Percentile
Unnamed: 0_level_1,Statistic,Unnamed: 2_level_1,Unnamed: 3_level_1
Standard,Goals,0.35,77
Standard,Assists,0.33,88
Standard,Goals + Assists,0.68,88
Standard,Non-Penalty Goals,0.35,82
Standard,Penalty Kicks Made,0.00,35
...,...,...,...
Miscellaneous Stats,Own Goals,0.00,51
Miscellaneous Stats,Ball Recoveries,2.79,2
Miscellaneous Stats,Aerials Won,0.85,76
Miscellaneous Stats,Aerials Lost,0.70,66


In [103]:
scout_report[0].to_csv(data_dir+"\\leao_stats.csv")

In [108]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
scout_report[0]

Unnamed: 0_level_0,Unnamed: 1_level_0,Per 90,Percentile
Unnamed: 0_level_1,Statistic,Unnamed: 2_level_1,Unnamed: 3_level_1
Standard,Goals,0.35,77
Standard,Assists,0.33,88
Standard,Goals + Assists,0.68,88
Standard,Non-Penalty Goals,0.35,82
Standard,Penalty Kicks Made,0.0,35
Standard,Penalty Kicks Attempted,0.0,33
Standard,Yellow Cards,0.18,42
Standard,Red Cards,0.0,58
Standard,xG: Expected Goals,0.32,78
Standard,npxG: Non-Penalty xG,0.32,83
