### Import and settings

In [69]:
import requests
import re
from bs4 import BeautifulSoup
import pandas as pd
import time

pd.set_option('display.max_rows', 25)
pd.set_option('display.max_columns', 50)

### Functions

In [92]:
def get_teams(year: int):
    start_year = year-1
    end_year = year
    
    url = f'https://fbref.com/en/comps/Big5/{start_year}-{end_year}/{start_year}-{end_year}-Big-5-European-Leagues-Stats'
    headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'}
    result = requests.get(url, headers=headers)
    teams = pd.read_html(result.content)[0]
    soup = BeautifulSoup(result.content, 'html.parser')
    links = soup.find("table", {"id": "big5_table"}).find_all('tr')
    link_list = []
    
    for i in range(1, len(links)):
        link_list.append(links[i].find('a')['href'])

    teams['link'] = link_list
    return teams

def get_players(teams: pd.DataFrame):
    links = teams['link']
    team = teams['Squad']
    
    total = pd.DataFrame()
    
    for i in range(len(links)):
        time.sleep(3)
        url = f'https://fbref.com{links[i]}'
        headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'}
        result = requests.get(url, headers=headers)
        players = pd.read_html(result.content)[0]
        players.columns = players.columns.droplevel(0)
        players = players.drop(players.tail(2).index)
        soup = BeautifulSoup(result.content, 'html.parser')
        extra = soup.find("table", {"id": re.compile('stats_standard*')}).find_all('tr')
        id_list = []
        link_list = []
        
        for j in range(2, len(extra)-2):
            id_list.append(extra[j].find('th')['data-append-csv'])
            cur_link = extra[j].find('a')['href']
            if cur_link:
                link_list.append(cur_link)
            
        players['link'] = link_list
        players['id'] = id_list
        
        #print(type(players), type(total))
        total = pd.concat([players, total], ignore_index=True)
        
    return total

In [8]:
players = pd.DataFrame(columns = ['name', 'link', 'country', 'league_pos', 'MP', 'W', 'D', 'L', 'GF', 'GA', 'PTS',
                                     'xG', 'xGA'])
    
year = 2023

start_year = year-1
end_year = year

url = f'https://fbref.com/en/comps/Big5/{start_year}-{end_year}/{start_year}-{end_year}-Big-5-European-Leagues-Stats'
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'}
result = requests.get(url, headers=headers)
soup = BeautifulSoup(result.content, 'html.parser')
teams = pd.read_html(result.content)

In [26]:
test = get_teams(2023)
test

Unnamed: 0,Rk,Squad,Country,LgRk,MP,W,D,L,GF,GA,GD,Pts,Pts/MP,xG,xGA,xGD,xGD/90,Attendance,Top Team Scorer,Goalkeeper,link
0,1,Napoli,it ITA,1,38,28,6,4,77,28,49,90,2.37,64.7,31.8,33.0,0.87,46173,Victor Osimhen - 26,Alex Meret,/en/squads/d48ad4ff/Napoli-Stats
1,2,Manchester City,eng ENG,1,38,28,5,5,94,33,61,89,2.34,78.7,32.1,46.6,1.23,53249,Erling Haaland - 36,Ederson,/en/squads/b8fd03ef/Manchester-City-Stats
2,3,Barcelona,es ESP,1,38,28,4,6,70,20,50,88,2.32,75.5,33.2,42.3,1.11,83498,Robert Lewandowski - 23,Marc-André ter Stegen,/en/squads/206d90db/Barcelona-Stats
3,4,Paris S-G,fr FRA,1,38,27,4,7,89,40,49,85,2.24,78.2,48.3,29.9,0.79,46334,Kylian Mbappé - 29,Gianluigi Donnarumma,/en/squads/e2d8892c/Paris-Saint-Germain-Stats
4,5,Arsenal,eng ENG,2,38,26,6,6,88,43,45,84,2.21,71.9,42.0,29.9,0.79,60191,"Martin Ødegaard, Martinelli - 15",Aaron Ramsdale,/en/squads/18bb7c10/Arsenal-Stats
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
93,94,Southampton,eng ENG,20,38,6,7,25,36,73,-37,25,0.66,37.7,61.0,-23.3,-0.61,30440,James Ward-Prowse - 9,Gavin Bazunu,/en/squads/33c895d4/Southampton-Stats
94,95,Elche,es ESP,20,38,5,10,23,30,67,-37,25,0.66,37.5,70.0,-32.6,-0.86,19875,Lucas Boyé - 7,Édgar Badía,/en/squads/6c8b07df/Elche-Stats
95,96,Troyes,fr FRA,19,38,4,12,22,45,81,-36,24,0.63,39.1,78.0,-38.9,-1.02,10004,Mama Samba Baldé - 12,Gauthier Gallon,/en/squads/54195385/Troyes-Stats
96,97,Sampdoria,it ITA,20,38,3,10,25,24,71,-47,19,0.50,34.1,66.3,-32.2,-0.85,20133,Manolo Gabbiadini - 7,Emil Audero,/en/squads/8ff9e3b3/Sampdoria-Stats


In [94]:
all_players = get_players(test)

In [95]:
all_players

Unnamed: 0,Player,Nation,Pos,Age,MP,Starts,Min,90s,Gls,Ast,G+A,G-PK,PK,PKatt,CrdY,CrdR,xG,npxG,xAG,npxG+xAG,PrgC,PrgP,PrgR,Gls.1,Ast.1,G+A.1,G-PK.1,G+A-PK,xG.1,xAG.1,xG+xAG,npxG.1,npxG+xAG.1,Matches,link,id
0,Cédric Hountondji,bj BEN,DF,28.0,36,35,3095.0,34.4,0.0,0.0,0.0,0.0,0.0,0.0,7.0,0.0,1.1,1.1,0.4,1.6,5.0,84.0,1.0,0.00,0.00,0.00,0.00,0.00,0.03,0.01,0.05,0.03,0.05,Matches,/en/players/00891164/Cedric-Hountondji,00891164
1,Batista Mendy,fr FRA,"MF,DF",22.0,35,35,2859.0,31.8,0.0,0.0,0.0,0.0,0.0,0.0,8.0,1.0,0.9,0.9,1.2,2.1,27.0,195.0,49.0,0.00,0.00,0.00,0.00,0.00,0.03,0.04,0.07,0.03,0.07,Matches,/en/players/e472a6f6/Batista-Mendy,e472a6f6
2,Yan Valery,tn TUN,DF,23.0,30,30,2497.0,27.7,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.8,0.8,1.9,2.7,72.0,84.0,74.0,0.00,0.00,0.00,0.00,0.00,0.03,0.07,0.10,0.03,0.10,Matches,/en/players/531a4aa8/Yan-Valery,531a4aa8
3,Nabil Bentaleb,dz ALG,MF,27.0,30,29,2489.0,27.7,4.0,4.0,8.0,2.0,2.0,2.0,7.0,1.0,3.2,1.6,2.3,3.9,46.0,176.0,28.0,0.14,0.14,0.29,0.07,0.22,0.11,0.08,0.20,0.06,0.14,Matches,/en/players/3189f61a/Nabil-Bentaleb,3189f61a
4,Adrien Hunou,fr FRA,"MF,FW",28.0,34,28,2234.0,24.8,4.0,1.0,5.0,4.0,0.0,0.0,2.0,0.0,6.5,6.5,2.3,8.7,34.0,62.0,149.0,0.16,0.04,0.20,0.16,0.20,0.26,0.09,0.35,0.26,0.35,Matches,/en/players/d14229f3/Adrien-Hunou,d14229f3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3522,Adam Ounas,dz ALG,FW,25.0,2,0,15.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,4.0,1.0,0.00,0.00,0.00,0.00,0.00,0.23,0.00,0.23,0.23,0.23,Matches,/en/players/ed90babd/Adam-Ounas,ed90babd
3523,Alessandro Zanoli,it ITA,DF,21.0,1,0,12.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.1,0.0,0.1,1.0,0.0,2.0,0.00,0.00,0.00,0.00,0.00,0.47,0.00,0.47,0.47,0.47,Matches,/en/players/3b12b11f/Alessandro-Zanoli,3b12b11f
3524,Hubert Idasiak,pl POL,GK,20.0,0,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,Matches,/en/players/1b70f53a/Hubert-Idasiak,1b70f53a
3525,Davide Marfella,it ITA,GK,22.0,0,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,Matches,/en/players/880fcdbb/Davide-Marfella,880fcdbb


In [67]:
url = f'https://fbref.com/en/squads/54195385/Troyes-Stats'
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'}
result = requests.get(url, headers=headers)
#players = pd.read_html(result.content)[0]
soup = BeautifulSoup(result.content, 'html.parser')
extra = soup.find("table", {"id": re.compile('stats_standard*')}).find_all('tr')
id_list = []
link_list = []

for j in range(2, len(extra)-2):
    print(extra[j].find('th'))
    id_list.append(extra[j].find('th'))
    link_list.append(extra[j].find('a'))

AttributeError: 'NoneType' object has no attribute 'find_all'

In [85]:
url = f'https://fbref.com/en/squads/54195385/Troyes-Stats'
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'}
result = requests.get(url, headers=headers)
players = pd.read_html(result.content)[0]
players.columns = players.columns.droplevel(0)
players = players.drop(players.tail(2).index)
soup = BeautifulSoup(result.content, 'html.parser')
extra = soup.find("table", {"id": re.compile('stats_standard*')}).find_all('tr')
id_list = []
link_list = []

for j in range(2, len(extra)-2):
    id_list.append(extra[j].find('th')['data-append-csv'])
    cur_link = extra[j].find('a')['href']
    if cur_link:
        link_list.append(cur_link)

players['link'] = link_list
players['id'] = id_list
players

Unnamed: 0,Player,Nation,Pos,Age,MP,Starts,Min,90s,Gls,Ast,G+A,G-PK,PK,PKatt,CrdY,CrdR,xG,npxG,xAG,npxG+xAG,PrgC,PrgP,PrgR,Gls.1,Ast.1,G+A.1,G-PK.1,G+A-PK,xG.1,xAG.1,xG+xAG,npxG.1,npxG+xAG.1,Matches,link,id
0,Erik Palmer-Brown,us USA,DF,25.0,38,36,3191.0,35.5,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,1.1,1.1,0.4,1.5,6.0,75.0,0.0,0.00,0.00,0.00,0.00,0.00,0.03,0.01,0.04,0.03,0.04,Matches,/en/players/e5c1e0ca/Erik-Palmer-Brown,e5c1e0ca
1,Rominigue Kouamé,ml MLI,MF,25.0,32,31,2661.0,29.6,1.0,1.0,2.0,1.0,0.0,0.0,9.0,0.0,1.4,1.4,2.1,3.5,59.0,169.0,36.0,0.03,0.03,0.07,0.03,0.07,0.05,0.07,0.12,0.05,0.12,Matches,/en/players/86a2ba20/Rominigue-Kouame,86a2ba20
2,Yoann Salmier,gf GUF,DF,29.0,32,31,2646.0,29.4,1.0,0.0,1.0,1.0,0.0,0.0,5.0,1.0,1.2,1.2,0.4,1.6,24.0,121.0,9.0,0.03,0.00,0.03,0.03,0.03,0.04,0.02,0.05,0.04,0.05,Matches,/en/players/c3725db2/Yoann-Salmier,c3725db2
3,Gauthier Gallon,fr FRA,GK,29.0,30,30,2667.0,29.6,0.0,0.0,0.0,0.0,0.0,0.0,3.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,Matches,/en/players/db3164e1/Gauthier-Gallon,db3164e1
4,Mama Samba Baldé,gw GNB,FW,26.0,34,30,2665.0,29.6,12.0,3.0,15.0,10.0,2.0,2.0,6.0,2.0,10.6,9.1,2.1,11.2,51.0,56.0,179.0,0.41,0.10,0.51,0.34,0.44,0.36,0.07,0.43,0.31,0.38,Matches,/en/players/fb14aa28/Mama-Samba-Balde,fb14aa28
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
33,Ryan Fage,fr FRA,MF,18.0,0,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,Matches,/en/players/004cce90/Ryan-Fage,004cce90
34,Corentin Michel,be BEL,GK,22.0,0,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,Matches,/en/players/4fe0044b/Corentin-Michel,4fe0044b
35,Jessy Moulin,fr FRA,GK,36.0,0,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,Matches,/en/players/28a65a2c/Jessy-Moulin,28a65a2c
36,Eric N'Jo,fr FRA,DF,18.0,0,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,Matches,/en/players/b85e57bb/Eric-NJo,b85e57bb
