Note: \
It might be much faster using asynchronous programming and parsel: import asyncio, import aiohttp, from parsel import Selector, lxml was faster too \
Asyncio with Multiprocessing do this for faster return time for later use


In [25]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import random
import concurrent.futures
import time

In [7]:
def getAllClubLinks(url):
    
    #get the response
    response = requests.get(url)
    soup = BeautifulSoup(response.text)
    
    #select the first table and find all anchor tags
    team_table = soup.select("table.stats_table")[0]
    team_links = team_table.find_all("a")
    
    #get if link has href and and "/squads/"
    team_links = [l.get("href") for l in team_links]
    team_links = [l for l in team_links if "/squads/" in l]
    team_links = [f"https://fbref.com{l}" for l in team_links]
    return team_links

[]

In [23]:


def getAllClubData(url):
    # runs only once for all clubs, returns team links
    allClubLinks = getAllClubLinks(url)
    names = {"Comp": "Competition", "Poss": "Possession", "Sh":"Shots", "SoT":"Shots Target", "CrdY":"Yellow", "CrdR":"Red", "Fls":"Fouls","Off":"Offside"}
    
    result = pd.DataFrame()  # Initialize an empty dataframe
    
    with concurrent.futures.ThreadPoolExecutor() as executor:
        # Submit each club processing as a separate task
        club_tasks = [executor.submit(processClubData, link) for link in allClubLinks]
        
        # Process the completed tasks as they finish
        for future in concurrent.futures.as_completed(club_tasks):
            club_data = future.result()
            if club_data is not None:
                club_data.rename(columns=names, inplace=True)  # Rename columns
                result = pd.concat([result, club_data], axis=0)  # Concatenate current club_data with the result dataframe
                print(result)
    
    return result


def getClubData(requestData, param, match, keepList):
    soup = BeautifulSoup(requestData.text)
    all_links = soup.find_all("a")
    links = [l.get("href") for l in all_links]
    links = [l for l in links if l and param in l]
    links = "https://fbref.com" + links[0]
    time.sleep(random.randint(10,30))
    shooting = requests.get(links)
    shooting = pd.read_html(shooting.text, match=match)[0]
    shooting.columns = shooting.columns.droplevel()
    shooting = shooting[keepList]
    return shooting[:-1]

def processClubData(link):
    time.sleep(random.randint(10,30))
    club = requests.get(link)
    clubName = link.split("/")[-1].replace("-Stats","").replace("-", " ")

    # get Scores (on the same page)
    scores = pd.read_html(club.text, match = "Scores & Fixtures")[0]
    scores = scores[['Date', 'Time', 'Comp', 'Round', 'Day', 'Venue', 'Result', 'GF', 'GA','Opponent', 'Poss']]

    # get shooting and other stat (figure better ways for these two)
    shooting = getClubData(club, "/all_comps/shooting/", "Shooting", ['Date', 'Sh', "SoT"])
    time.sleep(random.randint(10,30))
    misc = getClubData(club, "/all_comps/misc/", "Miscellaneous", ['Date', 'CrdY', 'CrdR', 'Fls', 'Off'])
#     names = {"Comp": "Competition", "Poss": "Possession", "Sh":"Shots", "SoT":"Shots Target", "CrdY":"Yellow", "CrdR":"Red", "Fls":"Fouls","Off":"Offside"}
    finalClubData = scores.merge(shooting, how='left').merge(misc, how='left')
#     finalClubData.rename(columns=names, inplace=True)
    finalClubData = finalClubData[finalClubData["Competition"] == "Premier League"]
    finalClubData["Team"] = clubName
    print(clubName)
    return finalClubData

url = "https://fbref.com/en/comps/9/Premier-League-Stats"
res = getAllClubData(url)

Manchester City
          Date   Time     Competition         Round  Day Venue Result   GF  \
1   2022-08-07  16:30  Premier League   Matchweek 1  Sun  Away      W  2.0   
2   2022-08-13  15:00  Premier League   Matchweek 2  Sat  Home      W  4.0   
3   2022-08-21  16:30  Premier League   Matchweek 3  Sun  Away      D  3.0   
4   2022-08-27  15:00  Premier League   Matchweek 4  Sat  Home      W  4.0   
5   2022-08-31  19:30  Premier League   Matchweek 5  Wed  Home      W  6.0   
6   2022-09-03  17:30  Premier League   Matchweek 6  Sat  Away      D  1.0   
9   2022-09-17  12:30  Premier League   Matchweek 8  Sat  Away      W  3.0   
10  2022-10-02  14:00  Premier League   Matchweek 9  Sun  Home      W  6.0   
12  2022-10-08  15:00  Premier League  Matchweek 10  Sat  Home      W  4.0   
14  2022-10-16  16:30  Premier League  Matchweek 11  Sun  Away      L  0.0   
15  2022-10-22  15:00  Premier League  Matchweek 13  Sat  Home      W  3.0   
17  2022-10-29  12:30  Premier League  Matchweek

Brentford
          Date   Time     Competition         Round  Day Venue Result   GF  \
1   2022-08-07  16:30  Premier League   Matchweek 1  Sun  Away      W  2.0   
2   2022-08-13  15:00  Premier League   Matchweek 2  Sat  Home      W  4.0   
3   2022-08-21  16:30  Premier League   Matchweek 3  Sun  Away      D  3.0   
4   2022-08-27  15:00  Premier League   Matchweek 4  Sat  Home      W  4.0   
5   2022-08-31  19:30  Premier League   Matchweek 5  Wed  Home      W  6.0   
..         ...    ...             ...           ...  ...   ...    ...  ...   
36  2023-04-29  15:00  Premier League  Matchweek 34  Sat  Home      W    2   
37  2023-05-06  17:30  Premier League  Matchweek 35  Sat  Away      L    0   
38  2023-05-14  14:00  Premier League  Matchweek 36  Sun  Home      W    2   
39  2023-05-20  12:30  Premier League  Matchweek 37  Sat  Away      W    3   
40  2023-05-28  16:30  Premier League  Matchweek 38  Sun  Home      W    1   

     GA         Opponent  Possession  Shots  Shots Ta

Crystal Palace
          Date   Time     Competition         Round  Day Venue Result   GF  \
1   2022-08-07  16:30  Premier League   Matchweek 1  Sun  Away      W  2.0   
2   2022-08-13  15:00  Premier League   Matchweek 2  Sat  Home      W  4.0   
3   2022-08-21  16:30  Premier League   Matchweek 3  Sun  Away      D  3.0   
4   2022-08-27  15:00  Premier League   Matchweek 4  Sat  Home      W  4.0   
5   2022-08-31  19:30  Premier League   Matchweek 5  Wed  Home      W  6.0   
..         ...    ...             ...           ...  ...   ...    ...  ...   
36  2023-04-29  12:30  Premier League  Matchweek 34  Sat  Home      W    4   
37  2023-05-06  15:00  Premier League  Matchweek 35  Sat  Away      L    0   
38  2023-05-13  15:00  Premier League  Matchweek 36  Sat  Home      W    2   
39  2023-05-20  15:00  Premier League  Matchweek 37  Sat  Away      D    2   
40  2023-05-28  16:30  Premier League  Matchweek 38  Sun  Home      D    1   

     GA         Opponent  Possession  Shots  Sho

Aston Villa
          Date   Time     Competition         Round  Day Venue Result   GF  \
1   2022-08-07  16:30  Premier League   Matchweek 1  Sun  Away      W  2.0   
2   2022-08-13  15:00  Premier League   Matchweek 2  Sat  Home      W  4.0   
3   2022-08-21  16:30  Premier League   Matchweek 3  Sun  Away      D  3.0   
4   2022-08-27  15:00  Premier League   Matchweek 4  Sat  Home      W  4.0   
5   2022-08-31  19:30  Premier League   Matchweek 5  Wed  Home      W  6.0   
..         ...    ...             ...           ...  ...   ...    ...  ...   
36  2023-04-30  14:00  Premier League  Matchweek 34  Sun  Away      L    0   
37  2023-05-06  15:00  Premier League  Matchweek 35  Sat  Away      L    0   
38  2023-05-13  15:00  Premier League  Matchweek 36  Sat  Home      W    2   
39  2023-05-20  15:00  Premier League  Matchweek 37  Sat  Away      D    1   
40  2023-05-28  16:30  Premier League  Matchweek 38  Sun  Home      W    2   

     GA         Opponent  Possession  Shots  Shots 

Leicester City
          Date   Time     Competition         Round  Day Venue Result   GF  \
1   2022-08-07  16:30  Premier League   Matchweek 1  Sun  Away      W  2.0   
2   2022-08-13  15:00  Premier League   Matchweek 2  Sat  Home      W  4.0   
3   2022-08-21  16:30  Premier League   Matchweek 3  Sun  Away      D  3.0   
4   2022-08-27  15:00  Premier League   Matchweek 4  Sat  Home      W  4.0   
5   2022-08-31  19:30  Premier League   Matchweek 5  Wed  Home      W  6.0   
..         ...    ...             ...           ...  ...   ...    ...  ...   
40  2023-05-01  20:00  Premier League  Matchweek 34  Mon  Home      D    2   
41  2023-05-08  15:00  Premier League  Matchweek 35  Mon  Away      L    3   
42  2023-05-15  20:00  Premier League  Matchweek 36  Mon  Home      L    0   
43  2023-05-22  20:00  Premier League  Matchweek 37  Mon  Away      D    0   
44  2023-05-28  16:30  Premier League  Matchweek 38  Sun  Home      W    2   

     GA         Opponent  Possession  Shots  Sho

Leeds United
          Date   Time     Competition         Round  Day Venue Result   GF  \
1   2022-08-07  16:30  Premier League   Matchweek 1  Sun  Away      W  2.0   
2   2022-08-13  15:00  Premier League   Matchweek 2  Sat  Home      W  4.0   
3   2022-08-21  16:30  Premier League   Matchweek 3  Sun  Away      D  3.0   
4   2022-08-27  15:00  Premier League   Matchweek 4  Sat  Home      W  4.0   
5   2022-08-31  19:30  Premier League   Matchweek 5  Wed  Home      W  6.0   
..         ...    ...             ...           ...  ...   ...    ...  ...   
39  2023-04-30  14:00  Premier League  Matchweek 34  Sun  Away      L    1   
40  2023-05-06  15:00  Premier League  Matchweek 35  Sat  Away      L    1   
41  2023-05-13  12:30  Premier League  Matchweek 36  Sat  Home      D    2   
42  2023-05-21  13:30  Premier League  Matchweek 37  Sun  Away      L    1   
43  2023-05-28  16:30  Premier League  Matchweek 38  Sun  Home      L    1   

     GA         Opponent  Possession  Shots  Shots

In [24]:
res

Unnamed: 0,Date,Time,Competition,Round,Day,Venue,Result,GF,GA,Opponent,Possession,Shots,Shots Target,Yellow,Red,Fouls,Offside,Team
1,2022-08-07,16:30,Premier League,Matchweek 1,Sun,Away,W,2.0,0.0,West Ham,75.0,13.0,1.0,1.0,0.0,4.0,1.0,Manchester City
2,2022-08-13,15:00,Premier League,Matchweek 2,Sat,Home,W,4.0,0.0,Bournemouth,67.0,19.0,7.0,0.0,0.0,9.0,2.0,Manchester City
3,2022-08-21,16:30,Premier League,Matchweek 3,Sun,Away,D,3.0,3.0,Newcastle Utd,69.0,21.0,10.0,2.0,0.0,7.0,1.0,Manchester City
4,2022-08-27,15:00,Premier League,Matchweek 4,Sat,Home,W,4.0,2.0,Crystal Palace,74.0,18.0,5.0,1.0,0.0,14.0,2.0,Manchester City
5,2022-08-31,19:30,Premier League,Matchweek 5,Wed,Home,W,6.0,0.0,Nott'ham Forest,74.0,17.0,9.0,0.0,0.0,9.0,2.0,Manchester City
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
39,2023-04-30,14:00,Premier League,Matchweek 34,Sun,Away,L,1,4,Bournemouth,58.0,15.0,6.0,0.0,0.0,14.0,1.0,Leeds United
40,2023-05-06,15:00,Premier League,Matchweek 35,Sat,Away,L,1,2,Manchester City,20.0,4.0,2.0,3.0,0.0,16.0,3.0,Leeds United
41,2023-05-13,12:30,Premier League,Matchweek 36,Sat,Home,D,2,2,Newcastle Utd,36.0,8.0,3.0,4.0,1.0,16.0,0.0,Leeds United
42,2023-05-21,13:30,Premier League,Matchweek 37,Sun,Away,L,1,3,West Ham,41.0,12.0,3.0,3.0,0.0,11.0,1.0,Leeds United


<!-- <a href="/en/squads/b8fd03ef/2022-2023/matchlogs/all_comps/keeper/Manchester-City-Match-Logs-All-Competitions">Goalkeeping</a>  /

<a href="/en/squads/b8fd03ef/2022-2023/matchlogs/all_comps/passing/Manchester-City-Match-Logs-All-Competitions">Passing</a>

<a href="/en/squads/b8fd03ef/2022-2023/matchlogs/all_comps/shooting/Manchester-City-Match-Logs-All-Competitions">Shooting</a>
 -->