# Libraries

In [1]:
import pandas as pd
import numpy as np
import time
import re

# Web-scraping packages
import requests
from bs4 import BeautifulSoup


In [2]:
page_url = 'https://fbref.com/en/comps/1/World-Cup-Stats'

In [3]:
league_table = pd.read_html(page_url, match='League Table')
league_table[0]

Unnamed: 0,Rk,Squad,MP,W,D,L,GF,GA,GD,Pts,xG,xGA,xGD,xGD/90,Top Team Scorer,Goalkeeper,Notes
0,1,ar Argentina,7.0,4.0,2.0,1.0,15.0,8.0,7.0,14.0,15.1,4.6,10.4,1.49,Lionel Messi - 7,Emiliano Martínez,
1,,,,,,,,,,,,,,,,,
2,2,fr France,7.0,5.0,1.0,1.0,16.0,8.0,8.0,16.0,13.8,9.9,3.9,0.55,Kylian Mbappé - 8,Hugo Lloris,
3,,,,,,,,,,,,,,,,,
4,3,hr Croatia,7.0,2.0,4.0,1.0,8.0,7.0,1.0,10.0,7.0,11.0,-4.0,-0.58,Andrej Kramarić - 2,Dominik Livaković,
5,,,,,,,,,,,,,,,,,
6,4,ma Morocco,7.0,3.0,2.0,2.0,6.0,5.0,1.0,11.0,6.6,7.4,-0.8,-0.12,Youssef En-Nesyri - 2,Yassine Bounou,
7,,,,,,,,,,,,,,,,,
8,QF,nl Netherlands,5.0,3.0,2.0,0.0,10.0,4.0,6.0,11.0,4.6,6.1,-1.5,-0.29,Cody Gakpo - 3,Andries Noppert,
9,QF,eng England,5.0,3.0,1.0,1.0,13.0,4.0,9.0,10.0,8.7,4.0,4.6,0.92,"Bukayo Saka, Marcus Rashford - 3",Jordan Pickford,


In [4]:
# Go to the books page
request = requests.get(page_url)
soup = BeautifulSoup(request.text)

In [5]:
# Get the info name attrs by using css selector
teams = [x['href'] for x in soup.select('#results202210_overall td.left a')]
teams

['/en/squads/f9fddd6e/Argentina-Men-Stats',
 '/en/squads/b1b36dcd/2022/France-Men-Stats',
 '/en/squads/7b08e376/2022/Croatia-Men-Stats',
 '/en/squads/af41ccda/2022/Morocco-Men-Stats',
 '/en/squads/5bb5024a/2022/Netherlands-Men-Stats',
 '/en/squads/1862c019/2022/England-Men-Stats',
 '/en/squads/304635c3/Brazil-Men-Stats',
 '/en/squads/4a1b4ea8/2022/Portugal-Men-Stats',
 '/en/squads/ffcf1690/Japan-Men-Stats',
 '/en/squads/9ab5c684/2022/Senegal-Men-Stats',
 '/en/squads/b90bf4f9/Australia-Men-Stats',
 '/en/squads/81021a70/2022/Switzerland-Men-Stats',
 '/en/squads/b561dd30/2022/Spain-Men-Stats',
 '/en/squads/0f66725b/United-States-Men-Stats',
 '/en/squads/8912dcf0/2022/Poland-Men-Stats',
 '/en/squads/473f0fbf/Korea-Republic-Men-Stats',
 '/en/squads/c1e40422/2022/Germany-Men-Stats',
 '/en/squads/123acaf8/Ecuador-Men-Stats',
 '/en/squads/896550da/2022/Cameroon-Men-Stats',
 '/en/squads/870e020f/Uruguay-Men-Stats',
 '/en/squads/a7c7562a/2022/Tunisia-Men-Stats',
 '/en/squads/b009a548/Mexico-Men-

In [6]:
roster_table_list = []
standard_stats_table_list = []
shooting_table_list = []
passing_table_list = []
miscellaneous_stats_table_list = []
team_name = ""
for team in teams:
    # Get table-data for every team
    # Delay three seconds
    roster_table = pd.read_html('https://fbref.com' + team, match='Roster')
    time.sleep(3)
    standard_stats_table = pd.read_html('https://fbref.com' + team, match='Standard Stats')
    time.sleep(3)
    shooting_table = pd.read_html('https://fbref.com' + team, match='Shooting')
    time.sleep(3)
    passing_table = pd.read_html('https://fbref.com' + team, match='Passing')
    time.sleep(3)
    miscellaneous_stats_table = pd.read_html('https://fbref.com' + team, match='Miscellaneous Stats')
    
    # Tag the team's name to table-data
    team_name = team.split('/')[-1].split('-')[0]
    roster_table[0]['Team'] = team_name
    standard_stats_table[0]['Team'] = team_name
    shooting_table[0]['Team'] = team_name
    passing_table[0]['Team'] = team_name
    miscellaneous_stats_table[0]['Team'] = team_name
    
    # Store table-data to list data
    roster_table_list.append(roster_table[0])
    standard_stats_table_list.append(standard_stats_table[0])
    shooting_table_list.append(shooting_table[0])
    passing_table_list.append(passing_table[0])
    miscellaneous_stats_table_list.append(miscellaneous_stats_table[0])

In [7]:
# Create empty DataFrame
df_roster_table = pd.DataFrame()
df_standard_stats_table = pd.DataFrame()
df_shooting_table = pd.DataFrame()
df_passing_table = pd.DataFrame()
df_miscellaneous_stats_table = pd.DataFrame()

# Add data to DataFrame from list data above
for item in roster_table_list:
    df_roster_table = pd.concat([df_roster_table, item])

for item in standard_stats_table_list:
    df_standard_stats_table = pd.concat([df_standard_stats_table, item])

for item in shooting_table_list:
    df_shooting_table = pd.concat([df_shooting_table, item])
    
for item in passing_table_list:
    df_passing_table = pd.concat([df_passing_table, item])
    
for item in miscellaneous_stats_table_list:
    df_miscellaneous_stats_table = pd.concat([df_miscellaneous_stats_table, item])
    
# Drop Level from Multi-level Column index 
df_standard_stats_table = df_standard_stats_table.droplevel(0, axis=1)
df_shooting_table = df_shooting_table.droplevel(0, axis=1)
df_passing_table = df_passing_table.droplevel(0, axis=1)
df_miscellaneous_stats_table = df_miscellaneous_stats_table.droplevel(0, axis=1)

In [8]:
# Convert to CSV file
league_table[0].to_csv('team_data.csv')
df_roster_table.to_csv("roster_table.csv")
df_standard_stats_table.to_csv("standard_stats_table.csv")
df_shooting_table.to_csv("shooting_table.csv")
df_passing_table.to_csv("passing_table.csv")
df_miscellaneous_stats_table.to_csv("miscellaneous_stats_table.csv")