## Bundesliga German Football Table Web Scraping

In [2]:
# Import libraries

import pandas as pd
import requests
from bs4 import BeautifulSoup

In [4]:
url = "https://www.bundesliga.com/en/bundesliga/table"

response = requests.get(url)

response.status_code

200

## Get Soup

In [6]:
soup = BeautifulSoup(response.content, 'html.parser')

In [7]:
soup

<!DOCTYPE html>
<html lang="en"><head><link crossorigin="" href="https://fonts.gstatic.com" rel="preconnect"/><meta charset="utf-8"/><title>Bundesliga | Table | 2021-2022</title><script type="text/javascript">let shouldUseDarkTheme="0";const availibleThemes=["light","dark"];localStorage&&availibleThemes.includes(localStorage.getItem("bl-force-theme"))?shouldUseDarkTheme="dark"===localStorage.getItem("bl-force-theme")?"1":"0":window.matchMedia&&window.matchMedia("(prefers-color-scheme:dark)").matches&&(shouldUseDarkTheme="1"),window.document.documentElement.setAttribute("dark",shouldUseDarkTheme)</script><base href="/"/><link href="assets/favicons/favicon.ico" rel="shortcut icon" type="image/x-icon"/><link href="assets/favicons/favicon-16x16.png" rel="icon" sizes="16x16"/><link href="assets/favicons/favicon-32x32.png" rel="icon" sizes="32x32"/><link href="assets/favicons/android-chrome-192x192.png" rel="icon" sizes="192x192"/><link href="assets/favicons/apple-touch-icon-180x180.png" rel

In [10]:
results = soup.find('table', {'class': 'table'}).find_all('tr')

In [11]:
results

[<tr _ngcontent-sc235=""><th _ngcontent-sc235="" class="qual" scope="col"></th><th _ngcontent-sc235="" class="rank"></th><th _ngcontent-sc235="" class="tend"></th><th _ngcontent-sc235="" class="logo"></th><th _ngcontent-sc235="" class="team"></th><th _ngcontent-sc235="" class="live"></th><th _ngcontent-sc235="" class="matches"><span _ngcontent-sc235="" class="d-none d-lg-inline">Played </span><span _ngcontent-sc235="" class="d-lg-none">P</span></th><th _ngcontent-sc235="" class="pts"><span _ngcontent-sc235="" class="d-none d-lg-inline">Points </span><span _ngcontent-sc235="" class="d-lg-none">Pts</span></th><th _ngcontent-sc235="" class="d-none d-lg-table-cell wins">W</th><th _ngcontent-sc235="" class="d-none d-lg-table-cell draws">D</th><th _ngcontent-sc235="" class="d-none d-lg-table-cell looses">L</th><th _ngcontent-sc235="" class="d-none d-md-table-cell goals"><span _ngcontent-sc235="" class="d-none d-md-inline">Goals </span><span _ngcontent-sc235="" class="d-md-none">G</span></th>

In [16]:
# Remove first row:

results = results[1:]

## Get Team Name

In [49]:
results[0].find('td', {'class': 'team'}).find('span', {'class': 'd-none d-lg-inline'}).get_text()

'FC Bayern München'

In [47]:
results[17].find('td', {'class': 'team'}).find('span', {'class': 'd-none d-lg-inline'}).get_text()

'SpVgg Greuther Fürth'

## Number Of Matches

In [51]:
results[0].find('td', {'class': 'matches'}).get_text()

'20'

## Points

In [52]:
results[0].find('td', {'class': 'pts'}).get_text()

'49'

## Wins, Draws, Losses

In [53]:
# Wins:

results[0].find('td', {'class': 'd-none d-lg-table-cell wins'}).get_text()

'16'

In [54]:
# Draws:

results[0].find('td', {'class': 'd-none d-lg-table-cell draws'}).get_text()

'1'

In [55]:
# Losses

results[0].find('td', {'class': 'd-none d-lg-table-cell looses'}).get_text()

'3'

## Goals

In [57]:
results[0].find('td', {'class': 'd-none d-md-table-cell goals'}).get_text()

'65:19'

## Goal Difference

In [62]:
# Remove plus sign cases

results[0].find('td', {'class': 'difference'}).get_text().replace("+", "")

'46'

In [63]:
results[17].find('td', {'class': 'difference'}).get_text().replace("+", "")

'-35'

## Creating Table

In [76]:
# Webscrape the table, I use list comprhension instead of for loop append method:

teams = [result.find('td', {'class': 'team'}).find('span', {'class': 'd-none d-lg-inline'}).get_text() 
         for result in results]
    
matches = [result.find('td', {'class': 'matches'}).get_text() for result in results]

points = [result.find('td', {'class': 'pts'}).get_text() for result in results]

wins = [result.find('td', {'class': 'd-none d-lg-table-cell wins'}).get_text() for result in results]

draws = [result.find('td', {'class': 'd-none d-lg-table-cell draws'}).get_text() for result in results]

losses = [result.find('td', {'class': 'd-none d-lg-table-cell looses'}).get_text() for result in results]

goals = [result.find('td', {'class': 'd-none d-md-table-cell goals'}).get_text() for result in results]

goal_diff = [result.find('td', {'class': 'difference'}).get_text().replace("+", "") for result in results]

In [104]:
## Make pandas Dataframe:

bundesliga_df = pd.DataFrame({'Rank': range(1, 19), 'Team': teams, 'Matches': matches,
                              'Points': points, 'Wins': wins, 'Draws': draws,
                              'Losses': losses, 'Goals': goals, 'Goal Difference': goal_diff})

In [105]:
bundesliga_df

Unnamed: 0,Rank,Team,Matches,Points,Wins,Draws,Losses,Goals,Goal Difference
0,1,FC Bayern München,20,49,16,1,3,65:19,46
1,2,Borussia Dortmund,20,43,14,1,5,52:31,21
2,3,Bayer 04 Leverkusen,20,35,10,5,5,49:32,17
3,4,1. FC Union Berlin,20,34,9,7,4,29:25,4
4,5,SC Freiburg,20,33,9,6,5,33:23,10
5,6,RB Leipzig,20,31,9,4,7,38:23,15
6,7,TSG Hoffenheim,20,31,9,4,7,41:32,9
7,8,1. FC Köln,20,29,7,8,5,32:34,-2
8,9,Eintracht Frankfurt,20,28,7,7,6,30:30,0
9,10,1. FSV Mainz 05,20,27,8,3,9,28:23,5


In [106]:
## Split Goals Into Goals For & Goals Against:

bundesliga_df[['Goals For','Goals Against']] = bundesliga_df['Goals'].str.split(":",expand=True,)

In [107]:
bundesliga_df

Unnamed: 0,Rank,Team,Matches,Points,Wins,Draws,Losses,Goals,Goal Difference,Goals For,Goals Against
0,1,FC Bayern München,20,49,16,1,3,65:19,46,65,19
1,2,Borussia Dortmund,20,43,14,1,5,52:31,21,52,31
2,3,Bayer 04 Leverkusen,20,35,10,5,5,49:32,17,49,32
3,4,1. FC Union Berlin,20,34,9,7,4,29:25,4,29,25
4,5,SC Freiburg,20,33,9,6,5,33:23,10,33,23
5,6,RB Leipzig,20,31,9,4,7,38:23,15,38,23
6,7,TSG Hoffenheim,20,31,9,4,7,41:32,9,41,32
7,8,1. FC Köln,20,29,7,8,5,32:34,-2,32,34
8,9,Eintracht Frankfurt,20,28,7,7,6,30:30,0,30,30
9,10,1. FSV Mainz 05,20,27,8,3,9,28:23,5,28,23


In [108]:
# Drop Goals column and rearrange columns:

bundesliga_df.drop('Goals', axis = 1, inplace = True)

bundesliga_df

Unnamed: 0,Rank,Team,Matches,Points,Wins,Draws,Losses,Goal Difference,Goals For,Goals Against
0,1,FC Bayern München,20,49,16,1,3,46,65,19
1,2,Borussia Dortmund,20,43,14,1,5,21,52,31
2,3,Bayer 04 Leverkusen,20,35,10,5,5,17,49,32
3,4,1. FC Union Berlin,20,34,9,7,4,4,29,25
4,5,SC Freiburg,20,33,9,6,5,10,33,23
5,6,RB Leipzig,20,31,9,4,7,15,38,23
6,7,TSG Hoffenheim,20,31,9,4,7,9,41,32
7,8,1. FC Köln,20,29,7,8,5,-2,32,34
8,9,Eintracht Frankfurt,20,28,7,7,6,0,30,30
9,10,1. FSV Mainz 05,20,27,8,3,9,5,28,23


In [109]:
bundesliga_df = bundesliga_df.reindex(columns=['Rank', 'Team', 'Matches', 'Points',
                                               'Wins', 'Draws', 'Losses', 'Goals For',
                                               'Goals Against', 'Goal Difference'])

In [110]:
bundesliga_df

Unnamed: 0,Rank,Team,Matches,Points,Wins,Draws,Losses,Goals For,Goals Against,Goal Difference
0,1,FC Bayern München,20,49,16,1,3,65,19,46
1,2,Borussia Dortmund,20,43,14,1,5,52,31,21
2,3,Bayer 04 Leverkusen,20,35,10,5,5,49,32,17
3,4,1. FC Union Berlin,20,34,9,7,4,29,25,4
4,5,SC Freiburg,20,33,9,6,5,33,23,10
5,6,RB Leipzig,20,31,9,4,7,38,23,15
6,7,TSG Hoffenheim,20,31,9,4,7,41,32,9
7,8,1. FC Köln,20,29,7,8,5,32,34,-2
8,9,Eintracht Frankfurt,20,28,7,7,6,30,30,0
9,10,1. FSV Mainz 05,20,27,8,3,9,28,23,5


## All In One Function

Putting it all together in one function. I did not include saving the table into a .csv or Excel file. All the values are strings. The user would have to convert them into numeric if they want to.

In [115]:
def bundesliga_scrape():
    
    url = "https://www.bundesliga.com/en/bundesliga/table"

    response = requests.get(url)
    
    soup = BeautifulSoup(response.content, 'html.parser')
    
    results = soup.find('table', {'class': 'table'}).find_all('tr')
    
    # Remove first row:

    results = results[1:]
    
    # Webscrape the table, I use list comprhension instead of for loop append method:

    teams = [result.find('td', {'class': 'team'}).find('span', {'class': 'd-none d-lg-inline'}).get_text() 
             for result in results]  
    matches = [result.find('td', {'class': 'matches'}).get_text() for result in results]
    points = [result.find('td', {'class': 'pts'}).get_text() for result in results]

    wins = [result.find('td', {'class': 'd-none d-lg-table-cell wins'}).get_text() for result in results]
    draws = [result.find('td', {'class': 'd-none d-lg-table-cell draws'}).get_text() for result in results]
    losses = [result.find('td', {'class': 'd-none d-lg-table-cell looses'}).get_text() for result in results]

    goals = [result.find('td', {'class': 'd-none d-md-table-cell goals'}).get_text() for result in results]
    goal_diff = [result.find('td', {'class': 'difference'}).get_text().replace("+", "") for result in results]
    
    ## Make pandas Dataframe:

    df = pd.DataFrame({'Rank': range(1, 19), 'Team': teams, 'Matches': matches,
                              'Points': points, 'Wins': wins, 'Draws': draws,
                              'Losses': losses, 'Goals': goals, 'Goal Difference': goal_diff})
    
    # Split Goals Into Goals For & Goals Against:
    df[['Goals For','Goals Against']] = df['Goals'].str.split(":",expand=True,)
    
    # Drop Goals column
    df.drop('Goals', axis = 1, inplace = True)
    
    # Rearrange columns
    df = df.reindex(columns=['Rank', 'Team', 'Matches', 'Points',
                             'Wins', 'Draws', 'Losses', 'Goals For',
                             'Goals Against', 'Goal Difference'])
    return df

In [116]:
# Function Call

bundesliga_df = bundesliga_scrape()

bundesliga_df

Unnamed: 0,Rank,Team,Matches,Points,Wins,Draws,Losses,Goals For,Goals Against,Goal Difference
0,1,FC Bayern München,20,49,16,1,3,65,19,46
1,2,Borussia Dortmund,20,43,14,1,5,52,31,21
2,3,Bayer 04 Leverkusen,20,35,10,5,5,49,32,17
3,4,1. FC Union Berlin,20,34,9,7,4,29,25,4
4,5,SC Freiburg,20,33,9,6,5,33,23,10
5,6,RB Leipzig,20,31,9,4,7,38,23,15
6,7,TSG Hoffenheim,20,31,9,4,7,41,32,9
7,8,1. FC Köln,20,29,7,8,5,32,34,-2
8,9,Eintracht Frankfurt,20,28,7,7,6,30,30,0
9,10,1. FSV Mainz 05,20,27,8,3,9,28,23,5


In [120]:
# Today's date:

from datetime import date

date.today()

2022-01-27


In [123]:
'bundesliga_table_' + str(date.today())

'bundesliga_table_2022-01-27'

In [124]:
# Save to .csv:

bundesliga_df.to_csv('bundesliga_table_' + str(date.today()) + '.csv', index = False)