# Teams: League Table & Scores

### Current League Table

In [5]:
from bs4 import BeautifulSoup
import requests
import pandas as pd

In [31]:
def League_Table_Scraper(url):
    # getting soup from the url
    r = requests.get(url)
    soup = BeautifulSoup(r.text, "html.parser")
    league_table = soup.find("div", {'class': 'table_outer_container'}).div.table
    # creating empty lists for looping
    rank = []
    club = []
    games = []
    wins = []
    draws = []
    losses = []
    goals_for = []
    goals_against = []
    goal_diff = []
    points = []
    last_5 = []
    attendance_per_g = []
    top_team_scorers = []
    top_keeper = []
    notes = []
    # looping over different teams (rows)
    for team in league_table.find_all("tbody"):
        rows = team.find_all("tr")
        # looping over different variables (columns) and writing into lists
        for row in rows:
            rank.append(row.find("th", {'data-stat': 'rank'}).text)
            club.append(row.find("td", {'data-stat': 'squad'}).text)
            games.append(row.find("td", {'data-stat': 'games'}).text)
            wins.append(row.find("td", {'data-stat': 'wins'}).text)
            draws.append(row.find("td", {'data-stat': 'draws'}).text)
            losses.append(row.find("td", {'data-stat': 'losses'}).text)
            goals_for.append(row.find("td", {'data-stat': 'goals_for'}).text)
            goals_against.append(row.find("td", {'data-stat': 'goals_against'}).text)
            goal_diff.append(row.find("td", {'data-stat': 'goal_diff'}).text)
            points.append(row.find("td", {'data-stat': 'points'}).text)
            last_5.append(row.find("td", {'data-stat': 'last_5'}).text)
            attendance_per_g.append(row.find("td", {'data-stat': 'attendance_per_g'}).text)
            top_team_scorers.append(row.find("td", {'data-stat': 'top_team_scorers'}).text)
            top_keeper.append(row.find("td", {'data-stat': 'top_keeper'}).text)
            notes.append(row.find("td", {'data-stat': 'notes'}).text)
    # creating a dataframe by concatenating the lists
    table = pd.DataFrame({"Rank": rank, "Team": club, "Games": games, "Wins": wins, "Draws": draws, "Losses": losses, "Goals for": goals_for, "Goals against": goals_against, "Goal Difference": goal_diff, "Points": points, "Last 5 Games": last_5, "Attendance per Game": attendance_per_g, "Top Team Scorers": top_team_scorers, "Top Goalkeeper": top_keeper, "Notes": notes})
    return table

In [32]:
url = "https://fbref.com/en/comps/66/Czech-First-League-Stats"
table = League_Table_Scraper(url)
table.to_csv(r"C:\Users\Honza Stuchlík\Documents\IES\Data Processing in Python\Czech-Football-League\league_table.csv", index = False)
table

### Scores & Fixtures

In [7]:
# for regular seasons only (without championship/relegation rounds)
def Scores_Scraper_regular(url):
    
    # getting soup from the url
    r = requests.get(url)
    soup = BeautifulSoup(r.text, "html.parser")
    scores_table = soup.find("div", {'class': 'table_outer_container'}).div.table

    gameweek = []
    dayofweek = []
    date = []
    time = []
    squad_a = []
    score = []
    squad_b = []
    attendance = []
    venue = []
    referee = []
    match_report = []
    notes = []

    # looping over different teams (rows)
    for game in scores_table.find_all("tbody"):
        rows = game.find_all("tr")
        
        # looping over different variables (columns) and writing into lists
        for row in rows:
            gameweek.append(row.find("th", {'data-stat': 'gameweek'}).text)
            dayofweek.append(row.find("td", {'data-stat': 'dayofweek'}).text)
            date.append(row.find("td", {'data-stat': 'date'}).text)
            time.append(row.find("td", {'data-stat': 'time'}).text)
            squad_a.append(row.find("td", {'data-stat': 'squad_a'}).text)
            score.append(row.find("td", {'data-stat': 'score'}).text)
            squad_b.append(row.find("td", {'data-stat': 'squad_b'}).text)
            attendance.append(row.find("td", {'data-stat': 'attendance'}).text)
            venue.append(row.find("td", {'data-stat': 'venue'}).text)
            referee.append(row.find("td", {'data-stat': 'referee'}).text)
            match_report.append(row.find("td", {'data-stat': 'match_report'}).text)
            notes.append(row.find("td", {'data-stat': 'notes'}).text)
            
    # creating a dataframe by concatenating the lists
    scores = pd.DataFrame({"Game Week": gameweek, "Weekday": dayofweek, "Date": date, "Time": time, "Home Team": squad_a, "Score": score, "Away Team": squad_b, "Attendance": attendance, "Venue": venue, "Referee": referee, "Match Report": match_report, "Notes": notes})
    
    return scores

In [8]:
url = "https://fbref.com/en/comps/66/schedule/Czech-First-League-Scores-and-Fixtures"
Scores_Scraper_regular(url)

Unnamed: 0,Game Week,Weekday,Date,Time,Home Team,Score,Away Team,Attendance,Venue,Referee,Match Report,Notes
0,1,Fri,2020-08-21,18:00,Viktoria Plzeň,3–1,Opava,2813,Doosan Arena,Alex Denev,Match Report,
1,1,Sat,2020-08-22,17:00,Fastav Zlín,1–2,Slovácko,1282,Stadion Letná,Pavel Královec,Match Report,
2,1,Sat,2020-08-22,17:00,Příbram,1–3,Teplice,1350,Energon Aréna,Ondřej Berka,Match Report,
3,1,Sat,2020-08-22,17:00,Sigma Olomouc,1–0,Slovan Liberec,2216,Andrův stadion,Paval Julínek,Match Report,
4,1,Sat,2020-08-22,19:30,Zbrojovka Brno,1–4,Sparta Prague,2500,Městský fotbalový stadion Srbská,Pavel Franek,Match Report,
...,...,...,...,...,...,...,...,...,...,...,...,...
338,34,Fri,2021-05-28,,Slovácko,,Fastav Zlín,,Městský fotbalový stadion Miroslava Vale...,,Head-to-Head,
339,34,Fri,2021-05-28,,Slavia Prague,,České Budĕjov.,,Sinobo Stadium,,Head-to-Head,
340,34,Fri,2021-05-28,,FK Pardubice,,Jablonec,,,,Head-to-Head,
341,,,,,,,,,,,,


In [3]:
# for seasons with regular rounds and championship/relegation rounds
def Scores_Scraper_irregular(url):
    
    # getting soup from the url
    r = requests.get(url)
    soup = BeautifulSoup(r.text, "html.parser")
    scores_table = soup.find("div", {'class': 'table_outer_container'}).div.table

    gameweek = []
    dayofweek = []
    date = []
    time = []
    squad_a = []
    score = []
    squad_b = []
    attendance = []
    venue = []
    referee = []
    match_report = []
    notes = []

    # looping over different teams (rows)
    for game in scores_table.find_all("tbody"):
        rows = game.find_all("tr")
        
        # looping over different variables (columns) and writing into lists
        for row in rows:
            gameweek.append(row.find("td", {'data-stat': 'gameweek'}).text)
            dayofweek.append(row.find("td", {'data-stat': 'dayofweek'}).text)
            date.append(row.find("td", {'data-stat': 'date'}).text)
            time.append(row.find("td", {'data-stat': 'time'}).text)
            squad_a.append(row.find("td", {'data-stat': 'squad_a'}).text)
            score.append(row.find("td", {'data-stat': 'score'}).text)
            squad_b.append(row.find("td", {'data-stat': 'squad_b'}).text)
            attendance.append(row.find("td", {'data-stat': 'attendance'}).text)
            venue.append(row.find("td", {'data-stat': 'venue'}).text)
            referee.append(row.find("td", {'data-stat': 'referee'}).text)
            match_report.append(row.find("td", {'data-stat': 'match_report'}).text)
            notes.append(row.find("td", {'data-stat': 'notes'}).text)
            
    # creating a dataframe by concatenating the lists
    scores = pd.DataFrame({"Game Week": gameweek, "Weekday": dayofweek, "Date": date, "Time": time, "Home Team": squad_a, "Score": score, "Away Team": squad_b, "Attendance": attendance, "Venue": venue, "Referee": referee, "Match Report": match_report, "Notes": notes})
    
    return scores

In [6]:
url = "https://fbref.com/en/comps/66/3226/schedule/2019-2020-Czech-First-League-Scores-and-Fixtures"
Scores_Scraper_irregular(url)

Unnamed: 0,Game Week,Weekday,Date,Time,Home Team,Score,Away Team,Attendance,Venue,Referee,Match Report,Notes
0,1,Fri,2019-07-12,18:00,Jablonec,2–0,Bohemians 1905,2612,Stadion Střelnice,Pavel Franek,Match Report,
1,1,Sat,2019-07-13,17:00,Příbram,1–1,Teplice,2862,Energon Aréna,Paval Julínek,Match Report,
2,1,Sat,2019-07-13,17:00,Baník Ostrava,1–2,Slovan Liberec,7542,Městský stadion - Vítkovice Aréna,Ondřej Berka,Match Report,
3,1,Sat,2019-07-13,19:30,Viktoria Plzeň,3–1,Sigma Olomouc,9611,Doosan Arena,Ondřej Pechanec,Match Report,
4,1,Sun,2019-07-14,16:30,České Budĕjov.,0–1,Opava,4381,Fotbalový stadion Střelecký ostrov,Ondřej Ginzel,Match Report,
...,...,...,...,...,...,...,...,...,...,...,...,...
311,4,Thu,2020-07-23,18:00,Fastav Zlín,,Karviná,,Stadion Letná,,,Match Postponed
312,,,,,,,,,,,,
313,5,Sun,2020-07-26,17:00,Sigma Olomouc,,Fastav Zlín,,Andrův stadion,,,Match Postponed
314,5,Sun,2020-07-26,17:00,Karviná,,Příbram,,Městský stadion,,,Match Postponed


In [22]:
regular_season_urls = ["https://fbref.com/en/comps/66/1459/schedule/2015-2016-Czech-First-League-Scores-and-Fixtures", "https://fbref.com/en/comps/66/1518/schedule/2016-2017-Czech-First-League-Scores-and-Fixtures", "https://fbref.com/en/comps/66/1623/schedule/2017-2018-Czech-First-League-Scores-and-Fixtures"]
irregular_season_urls = ["https://fbref.com/en/comps/66/2427/schedule/2018-2019-Czech-First-League-Scores-and-Fixtures", "https://fbref.com/en/comps/66/3226/schedule/2019-2020-Czech-First-League-Scores-and-Fixtures", "https://fbref.com/en/comps/66/schedule/Czech-First-League-Scores-and-Fixtures"]
scores = pd.DataFrame()
scores_1516 = Scores_Scraper_regular("https://fbref.com/en/comps/66/1459/schedule/2015-2016-Czech-First-League-Scores-and-Fixtures")
scores_1617 = Scores_Scraper_regular("https://fbref.com/en/comps/66/1518/schedule/2016-2017-Czech-First-League-Scores-and-Fixtures")
scores_1718 = Scores_Scraper_regular("https://fbref.com/en/comps/66/1623/schedule/2017-2018-Czech-First-League-Scores-and-Fixtures")
scores_1819 = Scores_Scraper_irregular("https://fbref.com/en/comps/66/2427/schedule/2018-2019-Czech-First-League-Scores-and-Fixtures")
scores_1920 = Scores_Scraper_irregular("https://fbref.com/en/comps/66/3226/schedule/2019-2020-Czech-First-League-Scores-and-Fixtures")
scores_2021 = Scores_Scraper_regular("https://fbref.com/en/comps/66/schedule/Czech-First-League-Scores-and-Fixtures")

In [33]:
scores_dfs = [scores_1516, scores_1617, scores_1718, scores_1819, scores_1920, scores_2021]
for i in scores_dfs:
    scores = scores.append(i, ignore_index = True, sort = False)

scores.to_csv(r"C:\Users\Honza Stuchlík\Documents\IES\Data Processing in Python\Czech-Football-League\scores.csv", index = False)
scores

Unnamed: 0,Game Week,Weekday,Date,Time,Home Team,Score,Away Team,Attendance,Venue,Referee,Match Report,Notes
0,1,Fri,2015-07-24,17:30,Viktoria Plzeň,2–1,Slavia Prague,11233,Doosan Arena,Pavel Franek,Match Report,
1,1,Fri,2015-07-24,19:00,Vysočina Jihlava,0–0,Sparta Prague,3894,Stadion v Jiráskově ulici,Tomas Kocourek,Match Report,
2,1,Sat,2015-07-25,17:00,Příbram,2–3,Jablonec,4182,Energon Aréna,Pavel Královec,Match Report,
3,1,Sat,2015-07-25,17:00,Slovácko,4–3,Dukla Prague,3726,Městský fotbalový stadion Miroslava Vale...,Zbyněk Proske,Match Report,
4,1,Sat,2015-07-25,17:00,Zbrojovka Brno,2–1,Baník Ostrava,5326,Městský fotbalový stadion Srbská,Libor Kovařík,Match Report,
...,...,...,...,...,...,...,...,...,...,...,...,...
8655,34,Fri,2021-05-28,,Slovácko,,Fastav Zlín,,Městský fotbalový stadion Miroslava Vale...,,Head-to-Head,
8656,34,Fri,2021-05-28,,Slavia Prague,,České Budĕjov.,,Sinobo Stadium,,Head-to-Head,
8657,34,Fri,2021-05-28,,FK Pardubice,,Jablonec,,,,Head-to-Head,
8658,,,,,,,,,,,,
