# Web Scrapping Project: Euro 2020 Stats

## Import required libraries

In [None]:
import requests
from bs4 import BeautifulSoup as bs
import pandas as pd
import time
from typing import List

## URLs for data extraction

In [None]:
def gen_urls(s_dte, e_dte):
    base_url = 'https://www.bbc.com/sport/football/european-championship/scores-fixtures/'
    match_dates = pd.date_range(s_dte, e_dte)
    return [base_url+str(match_date.date()) for match_date in match_dates]

In [None]:
def show_match_result(home_team,home_goals,away_team,away_goals, pens = None):
    """
    This function will take the names of both home and away teams as well as goal scored by them. 
    In the knock out stages some matches went to penalties so to cover all the statistics of the tournament
    a default argument 'pens' is also created. If the game did not go to penalties, it's value will remain\
    None, otherwise the penalty tag will be passed to this argument.
    
    """
    if pens is not None: return f'{home_team} {home_goals} - {away_goals} {away_team} ({pens})'
    return f'{home_team} {home_goals} - {away_goals} {away_team}'

In [None]:
def all_results(urls):
    # results will contain the reuslts from all the football matches.
    results = []
    # Starting date of the knock out matches
    KO_START_DATE = pd.Timestamp('2021-06-28').date()
    for url in urls:
        r = requests.get(url)
        match_date = pd.Timestamp(url.split('/')[-1]).date()
        time.sleep(1)
        soup = bs(r.text, 'html.parser')
        # The following line will extract all the matches' data from the requested page
        matches = soup.find_all('article', class_= 'sp-c-fixture')
        for match in matches:
            home_team = match.select_one('.sp-c-fixture__team-name--home .sp-c-fixture__team-name-trunc').text
            away_team = match.select_one('.sp-c-fixture__team-name--away .sp-c-fixture__team-name-trunc').text
            home_goals = match.select_one('.sp-c-fixture__number--home').text
            away_goals = match.select_one('.sp-c-fixture__number--away').text
            # If the match took place after the start of the knock out matches, our code will look for penalty
            # stats
            if match_date >= KO_START_DATE:
                pens =  match.select_one('.sp-c-fixture__win-message') 
                if pens is not None:
                    results.append(show_match_result(home_team,home_goals,away_team,away_goals,pens.text))
                    continue
            results.append(show_match_result(home_team,home_goals,away_team,away_goals))
    return results

In [None]:
start_date, end_date = '2021-06-11', '2021-07-11'
urls = gen_urls(start_date, end_date)
results = all_results(urls)