### 

### Imports

In [53]:
from bs4 import BeautifulSoup
import pandas as pd
import requests
import pickle
import time
from string import ascii_uppercase as alphabet

### Scraping Historical and Fixture Data

In [54]:
pd.set_option("display.max_columns", None)

In [55]:
# function to get all the matches for a given year
def get_matches(year):
    request_url = f"https://en.wikipedia.org/wiki/{year}_Africa_Cup_of_Nations"
    res = requests.get(url=request_url).text

    soup = BeautifulSoup(res, "lxml")
    matches = soup.find_all("div", {"class": "footballbox"})

    data = []
    for m in matches:
        # extract the home, away, and score for each match
        match = {}
        match["home"] = m.find("th", {"class": "fhome"}).get_text()
        match["away"] = m.find("th", {"class": "faway"}).get_text() 
        match["score"] = m.find("th", {"class": "fscore"}).get_text().split(" ")[0]
        match["year"] = year
        data.append(match)

    return pd.DataFrame(data)

In [56]:
years = [1957, 1959, 1962, 1963, 1965, 1968, 1970, 1972, 1974, 1976, 1978, 1980, 1982, 1984, 1986, 1988, 1990, 1992, 1994, 1996, 1998, 2000, 2002, 2004, 2006, 2008, 2010, 2012, 2013, 2015, 2017, 2019, 2021]

begin_loop = time.time()
match_dict = [get_matches(y) for y in years] # loop over all the years and get all the matches
df = pd.concat(match_dict, ignore_index=True)

print(f"Scraping completed! - Total run time: {round((time.time() - begin_loop), 2)}seconds")

Scraping completed! - Total run time: 10.83seconds


##### Exporting to CSV

In [57]:
# historical data
df.to_csv("data/afcon_historical_data.csv", index=False)

# fixture for 2023
fixture = get_matches("2023")
fixture.to_csv("data/afcon_fixture_data.csv", index=False)

OSError: Cannot save file into a non-existent directory: 'data'

In [None]:
df

Unnamed: 0,home,away,score,year
0,Sudan,Egypt,1–2,1957
1,Ethiopia,South Africa,2–0,1957
2,Egypt,Ethiopia,4–0,1957
3,United Arab Republic,Ethiopia,4–0,1959
4,Sudan,Ethiopia,1–0,1959
...,...,...,...,...
741,Senegal,Equatorial Guinea,3–1,2021
742,Burkina Faso,Senegal,1–3,2021
743,Cameroon,Egypt,0–0,2021
744,Burkina Faso,Cameroon,3–3,2021


In [None]:
fixture.sample(10)

Unnamed: 0,home,away,score,year
7,Ghana,Cape Verde,v,2023
26,Tunisia,Mali,v,2023
22,Angola,Burkina Faso,v,2023
48,Winner QF1,Winner QF4,SF1,2023
20,Algeria,Burkina Faso,v,2023
49,Winner QF3,Winner QF2,SF2,2023
21,Mauritania,Angola,v,2023
9,Cape Verde,Mozambique,v,2023
33,Zambia,Tanzania,v,2023
1,Nigeria,Equatorial Guinea,v,2023
