# AFCON 2023 project

In [1]:
import pandas as pd
from string import ascii_uppercase as alphabet
import pickle
from bs4 import BeautifulSoup # to parse HTML or XML documents and extract data 
import requests # to aend requests to the website (wikipedia) that I am going to scrape

# Scraping data from the internet relating to AFCON 2023/24

In [2]:
all_group_tables = pd.read_html('https://en.wikipedia.org/wiki/2023_Africa_Cup_of_Nations')

In [3]:
all_group_tables

[   Coupe d'Afrique des Nations 2023 Coupe d'Afrique des Nations 2023.1
 0                  Official logo[1]                   Official logo[1]
 1                Tournament details                 Tournament details
 2                      Host country                        Ivory Coast
 3                             Dates      13 January – 11 February 2024
 4                             Teams                                 24
 5                          Venue(s)               6 (in 5 host cities)
 6                   Final positions                    Final positions
 7                         Champions            Ivory Coast (3rd title)
 8                        Runners-up                            Nigeria
 9                       Third place                       South Africa
 10                     Fourth place                           DR Congo
 11            Tournament statistics              Tournament statistics
 12                   Matches played                            

As can be seen, the Wikipedia page does not simply contain the AFCON groups/tables only.  There is other information that might not be relevant to me for this particular project.  I will show a few elements of the Wikipedia page in the cells below.

In [4]:
all_group_tables[12]

Unnamed: 0,Nigeria,1–1,Equatorial Guinea
0,Osimhen 38',Report,Salvador 36'


By observing the orignal 'all_group_tables' data that I imported from Wikipedia, I notice that the specific group table relating to the fixture "Ivory Coast	v	Nigeria" thatI found in my random search **all_group_tables[12]** is a few elements before that fixture list.  I *guesstimate* that the group will be found in the 9th element of the page, as seen below.

The table above is the table for group A. Another *guesstimate* shows me that Group B is foundin the 16th element of our data. I will now test this for all the 6 groups.

### Modifying column names

In [5]:
for table_ref in range(8,50,7):
    print(table_ref)

8
15
22
29
36
43


To link my table reference numbers to an actual group name, I imported the **ascii_uppercase library** as **alphabet** to assist me.

In [6]:
alphabet

'ABCDEFGHIJKLMNOPQRSTUVWXYZ'

In [7]:
fix_A =all_group_tables[9:15]
fix_B = all_group_tables[16:22]
fix_C =all_group_tables[23:29]
fix_D =all_group_tables[30:36]
fix_E =all_group_tables[37:43]
fix_F =all_group_tables[44:50]

In [8]:
fix_A

[                                               Pot 1  \
 0  Ivory Coast (50) (hosts) Morocco (13) Senegal ...   
 
                                                Pot 2  \
 0  Nigeria (40) Cameroon (41) Mali (49) Burkina F...   
 
                                                Pot 3  \
 0  South Africa (65) Cape Verde (71) Guinea (81) ...   
 
                                                Pot 4  
 0  Guinea-Bissau (106) Mozambique (113) Namibia (...  ,
    Pos  \
 0    1   
 1    2   
 2    3   
 3    4   
 
   Team.mw-parser-output .hlist dl,.mw-parser-output .hlist ol,.mw-parser-output .hlist ul{margin:0;padding:0}.mw-parser-output .hlist dd,.mw-parser-output .hlist dt,.mw-parser-output .hlist li{margin:0;display:inline}.mw-parser-output .hlist.inline,.mw-parser-output .hlist.inline dl,.mw-parser-output .hlist.inline ol,.mw-parser-output .hlist.inline ul,.mw-parser-output .hlist dl dl,.mw-parser-output .hlist dl ol,.mw-parser-output .hlist dl ul,.mw-parser-output .hlist ol dl,.mw

In [9]:
fix_A[0]

Unnamed: 0,Pot 1,Pot 2,Pot 3,Pot 4
0,Ivory Coast (50) (hosts) Morocco (13) Senegal ...,Nigeria (40) Cameroon (41) Mali (49) Burkina F...,South Africa (65) Cape Verde (71) Guinea (81) ...,Guinea-Bissau (106) Mozambique (113) Namibia (...


In [10]:
wiki = 'https://en.wikipedia.org/wiki/2023_Africa_Cup_of_Nations'
response = requests.get(wiki)
wiki_content = response.text
soup = BeautifulSoup(wiki_content, 'lxml') # lxml  parser is responsible for reading the HTML or XML content 

In [11]:
# I used BeautifulSoup to find all HTML elements with the class 'footballbox' in the parsed HTML content and stored them in the "afcon74_games" variable. 
afcon23_games = soup.find_all('table', class_='fevent')

In [12]:
home = []
score = []
away = []

# Extract information from each game element in afcon74_games
    # - 'home' stores the home team name
    # - 'score' stores the match score
    # - 'away' stores the away team name
for game in afcon23_games:
    #  get_text() is used to extract the home team name, score, and away team name from the HTML element
    home.append(game.find('th', class_ = 'fhome').get_text()) 
    score.append(game.find('th', class_ = 'fscore').get_text())
    away.append(game.find('th', class_ = 'faway').get_text())

In [13]:
dict_afcon23 = {'home':home, 'score':score, 'away': away}

In [14]:
afcon23_df= pd.DataFrame(dict_afcon23) 

In [15]:
afcon23_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 52 entries, 0 to 51
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   home    52 non-null     object
 1   score   52 non-null     object
 2   away    52 non-null     object
dtypes: object(3)
memory usage: 1.3+ KB


In [16]:
import pandas as pd

# Assuming afcon23_df contains your existing fixture data
# afcon23_df = ...

# DataFrame with knockout round fixtures
afcon23_df_KO = pd.DataFrame([['1D', "", '3BEF'], ['2A', "", '2C'], ['1A', "", '3CDE'], ['2B', "", '2F'], ['1B', "", '3ACD'], 
                              ['1C', "", '3ABF'], ['1E', "", '2D'], ['1F', "", '2E']],
                             columns=['home', 'score', 'away'])

# Concatenate the two DataFrames
afcon23_df = pd.concat([afcon23_df, afcon23_df_KO], ignore_index=True)

# Now afcon23_df contains both the existing fixtures and the knockout round fixtures

                                                 
                                                 

In [17]:
afcon23_df

Unnamed: 0,home,score,away
0,Ivory Coast,2–0,Guinea-Bissau
1,Nigeria,1–1,Equatorial Guinea
2,Equatorial Guinea,4–2,Guinea-Bissau
3,Ivory Coast,0–1,Nigeria
4,Equatorial Guinea,4–0,Ivory Coast
5,Guinea-Bissau,0–1,Nigeria
6,Egypt,2–2,Mozambique
7,Ghana,1–2,Cape Verde
8,Egypt,2–2,Ghana
9,Cape Verde,3–0,Mozambique


In [18]:
afcon23_df = afcon23_df.drop([37, 38])


In [19]:
afcon23_df

Unnamed: 0,home,score,away
0,Ivory Coast,2–0,Guinea-Bissau
1,Nigeria,1–1,Equatorial Guinea
2,Equatorial Guinea,4–2,Guinea-Bissau
3,Ivory Coast,0–1,Nigeria
4,Equatorial Guinea,4–0,Ivory Coast
5,Guinea-Bissau,0–1,Nigeria
6,Egypt,2–2,Mozambique
7,Ghana,1–2,Cape Verde
8,Egypt,2–2,Ghana
9,Cape Verde,3–0,Mozambique


In [20]:
afcon23_df['Stage'] = "KO"
afcon23_df

Unnamed: 0,home,score,away,Stage
0,Ivory Coast,2–0,Guinea-Bissau,KO
1,Nigeria,1–1,Equatorial Guinea,KO
2,Equatorial Guinea,4–2,Guinea-Bissau,KO
3,Ivory Coast,0–1,Nigeria,KO
4,Equatorial Guinea,4–0,Ivory Coast,KO
5,Guinea-Bissau,0–1,Nigeria,KO
6,Egypt,2–2,Mozambique,KO
7,Ghana,1–2,Cape Verde,KO
8,Egypt,2–2,Ghana,KO
9,Cape Verde,3–0,Mozambique,KO


In [21]:
afcon23_df.loc[:37, 'stage'] = 'Group'
afcon23_df

Unnamed: 0,home,score,away,Stage,stage
0,Ivory Coast,2–0,Guinea-Bissau,KO,Group
1,Nigeria,1–1,Equatorial Guinea,KO,Group
2,Equatorial Guinea,4–2,Guinea-Bissau,KO,Group
3,Ivory Coast,0–1,Nigeria,KO,Group
4,Equatorial Guinea,4–0,Ivory Coast,KO,Group
5,Guinea-Bissau,0–1,Nigeria,KO,Group
6,Egypt,2–2,Mozambique,KO,Group
7,Ghana,1–2,Cape Verde,KO,Group
8,Egypt,2–2,Ghana,KO,Group
9,Cape Verde,3–0,Mozambique,KO,Group


In [22]:
afcon23_df.loc[39:46, 'stage'] = 'L16'
afcon23_df

Unnamed: 0,home,score,away,Stage,stage
0,Ivory Coast,2–0,Guinea-Bissau,KO,Group
1,Nigeria,1–1,Equatorial Guinea,KO,Group
2,Equatorial Guinea,4–2,Guinea-Bissau,KO,Group
3,Ivory Coast,0–1,Nigeria,KO,Group
4,Equatorial Guinea,4–0,Ivory Coast,KO,Group
5,Guinea-Bissau,0–1,Nigeria,KO,Group
6,Egypt,2–2,Mozambique,KO,Group
7,Ghana,1–2,Cape Verde,KO,Group
8,Egypt,2–2,Ghana,KO,Group
9,Cape Verde,3–0,Mozambique,KO,Group


In [23]:
afcon23_df['ko_label'] = ''
afcon23_df

Unnamed: 0,home,score,away,Stage,stage,ko_label
0,Ivory Coast,2–0,Guinea-Bissau,KO,Group,
1,Nigeria,1–1,Equatorial Guinea,KO,Group,
2,Equatorial Guinea,4–2,Guinea-Bissau,KO,Group,
3,Ivory Coast,0–1,Nigeria,KO,Group,
4,Equatorial Guinea,4–0,Ivory Coast,KO,Group,
5,Guinea-Bissau,0–1,Nigeria,KO,Group,
6,Egypt,2–2,Mozambique,KO,Group,
7,Ghana,1–2,Cape Verde,KO,Group,
8,Egypt,2–2,Ghana,KO,Group,
9,Cape Verde,3–0,Mozambique,KO,Group,


In [24]:
afcon23_df = afcon23_df.drop(36)
afcon23_df
    

Unnamed: 0,home,score,away,Stage,stage,ko_label
0,Ivory Coast,2–0,Guinea-Bissau,KO,Group,
1,Nigeria,1–1,Equatorial Guinea,KO,Group,
2,Equatorial Guinea,4–2,Guinea-Bissau,KO,Group,
3,Ivory Coast,0–1,Nigeria,KO,Group,
4,Equatorial Guinea,4–0,Ivory Coast,KO,Group,
5,Guinea-Bissau,0–1,Nigeria,KO,Group,
6,Egypt,2–2,Mozambique,KO,Group,
7,Ghana,1–2,Cape Verde,KO,Group,
8,Egypt,2–2,Ghana,KO,Group,
9,Cape Verde,3–0,Mozambique,KO,Group,


In [25]:

afcon23_df.to_csv('afcon23_fixtures_group_L16.csv', index=False)
