# AFCON 2023 project

In [4]:
import pandas as pd
from string import ascii_uppercase as alphabet
import pickle
from bs4 import BeautifulSoup # to parse HTML or XML documents and extract data 
import requests # to aend requests to the website (wikipedia) that I am going to scrape

# Scraping data from the internet relating to AFCON 2023/24

In [5]:
all_group_tables = pd.read_html('https://en.wikipedia.org/wiki/2023_Africa_Cup_of_Nations')

In [6]:
all_group_tables

[  Coupe d'Afrique des Nations 2023 Coupe d'Afrique des Nations 2023.1
 0                 Official logo[1]                   Official logo[1]
 1               Tournament details                 Tournament details
 2                     Host country                        Ivory Coast
 3                            Dates      13 January – 11 February 2024
 4                            Teams                                 24
 5                         Venue(s)               6 (in 5 host cities)
 6                    ← 2021 2025 →                      ← 2021 2025 →,
            Title sponsor            Official sponsors Regional sponsors
 0  Total 1XBET Visa Puma  Apsonic[15] Tecno[16] Razzl    Celeste Porteo
 1            Total 1XBET                    Visa Puma               NaN
 2  Apsonic[15] Tecno[16]                        Razzl               NaN,
              0          1
 0  Total 1XBET  Visa Puma,
                        0      1
 0  Apsonic[15] Tecno[16]  Razzl,
                

As can be seen, the Wikipedia page does not simply contain the AFCON groups/tables only.  There is other information that might not be relevant to me for this particular project.  I will show a few elements of the Wikipedia page in the cells below.

In [7]:
all_group_tables[12]

Unnamed: 0,Ivory Coast,v,Nigeria
0,,,


By observing the orignal 'all_group_tables' data that I imported from Wikipedia, I notice that the specific group table relating to the fixture "Ivory Coast	v	Nigeria" thatI found in my random search **all_group_tables[12]** is a few elements before that fixture list.  I *guesstimate* that the group will be found in the 9th element of the page, as seen below.

The table above is the table for group A. Another *guesstimate* shows me that Group B is foundin the 16th element of our data. I will now test this for all the 6 groups.

### Modifying column names

In [8]:
for table_ref in range(8,50,7):
    print(table_ref)

8
15
22
29
36
43


To link my table reference numbers to an actual group name, I imported the **ascii_uppercase library** as **alphabet** to assist me.

In [9]:
alphabet

'ABCDEFGHIJKLMNOPQRSTUVWXYZ'

In [11]:
fix_A =all_group_tables[9:15]
fix_B = all_group_tables[16:22]
fix_C =all_group_tables[23:29]
fix_D =all_group_tables[30:36]
fix_E =all_group_tables[37:43]
fix_F =all_group_tables[44:50]

In [27]:
fix_A

[   Ivory Coast   v  Guinea-Bissau
 0          NaN NaN            NaN,
    Nigeria   v  Equatorial Guinea
 0      NaN NaN                NaN,
    Equatorial Guinea   v  Guinea-Bissau
 0                NaN NaN            NaN,
    Ivory Coast   v  Nigeria
 0          NaN NaN      NaN,
    Equatorial Guinea   v  Ivory Coast
 0                NaN NaN          NaN,
    Guinea-Bissau   v  Nigeria
 0            NaN NaN      NaN]

In [28]:
fix_A[0]

Unnamed: 0,Ivory Coast,v,Guinea-Bissau
0,,,


In [30]:
wiki = 'https://en.wikipedia.org/wiki/2023_Africa_Cup_of_Nations'
response = requests.get(wiki)
wiki_content = response.text
soup = BeautifulSoup(wiki_content, 'lxml') # lxml  parser is responsible for reading the HTML or XML content 

In [31]:
# I used BeautifulSoup to find all HTML elements with the class 'footballbox' in the parsed HTML content and stored them in the "afcon74_games" variable. 
afcon23_games = soup.find_all('table', class_='fevent')

In [34]:
home = []
score = []
away = []

# Extract information from each game element in afcon74_games
    # - 'home' stores the home team name
    # - 'score' stores the match score
    # - 'away' stores the away team name
for game in afcon23_games:
    #  get_text() is used to extract the home team name, score, and away team name from the HTML element
    home.append(game.find('th', class_ = 'fhome').get_text()) 
    score.append(game.find('th', class_ = 'fscore').get_text())
    away.append(game.find('th', class_ = 'faway').get_text())

In [35]:
dict_afcon23 = {'home':home, 'score':score, 'away': away}

In [36]:
afcon23_df= pd.DataFrame(dict_afcon23) 

In [38]:
afcon23_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 37 entries, 0 to 36
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   home    37 non-null     object
 1   score   37 non-null     object
 2   away    37 non-null     object
dtypes: object(3)
memory usage: 1016.0+ bytes


In [50]:
import pandas as pd

# Assuming afcon23_df contains your existing fixture data
# afcon23_df = ...

# DataFrame with knockout round fixtures
afcon23_df_KO = pd.DataFrame([['1D', "", '3BEF'], ['2A', "", '2C'], ['1A', "", '3CDE'], ['2B', "", '2F'], ['1B', "", '3ACD'], 
                              ['1C', "", '3ABF'], ['1E', "", '2D'], ['1F', "", '2E']],
                             columns=['home', 'score', 'away'])

# Concatenate the two DataFrames
afcon23_df = pd.concat([afcon23_df, afcon23_df_KO], ignore_index=True)

# Now afcon23_df contains both the existing fixtures and the knockout round fixtures

                                                 
                                                 

In [51]:
afcon23_df

Unnamed: 0,home,score,away
0,Ivory Coast,v,Guinea-Bissau
1,Nigeria,v,Equatorial Guinea
2,Equatorial Guinea,v,Guinea-Bissau
3,Ivory Coast,v,Nigeria
4,Equatorial Guinea,v,Ivory Coast
5,Guinea-Bissau,v,Nigeria
6,Egypt,v,Mozambique
7,Ghana,v,Cape Verde
8,Egypt,v,Ghana
9,Cape Verde,v,Mozambique


In [56]:
afcon23_df = afcon23_df.drop([37, 38])


In [57]:
afcon23_df

Unnamed: 0,home,score,away
0,Ivory Coast,v,Guinea-Bissau
1,Nigeria,v,Equatorial Guinea
2,Equatorial Guinea,v,Guinea-Bissau
3,Ivory Coast,v,Nigeria
4,Equatorial Guinea,v,Ivory Coast
5,Guinea-Bissau,v,Nigeria
6,Egypt,v,Mozambique
7,Ghana,v,Cape Verde
8,Egypt,v,Ghana
9,Cape Verde,v,Mozambique


In [58]:
afcon23_df['Stage'] = "KO"
afcon23_df

Unnamed: 0,home,score,away,Stage
0,Ivory Coast,v,Guinea-Bissau,KO
1,Nigeria,v,Equatorial Guinea,KO
2,Equatorial Guinea,v,Guinea-Bissau,KO
3,Ivory Coast,v,Nigeria,KO
4,Equatorial Guinea,v,Ivory Coast,KO
5,Guinea-Bissau,v,Nigeria,KO
6,Egypt,v,Mozambique,KO
7,Ghana,v,Cape Verde,KO
8,Egypt,v,Ghana,KO
9,Cape Verde,v,Mozambique,KO


In [59]:
afcon23_df.loc[:37, 'stage'] = 'Group'
afcon23_df

Unnamed: 0,home,score,away,Stage,stage
0,Ivory Coast,v,Guinea-Bissau,KO,Group
1,Nigeria,v,Equatorial Guinea,KO,Group
2,Equatorial Guinea,v,Guinea-Bissau,KO,Group
3,Ivory Coast,v,Nigeria,KO,Group
4,Equatorial Guinea,v,Ivory Coast,KO,Group
5,Guinea-Bissau,v,Nigeria,KO,Group
6,Egypt,v,Mozambique,KO,Group
7,Ghana,v,Cape Verde,KO,Group
8,Egypt,v,Ghana,KO,Group
9,Cape Verde,v,Mozambique,KO,Group


In [61]:
afcon23_df.loc[39:46, 'stage'] = 'L16'
afcon23_df

Unnamed: 0,home,score,away,Stage,stage
0,Ivory Coast,v,Guinea-Bissau,KO,Group
1,Nigeria,v,Equatorial Guinea,KO,Group
2,Equatorial Guinea,v,Guinea-Bissau,KO,Group
3,Ivory Coast,v,Nigeria,KO,Group
4,Equatorial Guinea,v,Ivory Coast,KO,Group
5,Guinea-Bissau,v,Nigeria,KO,Group
6,Egypt,v,Mozambique,KO,Group
7,Ghana,v,Cape Verde,KO,Group
8,Egypt,v,Ghana,KO,Group
9,Cape Verde,v,Mozambique,KO,Group


In [62]:
afcon23_df['ko_label'] = ''
afcon23_df

Unnamed: 0,home,score,away,Stage,stage,ko_label
0,Ivory Coast,v,Guinea-Bissau,KO,Group,
1,Nigeria,v,Equatorial Guinea,KO,Group,
2,Equatorial Guinea,v,Guinea-Bissau,KO,Group,
3,Ivory Coast,v,Nigeria,KO,Group,
4,Equatorial Guinea,v,Ivory Coast,KO,Group,
5,Guinea-Bissau,v,Nigeria,KO,Group,
6,Egypt,v,Mozambique,KO,Group,
7,Ghana,v,Cape Verde,KO,Group,
8,Egypt,v,Ghana,KO,Group,
9,Cape Verde,v,Mozambique,KO,Group,


In [68]:
afcon23_df = afcon23_df.drop(36)
afcon23_df
    

Unnamed: 0,home,score,away,Stage,stage,ko_label
0,Ivory Coast,v,Guinea-Bissau,KO,Group,
1,Nigeria,v,Equatorial Guinea,KO,Group,
2,Equatorial Guinea,v,Guinea-Bissau,KO,Group,
3,Ivory Coast,v,Nigeria,KO,Group,
4,Equatorial Guinea,v,Ivory Coast,KO,Group,
5,Guinea-Bissau,v,Nigeria,KO,Group,
6,Egypt,v,Mozambique,KO,Group,
7,Ghana,v,Cape Verde,KO,Group,
8,Egypt,v,Ghana,KO,Group,
9,Cape Verde,v,Mozambique,KO,Group,


In [69]:
# Assuming df is your DataFrame
afcon23_df.to_csv('afcon23_fixtures_group_L16.csv', index=False)
