In [2]:
import pandas as pd
import numpy as np

In [3]:
champion = pd.read_csv("dataset/champion.csv")
ranking = pd.read_csv("dataset/fifa_ranking.csv")

In [4]:
# We will select only CAF federations

ranking  = ranking[ranking["confederation"] == "CAF"]
ranking.head()

Unnamed: 0,rank,country_full,country_abrv,total_points,previous_points,rank_change,confederation,rank_date
2,32.0,Zambia,ZAM,38.0,0.0,32,CAF,1992-12-31
4,30.0,Algeria,ALG,39.0,0.0,30,CAF,1992-12-31
7,27.0,Côte d'Ivoire,CIV,41.0,0.0,27,CAF,1992-12-31
12,22.0,Cameroon,CMR,43.0,0.0,22,CAF,1992-12-31
13,21.0,Egypt,EGY,45.0,0.0,21,CAF,1992-12-31


In [5]:
champion.head()

Unnamed: 0,year,winner,runner-up
0,1957,Egypt,Ethiopia
1,1959,Egypt,Sudan
2,1962,Ethiopia,Egypt
3,1963,Ghana,Sudan
4,1965,Ghana,Tunisia


In [6]:
# Reindexing ranking's rows
ranking = ranking.reset_index(drop=True)
ranking.head()

Unnamed: 0,rank,country_full,country_abrv,total_points,previous_points,rank_change,confederation,rank_date
0,32.0,Zambia,ZAM,38.0,0.0,32,CAF,1992-12-31
1,30.0,Algeria,ALG,39.0,0.0,30,CAF,1992-12-31
2,27.0,Côte d'Ivoire,CIV,41.0,0.0,27,CAF,1992-12-31
3,22.0,Cameroon,CMR,43.0,0.0,22,CAF,1992-12-31
4,21.0,Egypt,EGY,45.0,0.0,21,CAF,1992-12-31


In [7]:
ranking = ranking.drop(["country_abrv","total_points","previous_points","rank_change","rank_date","confederation"],axis=1)
ranking.head()

Unnamed: 0,rank,country_full
0,32.0,Zambia
1,30.0,Algeria
2,27.0,Côte d'Ivoire
3,22.0,Cameroon
4,21.0,Egypt


In [8]:
ranking = ranking.rename(columns={"country_full":"Country"})
ranking.head()

Unnamed: 0,rank,Country
0,32.0,Zambia
1,30.0,Algeria
2,27.0,Côte d'Ivoire
3,22.0,Cameroon
4,21.0,Egypt


In [9]:
# Renaming some African countries with their actual names
ranking = ranking.drop_duplicates(subset='Country')
ranking = ranking.reset_index(drop=True)
ranking

Unnamed: 0,rank,Country
0,32.0,Zambia
1,30.0,Algeria
2,27.0,Côte d'Ivoire
3,22.0,Cameroon
4,21.0,Egypt
5,54.0,Zimbabwe
6,53.0,Malawi
7,51.0,Senegal
8,41.0,Morocco
9,39.0,Ghana


In [10]:
# Dropping Teams who didn't qualify to afcoons

qualified_teams = ["Morocco",
"Burkina Faso",
"Cameroon",
"Algeria",
"DR Congo"
"Senegal",
"Egypt",
"Angola",
"Equatorial Guinea",
"Ivory Coast",
"Uganda"
"South Africa",
"Gabon",
"Tunisia",
"Nigeria",
"Zambia",
"Mali",
"Zimbabwe",
"Comoros",
"Sudan",
"Benin",
"Tanzania",
"Botswana",
"Mozambique"]

replace_name = {
    "Congo DR" : "DR Congo",
    "Zaire" :"DR Congo",
    "Côte d'Ivoire" : "Ivory Coast",
    "Côte d'Ivoire" : "Ivory Coast"
}

ranking["Country"] = ranking["Country"].replace(replace_name)
champion["winner"] = champion["winner"].replace(replace_name)
champion["runner-up"] = champion["runner-up"].replace(replace_name)

In [11]:
# We will keep just the qualified teams to the Afcoon 
# Source : https://www.cafonline.com/caf-africa-cup-of-nations/news/totalenergies-caf-afcon-2025-24-nations-confirmed-for-morocco-finals/

n = len(ranking)
ranking = ranking[ranking["Country"].isin(qualified_teams)]

In [12]:
ranking

Unnamed: 0,rank,Country
0,32.0,Zambia
1,30.0,Algeria
2,27.0,Ivory Coast
3,22.0,Cameroon
4,21.0,Egypt
5,54.0,Zimbabwe
8,41.0,Morocco
10,38.0,Tunisia
11,55.0,Gabon
12,13.0,Nigeria


In [13]:
# Let's update their rankings

updated_rankings = {
    "Morocco": 14,
    "Burkina Faso": 67,
    "Cameroon": 51,
    "Algeria": 46,
    "DR Congo": 60,
    "Senegal": 19,
    "Egypt": 36,
    "Angola": 90,
    "Equatorial Guinea": 88,
    "Ivory Coast": 38,
    "Uganda": 92,
    "South Africa": 57,
    "Gabon": 83,
    "Tunisia": 41,
    "Nigeria": 39,
    "Zambia": 92,
    "Mali": 53,
    "Zimbabwe": 125,
    "Comoros": 129,
    "Sudan": 128,
    "Benin": 91,
    "Tanzania": 130,
    "Botswana": 148,
    "Mozambique": 114
}

def fetch_country(rank):
    result = ranking.loc[ranking["rank"] == rank, "Country"]
    return result.iloc[0] if not result.empty else None
    
ranking["rank"] = ranking["rank"].apply(lambda r: updated_rankings.get(fetch_country(r), r))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ranking["rank"] = ranking["rank"].apply(lambda r: updated_rankings.get(fetch_country(r), r))


In [19]:
ranking = ranking.sort_values(by=["rank"],ascending = True)
ranking.reset_index(drop=True)

Unnamed: 0,rank,Country
0,14,Morocco
1,36,Egypt
2,38,Ivory Coast
3,39,Nigeria
4,41,Tunisia
5,46,Algeria
6,51,Cameroon
7,53,Mali
8,67,Burkina Faso
9,83,Gabon


In [21]:
stats = pd.read_csv("dataset/stats.csv")
stats = stats.drop(["Rank"],axis=1)
stats.head()

Unnamed: 0,Team,Part,Pld,W,D,L,GF,GA,GD,Points
0,Egypt,26,111,60,24,27,175,97,78,204
1,Nigeria,20,104,57,24,23,146,95,50,195
2,Ghana,24,105,54,23,28,138,93,45,185
3,Ivory Coast,25,106,48,30,28,152,111,41,174
4,Cameroon,21,95,46,31,18,142,90,52,169


In [24]:
stats = stats.rename(columns={"Team":"Country"})
data = ranking.merge(stats,how="inner",on="Country")
data.head()

Unnamed: 0,rank,Country,Part,Pld,W,D,L,GF,GA,GD,Points
0,14,Morocco,19,74,29,25,20,87,65,22,112
1,36,Egypt,26,111,60,24,27,175,97,78,204
2,38,Ivory Coast,25,106,48,30,28,152,111,41,174
3,39,Nigeria,20,104,57,24,23,146,95,50,195
4,41,Tunisia,21,83,25,30,27,99,97,2,105


In [26]:
data.to_csv('dataset/test.csv')