In [42]:
# Dependencies
import pycountry
import pycountry_convert as pc
import pandas as pd


In [43]:
# Upload dataset from the project main csv  
csv_path = "../Resources/average_air_quality_health.csv"

# Read csv in a dataframe
test_df = pd.read_csv(csv_path)

# Show Dataframe
test_df

Unnamed: 0,City,Country,PM2.5,PM10,NO2,O3,SO2,CO,NH3,MortalityRate_per_100k
0,abbeville,FR,4.7,8.8,4.7,36.8,0.8,210.3,1.3,4.1
1,acarau,BR,2.1,9.6,0.2,52.2,0.1,353.8,0.1,9.6
2,agbor,NG,29.9,39.2,5.7,2.5,0.4,894.6,0.6,29.6
3,al jawf,SA,11.1,39.2,1.0,101.6,1.7,193.6,0.5,33.5
4,al qusayr,SY,7.9,10.0,4.7,40.8,2.5,210.3,2.7,26.6
...,...,...,...,...,...,...,...,...,...,...
393,zaragoza,ES,0.5,0.7,0.9,67.2,0.1,203.6,1.0,4.2
394,zavoronezhskoye,RU,1.0,1.1,0.7,75.8,0.1,227.0,0.1,17.4
395,zhangatas,KZ,4.6,15.4,2.0,54.4,3.7,230.3,0.1,30.5
396,zhangjiakou,CN,3.3,9.9,1.9,62.2,0.9,200.3,3.4,26.6


In [44]:
# Get Alpha-2 country code 
def convert_code_to_name(country_code):
    try:
        country = pycountry.countries.get(alpha_2=country_code)
        return country.name
    except:
        return None

In [45]:
# Convert country codes to country names
test_df['CountryName'] = test_df['Country'].apply(convert_code_to_name)

# Display the DataFrame with the new CountryName column
test_df

Unnamed: 0,City,Country,PM2.5,PM10,NO2,O3,SO2,CO,NH3,MortalityRate_per_100k,CountryName
0,abbeville,FR,4.7,8.8,4.7,36.8,0.8,210.3,1.3,4.1,France
1,acarau,BR,2.1,9.6,0.2,52.2,0.1,353.8,0.1,9.6,Brazil
2,agbor,NG,29.9,39.2,5.7,2.5,0.4,894.6,0.6,29.6,Nigeria
3,al jawf,SA,11.1,39.2,1.0,101.6,1.7,193.6,0.5,33.5,Saudi Arabia
4,al qusayr,SY,7.9,10.0,4.7,40.8,2.5,210.3,2.7,26.6,Syrian Arab Republic
...,...,...,...,...,...,...,...,...,...,...,...
393,zaragoza,ES,0.5,0.7,0.9,67.2,0.1,203.6,1.0,4.2,Spain
394,zavoronezhskoye,RU,1.0,1.1,0.7,75.8,0.1,227.0,0.1,17.4,Russian Federation
395,zhangatas,KZ,4.6,15.4,2.0,54.4,3.7,230.3,0.1,30.5,Kazakhstan
396,zhangjiakou,CN,3.3,9.9,1.9,62.2,0.9,200.3,3.4,26.6,China


In [46]:
# Get the continent info based on the new column CountryName
def country_to_continent(country_name):
    try:
        country_alpha2 = pc.country_name_to_country_alpha2(country_name)
        continent_code = pc.country_alpha2_to_continent_code(country_alpha2)
        continent_name = pc.convert_continent_code_to_continent_name(continent_code)
        return continent_name
    except:
        return None

# Add a new column for Continent
test_df['Continent'] = test_df['CountryName'].apply(country_to_continent)

# Display the DataFrame with the new Continent column
test_df

Unnamed: 0,City,Country,PM2.5,PM10,NO2,O3,SO2,CO,NH3,MortalityRate_per_100k,CountryName,Continent
0,abbeville,FR,4.7,8.8,4.7,36.8,0.8,210.3,1.3,4.1,France,Europe
1,acarau,BR,2.1,9.6,0.2,52.2,0.1,353.8,0.1,9.6,Brazil,South America
2,agbor,NG,29.9,39.2,5.7,2.5,0.4,894.6,0.6,29.6,Nigeria,Africa
3,al jawf,SA,11.1,39.2,1.0,101.6,1.7,193.6,0.5,33.5,Saudi Arabia,Asia
4,al qusayr,SY,7.9,10.0,4.7,40.8,2.5,210.3,2.7,26.6,Syrian Arab Republic,Asia
...,...,...,...,...,...,...,...,...,...,...,...,...
393,zaragoza,ES,0.5,0.7,0.9,67.2,0.1,203.6,1.0,4.2,Spain,Europe
394,zavoronezhskoye,RU,1.0,1.1,0.7,75.8,0.1,227.0,0.1,17.4,Russian Federation,Europe
395,zhangatas,KZ,4.6,15.4,2.0,54.4,3.7,230.3,0.1,30.5,Kazakhstan,Asia
396,zhangjiakou,CN,3.3,9.9,1.9,62.2,0.9,200.3,3.4,26.6,China,Asia


In [47]:
# Save the updated dataframe in a new csv file
test_df.to_csv('average_air_quality_health_continents_included.csv', index=False)

In [48]:
# Ensure names and number of continents are correct
unique_continents = test_df['Continent'].unique()
number_of_continents = len(unique_continents)
print(f'- Number of continents: {number_of_continents}')
unique_continents
print(f'- List of continents: {unique_continents}')

- Number of continents: 6
- List of continents: ['Europe' 'South America' 'Africa' 'Asia' 'North America' 'Oceania']
