In [2]:
import pandas as pd
import wbgapi as wb
import numpy as np
from sklearn.preprocessing import MinMaxScaler

## Population

In [3]:

pop = wb.data.DataFrame('SP.POP.TOTL', wb.region.members('AFR'))


In [10]:
pop = pop[['YR2023', 'YR2022']]
unwto_count = pd.read_excel(r'../../datasets/unwto/countryContUNWTO.xlsx')
pop = pop.reset_index().rename(columns = {'economy' : 'iso3_code'})

In [12]:
pop = pop.merge(unwto_count[['iso3_code', 'country', 'sub_region']], on = 'iso3_code', how='left')

In [13]:
pop.to_excel(r'../../datasets/world_bank/population.xlsx')

## Infrastructure Data

In [2]:
# economy, Use of IMF credit (DOD, current US$)
imf = wb.data.DataFrame('DT.DOD.DIMF.CD', wb.region.members('AFR'))
#  Official exchange rate (LCU per US$, period average)
exrate = wb.data.DataFrame('PA.NUS.FCRF', wb.region.members('AFR'))
# Current account balance (% of GDP)
cur_ac = wb.data.DataFrame('BN.CAB.XOKA.GD.ZS', wb.region.members('AFR'))
# GDP per capita (current US$)
gdp = wb.data.DataFrame('NY.GDP.PCAP.CD', wb.region.members('AFR'))
# Inflation, consumer prices (annual %)
inf = wb.data.DataFrame('FP.CPI.TOTL.ZG', wb.region.members('AFR'))
# Foreign direct investment, net inflows (BoP, current US$)
fdi = wb.data.DataFrame('BX.KLT.DINV.CD.WD', wb.region.members('AFR'))
# Domestic credit to private sector (% of GDP)
dom = wb.data.DataFrame('FS.AST.PRVT.GD.ZS', wb.region.members('AFR'))
#Unemployment, total (% of total labor force) (modeled ILO estimate)
ur = wb.data.DataFrame('SL.UEM.TOTL.ZS', wb.region.members('AFR'))


In [3]:
econ_dic = {
    'Use of IMF credit (DOD, current US$)' : imf ,
    'Official exchange rate (LCU per US$, period average)' : exrate,
    'Inflation, consumer prices (annual %)' : inf ,
    'Current account balance (% of GDP)' : cur_ac,
    'GDP per capita (current US$)' : gdp,
    'Foreign direct investment, net inflows (BoP, current US$)' : fdi,
    'Domestic credit to private sector (% of GDP)' : dom, 
    'Unemployment, total (% of total labor force) (modeled ILO estimate)' : ur
}

In [4]:
new_econ_dict = {}


# Function to get the latest year and its value
def get_latest_year_and_value_log(row):
    last_valid_index = row.last_valid_index()
    if last_valid_index is not None:
        return pd.Series([int(last_valid_index[2:]), row[last_valid_index]])
    else:
        return pd.Series([None, None])
def get_latest_year_and_value(row):
    last_valid_index = row.last_valid_index()
    if last_valid_index is not None:
        return pd.Series([int(last_valid_index[2:]), row[last_valid_index]])
    else:
        return pd.Series([None, None])

for each in econ_dic:
    if each in ['Use of IMF credit (DOD, current US$)', 'Total reserves (includes gold, current US$)']:
        latest_data = econ_dic[each].apply(get_latest_year_and_value_log, axis=1)
        latest_data.columns = ['Latest Year', 'Value']
        new_econ_dict[each] = latest_data
    else:
        latest_data = econ_dic[each].apply(get_latest_year_and_value, axis=1)
        latest_data.columns = ['Latest Year', 'Value']
        new_econ_dict[each] = latest_data

scaler = MinMaxScaler()

for indicator, df in new_econ_dict.items():
    # Reshape the 'value' column for the scaler
    values = df['Value'].values.reshape(-1, 1)
    normalized_values = scaler.fit_transform(values).flatten()
    
    # Replace the values in the DataFrame
    df['Value'] = normalized_values


In [5]:
final_structure = {}

# Iterate through each indicator and its DataFrame
for indicator, df in new_econ_dict.items():
    for country_code in df.index:
        # Check if the country code is already in the final structure
        if country_code not in final_structure:
            final_structure[country_code] = {}

        # Get the latest year and value for the indicator
        latest_year = df.loc[country_code, 'Latest Year']
        latest_value = df.loc[country_code, 'Value']

        # Add the data to the final structure
        final_structure[country_code][indicator] = {
            'latest_year': latest_year,
            'latest_value': latest_value
        }

# The final_structure now has the desired format

In [6]:
# Your JSON data
data = final_structure

# List to hold each row of the DataFrame
data_rows = []

# Iterate over each country and its metrics
for country, metrics in data.items():
    for metric_name, metric_details in metrics.items():
        # Each row represents a country's metric for the latest year
        row = {
            'Country': country,
            'Metric': metric_name,
            'Latest Year': metric_details['latest_year'],
            'Latest Value': metric_details['latest_value']
        }
        data_rows.append(row)

# Convert the list of dictionaries to a pandas DataFrame
df_econ = pd.DataFrame(data_rows)

# Display the DataFrame
df_econ.head()


Unnamed: 0,Country,Metric,Latest Year,Latest Value
0,AGO,"Use of IMF credit (DOD, current US$)",2022.0,0.255311
1,AGO,"Official exchange rate (LCU per US$, period av...",2022.0,0.019808
2,AGO,"Inflation, consumer prices (annual %)",2021.0,0.222973
3,AGO,Current account balance (% of GDP),2022.0,0.954085
4,AGO,GDP per capita (current US$),2022.0,0.211017


In [8]:
import json
json_path = f'../../../datasets/dashboards/country_profiles/economy.json'
with open(json_path, 'w') as f:
    f.write(json.dumps(final_structure, indent=4))


## Infrastructure Data

In [9]:
# Rail lines (total route-km)
rail = wb.data.DataFrame('IS.RRS.TOTL.KM', wb.region.members('AFR'))
#  Fixed broadband subscriptions (per 100 people)
broadband = wb.data.DataFrame('IT.NET.BBND.P2', wb.region.members('AFR'))
# Renewable energy consumption (% of total final energy consumption)
transport = wb.data.DataFrame('EG.FEC.RNEW.ZS', wb.region.members('AFR'))
# Mobile cellular subscriptions (per 100 people)
mobile = wb.data.DataFrame('IT.CEL.SETS.P2', wb.region.members('AFR'))
# Air transport, registered carrier departures worldwide
air = wb.data.DataFrame('IS.AIR.DPRT', wb.region.members('AFR'))
# Access to electricity (% of population)
pp_ict = wb.data.DataFrame('EG.ELC.ACCS.ZS', wb.region.members('AFR'))

In [10]:
infra_dict = {
    'Rail lines (total route-km)' : rail,
    'Fixed broadband subscriptions (per 100 people)' : broadband, 
    'Renewable energy consumption (% of total final energy consumption)':transport, 
    'Mobile cellular subscriptions (per 100 people)': mobile, 
    'Air transport, registered carrier departures worldwide' :air, 
    'Access to electricity (% of population)' :pp_ict, 
}

In [11]:
new_infra_dict = {}

# Function to get the latest year and its value
def get_latest_year_and_value(row):
    last_valid_index = row.last_valid_index()
    if last_valid_index is not None:
        return pd.Series([int(last_valid_index[2:]), row[last_valid_index]])
    else:
        return pd.Series([None, None])

# Function to get the latest year and its value
def get_latest_year_and_value_log(row):
    last_valid_index = row.last_valid_index()
    if last_valid_index is not None:
        return pd.Series([int(last_valid_index[2:]), row[last_valid_index]])
    else:
        return pd.Series([None, None])
def get_latest_year_and_value(row):
    last_valid_index = row.last_valid_index()
    if last_valid_index is not None:
        return pd.Series([int(last_valid_index[2:]), row[last_valid_index]])
    else:
        return pd.Series([None, None])

for each in infra_dict:
    if each in ['Investment in transport with private participation (current US$)', 'Air transport, registered carrier departures worldwide']:
        latest_data = infra_dict[each].apply(get_latest_year_and_value_log, axis=1)
        latest_data.columns = ['Latest Year', 'Value']
        new_infra_dict[each] = latest_data
    else:
        latest_data = infra_dict[each].apply(get_latest_year_and_value, axis=1)
        latest_data.columns = ['Latest Year', 'Value']
        new_infra_dict[each] = latest_data
scaler = MinMaxScaler()

for indicator, df in new_infra_dict.items():
    # Reshape the 'value' column for the scaler
    values = df['Value'].values.reshape(-1, 1)
    normalized_values = scaler.fit_transform(values).flatten()
    
    # Replace the values in the DataFrame
    df['Value'] = normalized_values


In [12]:
final_structure_infra = {}

# Iterate through each indicator and its DataFrame
for indicator, df in new_infra_dict.items():
    for country_code in df.index:
        # Check if the country code is already in the final structure
        if country_code not in final_structure_infra:
            final_structure_infra[country_code] = {}

        # Get the latest year and value for the indicator
        latest_year = df.loc[country_code, 'Latest Year']
        latest_value = df.loc[country_code, 'Value']

        # Add the data to the final structure
        final_structure_infra[country_code][indicator] = {
            'latest_year': latest_year,
            'latest_value': latest_value
        }

# The final_structure now has the desired format

In [13]:
json_path = f'../../../datasets/dashboards/country_profiles/infra.json'
with open(json_path, 'w') as f:
    f.write(json.dumps(final_structure_infra, indent=4))


In [14]:
# Your JSON data
data = final_structure_infra

# List to hold each row of the DataFrame
data_rows = []

# Iterate over each country and its metrics
for country, metrics in data.items():
    for metric_name, metric_details in metrics.items():
        # Each row represents a country's metric for the latest year
        row = {
            'Country': country,
            'Metric': metric_name,
            'Latest Year': metric_details['latest_year'],
            'Latest Value': metric_details['latest_value']
        }
        data_rows.append(row)

# Convert the list of dictionaries to a pandas DataFrame
df_infra = pd.DataFrame(data_rows)

# Display the DataFrame
df_infra.head()


Unnamed: 0,Country,Metric,Latest Year,Latest Value
0,AGO,Rail lines (total route-km),,
1,AGO,Fixed broadband subscriptions (per 100 people),2022.0,0.011076
2,AGO,Renewable energy consumption (% of total final...,2020.0,0.633996
3,AGO,Mobile cellular subscriptions (per 100 people),2022.0,0.213926
4,AGO,"Air transport, registered carrier departures w...",2021.0,0.03016


In [15]:
with pd.ExcelWriter(path=r'../../../datasets/dashboards/country_profiles/country_profiles.xlsx') as writer:
    df_econ.to_excel(writer, sheet_name='economy')
    df_infra.to_excel(writer, sheet_name='infrastructure')

## Plots

In [17]:
import plotly.graph_objs as go
country = 'DZA'
# Extract data for 'AGO'
country_data = final_structure[country]

# Prepare data for the radar chart
categories = list(country_data.keys())  # Indicator names
values = [data['latest_value'] for data in country_data.values()]  # Latest values for each indicator

# Create the radar chart
fig = go.Figure()

fig.add_trace(go.Scatterpolar(
    r=values,
    theta=categories,
    fill='toself'
))

# Update the layout
fig.update_layout(
    polar=dict(
        radialaxis=dict(
            visible=True,
            range=[0, max(values)]  # Adjust the range as needed
        )),
    showlegend=False,
    title=f"Radar Chart for {country}"
)

# Show the plot
fig.show()
