In [54]:
import requests
import pandas as pd
import logging
import xmltodict
import json
from tqdm import tqdm

logging.basicConfig(level = logging.INFO)

In [118]:
def create_df(indicator, url):
    page = 1
    all_data = []
 
    # Get total pages first to initialize the progress bar
    response = requests.get(f"{url}")
    if response.status_code != 200:
        logging.error(f"Failed to fetch data for {indicator}: {response.status_code}")
        
    else:
        try:
            data_dict = xmltodict.parse(response.content.decode('utf-8-sig')) 
            data = data_dict.get('wb:data', {}) 
            total_pages = int(data.get('@pages', 1))  # Extract total pages
        except Exception as e:
            logging.error(f"Failed to parse XML for {indicator} on page 1: {e}")
            total_pages = 1  # If parsing fails, assume 1 page

    # Wrap the page loop with tqdm for progress bar
    for page in tqdm(range(1, total_pages ), desc="Fetching Data", unit="page"):
        response = requests.get(f"{url}?page={page}&format=xml")
        
        if response.status_code != 200:
            logging.error(f"Failed to fetch data for {indicator} on page {page}: {response.status_code}")
            break
        
        try:
            data_dict = xmltodict.parse(response.content.decode('utf-8-sig'))  # Parse XML into a dictionary
        except Exception as e:
            logging.error(f"Failed to parse XML for {indicator} on page {page}: {e}")
            break
        
        data = data_dict.get('wb:data', {})  # Adjust the key based on your structure
        all_data.append(data)  # Add the item to the list
        

    # Optionally, handle `all_data` after loop completion
    logging.info(f"Fetched {len(all_data)} records for {indicator}.")

    countries = []
    values = []
    years = []
    country_codes = []

    for entry in all_data:
        entry = entry.get('wb:data', {})[0]
        year = entry.get('wb:date')
        if year and 2015 <= int(year) <= 2025:                   
            country = entry.get('wb:country').get('#text')
            value = entry.get('wb:value')
            country_code = entry.get('wb:countryiso3code')
            countries.append(country)
            years.append(year)
            values.append(value)
            country_codes.append(country_code)


    df = pd.DataFrame({"Country": countries, "Country_code": country_codes, "Year": years, f"{indicator}": values})
    return df

In [128]:
base_url = "http://api.worldbank.org/v2/country/all/indicator/"
indicator_codes = ["NY.GDP.MKTP.CD", "FP.CPI.TOTL.ZG", "SL.UEM.TOTL.ZS", "NE.TRD.GNFS.ZS", "DT.DOD.DECT.CD", "SP.POP.TOTL"]

urls = []

for code in indicator_codes:
    indicator_url = f"{base_url}/{code}"
    urls.append(indicator_url)


indicators = ["gdp (current US$)", "inflation (Consumer Prices, Annual %)", "unemployment (Total, % of Total Labor Force)", "trade (Trade as % of GDP)", \
    "debt (Total External Debt Stocks, USD)", "population (total)"]


indicator_url_dict = dict(zip(indicators,urls))

try:

    for indicator, url in indicator_url_dict.items():
        logging.info(f"Fetching data for {indicator}")
        df_name = f"{indicator.split()[0]}_df"
        df = create_df(indicator, url)
        globals()[df_name] = df
        print(f"Data for {indicator}:\n", df.head(), "\n")
except Exception as e:
    logging.error(f"Error fetching data for {indicator}")

INFO:root:Fetching data for gdp (current US$)
Fetching Data: 100%|██████████| 340/340 [01:45<00:00,  3.22page/s]
INFO:root:Fetched 340 records for gdp (current US$).
INFO:root:Fetching data for inflation (Consumer Prices, Annual %)


Data for gdp (current US$):
                                        Country Country_code  Year  \
0                  Africa Eastern and Southern          AFE  2023   
1                       Caribbean small states          CSS  2015   
2  East Asia & Pacific (excluding high income)          EAP  2021   
3     Fragile and conflict affected situations          FCS  2019   
4                                    IDA total          IDA  2017   

  gdp (current US$)  
0  1245472471675.95  
1  35085992997.3327  
2  20849863760041.9  
3  1842632114099.19  
4  2268542522550.63   



Fetching Data: 100%|██████████| 340/340 [01:44<00:00,  3.24page/s]
INFO:root:Fetched 340 records for inflation (Consumer Prices, Annual %).
INFO:root:Fetching data for unemployment (Total, % of Total Labor Force)


Data for inflation (Consumer Prices, Annual %):
                                        Country Country_code  Year  \
0                  Africa Eastern and Southern          AFE  2023   
1                       Caribbean small states          CSS  2015   
2  East Asia & Pacific (excluding high income)          EAP  2021   
3     Fragile and conflict affected situations          FCS  2019   
4                                    IDA total          IDA  2017   

  inflation (Consumer Prices, Annual %)  
0                      7.12697482131851  
1                    -0.861584009427748  
2                      2.69891859529198  
3                      2.45280214062732  
4                      3.89251625363385   



Fetching Data: 100%|██████████| 340/340 [02:05<00:00,  2.70page/s]
INFO:root:Fetched 340 records for unemployment (Total, % of Total Labor Force).
INFO:root:Fetching data for trade (Trade as % of GDP)


Data for unemployment (Total, % of Total Labor Force):
                                        Country Country_code  Year  \
0                  Africa Eastern and Southern          AFE  2023   
1                       Caribbean small states          CSS  2015   
2  East Asia & Pacific (excluding high income)          EAP  2021   
3     Fragile and conflict affected situations          FCS  2019   
4                                    IDA total          IDA  2017   

  unemployment (Total, % of Total Labor Force)  
0                             7.80636542181196  
1                             11.5050757703248  
2                             4.12248591657027  
3                             5.87910736649221  
4                             4.55218158153548   



Fetching Data: 100%|██████████| 340/340 [02:42<00:00,  2.09page/s]
INFO:root:Fetched 340 records for trade (Trade as % of GDP).
INFO:root:Fetching data for debt (Total External Debt Stocks, USD)


Data for trade (Trade as % of GDP):
                                        Country Country_code  Year  \
0                  Africa Eastern and Southern          AFE  2023   
1                       Caribbean small states          CSS  2015   
2  East Asia & Pacific (excluding high income)          EAP  2021   
3     Fragile and conflict affected situations          FCS  2019   
4                                    IDA total          IDA  2017   

  trade (Trade as % of GDP)  
0          56.8723924609556  
1                      None  
2          45.3150442539075  
3                      None  
4          46.3845208927936   



Fetching Data: 100%|██████████| 340/340 [03:40<00:00,  1.54page/s]
INFO:root:Fetched 340 records for debt (Total External Debt Stocks, USD).
INFO:root:Fetching data for population (total)


Data for debt (Total External Debt Stocks, USD):
                                        Country Country_code  Year  \
0                  Africa Eastern and Southern          AFE  2023   
1                       Caribbean small states          CSS  2015   
2  East Asia & Pacific (excluding high income)          EAP  2021   
3     Fragile and conflict affected situations          FCS  2019   
4                                    IDA total          IDA  2017   

  debt (Total External Debt Stocks, USD)  
0                                   None  
1                                   None  
2                        3697087990175.4  
3                                   None  
4                                   None   



Fetching Data: 100%|██████████| 340/340 [03:41<00:00,  1.54page/s]
INFO:root:Fetched 340 records for population (total).


Data for population (total):
                                        Country Country_code  Year  \
0                  Africa Eastern and Southern          AFE  2023   
1                       Caribbean small states          CSS  2015   
2  East Asia & Pacific (excluding high income)          EAP  2021   
3     Fragile and conflict affected situations          FCS  2019   
4                                    IDA total          IDA  2017   

  population (total)  
0          750503764  
1            2964524  
2         2128553744  
3          974824119  
4       1674229409.5   



In [129]:
gdp_df.head()

Unnamed: 0,Country,Country_code,Year,gdp (current US$)
0,Africa Eastern and Southern,AFE,2023,1245472471675.95
1,Caribbean small states,CSS,2015,35085992997.3327
2,East Asia & Pacific (excluding high income),EAP,2021,20849863760041.9
3,Fragile and conflict affected situations,FCS,2019,1842632114099.19
4,IDA total,IDA,2017,2268542522550.63


In [130]:
gdp_df.describe()

Unnamed: 0,Country,Country_code,Year,gdp (current US$)
count,54,53,54,54.0
unique,54,53,5,54.0
top,Africa Eastern and Southern,AFE,2023,1245472471675.95
freq,1,1,11,1.0


In [131]:
inflation_df.head()

Unnamed: 0,Country,Country_code,Year,"inflation (Consumer Prices, Annual %)"
0,Africa Eastern and Southern,AFE,2023,7.12697482131851
1,Caribbean small states,CSS,2015,-0.861584009427748
2,East Asia & Pacific (excluding high income),EAP,2021,2.69891859529198
3,Fragile and conflict affected situations,FCS,2019,2.45280214062732
4,IDA total,IDA,2017,3.89251625363385


In [132]:
trade_df.head()

Unnamed: 0,Country,Country_code,Year,trade (Trade as % of GDP)
0,Africa Eastern and Southern,AFE,2023,56.8723924609556
1,Caribbean small states,CSS,2015,
2,East Asia & Pacific (excluding high income),EAP,2021,45.3150442539075
3,Fragile and conflict affected situations,FCS,2019,
4,IDA total,IDA,2017,46.3845208927936


In [133]:
unemployment_df.head()

Unnamed: 0,Country,Country_code,Year,"unemployment (Total, % of Total Labor Force)"
0,Africa Eastern and Southern,AFE,2023,7.80636542181196
1,Caribbean small states,CSS,2015,11.5050757703248
2,East Asia & Pacific (excluding high income),EAP,2021,4.12248591657027
3,Fragile and conflict affected situations,FCS,2019,5.87910736649221
4,IDA total,IDA,2017,4.55218158153548


In [134]:
debt_df.head()

Unnamed: 0,Country,Country_code,Year,"debt (Total External Debt Stocks, USD)"
0,Africa Eastern and Southern,AFE,2023,
1,Caribbean small states,CSS,2015,
2,East Asia & Pacific (excluding high income),EAP,2021,3697087990175.4
3,Fragile and conflict affected situations,FCS,2019,
4,IDA total,IDA,2017,


In [135]:
population_df.head()

Unnamed: 0,Country,Country_code,Year,population (total)
0,Africa Eastern and Southern,AFE,2023,750503764.0
1,Caribbean small states,CSS,2015,2964524.0
2,East Asia & Pacific (excluding high income),EAP,2021,2128553744.0
3,Fragile and conflict affected situations,FCS,2019,974824119.0
4,IDA total,IDA,2017,1674229409.5


In [136]:
import joblib

In [138]:
joblib.dump(gdp_df, 'data/gdp_df.joblib')
joblib.dump(inflation_df, 'data/inflation_df.joblib')
joblib.dump(population_df, 'data/population_df.joblib')
joblib.dump(debt_df, 'data/debt_df.joblib')
joblib.dump(unemployment_df, 'data/unemployment_df.joblib')

['data/unemployment_df.joblib']