In [61]:
# pip install beautifulsoup4


In [67]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
import csv

In [68]:
# Define the url for world bank's country page
base_url = "https://data.worldbank.org/country"
response = requests.get(base_url)
soup = BeautifulSoup(response.content, 'html.parser')
print(soup)

<!DOCTYPE html>

<html data-react-checksum="1861656260" data-reactid="1" data-reactroot=""><head data-reactid="2"><meta charset="utf-8" data-reactid="3"/><title data-react-helmet="true" data-reactid="4">Countries | Data</title><meta content="width=device-width, initial-scale=1, minimal-ui" data-reactid="5" name="viewport"/><meta content="IE=Edge" data-reactid="6" http-equiv="X-UA-Compatible"/><meta content="Countries from The World Bank: Data" data-react-helmet="true" data-reactid="7" name="description"/><link data-reactid="8" href="/favicon.ico?v=1.1" rel="shortcut icon"/><meta content="ByFDZmo3VoJURCHrA3WHjth6IAISYQEbe20bfzTPCPo" data-reactid="9" name="google-site-verification"/><meta content="World Bank Open Data" data-reactid="10" property="og:title"/><meta content="Free and open access to global development data" data-reactid="11" property="og:description"/><meta content="https://data.worldbank.org/assets/images/logo-wb-header-en.svg" data-reactid="12" property="og:image"/><meta c

In [69]:
#Extract country name and links

#find all anchor tags with links to country
links = soup.find_all('a')

#filter and store unique country entries
countries = []
for link in links:
    href = link.get('href')
    if href and href.startswith("/country/"):
        country_name = link.text.strip()
        full_url =  'https://data.worldbank.org/' + href
        countries.append((country_name,full_url))
        

In [70]:
#print out the first countries and their links
print(countries[:5])

[('Afghanistan', 'https://data.worldbank.org//country/afghanistan?view=chart'), ('Albania', 'https://data.worldbank.org//country/albania?view=chart'), ('Algeria', 'https://data.worldbank.org//country/algeria?view=chart'), ('American Samoa', 'https://data.worldbank.org//country/american-samoa?view=chart'), ('Andorra', 'https://data.worldbank.org//country/andorra?view=chart')]


In [71]:
#saving to csv file
with open('worldbank_countries.csv', 'w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)
    writer.writerow(['Country', 'URL'])
    writer.writerows(countries)
print('csv saved')


csv saved


In [72]:
#Load the csv and preview the data
data = pd.read_csv("worldbank_countries.csv")
data.head()
            

Unnamed: 0,Country,URL
0,Afghanistan,https://data.worldbank.org//country/afghanista...
1,Albania,https://data.worldbank.org//country/albania?vi...
2,Algeria,https://data.worldbank.org//country/algeria?vi...
3,American Samoa,https://data.worldbank.org//country/american-s...
4,Andorra,https://data.worldbank.org//country/andorra?vi...


In [73]:
# Function that fetch metadata using country code
def get_country_metadata(country_code):
    url = f"https://api.worldbank.org/v2/country/{country_code}?format=json"
    response = requests.get(url)
    data = response.json()
    #extract main metadata dictionary
    return data[1][0] 


In [74]:
#calling the function with country code
info = get_country_metadata("NGA")
print(info)

{'id': 'NGA', 'iso2Code': 'NG', 'name': 'Nigeria', 'region': {'id': 'SSF', 'iso2code': 'ZG', 'value': 'Sub-Saharan Africa '}, 'adminregion': {'id': 'SSA', 'iso2code': 'ZF', 'value': 'Sub-Saharan Africa (excluding high income)'}, 'incomeLevel': {'id': 'LMC', 'iso2code': 'XN', 'value': 'Lower middle income'}, 'lendingType': {'id': 'IDB', 'iso2code': 'XH', 'value': 'Blend'}, 'capitalCity': 'Abuja', 'longitude': '7.48906', 'latitude': '9.05804'}


In [75]:
#Extracting specific fields (name, capital city,region,income level and lending type
print("Country:", info['name'])
print("Capital:", info['capitalCity'])
print("Region:", info['region']['value'])
print("Income level:", info['incomeLevel']['value'])
print("Lending Type:", info['lendingType']['value'])

Country: Nigeria
Capital: Abuja
Region: Sub-Saharan Africa 
Income level: Lower middle income
Lending Type: Blend
