### Dataset source : https://data.worldbank.org/indicator/SP.POP.TOTL

## Comparison of Population Growth Across Selected Countries Over Time.

### Download dataset

In [12]:
import os
import urllib.request
import zipfile
from io import BytesIO
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from ipywidgets import interact

# Download the zip file
url = "https://api.worldbank.org/v2/en/indicator/SP.POP.TOTL?downloadformat=csv&_gl=1*fpn8cm*_gcl_au*MTQyNjIwNTQ2LjE3MjUzNjYxOTg."
response = urllib.request.urlopen(url)
package = BytesIO(response.read())

# Extract the zip file
zip = zipfile.ZipFile(package, 'r')
pwd = os.path.abspath(os.curdir)

# List to store filenames
downloaded_files = []

for filename in zip.namelist():
    csv_path = os.path.join(pwd, filename)
    with open(csv_path, 'wb') as fp:  # Use 'wb' to write binary data
        fp.write(zip.read(filename))
    downloaded_files.append(csv_path)
    print(filename, 'downloaded successfully')
    

Metadata_Indicator_API_SP.POP.TOTL_DS2_en_csv_v2_3401680.csv downloaded successfully
API_SP.POP.TOTL_DS2_en_csv_v2_3401680.csv downloaded successfully
Metadata_Country_API_SP.POP.TOTL_DS2_en_csv_v2_3401680.csv downloaded successfully


In [3]:
print("\nFiles downloaded:")
for file in downloaded_files:
    print(file)


Files downloaded:
C:\Users\user\Documents\Projects_Info_Tech\Metadata_Indicator_API_SP.POP.TOTL_DS2_en_csv_v2_3401680.csv
C:\Users\user\Documents\Projects_Info_Tech\API_SP.POP.TOTL_DS2_en_csv_v2_3401680.csv
C:\Users\user\Documents\Projects_Info_Tech\Metadata_Country_API_SP.POP.TOTL_DS2_en_csv_v2_3401680.csv


### Read the CSV file containing the countries' populations

In [5]:
dataFrame = pd.read_csv(r"API_SP.POP.TOTL_DS2_en_csv_v2_3401680.csv", skiprows=3)

In [6]:
df = dataFrame.copy()
df.shape

(266, 69)

In [7]:
df.head()

Unnamed: 0,Country Name,Country Code,Indicator Name,Indicator Code,1960,1961,1962,1963,1964,1965,...,2015,2016,2017,2018,2019,2020,2021,2022,2023,Unnamed: 68
0,Aruba,ABW,"Population, total",SP.POP.TOTL,54608.0,55811.0,56682.0,57475.0,58178.0,58782.0,...,104257.0,104874.0,105439.0,105962.0,106442.0,106585.0,106537.0,106445.0,106277.0,
1,Africa Eastern and Southern,AFE,"Population, total",SP.POP.TOTL,130692579.0,134169237.0,137835590.0,141630546.0,145605995.0,149742351.0,...,600008424.0,616377605.0,632746570.0,649757148.0,667242986.0,685112979.0,702977106.0,720859132.0,739108306.0,
2,Afghanistan,AFG,"Population, total",SP.POP.TOTL,8622466.0,8790140.0,8969047.0,9157465.0,9355514.0,9565147.0,...,33753499.0,34636207.0,35643418.0,36686784.0,37769499.0,38972230.0,40099462.0,41128771.0,42239854.0,
3,Africa Western and Central,AFW,"Population, total",SP.POP.TOTL,97256290.0,99314028.0,101445032.0,103667517.0,105959979.0,108336203.0,...,408690375.0,419778384.0,431138704.0,442646825.0,454306063.0,466189102.0,478185907.0,490330870.0,502789511.0,
4,Angola,AGO,"Population, total",SP.POP.TOTL,5357195.0,5441333.0,5521400.0,5599827.0,5673199.0,5736582.0,...,28127721.0,29154746.0,30208628.0,31273533.0,32353588.0,33428486.0,34503774.0,35588987.0,36684202.0,


### Data cleaning 

In [8]:
df.drop(index=196, inplace=True)
df.drop(index=110, inplace=True)
df.drop(columns=['Unnamed: 68','Country Code','Indicator Name','Indicator Code'], inplace=True)
df.shape

(264, 65)

In [14]:
df = df.set_index('Country Name')

In [15]:
df.head()

Unnamed: 0_level_0,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,...,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023
Country Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Aruba,54608.0,55811.0,56682.0,57475.0,58178.0,58782.0,59291.0,59522.0,59471.0,59330.0,...,103594.0,104257.0,104874.0,105439.0,105962.0,106442.0,106585.0,106537.0,106445.0,106277.0
Africa Eastern and Southern,130692579.0,134169237.0,137835590.0,141630546.0,145605995.0,149742351.0,153955516.0,158313235.0,162875171.0,167596160.0,...,583651101.0,600008424.0,616377605.0,632746570.0,649757148.0,667242986.0,685112979.0,702977106.0,720859132.0,739108306.0
Afghanistan,8622466.0,8790140.0,8969047.0,9157465.0,9355514.0,9565147.0,9783147.0,10010030.0,10247780.0,10494489.0,...,32716210.0,33753499.0,34636207.0,35643418.0,36686784.0,37769499.0,38972230.0,40099462.0,41128771.0,42239854.0
Africa Western and Central,97256290.0,99314028.0,101445032.0,103667517.0,105959979.0,108336203.0,110798486.0,113319950.0,115921723.0,118615741.0,...,397855507.0,408690375.0,419778384.0,431138704.0,442646825.0,454306063.0,466189102.0,478185907.0,490330870.0,502789511.0
Angola,5357195.0,5441333.0,5521400.0,5599827.0,5673199.0,5736582.0,5787044.0,5827503.0,5868203.0,5928386.0,...,27128337.0,28127721.0,29154746.0,30208628.0,31273533.0,32353588.0,33428486.0,34503774.0,35588987.0,36684202.0


### Prepare the dataFrame for visualization

In [18]:
# Save the contries as a list
countries = df.index.to_list()
countries

['Aruba',
 'Africa Eastern and Southern',
 'Afghanistan',
 'Africa Western and Central',
 'Angola',
 'Albania',
 'Andorra',
 'Arab World',
 'United Arab Emirates',
 'Argentina',
 'Armenia',
 'American Samoa',
 'Antigua and Barbuda',
 'Australia',
 'Austria',
 'Azerbaijan',
 'Burundi',
 'Belgium',
 'Benin',
 'Burkina Faso',
 'Bangladesh',
 'Bulgaria',
 'Bahrain',
 'Bahamas, The',
 'Bosnia and Herzegovina',
 'Belarus',
 'Belize',
 'Bermuda',
 'Bolivia',
 'Brazil',
 'Barbados',
 'Brunei Darussalam',
 'Bhutan',
 'Botswana',
 'Central African Republic',
 'Canada',
 'Central Europe and the Baltics',
 'Switzerland',
 'Channel Islands',
 'Chile',
 'China',
 "Cote d'Ivoire",
 'Cameroon',
 'Congo, Dem. Rep.',
 'Congo, Rep.',
 'Colombia',
 'Comoros',
 'Cabo Verde',
 'Costa Rica',
 'Caribbean small states',
 'Cuba',
 'Curacao',
 'Cayman Islands',
 'Cyprus',
 'Czechia',
 'Germany',
 'Djibouti',
 'Dominica',
 'Denmark',
 'Dominican Republic',
 'Algeria',
 'East Asia & Pacific (excluding high income)

### Plotting (bar)

In [23]:
@interact(country1 = countries, country2 = countries)
def plot_population_two_countries(country1, country2):
    plt.figure(figsize=(16,8))
    plt.bar(df.columns, df.loc[country1], label=country1)
    plt.bar(df.columns, df.loc[country2], label=country2)
    plt.title(f'Population Evolution: {country1} vs {country2}')
    plt.xlabel('Year')
    plt.xticks(rotation=90)
    plt.ylabel('Population')
    plt.legend(title='Country')
    plt.grid(True)
    plt.show()

interactive(children=(Dropdown(description='country1', options=('Aruba', 'Africa Eastern and Southern', 'Afgha…