# Import libraries

In [1]:
import numpy as np
import pandas as pd
import plotly.express as px
import seaborn as sns
import world_bank_data as wb

# Load data:

In [2]:
CO2_EMISSION_DATA = "EN.ATM.CO2E.KT"
COUNTRY_POPULATION_SIZE_DATA = "SP.POP.TOTL"
POVERTY_HEADCOUNT_RATIO_DATA = "SI.POV.DDAY"
COUNTRY_LAND_AREA_DATA = "AG.LND.TOTL.K2"
COUNTRY_GDP_DATA = "NY.GDP.MKTP.CD"

## CO2 emissions data:

In [3]:
co2_emissions_series = (
    wb.get_series(CO2_EMISSION_DATA, simplify_index=True)
    .reset_index()
    .rename(columns={CO2_EMISSION_DATA: "co2 emissions"})
)

In [4]:
co2_emissions_df = pd.DataFrame(co2_emissions_series)

In [5]:
co2_emissions_df.head()

Unnamed: 0,Country,Year,co2 emissions
0,Arab World,1960,56005.299798
1,Arab World,1961,62578.60559
2,Arab World,1962,70562.050726
3,Arab World,1963,85085.751513
4,Arab World,1964,99693.913771


## Country population data:

In [66]:
# year 2019 has the most recent non-missing data
country_population_series = (
    wb.get_series(COUNTRY_POPULATION_SIZE_DATA, simplify_index=True, date="2019")
    .reset_index()
    .rename(columns={COUNTRY_POPULATION_SIZE_DATA: "population size"})
)

In [63]:
country_population_df = pd.DataFrame(country_population_series)

In [64]:
country_population_df.head()

Unnamed: 0,Country,population size
0,Arab World,427870300.0
1,Caribbean small states,7401381.0
2,Central Europe and the Baltics,102378600.0
3,Early-demographic dividend,3290291000.0
4,East Asia & Pacific,2340628000.0


## Country poverty percentage data:

In [9]:
poverty_percentages_series = (
    wb.get_series(POVERTY_HEADCOUNT_RATIO_DATA, simplify_index=True)
    .reset_index()
    .rename(columns={POVERTY_HEADCOUNT_RATIO_DATA: "poverty percentage"})
)

In [10]:
poverty_percentages_df = pd.DataFrame(poverty_percentages_series)

In [11]:
poverty_percentages_df.head()

Unnamed: 0,Country,Year,poverty percentage
0,Arab World,1960,
1,Arab World,1961,
2,Arab World,1962,
3,Arab World,1963,
4,Arab World,1964,


## Country land area data:

In [67]:
# year 2017 has the most recent non-missing data
country_land_area_series = (
    wb.get_series(COUNTRY_LAND_AREA_DATA, simplify_index=True, date="2017")
    .reset_index()
    .rename(columns={COUNTRY_LAND_AREA_DATA: "land area"})
)

In [68]:
country_land_area_df = pd.DataFrame(country_land_area_series)

In [69]:
country_land_area_df.head()

Unnamed: 0,Country,land area
0,Arab World,11232650.0
1,Caribbean small states,404850.0
2,Central Europe and the Baltics,1105054.0
3,Early-demographic dividend,33107750.0
4,East Asia & Pacific,24396750.0


## Country GDP data:

In [15]:
country_gdp_series = (
    wb.get_series(COUNTRY_GDP_DATA, simplify_index=True)
    .reset_index()
    .rename(columns={COUNTRY_GDP_DATA: "GDP"})
)

In [16]:
country_gdp_df = pd.DataFrame(country_gdp_series)

In [17]:
country_gdp_df.head()

Unnamed: 0,Country,Year,GDP
0,Arab World,1960,
1,Arab World,1961,
2,Arab World,1962,
3,Arab World,1963,
4,Arab World,1964,


## Country income group data:

In [18]:
country_income_group_df = pd.read_csv("worldbank_classification.csv")

In [19]:
country_income_group_df.head()

Unnamed: 0,x,Economy,Code,Region,Income group,Lending category,Other
0,1,Afghanistan,AFG,South Asia,Low income,IDA,HIPC
1,2,Albania,ALB,Europe & Central Asia,Upper middle income,IBRD,
2,3,Algeria,DZA,Middle East & North Africa,Upper middle income,IBRD,
3,4,American Samoa,ASM,East Asia & Pacific,Upper middle income,,
4,5,Andorra,AND,Europe & Central Asia,High income,,


# Preprocess data: