# World Economic Report Analysis

## Preparing and QA Data

In [1]:
# Importing Libraries

import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt 
import seaborn as sns

# Setting Up style

plt.style.use("my_dark_creative.mplstyle")

In [2]:
# Importing Dataset world Bank Excel File

path = '../Data/World+Economic+Indicators/WorldBank.xlsx'


# Creating new column and renaming existing columns for better maneuverability

world_bank = pd.read_excel(path, parse_dates=['Year']).assign(
    population_millions = lambda x : x['GDP (USD)'] / (x['GDP per capita (USD)'] * 1000000)
).rename(columns = {
    'Country Name' : 'country',
    'Country Code' : 'country_code',
    'Region' : 'region',
    'IncomeGroup' : 'income_group',
    'Year' : 'year',
    'Birth rate, crude (per 1,000 people)' : 'birth_rate_1000_people',
    'Death rate, crude (per 1,000 people)' : 'death_rate_1000_people',
    'Electric power consumption (kWh per capita)' : 'electric_power_consumption_kwh',
    "individuals using the Internet (% of population)" : 'individuals_using_internet',
    'Infant mortality rate (per 1,000 live births)' : 'infant_mortality_rate',
    'Life expectancy at birth (years)' : 'life_expectancy',
    'Population density (people per sq. km of land area)' : 'population_density',
    'Unemployment (% of total labor force) (modeled ILO estimate)' : 'unemployment_ratio'
})

world_bank.head()

Unnamed: 0,country,country_code,region,income_group,year,birth_rate_1000_people,death_rate_1000_people,electric_power_consumption_kwh,GDP (USD),GDP per capita (USD),Individuals using the Internet (% of population),infant_mortality_rate,life_expectancy,population_density,unemployment_ratio,population_millions
0,Afghanistan,AFG,South Asia,Low income,2018-01-01,,,,19363000000.0,520.897,,47.9,,56.9378,1.542,37.172416
1,Afghanistan,AFG,South Asia,Low income,2017-01-01,33.211,6.575,,20191800000.0,556.302,13.5,49.5,64.13,55.596,1.559,36.296472
2,Afghanistan,AFG,South Asia,Low income,2016-01-01,33.981,6.742,,19362600000.0,547.228,11.2,51.2,63.763,54.1971,1.634,35.383058
3,Afghanistan,AFG,South Asia,Low income,2015-01-01,34.809,6.929,,19907100000.0,578.466,8.26,53.1,63.377,52.7121,1.679,34.413604
4,Afghanistan,AFG,South Asia,Low income,2014-01-01,35.706,7.141,,20484900000.0,613.856,7.0,55.1,62.966,51.1148,1.735,33.370856


In [3]:
world_bank.describe()

Unnamed: 0,year,birth_rate_1000_people,death_rate_1000_people,electric_power_consumption_kwh,GDP (USD),GDP per capita (USD),Individuals using the Internet (% of population),infant_mortality_rate,life_expectancy,population_density,unemployment_ratio,population_millions
count,12449,11440.0,11416.0,5848.0,9578.0,9575.0,5064.0,9984.0,11176.0,11845.0,5208.0,9575.0
mean,1988-12-31 15:03:03.050847488,28.643276,10.588539,3175.294686,170074000000.0,8231.812259,23.334471,51.704437,64.044692,318.86137,8.295079,29.956728
min,1960-01-01 00:00:00,6.9,1.127,0.0,8824450.0,34.7906,0.0,1.4,18.907,0.098625,0.14,0.008913
25%,1974-01-01 00:00:00,16.6,6.86375,390.38575,1393010000.0,513.1455,0.594949,14.475,55.91775,19.7834,3.687,1.051937
50%,1989-01-01 00:00:00,27.5455,9.2,1541.895,7275305000.0,1852.81,8.406225,37.0,67.276,64.0075,6.775,5.390515
75%,2004-01-01 00:00:00,40.88125,12.687,4313.7675,48577820000.0,7774.565,41.29595,78.2,72.69225,144.823,11.21225,17.075158
max,2018-01-01 00:00:00,58.227,54.444,54799.2,20500000000000.0,189171.0,100.0,279.4,85.4171,21389.1,37.94,1391.89528
std,,13.131893,5.489382,4467.139298,897986600000.0,16173.539954,28.319388,46.131039,11.491087,1593.406041,6.290703,115.803824


In [4]:
# Changing Datatypes

convert_dict = {
    'birth_rate_1000_people' : 'float16',
    'death_rate_1000_people' : 'float16',
    'income_group' : 'category',
    'electric_power_consumption_kwh' : 'float32',
    'infant_mortality_rate' : 'float16',
    'life_expectancy' : 'float16',
    'population_density' : 'float32',
    'unemployment_ratio' : 'float16',
    'population_millions' : 'float32',
}

world_bank = world_bank.astype(convert_dict)

In [5]:
# Checking Memory Usage and Data type that we have changed
world_bank.info(memory_usage='deep')

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12449 entries, 0 to 12448
Data columns (total 16 columns):
 #   Column                                            Non-Null Count  Dtype         
---  ------                                            --------------  -----         
 0   country                                           12449 non-null  object        
 1   country_code                                      12449 non-null  object        
 2   region                                            12449 non-null  object        
 3   income_group                                      12449 non-null  category      
 4   year                                              12449 non-null  datetime64[ns]
 5   birth_rate_1000_people                            11440 non-null  float16       
 6   death_rate_1000_people                            11416 non-null  float16       
 7   electric_power_consumption_kwh                    5848 non-null   float32       
 8   GDP (USD)                 

In [6]:
# Importing Human Development Index Data on year 2014

hdi_2014 = pd.read_csv('../Data/World+Economic+Indicators/HDI.csv', usecols=['iso3', 'hdi_2014'])

hdi_2014.head()

Unnamed: 0,iso3,hdi_2014
0,AFG,0.479
1,AGO,0.563
2,ALB,0.792
3,AND,0.871
4,ARE,0.859


In [7]:
# Filtering World Bank Data on Year 2014 and joining this on HDI csv file on country code

wb_hdi_2014 = pd.merge(
                world_bank.query("year.dt.year == 2014"), hdi_2014,
                left_on = 'country_code', right_on = 'iso3',
                how = 'left'
            )


wb_hdi_2014.head()

Unnamed: 0,country,country_code,region,income_group,year,birth_rate_1000_people,death_rate_1000_people,electric_power_consumption_kwh,GDP (USD),GDP per capita (USD),Individuals using the Internet (% of population),infant_mortality_rate,life_expectancy,population_density,unemployment_ratio,population_millions,iso3,hdi_2014
0,Afghanistan,AFG,South Asia,Low income,2014-01-01,35.71875,7.140625,,20484900000.0,613.856,7.0,55.09375,62.96875,51.114799,1.735352,33.370857,AFG,0.479
1,Albania,ALB,Europe & Central Asia,Upper middle income,2014-01-01,12.257812,7.21875,2309.370117,13228200000.0,4578.67,60.1,8.898438,77.8125,105.442001,17.484375,2.889092,ALB,0.792
2,Algeria,DZA,Middle East & North Africa,Upper middle income,2014-01-01,25.53125,4.710938,1362.869995,214000000000.0,5493.06,29.5,21.796875,75.875,16.342501,10.203125,38.958248,DZA,0.735
3,American Samoa,ASM,East Asia & Pacific,Upper middle income,2014-01-01,17.5,4.199219,,643000000.0,11525.2,,,,278.954987,,0.055791,,
4,Andorra,AND,Europe & Central Asia,High income: nonOECD,2014-01-01,,,,3350740000.0,42300.3,95.9,3.199219,,168.537994,,0.079213,AND,0.871


In [8]:
# Checking Summary

wb_hdi_2014.describe()

Unnamed: 0,year,birth_rate_1000_people,death_rate_1000_people,electric_power_consumption_kwh,GDP (USD),GDP per capita (USD),Individuals using the Internet (% of population),infant_mortality_rate,life_expectancy,population_density,unemployment_ratio,population_millions,hdi_2014
count,211,204.0,204.0,139.0,201.0,201.0,199.0,190.0,197.0,208.0,186.0,201.0,188.0
mean,2014-01-01 00:00:00,20.859375,7.691406,4270.600586,390777900000.0,17779.147856,45.738525,23.859375,71.8125,424.474243,8.078125,35.668407,0.709527
min,2014-01-01 00:00:00,7.898438,1.126953,39.055801,37291800.0,248.845,0.99,1.799805,49.90625,0.137154,0.189941,0.010972,0.37
25%,2014-01-01 00:00:00,11.976562,5.835938,858.1745,6047810000.0,2163.16,17.73,6.800781,66.375,36.550049,3.767578,1.174332,0.58125
50%,2014-01-01 00:00:00,17.8125,7.5,2588.300049,31335000000.0,6684.8,46.16,15.046875,73.4375,88.684647,6.515625,7.130571,0.732
75%,2014-01-01 00:00:00,28.65625,9.21875,5478.100098,206000000000.0,20258.0,69.89,36.78125,77.5625,210.380005,10.804688,23.589827,0.82475
max,2014-01-01 00:00:00,48.0,16.4375,53832.5,17500000000000.0,189171.0,98.16,93.0,84.0,19478.800781,28.03125,1359.233765,0.952
std,,10.28125,2.707031,5981.463867,1546117000000.0,27113.375249,28.934518,21.75,8.132812,1986.298096,6.019531,136.95726,0.152831


## Preparing Data for Visualization