# A Visual Analytics Exploration of Development, Energy Use, Health and Migration Data

---

## Introduction

Using data from the World Bank I explore the spatial and temporal relationships between development, energy use, health and migration data. 

The indicators used are as follows:
- GDP Growth
- Energy use (kg of oil equivalent per capita)
- Annual deforestation (% of change)	
- Refugee population by country or territory
- Life expectancy at birth, total (years)

In [73]:
import pandas as pd
import numpy as np
import seaborn as sns
import folium 
import mplleaflet
from pandas.io import wb
import matplotlib.pyplot as plt
%matplotlib inline

In [74]:
df = wb.download(indicator=['BG.GSR.NFSV.GD.ZS','AG.LND.FRST.ZS','EG.USE.PCAP.KG.OE','SM.POP.REFG','SP.DYN.LE00.IN'],
                    country='all',
                    start=2000,end=2013)
df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,BG.GSR.NFSV.GD.ZS,AG.LND.FRST.ZS,EG.USE.PCAP.KG.OE,SM.POP.REFG,SP.DYN.LE00.IN
country,year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Arab World,2013,14.502663,2.838442,,7794808.0,70.631305
Arab World,2012,13.977559,2.856322,1843.498772,6638911.0,70.428991
Arab World,2011,14.969959,2.874204,1747.429223,6657656.0,70.219347
Arab World,2010,17.142639,3.417508,1786.126353,7077529.0,70.003017
Arab World,2009,19.115962,3.425092,1715.097489,6955072.0,69.773806


In [76]:
df.columns

Index(['BG.GSR.NFSV.GD.ZS', 'AG.LND.FRST.ZS', 'EG.USE.PCAP.KG.OE',
       'SM.POP.REFG', 'SP.DYN.LE00.IN'],
      dtype='object')

In [77]:
df.columns= ['ServicesPercGDP', 'Deforestation','EnergyUse','RefugeePop','LifeExpTotal']

In [78]:
df.reset_index(inplace=True)
df.fillna(0);

In [79]:
df.head()

Unnamed: 0,country,year,ServicesPercGDP,Deforestation,EnergyUse,RefugeePop,LifeExpTotal
0,Arab World,2013,14.502663,2.838442,,7794808.0,70.631305
1,Arab World,2012,13.977559,2.856322,1843.498772,6638911.0,70.428991
2,Arab World,2011,14.969959,2.874204,1747.429223,6657656.0,70.219347
3,Arab World,2010,17.142639,3.417508,1786.126353,7077529.0,70.003017
4,Arab World,2009,19.115962,3.425092,1715.097489,6955072.0,69.773806


### Get the country names from the dataframe and drop the aggregates

In [139]:
country_names = df['country'].unique()
len(country_names)

248

In [140]:
keep_ctry = country_names[34:]
keep_ctry[:10]

array(['Afghanistan', 'Albania', 'Algeria', 'American Samoa', 'Andorra',
       'Angola', 'Antigua and Barbuda', 'Argentina', 'Armenia', 'Aruba'], dtype=object)

In [82]:
countries=wb.get_countries()
countries.head()

Unnamed: 0,adminregion,capitalCity,iso3c,incomeLevel,iso2c,latitude,lendingType,longitude,name,region
0,,Oranjestad,ABW,High income: nonOECD,AW,12.5167,Not classified,-70.0167,Aruba,Latin America & Caribbean (all income levels)
1,South Asia,Kabul,AFG,Low income,AF,34.5228,IDA,69.1761,Afghanistan,South Asia
2,,,AFR,Aggregates,A9,,Aggregates,,Africa,Aggregates
3,Sub-Saharan Africa (developing only),Luanda,AGO,Upper middle income,AO,-8.81155,IBRD,13.242,Angola,Sub-Saharan Africa (all income levels)
4,Europe & Central Asia (developing only),Tirane,ALB,Upper middle income,AL,41.3317,IBRD,19.8172,Albania,Europe & Central Asia (all income levels)


### Get Lat, Long Data for countries

In [90]:
country_lonlat_df = pd.DataFrame( {'Longitude': countries['longitude'], 
                                   'Latitude': countries['latitude']}
                                 )
country_lonlat_df.index= countries['name']
country_lonlat_df.reset_index(inplace=True)
country_lonlat_df.dropna(inplace=True)
country_lonlat_df.head()

Unnamed: 0,name,Latitude,Longitude
0,Aruba,12.5167,-70.0167
1,Afghanistan,34.5228,69.1761
2,Africa,,
3,Angola,-8.81155,13.242
4,Albania,41.3317,19.8172


In [121]:
country_lonlat_df.to_excel('country_latlong.xlsx')

In [122]:
lat_long = pd.read_excel('country_latlong_mod.xlsx')

In [125]:
lat_long.head()

Unnamed: 0,name,Latitude,Longitude
0,Aruba,12.5167,-70.0167
1,Afghanistan,34.5228,69.1761
2,Angola,-8.81155,13.242
3,Albania,41.3317,19.8172
4,Andorra,42.5075,1.5218


In [126]:
import vincent
vincent.core.initialize_notebook()

In [None]:
df[35:].plot(x='country',subplots=True,figsize=(15,25), use_index=True, rot=90, colormap=plt.cm.viridis, kind='line')

In [None]:
len(df['country'].unique())

In [None]:
df.to_excel('wbdata.xlsx')