# Data-oriented Programming Paradigms
### Exercise 3 - Group 32

Question 14: What is the most accurate overview of flows of refugees between countries that can be obtained? Are there typical characteristics of refugee origin and destination countries? Are there typical characteristics of large flows of refugees? Can countries that will produce large numbers of refugees be predicted? Can refugee flows be predicted?

In [19]:
import pandas as pd
import matplotlib as plt
import numpy as np

pd.options.mode.chained_assignment = None  # default='warn'

In [53]:
'''
Load & format World Governance Index data
'''

def format_wgi(df, name):
    df.columns = df.iloc[3].values
    df = df.iloc[4:]
    df['country'] = name
    df.index.name = 'year'
    df.reset_index(level=0, inplace=True)

    return df

wgi = pd.read_csv('data/WGIData.csv')
wgi.drop(['Unnamed: 24'], axis=1, inplace = True)

names = list(set(wgi['Country Name'].values))
codes = list(set(wgi['Indicator Code'].values))
wgi_new = pd.DataFrame(columns=codes)

for name in names:
    sel = wgi[wgi['Country Name'] == name].T
    
    if wgi_new.empty:
        wgi_new = format_wgi(sel, name)
    else:
        wgi_new = wgi_new.append(format_wgi(sel, name))

df_wgi = wgi_new.set_index(['country', 'year'])
df_wgi

Unnamed: 0_level_0,Unnamed: 1_level_0,CC.EST,CC.NO.SRC,CC.PER.RNK,CC.PER.RNK.LOWER,CC.PER.RNK.UPPER,CC.STD.ERR,GE.EST,GE.NO.SRC,GE.PER.RNK,GE.PER.RNK.LOWER,...,RL.PER.RNK,RL.PER.RNK.LOWER,RL.PER.RNK.UPPER,RL.STD.ERR,VA.EST,VA.NO.SRC,VA.PER.RNK,VA.PER.RNK.LOWER,VA.PER.RNK.UPPER,VA.STD.ERR
country,year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
Nepal,1996,-0.639209,2,31.7204,7.52688,53.7634,0.340507,-0.380564,1,41.5301,25.6831,...,51.2563,34.1709,63.3166,0.320348,-0.064157,4,49.5,35,62,0.261457
Nepal,1998,-0.738918,3,29.8969,7.2165,46.9072,0.295625,-0.621126,2,25.3886,11.399,...,51.5,35.5,67.5,0.283259,-0.19328,4,44.2786,32.3383,56.2189,0.25609
Nepal,2000,-0.666684,4,30.9645,10.1523,48.2234,0.270204,-0.458549,3,38.9744,18.9744,...,42.5743,29.703,55.9406,0.238717,-0.280599,4,42.7861,28.3582,53.7313,0.254043
Nepal,2002,-0.357501,4,43.9394,24.7475,60.6061,0.287161,-0.456825,4,38.2653,23.9796,...,38.1188,22.7723,50,0.227856,-0.79816,6,24.8756,15.9204,34.3284,0.189453
Nepal,2003,-0.458296,6,40.404,21.2121,56.5656,0.236859,-0.515905,4,35.7143,19.3878,...,32.6733,17.8218,47.0297,0.224429,-0.920003,7,22.3881,11.9403,29.8508,0.181176
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
"Taiwan, China",2014,0.807123,11,76.9231,70.6731,81.7308,0.13995,1.36783,8,87.9808,79.3269,...,85.5769,78.3654,87.9808,0.169037,0.881724,10,72.9064,68.9655,83.2512,0.138243
"Taiwan, China",2015,0.773747,11,76.9231,68.2692,80.2885,0.137437,1.39576,8,87.9808,80.2885,...,84.6154,78.8461,88.4615,0.163673,0.89849,10,74.8769,70.936,84.2365,0.137359
"Taiwan, China",2016,0.8802,11,79.3269,71.1539,81.7308,0.146799,1.36144,8,89.4231,79.3269,...,85.5769,77.8846,89.4231,0.181762,1.00929,10,79.803,71.9212,89.6552,0.1325
"Taiwan, China",2017,0.96478,11,81.25,77.4039,83.6539,0.13301,1.26361,8,86.5385,78.8461,...,84.6154,77.8846,88.4615,0.171213,1.01469,10,80.7882,73.399,89.6552,0.133297


In [54]:
'''
Load Human Development Index data
'''

def format_hdi(df, name):
    df.columns = ['HDI']
    df = df.iloc[1:]
    df['country'] = name
    df.index.name = 'year'
    df.reset_index(level=0, inplace=True)

    return df

hdi = pd.read_csv('data/HDI.csv', skiprows=1, encoding='latin-1', usecols = [i for i in range(1,31)])

names = list(set(hdi['Country'].values))
hdi_new = pd.DataFrame(columns=['HDI'])

for name in names:
    sel = hdi[hdi['Country'] == name].T
    
    if hdi_new.empty:
        hdi_new = format_hdi(sel, name)
        #display(hdi_new)
    else:
        hdi_new = hdi_new.append(format_hdi(sel, name))

df_hdi = hdi_new.set_index(['country', 'year'])
df_hdi

Unnamed: 0_level_0,Unnamed: 1_level_0,HDI
country,year,Unnamed: 2_level_1
Congo,1990,0.531
Congo,1991,0.530
Congo,1992,0.530
Congo,1993,0.524
Congo,1994,0.521
...,...,...
"Palestine, State of",2014,0.682
"Palestine, State of",2015,0.685
"Palestine, State of",2016,0.687
"Palestine, State of",2017,0.689


In [95]:
'''
Load Gross Domestic Product data
'''

def format_gdp(df, name):
    df.columns = ['GDP (current US$)']
    df = df.iloc[4:]
    df['country'] = name
    df.index.name = 'year'
    df.reset_index(level=0, inplace=True)
    df['year'] = df['year'].str[:4]

    return df

gdp = pd.read_csv('data/GDPData.csv', usecols = [i for i in range(0,28)])
#drop footer rows
gdp = gdp[:-5]

names = list(set(gdp['Country Name'].values))
gdp_new = pd.DataFrame(columns=['GDP (current US$)'])


for name in names:
    sel = gdp[gdp['Country Name'] == name].T

    if gdp_new.empty:
        gdp_new = format_gdp(sel, name)
        #display(gdp_new.year)
    else:
        gdp_new = gdp_new.append(format_gdp(sel, name))

df_gdp = gdp_new.set_index(['country', 'year'])
df_gdp

Unnamed: 0_level_0,Unnamed: 1_level_0,GDP (current US$)
country,year,Unnamed: 2_level_1
Nepal,1995,4401104417.67068
Nepal,1996,4521580381.47139
Nepal,1997,4918691916.53516
Nepal,1998,4856255044.39064
Nepal,1999,5033642384.10596
...,...,...
Ghana,2014,53601126663.9079
Ghana,2015,49181854798.2552
Ghana,2016,55009730600.0307
Ghana,2017,58996776237.7603
