In [1]:
from IPython.display import HTML
from IPython.display import display
baseCodeHide="""
<style>
.button {
    background-color: #008CBA;;
    border: none;
    color: white;
    padding: 8px 22px;
    text-align: center;
    text-decoration: none;
    display: inline-block;
    font-size: 16px;
    margin: 4px 2px;
    cursor: pointer;
}
</style>
 <script>
   // Assume 3 input cells. Manage from here.
   var divTag0 = document.getElementsByClassName("input")[0]
   divTag0.style.display = 'none';
 
    function toggleInput(i) { 
      var divTag = document.getElementsByClassName("input")[i]
      var displaySetting = divTag.style.display;
     
      if (displaySetting == 'block') { 
         divTag.style.display = 'none';
       }
      else { 
         divTag.style.display = 'block';
       } 
  }  
  </script>
  <!-- <button onclick="javascript:toggleInput(0)" class="button">Show Code</button> -->
"""
h=HTML(baseCodeHide)


display(h)

## Introduction

For the locations in which Kiva has active loans, our objective is to pair Kiva's data with additional data sources to estimate the welfare level of borrowers in specific regions, based on shared economic and demographic characteristics.

Kiva would like to be able to disaggregate these regional averages by:
* gender,
* sector,
* borrowing behavior 

in order to estimate a Kiva borrower’s level of welfare using all of the relevant information about them. Strong submissions will attempt to map vaguely described locations to more accurate geocodes.

## Loading Data

In [2]:
import plotly.plotly as py
import plotly.graph_objs as go
from plotly import tools
from plotly.offline import iplot, init_notebook_mode
init_notebook_mode()

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
f1 = '../input/data-science-for-good-kiva-crowdfunding'
f2 = '../input/currency-excahnge-rate'
f3 = '../input/countries-iso-codes'
print(os.listdir(f1))


In [3]:
#This file contains Alpha-2 and Alpha-3 codes for countries
codes = pd.read_csv(f3+'/wikipedia-iso-country-codes.csv')
#This file contains the Currency Exchange Rates in year
cer = pd.read_csv(f2+'/currency_exchange_rate.csv')

In [4]:
mpi = pd.read_csv(f1+'/kiva_mpi_region_locations.csv')
loans = pd.read_csv(f1+'/kiva_loans.csv')
loans['year'] = pd.to_datetime(loans.date).dt.year
loans.head()

In [5]:
mpi.head()

## Data Overview

In [6]:
import seaborn as sns
import matplotlib.pyplot as plt
fig,ax = plt.subplots(1, 1, figsize=(15,6))
data={'Activities':len(loans.activity.unique()),
      'Sectors':len(loans.sector.unique()),
      'Countries':len(loans.country.unique()),
      'Currencies':len(loans.currency.unique())
      };
sns.barplot(y=list(data.keys()),x=list(data.values()),orient='h')
plt.show()

now we merge loans with the codes dataset in order to get the alpha-3 codes. We need them for the plotly choropleth. We also exploit the currencies exchange rates dataset to turn different currencies into dollars.

In [7]:
df = pd.merge(loans,codes,left_on='country_code',right_on='Alpha-2 code')
df = pd.merge(df,cer,left_on=['Alpha-3 code','year'],right_on=['LOCATION','TIME'])
loan_dollars = df['loan_amount']*df['Value']
#loan_dollars = [df['loan_amount'][i]*df['Value'][i] for i in range(len(df))]
df['loan_dollars'] = loan_dollars
#df = df.groupby(['Alpha-3 code'],as_index=False).mean()
df.head()

In [8]:
gdf = df.groupby(['Alpha-3 code'],as_index=False).mean()
fig,axa = plt.subplots(1,1,figsize=(15,6))
sns.distplot(gdf.loan_dollars)

In [9]:
display(HTML("<button onclick=\"javascript:toggleInput(9)\" class=\"button\">Show/Hide Code</button>"))

In [10]:
gdf = df.groupby(['Alpha-3 code'],as_index=False).mean()
scl = [[0.0, 'rgb(242,240,247)'],[0.2, 'rgb(218,218,235)'],[0.4, 'rgb(188,189,220)'],\
            [0.6, 'rgb(158,154,200)'],[0.8, 'rgb(117,107,177)'],[1.0, 'rgb(84,39,143)']]
data = [ dict(
        type='choropleth',
        colorscale = 'Jet',
        autocolorscale = False,
        locations = gdf['Alpha-3 code'],
        z = gdf.loan_dollars,
        text = gdf['Alpha-3 code'],
        marker = dict(
            line = dict (
                color = 'rgb(255,255,255)',
                width = 2
            ) ),
        colorbar = dict(
            title = "Count")
        ) ]

layout = dict(
        title = 'Loan Amounts',
        geo = dict(
            scope='world',
            projection=dict( type='Mercator' ),
            showlakes = True,
            lakecolor = 'rgb(255, 255, 255)'),
             )
    
fig = dict( data=data, layout=layout )
iplot( fig)

In [11]:
js = """<script>
 var divTag1 = document.getElementsByClassName("input")[8]
   divTag1.style.display = 'none';
  var divTag1 = document.getElementsByClassName("input")[9]
   divTag1.style.display = 'none'; 
   var divTag1 = document.getElementsByClassName("input")[10]
   divTag1.style.display = 'none'; 
   </script>
"""
display(HTML(js))

In [12]:
display(HTML("<button onclick=\"javascript:toggleInput(12)\" class=\"button\">Show/Hide Code</button>"))

In [13]:
gdf = df.groupby(['Alpha-3 code'],as_index=False).count()
data = [ dict(
        type='choropleth',
        colorscale = 'Jet',
        autocolorscale = False,
        locations = gdf['Alpha-3 code'],
        z = gdf.loan_dollars,
        text = gdf['Alpha-3 code'],
        marker = dict(
            line = dict (
                color = 'rgb(255,255,255)',
                width = 2
            ) ),
        colorbar = dict(
            title = "Count")
        ) ]

layout = dict(
        title = 'Number of Loans',
        geo = dict(
            scope='world',
            projection=dict( type='Mercator' ),
            showlakes = True,
            lakecolor = 'rgb(255, 255, 255)'),
             )
    
fig = dict( data=data, layout=layout )
iplot( fig)

In [14]:
js = """<script>
 var divTag1 = document.getElementsByClassName("input")[11]
   divTag1.style.display = 'none';
  var divTag1 = document.getElementsByClassName("input")[12]
   divTag1.style.display = 'none'; 
   var divTag1 = document.getElementsByClassName("input")[13]
   divTag1.style.display = 'none'; 
   </script>
"""
display(HTML(js))

## Sectors

In [15]:
display(HTML("<button onclick=\"javascript:toggleInput(15)\" class=\"button\">Show/Hide Code</button>"))

In [16]:
sectors = df.sector.unique();
max_loans=[]
min_loans=[]
mean_loans=[]
std_loans=[]

fig,axa = plt.subplots(2,2,figsize=(18,15))
for i in range(len(sectors)):
    max_loans.append(df[df['sector']==sectors[i]].loan_dollars.max())
for i in range(len(sectors)):
    min_loans.append(df[df['sector']==sectors[i]].loan_dollars.min())
for i in range(len(sectors)):
    mean_loans.append(df[df['sector']==sectors[i]].loan_dollars.mean())
for i in range(len(sectors)):
    std_loans.append(df[df['sector']==sectors[i]].loan_dollars.std())
    
axa[0][0].set_title('min loan amounts')
sns.barplot(y=sectors,x=min_loans,orient='h',ax=axa[0][0])
axa[0][1].set_title('max loan amounts')
sns.barplot(y=sectors,x=max_loans,orient='h',ax=axa[0][1])
axa[1][0].set_title('mean loan amounts')
sns.barplot(y=sectors,x=mean_loans,orient='h',ax=axa[1][0])
axa[1][1].set_title('std of loan amounts')
sns.barplot(y=sectors,x=std_loans,orient='h',ax=axa[1][1])

In [17]:
js = """<script>
 var divTag1 = document.getElementsByClassName("input")[14]
   divTag1.style.display = 'none';
  var divTag1 = document.getElementsByClassName("input")[15]
   divTag1.style.display = 'none'; 
   var divTag1 = document.getElementsByClassName("input")[13]
   divTag1.style.display = 'none'; 
   </script>
"""
display(HTML(js))

In [18]:
coldf = df[df['Alpha-3 code']=='COL']
coldf = pd.merge(coldf,mpi,on='region')
coldf.groupby(['region'],as_index=False).mean()

In [19]:
tmp = pd.concat([pd.get_dummies(df.sector),df[['region']]],axis=1)
tmp = tmp.groupby(['region'],as_index=False).sum()
tmp = pd.merge(tmp,mpi,on='region')
tmp.head()

In [21]:
import math
import matplotlib
cmap = matplotlib.cm.get_cmap('jet')

data = []
layout = go.Layout(
    title = 'Sectors',
    showlegend = False,
    width=1000, height=1000,
    geo = dict(
            scope='world',
            showframe = False,
            #projection=dict( type = 'Mercator'),
            showland = True,
            landcolor = 'rgb(217, 217, 217)',
            subunitwidth=1,
            countrywidth=1,
            subunitcolor="rgb(255, 255, 255)",
            countrycolor="rgb(255, 255, 255)"
        ),)

for i in range(len(sectors)):
    for j in range(len(tmp)):
        geo_key = 'geo'+str(i+1) if i != 0 else 'geo'
        # Year markers
        if tmp[sectors[i]][j] >0:
            data.append(
            dict(
            type = 'scattergeo',
            geo = geo_key,
            lon = [tmp['lon'][j]],
            lat = [tmp['lat'][j]],
            text = sectors[i]+str((tmp[sectors[i]][j])),
            #colorscale='Magma',
            marker = dict(
                size =math.log(tmp[sectors[i]][j] )*3,# (tmp[sectors[i]][j]/max(tmp[sectors[i]]))*9,
                color = 'rgb(0,0,200,0.5)',
                opacity = 0.5,
                line = dict(width=0.5, color='rgb(40,40,40)'),
                sizemode = 'diameter'
            ),)
            )
        
    layout[geo_key] = dict(
        scope = 'world',
        showland = True,
        showframe = False,
        landcolor = 'rgb(229, 229, 229)',
        showcountries = False,
        domain = dict( x = [], y = [] ),
        subunitcolor = "rgb(255, 255, 255)",
    )
    # Year markers
    data.append(
        dict(
            type = 'scattergeo',
            showlegend = False,
            lon = [28],
            lat = [-55],
            geo = geo_key,
            text = [sectors[i]],
            mode = 'text',
        )
    )
    
    
z = 0
COLS = 3
ROWS = 5
for y in reversed(range(ROWS)):
    for x in range(COLS):
        geo_key = 'geo'+str(z+1) if z != 0 else 'geo'
        layout[geo_key]['domain']['x'] = [float(x)/float(COLS), float(x+1)/float(COLS)]
        layout[geo_key]['domain']['y'] = [float(y)/float(ROWS), float(y+1)/float(ROWS)]
        z=z+1
        if z > 42:
            break
            
fig = { 'data':data, 'layout':layout}
iplot( fig )

## Multidimensional Poverty Index

The Global Multidimensional Poverty Index (MPI) was developed in 2010 by the Oxford Poverty & Human Development Initiative (OPHI) and the United Nations Development Programme and uses different factors to determine poverty beyond income-based lists: https://en.wikipedia.org/wiki/Multidimensional_Poverty_Index

We use another scattergeo plot to depict through colors diffrent MPI values in the different areas of our dataset.

In [22]:
mpis = []
import matplotlib
cmap = matplotlib.cm.get_cmap('magma')
for i in range(len(mpi)):
    mpis.append(
        dict(
        type = 'scattergeo',
        #locationmode = 'world',
        lon = [mpi['lon'][i]],
        lat = [mpi['lat'][i]],
        text = 'MPI:'+str(mpi['MPI'][i]),
        colorscale='Magma',
        marker = dict(
            size = 9,
            color = cmap(mpi['MPI'][i]),
            line = dict(width=0.5, color='rgb(40,40,40)'),
            sizemode = 'diameter'
        ),)
        
    )

layout = go.Layout(
    title = 'MPI',
    showlegend = False,
    geo = dict(
            scope='world',
            #projection=dict( type = 'Mercator'),
            showland = True,
            landcolor = 'rgb(217, 217, 217)',
            subunitwidth=1,
            countrywidth=1,
            subunitcolor="rgb(255, 255, 255)",
            countrycolor="rgb(255, 255, 255)"
        ),)

fig = dict( data=mpis, layout=layout ) #fig =  go.Figure(layout=layout, data=mpis)
iplot( fig, validate=False)

In [50]:
cols = list(sectors)
cols.append('MPI')
f,axa = plt.subplots(1,1,figsize=(15,10))
sns.heatmap(tmp[cols].corr().filter(['MPI']).drop(['MPI']))