In [21]:
import pandas as pd
import numpy as np
from scipy.stats.stats import pearsonr
import matplotlib.pyplot as plt
plt.rcParams['figure.facecolor'] = 'w'
plt.rcParams['axes.grid'] = True
plt.rcParams['figure.figsize'] = 9.5,9.5
def applyPlotStyle():
    plt.xlim([0,1])
    plt.ylim([0,1])



cdr_path = '/Users/JackShipway/Desktop/UCLProject/Data/IvoryCoast/CDR/StaticMetrics'
dhs_path = '/Users/JackShipway/Desktop/UCLProject/Data/IvoryCoast/DHS/Metrics'
pop_path = '/Users/JackShipway/Desktop/UCLProject/Data/IvoryCoast/Population'

First I'm going to read all of the necessary data sources

In [22]:
''' DHS Data '''
dhs = pd.DataFrame(pd.read_csv(dhs_path+'/DHSData.csv'))
malaria_1 = dhs.groupby('Adm_1')['MalariaPerPop'].mean().reset_index()
malaria_2 = dhs.groupby('Adm_2')['MalariaPerPop'].mean().reset_index()
malaria_3 = dhs.groupby('Adm_3')['MalariaPerPop'].mean().reset_index()
malaria_4 = dhs.groupby('Adm_4')['MalariaPerPop'].mean().reset_index()

''' CDR Data '''
# Activity
activity_1 = pd.DataFrame(pd.read_csv(cdr_path+'/Activity/activity_adm1.csv'))
activity_2 = pd.DataFrame(pd.read_csv(cdr_path+'/Activity/activity_adm2.csv'))
activity_3 = pd.DataFrame(pd.read_csv(cdr_path+'/Activity/activity_adm3.csv'))
activity_4 = pd.DataFrame(pd.read_csv(cdr_path+'/Activity/activity_adm4.csv'))
# Entropy
entropy_1 = pd.DataFrame(pd.read_csv(cdr_path+'/Entropy/entropy_adm1.csv'))
entropy_2 = pd.DataFrame(pd.read_csv(cdr_path+'/Entropy/entropy_adm2.csv'))
entropy_3 = pd.DataFrame(pd.read_csv(cdr_path+'/Entropy/entropy_adm3.csv'))
entropy_4 = pd.DataFrame(pd.read_csv(cdr_path+'/Entropy/entropy_adm4.csv'))
# Median Degree
degree_1 = pd.DataFrame(pd.read_csv(cdr_path+'/MedianDegree/Degree/total_deg_adm1.csv'))
degree_2 = pd.DataFrame(pd.read_csv(cdr_path+'/MedianDegree/Degree/total_deg_adm2.csv'))
degree_3 = pd.DataFrame(pd.read_csv(cdr_path+'/MedianDegree/Degree/total_deg_adm3.csv'))
degree_4 = pd.DataFrame(pd.read_csv(cdr_path+'/MedianDegree/Degree/total_deg_adm4.csv'))
# Introversion
introversion_1 = pd.DataFrame(pd.read_csv(cdr_path+'/Introversion/introversion_adm1.csv'))
introversion_2 = pd.DataFrame(pd.read_csv(cdr_path+'/Introversion/introversion_adm2.csv'))
introversion_3 = pd.DataFrame(pd.read_csv(cdr_path+'/Introversion/introversion_adm3.csv'))
introversion_4 = pd.DataFrame(pd.read_csv(cdr_path+'/Introversion/introversion_adm4.csv'))

''' Population Data '''
pop = pd.DataFrame(pd.read_csv(pop_path+'/DHS_pop_Adm_1234.csv'))

# At level 3 and 4, there are a small number of missing data points, so we remove them
for i in np.setdiff1d(activity_3['Adm_3'], malaria_3['Adm_3']):
    activity_3 = activity_3[activity_3['Adm_3'] != i]
    entropy_3 = entropy_3[entropy_3['Adm_3'] != i]
    introversion_3 = introversion_3[introversion_3['Adm_3'] != i]
    degree_3 = degree_3[degree_3['Adm_3'] != i]
    
for j in np.setdiff1d(activity_4['Adm_4'], malaria_4['Adm_4']):
    activity_4 = activity_4[activity_4['Adm_4'] != j]
    entropy_4 = entropy_4[entropy_4['Adm_4'] != j]
    introversion_4 = introversion_4[introversion_4['Adm_4'] != j]
    degree_4 = degree_4[degree_4['Adm_4'] != j]
for k in np.setdiff1d(malaria_4['Adm_4'], activity_4['Adm_4']):
    malaria_4 = malaria_4[malaria_4['Adm_4'] != k]
    entropy_4 = entropy_4[entropy_4['Adm_4'] != k]
    introversion_4 = introversion_4[introversion_4['Adm_4'] != k]
    degree_4 = degree_4[degree_4['Adm_4'] != k]
    
for l in np.setdiff1d(activity_4['Adm_4'], entropy_4['Adm_4']):
    activity_4 = activity_4[activity_4['Adm_4'] != l]
    entropy_4 = entropy_4[entropy_4['Adm_4'] != l]
    malaria_4 = malaria_4[malaria_4['Adm_4'] != l]
    introversion_4 = introversion_4[introversion_4['Adm_4'] != l]
    degree_4 = degree_4[degree_4['Adm_4'] != l]
    
for m in np.setdiff1d(entropy_4['Adm_4'], activity_4['Adm_4']):
    activity_4 = activity_4[activity_4['Adm_4'] != m]
    malaria_4 = malaria_4[malaria_4['Adm_4'] != m]
    entropy_4 = entropy_4[entropy_4['Adm_4'] != m]
    introversion_4 = introversion_4[introversion_4['Adm_4'] != m]
    degree_4 = degree_4[degree_4['Adm_4'] != m]

Now I perform a Pearson's Moment Correlation Coefficient test, and plot the scatter diagrams for each pair of variables, starting with activity

In [11]:
print 'Activity PMCC:'
print pearsonr(activity_1['Vol']/max(activity_1['Vol']), malaria_1['MalariaPerPop']/max(malaria_1['MalariaPerPop']))
plt.scatter(activity_1['Vol']/max(activity_1['Vol']), malaria_1['MalariaPerPop']/max(malaria_1['MalariaPerPop']))
plt.xlabel('Activity Adm_1')
plt.ylabel('Malaria Cases Per Person')
plt.show()
print pearsonr(activity_2['Vol']/max(activity_2['Vol']), malaria_2['MalariaPerPop']/max(malaria_2['MalariaPerPop']))
plt.scatter(activity_2['Vol']/max(activity_2['Vol']), malaria_2['MalariaPerPop']/max(malaria_2['MalariaPerPop']))
plt.xlabel('Activity Adm_2')
plt.ylabel('Malaria Cases Per Person')
plt.show()
print pearsonr(activity_3['Vol']/max(activity_3['Vol']), malaria_3['MalariaPerPop']/max(malaria_3['MalariaPerPop']))
plt.scatter(activity_3['Vol']/max(activity_3['Vol']), malaria_3['MalariaPerPop']/max(malaria_3['MalariaPerPop']))
plt.xlabel('Activity Adm_3')
plt.ylabel('Malaria Cases Per Person')
plt.show()
print pearsonr(activity_4['Vol']/max(activity_4['Vol']), malaria_4['MalariaPerPop']/max(malaria_4['MalariaPerPop']))
plt.scatter(activity_4['Vol']/max(activity_4['Vol']), malaria_4['MalariaPerPop']/max(malaria_4['MalariaPerPop']))
plt.xlabel('Activity Adm_4')
plt.ylabel('Malaria Cases Per Person')
plt.show()

Activity PMCC:
(0.26877891802419518, 0.35279831979446102)


(0.17517441103018944, 0.32952744556659719)


(-0.027176865531348054, 0.78733390183220153)


(-0.0082807147730384976, 0.92697400160122712)


Now the same for Entropy

In [12]:
print 'Entropy PMCC:'
print pearsonr(entropy_1['Entropy']/max(entropy_1['Entropy']), malaria_1['MalariaPerPop']/max(malaria_1['MalariaPerPop']))
plt.scatter(entropy_1['Entropy']/max(entropy_1['Entropy']), malaria_1['MalariaPerPop']/max(malaria_1['MalariaPerPop']))
applyPlotStyle()
plt.xlabel('Entropy Adm_1')
plt.ylabel('Malaria Cases Per Person')
plt.show()
print pearsonr(entropy_2['Entropy']/max(entropy_2['Entropy']), malaria_2['MalariaPerPop']/max(malaria_2['MalariaPerPop']))
plt.scatter(entropy_2['Entropy']/max(entropy_2['Entropy']), malaria_2['MalariaPerPop']/max(malaria_2['MalariaPerPop']))
applyPlotStyle()
plt.xlabel('Entropy Adm_2')
plt.ylabel('Malaria Cases Per Person')
plt.show()
print pearsonr(entropy_3['Entropy']/max(entropy_3['Entropy']), malaria_3['MalariaPerPop']/max(malaria_3['MalariaPerPop']))
plt.scatter(entropy_3['Entropy']/max(entropy_3['Entropy']), malaria_3['MalariaPerPop']/max(malaria_3['MalariaPerPop']))
applyPlotStyle()
plt.xlabel('Entropy Adm_3')
plt.ylabel('Malaria Cases Per Person')
plt.show()
print pearsonr(entropy_4['Entropy']/max(entropy_4['Entropy']), malaria_4['MalariaPerPop']/max(malaria_4['MalariaPerPop']))
plt.scatter(entropy_4['Entropy']/max(entropy_4['Entropy']), malaria_4['MalariaPerPop']/max(malaria_4['MalariaPerPop']))
applyPlotStyle()
plt.xlabel('Entropy Adm_4')
plt.ylabel('Malaria Cases Per Person')
plt.show()

Entropy PMCC:
(-0.45508068532854151, 0.10203841599860207)


(-0.30147342233042052, 0.088197890678931121)


(-0.077745685626964364, 0.43965278761527449)


(-0.055988195216819692, 0.53514907776183085)


Now Introversion

In [13]:
print 'Introversion PMCC:'
print pearsonr(introversion_1['Introversion']/max(introversion_1['Introversion']), malaria_1['MalariaPerPop']/max(malaria_1['MalariaPerPop']))
plt.scatter(introversion_1['Introversion']/max(introversion_1['Introversion']), malaria_1['MalariaPerPop']/max(malaria_1['MalariaPerPop']))
plt.xlabel('Median Degree Adm_1')
plt.ylabel('Malaria Cases Per Person')
plt.show()
print pearsonr(introversion_2['Introversion']/max(introversion_1['Introversion']), malaria_2['MalariaPerPop']/max(malaria_2['MalariaPerPop']))
plt.scatter(introversion_2['Introversion']/max(introversion_1['Introversion']), malaria_2['MalariaPerPop']/max(malaria_2['MalariaPerPop']))
plt.xlabel('Median Degree Adm_2')
plt.ylabel('Malaria Cases Per Person')
plt.show()
print pearsonr(introversion_3['Introversion']/max(introversion_1['Introversion']), malaria_3['MalariaPerPop']/max(malaria_3['MalariaPerPop']))
plt.scatter(introversion_3['Introversion']/max(introversion_1['Introversion']), malaria_3['MalariaPerPop']/max(malaria_3['MalariaPerPop']))
plt.xlabel('Median Degree Adm_3')
plt.ylabel('Malaria Cases Per Person')
plt.show()
print pearsonr(introversion_4['Introversion']/max(introversion_1['Introversion']), malaria_4['MalariaPerPop']/max(malaria_4['MalariaPerPop']))
plt.scatter(introversion_4['Introversion']/max(introversion_1['Introversion']), malaria_4['MalariaPerPop']/max(malaria_4['MalariaPerPop']))
plt.xlabel('Median Degree Adm_4')
plt.ylabel('Malaria Cases Per Person')
plt.show()

Introversion PMCC:
(-0.11527126666192086, 0.6947583650387037)


(-0.31614070614078105, 0.073076718352043227)


(-0.10189667219181617, 0.31060933779570499)
(-0.072328783944782427, 0.42279192305101321)


And finally, median degree

In [14]:
print 'Median_degree PMCC:'
print pearsonr(degree_1['Degree']/max(degree_1['Degree']), malaria_1['MalariaPerPop']/max(malaria_1['MalariaPerPop']))
plt.scatter(degree_1['Degree']/max(degree_1['Degree']), malaria_1['MalariaPerPop']/max(malaria_1['MalariaPerPop']))
plt.xlabel('Median Degree Adm_1')
plt.ylabel('Malaria Cases Per Person')
plt.show()
print pearsonr(degree_2['Degree']/max(degree_2['Degree']), malaria_2['MalariaPerPop']/max(malaria_2['MalariaPerPop']))
plt.scatter(degree_2['Degree']/max(degree_2['Degree']), malaria_2['MalariaPerPop']/max(malaria_2['MalariaPerPop']))
plt.xlabel('Median Degree Adm_2')
plt.ylabel('Malaria Cases Per Person')
plt.show()
print pearsonr(degree_3['Degree']/max(degree_3['Degree']), malaria_3['MalariaPerPop']/max(malaria_3['MalariaPerPop']))
plt.scatter(degree_3['Degree']/max(degree_3['Degree']), malaria_3['MalariaPerPop']/max(malaria_3['MalariaPerPop']))
plt.xlabel('Median Degree Adm_3')
plt.ylabel('Malaria Cases Per Person')
plt.show()
print pearsonr(degree_4['Degree']/max(degree_4['Degree']), malaria_4['MalariaPerPop']/max(malaria_4['MalariaPerPop']))
plt.scatter(degree_4['Degree']/max(degree_4['Degree']), malaria_4['MalariaPerPop']/max(malaria_4['MalariaPerPop']))
plt.xlabel('Median Degree Adm_4')
plt.ylabel('Malaria Cases Per Person')
plt.show()

Median_degree PMCC:
(-0.45759842154165192, 0.099917824541556038)


(-0.30959743365760128, 0.079551803270562702)


(-0.08213644926841239, 0.41417572424476412)


(-0.053663806270072063, 0.5522576178370755)


In [15]:
print 'Activity PMCC:'
print pearsonr(np.log(activity_1['Vol'])/max(np.log(activity_1['Vol'])), malaria_1['MalariaPerPop']/max(malaria_1['MalariaPerPop']))
plt.scatter(np.log(activity_1['Vol'])/max(np.log(activity_1['Vol'])), malaria_1['MalariaPerPop']/max(malaria_1['MalariaPerPop']))
plt.xlabel('Log(Activity Adm_1)')
plt.ylabel('Malaria Cases Per Person')
plt.show()
print pearsonr(np.log(activity_2['Vol'])/max(np.log(activity_2['Vol'])), malaria_2['MalariaPerPop']/max(malaria_2['MalariaPerPop']))
plt.scatter(np.log(activity_2['Vol'])/max(np.log(activity_2['Vol'])), malaria_2['MalariaPerPop']/max(malaria_2['MalariaPerPop']))
plt.xlabel('Log(Activity Adm_2)')
plt.ylabel('Malaria Cases Per Person')
plt.show()
print pearsonr(np.log(activity_3['Vol'])/max(np.log(activity_3['Vol'])), malaria_3['MalariaPerPop']/max(malaria_3['MalariaPerPop']))
plt.scatter(np.log(activity_3['Vol'])/max(np.log(activity_3['Vol'])), malaria_3['MalariaPerPop']/max(malaria_3['MalariaPerPop']))
plt.xlabel('Log(Activity Adm_3)')
plt.ylabel('Malaria Cases Per Person')
plt.show()
print pearsonr(np.log(activity_4['Vol'])/max(np.log(activity_4['Vol'])), malaria_4['MalariaPerPop']/max(malaria_4['MalariaPerPop']))
plt.scatter(np.log(activity_4['Vol'])/max(np.log(activity_4['Vol'])), malaria_4['MalariaPerPop']/max(malaria_4['MalariaPerPop']))
plt.xlabel('Log(Activity Adm_4)')
plt.ylabel('Malaria Cases Per Person')
plt.show()

Activity PMCC:
(-0.01440579972490522, 0.9610165348092965)


(-0.036914564805404421, 0.83839199861773772)


(-0.16419245174310126, 0.10085350048710254)


(-0.12754993340824822, 0.15633039778732305)


In [16]:
print 'Entropy PMCC:'
print pearsonr(np.log(entropy_1['Entropy'])/max(np.log(entropy_1['Entropy'])), malaria_1['MalariaPerPop']/max(malaria_1['MalariaPerPop']))
plt.scatter(np.log(entropy_1['Entropy'])/max(np.log(entropy_1['Entropy'])), malaria_1['MalariaPerPop']/max(malaria_1['MalariaPerPop']))
applyPlotStyle()
plt.xlabel('Log(Entropy Adm_1)')
plt.ylabel('Malaria Cases Per Person')
plt.show()
print pearsonr(np.log(entropy_2['Entropy'])/max(np.log(entropy_2['Entropy'])), malaria_2['MalariaPerPop']/max(malaria_2['MalariaPerPop']))
plt.scatter(np.log(entropy_2['Entropy'])/max(np.log(entropy_2['Entropy'])), malaria_2['MalariaPerPop']/max(malaria_2['MalariaPerPop']))
applyPlotStyle()
plt.xlabel('Log(Entropy Adm_2)')
plt.ylabel('Malaria Cases Per Person')
plt.show()
print pearsonr(np.log(entropy_3['Entropy'])/max(np.log(entropy_3['Entropy'])), malaria_3['MalariaPerPop']/max(malaria_3['MalariaPerPop']))
plt.scatter(np.log(entropy_3['Entropy'])/max(np.log(entropy_3['Entropy'])), malaria_3['MalariaPerPop']/max(malaria_3['MalariaPerPop']))
applyPlotStyle()
plt.xlabel('Log(Entropy Adm_3)')
plt.ylabel('Malaria Cases Per Person')
plt.show()
print pearsonr(np.log(entropy_4['Entropy'])/max(np.log(entropy_4['Entropy'])), malaria_4['MalariaPerPop']/max(malaria_4['MalariaPerPop']))
plt.scatter(np.log(entropy_4['Entropy'])/max(np.log(entropy_4['Entropy'])), malaria_4['MalariaPerPop']/max(malaria_4['MalariaPerPop']))
applyPlotStyle()
plt.xlabel('Log(Entropy Adm_4)')
plt.ylabel('Malaria Cases Per Person')
plt.show()

Entropy PMCC:
(-0.38554593538110421, 0.17337973632510248)


(-0.48977003602533792, 0.0038156052097724821)


(-0.20277697139337564, 0.04198217008174978)
(-0.093091514501827302, 0.3017991555610946)


In [36]:
print 'Median_degree PMCC:'
print pearsonr(np.log(degree_1['Degree'])/max(np.log(degree_1['Degree'])), malaria_1['MalariaPerPop']/max(malaria_1['MalariaPerPop']))
plt.scatter(np.log(degree_1['Degree'])/max(np.log(degree_1['Degree'])), malaria_1['MalariaPerPop']/max(malaria_1['MalariaPerPop']))
applyPlotStyle()
plt.xlabel('Log(Median Degree Adm_1)')
plt.ylabel('Malaria Cases Per Person')
plt.show()
print pearsonr(np.log(degree_2['Degree'])/max(np.log(degree_2['Degree'])), malaria_2['MalariaPerPop']/max(malaria_2['MalariaPerPop']))
plt.scatter(np.log(degree_2['Degree'])/max(np.log(degree_2['Degree'])), malaria_2['MalariaPerPop']/max(malaria_2['MalariaPerPop']))
applyPlotStyle()
plt.xlabel('Log(Median Degree Adm_2)')
plt.ylabel('Malaria Cases Per Person')
plt.show()
print pearsonr(np.log(degree_3['Degree'])/max(np.log(degree_3['Degree'])), malaria_3['MalariaPerPop']/max(malaria_3['MalariaPerPop']))
plt.scatter(np.log(degree_3['Degree'])/max(np.log(degree_3['Degree'])), malaria_3['MalariaPerPop']/max(malaria_3['MalariaPerPop']))
applyPlotStyle()
plt.xlabel('Log(Median Degree Adm_3)')
plt.ylabel('Malaria Cases Per Person')
plt.show()
print pearsonr(np.log(degree_4['Degree'])/max(np.log(degree_4['Degree'])), malaria_4['MalariaPerPop']/max(malaria_4['MalariaPerPop']))
plt.scatter(np.log(degree_4['Degree'])/max(np.log(degree_4['Degree'])), malaria_4['MalariaPerPop']/max(malaria_4['MalariaPerPop']))
applyPlotStyle()
plt.xlabel('Log(Median Degree Adm_4)')
plt.ylabel('Malaria Cases Per Person')
plt.show()

Median_degree PMCC:
(-0.36831739615117176, 0.19505689696966622)


(-0.46017103056806058, 0.0070476464805924281)


(-0.231422447644787, 0.019884401829600218)


(-0.041270498923244171, 0.64768726285487732)


In [17]:
print 'Introversion PMCC:'
print pearsonr(np.log(introversion_1['Introversion'])/max(np.log(introversion_1['Introversion'])), malaria_1['MalariaPerPop']/max(malaria_1['MalariaPerPop']))
plt.scatter(np.log(introversion_1['Introversion'])/max(np.log(introversion_1['Introversion'])), malaria_1['MalariaPerPop']/max(malaria_1['MalariaPerPop']))
applyPlotStyle()
plt.xlabel('Median Degree Adm_1')
plt.ylabel('Malaria Cases Per Person')
plt.show()
print pearsonr(np.log(introversion_2['Introversion'])/max(np.log(introversion_1['Introversion'])), malaria_2['MalariaPerPop']/max(malaria_2['MalariaPerPop']))
plt.scatter(np.log(introversion_2['Introversion'])/max(np.log(introversion_1['Introversion'])), malaria_2['MalariaPerPop']/max(malaria_2['MalariaPerPop']))
applyPlotStyle()
plt.xlabel('Median Degree Adm_2')
plt.ylabel('Malaria Cases Per Person')
plt.show()
print pearsonr(np.log(introversion_3['Introversion'])/max(np.log(introversion_1['Introversion'])), malaria_3['MalariaPerPop']/max(malaria_3['MalariaPerPop']))
plt.scatter(np.log(introversion_3['Introversion'])/max(np.log(introversion_1['Introversion'])), malaria_3['MalariaPerPop']/max(malaria_3['MalariaPerPop']))
applyPlotStyle()
plt.xlabel('Median Degree Adm_3')
plt.ylabel('Malaria Cases Per Person')
plt.show()
print pearsonr(np.log(introversion_4['Introversion'])/max(np.log(introversion_1['Introversion'])), malaria_4['MalariaPerPop']/max(malaria_4['MalariaPerPop']))
plt.scatter(np.log(introversion_4['Introversion'])/max(np.log(introversion_1['Introversion'])), malaria_4['MalariaPerPop']/max(malaria_4['MalariaPerPop']))
applyPlotStyle()
plt.xlabel('Median Degree Adm_4')
plt.ylabel('Malaria Cases Per Person')
plt.show()

Introversion PMCC:
(0.049153981932537331, 0.86747290303742475)


(-0.25906873295369331, 0.14543637834752671)


(-0.096982665426900663, 0.33464073261074434)
(-0.019414519833606318, 0.82984487990726485)


In [23]:
print 'Activity PMCC:'
print pearsonr(activity_1['Vol']/max(activity_1['Vol']), np.log(malaria_1['MalariaPerPop'])/min(np.log(malaria_1['MalariaPerPop'])))
plt.scatter(activity_1['Vol']/max(activity_1['Vol']), np.log(malaria_1['MalariaPerPop'])/min(np.log(malaria_1['MalariaPerPop'])))
applyPlotStyle()
plt.xlabel('Activity Adm_1')
plt.ylabel('Log(Malaria Cases Per Person)')
plt.show()
print pearsonr(activity_2['Vol']/max(activity_2['Vol']), np.log(malaria_2['MalariaPerPop'])/min(np.log(malaria_2['MalariaPerPop'])))
plt.scatter(activity_2['Vol']/max(activity_2['Vol']), np.log(malaria_2['MalariaPerPop'])/min(np.log(malaria_2['MalariaPerPop'])))
applyPlotStyle()
plt.xlabel('Activity Adm_2')
plt.ylabel('Log(Malaria Cases Per Person)')
plt.show()
print pearsonr(activity_3['Vol']/max(activity_3['Vol']), np.log(malaria_3['MalariaPerPop'])/min(np.log(malaria_3['MalariaPerPop'])))
plt.scatter(activity_3['Vol']/max(activity_3['Vol']), np.log(malaria_3['MalariaPerPop'])/min(np.log(malaria_3['MalariaPerPop'])))
applyPlotStyle()
plt.xlabel('Activity Adm_3')
plt.ylabel('Log(Malaria Cases Per Person)')
plt.show()
print pearsonr(activity_4['Vol']/max(activity_4['Vol']), np.log(malaria_4['MalariaPerPop'])/min(np.log(malaria_4['MalariaPerPop'])))
plt.scatter(activity_4['Vol']/max(activity_4['Vol']), np.log(malaria_4['MalariaPerPop'])/min(np.log(malaria_4['MalariaPerPop'])))
applyPlotStyle()
plt.xlabel('Activity Adm_4')
plt.ylabel('Log(Malaria Cases Per Person)')
plt.show()

Activity PMCC:
(0.25696791453148754, 0.37514837138734447)


(0.072647759159746916, 0.68785354212323613)


(nan, 1.0)


(nan, 1.0)


In [24]:
print 'Entropy PMCC:'
print pearsonr(entropy_1['Entropy']/max(entropy_1['Entropy']), np.log(malaria_1['MalariaPerPop'])/min(np.log(malaria_1['MalariaPerPop'])))
plt.scatter(entropy_1['Entropy']/max(entropy_1['Entropy']), np.log(malaria_1['MalariaPerPop'])/min(np.log(malaria_1['MalariaPerPop'])))
applyPlotStyle()
plt.xlabel('Entropy Adm_1')
plt.ylabel('Log(Malaria Cases Per Person)')
plt.show()
print pearsonr(entropy_2['Entropy']/max(entropy_2['Entropy']), np.log(malaria_2['MalariaPerPop'])/min(np.log(malaria_2['MalariaPerPop'])))
plt.scatter(entropy_2['Entropy']/max(entropy_2['Entropy']), np.log(malaria_2['MalariaPerPop'])/min(np.log(malaria_2['MalariaPerPop'])))
applyPlotStyle()
plt.xlabel('Entropy Adm_2')
plt.ylabel('Log(Malaria Cases Per Person)')
plt.show()
print pearsonr(entropy_3['Entropy']/max(entropy_3['Entropy']), np.log(malaria_3['MalariaPerPop'])/min(np.log(malaria_3['MalariaPerPop'])))
plt.scatter(entropy_3['Entropy']/max(entropy_3['Entropy']), np.log(malaria_3['MalariaPerPop'])/min(np.log(malaria_3['MalariaPerPop'])))
applyPlotStyle()
plt.xlabel('Entropy Adm_3')
plt.ylabel('Log(Malaria Cases Per Person)')
plt.show()
print pearsonr(entropy_4['Entropy']/max(entropy_4['Entropy']), np.log(malaria_4['MalariaPerPop'])/min(np.log(malaria_4['MalariaPerPop'])))
plt.scatter(entropy_4['Entropy']/max(entropy_4['Entropy']), np.log(malaria_4['MalariaPerPop'])/min(np.log(malaria_4['MalariaPerPop'])))
applyPlotStyle()
plt.xlabel('Entropy Adm_4')
plt.ylabel('Log(Malaria Cases Per Person)')
plt.show()

Entropy PMCC:
(0.71878245344235203, 0.003772351648806422)


(0.71804927098820925, 2.5451212672970398e-06)


(nan, 1.0)


(nan, 1.0)


In [25]:
print 'Median Degree PMCC:'
print pearsonr(degree_1['Degree']/max(degree_1['Degree']), np.log(malaria_1['MalariaPerPop'])/min(np.log(malaria_1['MalariaPerPop'])))
plt.scatter(degree_1['Degree']/max(degree_1['Degree']), np.log(malaria_1['MalariaPerPop'])/min(np.log(malaria_1['MalariaPerPop'])))
applyPlotStyle()
plt.xlabel('Degree Adm_1')
plt.ylabel('Log(Malaria Cases Per Person)')
plt.show()
print pearsonr(degree_2['Degree']/max(degree_2['Degree']), np.log(malaria_2['MalariaPerPop'])/min(np.log(malaria_2['MalariaPerPop'])))
plt.scatter(degree_2['Degree']/max(degree_2['Degree']), np.log(malaria_2['MalariaPerPop'])/min(np.log(malaria_2['MalariaPerPop'])))
applyPlotStyle()
plt.xlabel('Degree Adm_2')
plt.ylabel('Log(Malaria Cases Per Person)')
plt.show()
print pearsonr(degree_3['Degree']/max(degree_3['Degree']), np.log(malaria_3['MalariaPerPop'])/min(np.log(malaria_3['MalariaPerPop'])))
plt.scatter(degree_3['Degree']/max(degree_3['Degree']), np.log(malaria_3['MalariaPerPop'])/min(np.log(malaria_3['MalariaPerPop'])))
applyPlotStyle()
plt.xlabel('Degree Adm_3')
plt.ylabel('Log(Malaria Cases Per Person)')
plt.show()
print pearsonr(degree_4['Degree']/max(degree_4['Degree']), np.log(malaria_4['MalariaPerPop'])/min(np.log(malaria_4['MalariaPerPop'])))
plt.scatter(degree_4['Degree']/max(degree_4['Degree']), np.log(malaria_4['MalariaPerPop'])/min(np.log(malaria_4['MalariaPerPop'])))
applyPlotStyle()
plt.xlabel('Degree Adm_4')
plt.ylabel('Log(Malaria Cases Per Person)')
plt.show()

Median Degree PMCC:
(0.71012585481340718, 0.0044316051760970622)


(0.714498545380848, 3.0057534432462201e-06)


(nan, 1.0)


(nan, 1.0)


In [26]:
print 'Introversion PMCC:'
print pearsonr(introversion_1['Introversion']/max(introversion_1['Introversion']), np.log(malaria_1['MalariaPerPop'])/min(np.log(malaria_1['MalariaPerPop'])))
plt.scatter(introversion_1['Introversion']/max(introversion_1['Introversion']), np.log(malaria_1['MalariaPerPop'])/min(np.log(malaria_1['MalariaPerPop'])))
applyPlotStyle()
plt.xlabel('Introversion Adm_1')
plt.ylabel('Log(Malaria Cases Per Person)')
plt.show()
print pearsonr(introversion_2['Introversion']/max(introversion_2['Introversion']), np.log(malaria_2['MalariaPerPop'])/min(np.log(malaria_2['MalariaPerPop'])))
plt.scatter(introversion_2['Introversion']/max(introversion_2['Introversion']), np.log(malaria_2['MalariaPerPop'])/min(np.log(malaria_2['MalariaPerPop'])))
applyPlotStyle()
plt.xlabel('Introversion Adm_2')
plt.ylabel('Log(Malaria Cases Per Person)')
plt.show()
print pearsonr(introversion_3['Introversion']/max(introversion_3['Introversion']), np.log(malaria_3['MalariaPerPop'])/min(np.log(malaria_3['MalariaPerPop'])))
plt.scatter(introversion_3['Introversion']/max(introversion_3['Introversion']), np.log(malaria_3['MalariaPerPop'])/min(np.log(malaria_3['MalariaPerPop'])))
applyPlotStyle()
plt.xlabel('Introversion Adm_3')
plt.ylabel('Log(Malaria Cases Per Person)')
plt.show()
print pearsonr(introversion_4['Introversion']/max(introversion_4['Introversion']), np.log(malaria_4['MalariaPerPop'])/min(np.log(malaria_4['MalariaPerPop'])))
plt.scatter(introversion_4['Introversion']/max(introversion_4['Introversion']), np.log(malaria_4['MalariaPerPop'])/min(np.log(malaria_4['MalariaPerPop'])))
applyPlotStyle()
plt.xlabel('Introversion Adm_4')
plt.ylabel('Log(Malaria Cases Per Person)')
plt.show()

Median Introversion PMCC:
(-0.18633096519339559, 0.52359747998134387)


(0.37487116711723434, 0.031593532723505423)


(nan, 1.0)
(nan, 1.0)


Log-Log transformation 

In [27]:
print 'Activity PMCC:'
print pearsonr(np.log(activity_1['Vol'])/max(np.log(activity_1['Vol'])), np.log(malaria_1['MalariaPerPop'])/min(np.log(malaria_1['MalariaPerPop'])))
plt.scatter(np.log(activity_1['Vol'])/max(np.log(activity_1['Vol'])), np.log(malaria_1['MalariaPerPop'])/min(np.log(malaria_1['MalariaPerPop'])))
applyPlotStyle()
plt.xlabel('Activity Adm_1')
plt.ylabel('Log(Malaria Cases Per Person)')
plt.show()
print pearsonr(np.log(activity_2['Vol'])/max(np.log(activity_2['Vol'])), np.log(malaria_2['MalariaPerPop'])/min(np.log(malaria_2['MalariaPerPop'])))
plt.scatter(np.log(activity_2['Vol'])/max(np.log(activity_2['Vol'])), np.log(malaria_2['MalariaPerPop'])/min(np.log(malaria_2['MalariaPerPop'])))
applyPlotStyle()
plt.xlabel('Activity Adm_2')
plt.ylabel('Log(Malaria Cases Per Person)')
plt.show()
print pearsonr(np.log(activity_3['Vol'])/max(np.log(activity_3['Vol'])), np.log(malaria_3['MalariaPerPop'])/min(np.log(malaria_3['MalariaPerPop'])))
plt.scatter(np.log(activity_3['Vol'])/max(np.log(activity_3['Vol'])), np.log(malaria_3['MalariaPerPop'])/min(np.log(malaria_3['MalariaPerPop'])))
applyPlotStyle()
plt.xlabel('Activity Adm_3')
plt.ylabel('Log(Malaria Cases Per Person)')
plt.show()
print pearsonr(np.log(activity_4['Vol'])/max(np.log(activity_4['Vol'])), np.log(malaria_4['MalariaPerPop'])/min(np.log(malaria_4['MalariaPerPop'])))
plt.scatter(np.log(activity_4['Vol'])/max(np.log(activity_4['Vol'])), np.log(malaria_4['MalariaPerPop'])/min(np.log(malaria_4['MalariaPerPop'])))
applyPlotStyle()
plt.xlabel('Activity Adm_4')
plt.ylabel('Log(Malaria Cases Per Person)')
plt.show()

Activity PMCC:
(0.41963579912400961, 0.13524944537797359)


(0.24709115053048164, 0.16565369567899704)


(nan, 1.0)


(nan, 1.0)


In [59]:
print 'Entropy PMCC:'
print pearsonr(np.log(entropy_1['Entropy'])/max(np.log(entropy_1['Entropy'])), np.log(malaria_1['MalariaPerPop'])/min(np.log(malaria_1['MalariaPerPop'])))
plt.scatter(np.log(entropy_1['Entropy'])/max(np.log(entropy_1['Entropy'])), np.log(malaria_1['MalariaPerPop'])/min(np.log(malaria_1['MalariaPerPop'])))
applyPlotStyle()
plt.xlabel('Entropy Adm_1')
plt.ylabel('Log(Malaria Cases Per Person)')
plt.show()
print pearsonr(np.log(entropy_2['Entropy'])/max(np.log(entropy_2['Entropy'])), np.log(malaria_2['MalariaPerPop'])/min(np.log(malaria_2['MalariaPerPop'])))
plt.scatter(np.log(entropy_2['Entropy'])/max(np.log(entropy_2['Entropy'])), np.log(malaria_2['MalariaPerPop'])/min(np.log(malaria_2['MalariaPerPop'])))
applyPlotStyle()
plt.xlabel('Entropy Adm_2')
plt.ylabel('Log(Malaria Cases Per Person)')
plt.show()
print pearsonr(np.log(entropy_3['Entropy'])/max(np.log(entropy_3['Entropy'])), np.log(malaria_3['MalariaPerPop'])/min(np.log(malaria_3['MalariaPerPop'])))
plt.scatter(np.log(entropy_3['Entropy'])/max(np.log(entropy_3['Entropy'])), np.log(malaria_3['MalariaPerPop'])/min(np.log(malaria_3['MalariaPerPop'])))
applyPlotStyle()
plt.xlabel('Entropy Adm_3')
plt.ylabel('Log(Malaria Cases Per Person)')
plt.show()
print pearsonr(np.log(entropy_4['Entropy'])/max(np.log(entropy_4['Entropy'])), np.log(malaria_4['MalariaPerPop'])/min(np.log(malaria_4['MalariaPerPop'])))
plt.scatter(np.log(entropy_4['Entropy'])/max(np.log(entropy_4['Entropy'])), np.log(malaria_4['MalariaPerPop'])/min(np.log(malaria_4['MalariaPerPop'])))
applyPlotStyle()
plt.xlabel('Entropy Adm_4')
plt.ylabel('Log(Malaria Cases Per Person)')
plt.show()

Entropy PMCC:
(0.42153681458552972, 0.1333051538513256)


(0.63768346827672129, 6.5622634705414978e-05)


(nan, 1.0)


(nan, 1.0)


In [28]:
print 'Degree PMCC:'
print pearsonr(np.log(degree_1['Degree'])/max(np.log(degree_1['Degree'])), np.log(malaria_1['MalariaPerPop'])/min(np.log(malaria_1['MalariaPerPop'])))
plt.scatter(np.log(degree_1['Degree'])/max(np.log(degree_1['Degree'])), np.log(malaria_1['MalariaPerPop'])/min(np.log(malaria_1['MalariaPerPop'])))
applyPlotStyle()
plt.xlabel('Degree Adm_1')
plt.ylabel('Log(Malaria Cases Per Person)')
plt.show()
print pearsonr(np.log(degree_2['Degree'])/max(np.log(degree_2['Degree'])), np.log(malaria_2['MalariaPerPop'])/min(np.log(malaria_2['MalariaPerPop'])))
plt.scatter(np.log(degree_2['Degree'])/max(np.log(degree_2['Degree'])), np.log(malaria_2['MalariaPerPop'])/min(np.log(malaria_2['MalariaPerPop'])))
applyPlotStyle()
plt.xlabel('Degree Adm_2')
plt.ylabel('Log(Malaria Cases Per Person)')
plt.show()
print pearsonr(np.log(degree_3['Degree'])/max(np.log(degree_3['Degree'])), np.log(malaria_3['MalariaPerPop'])/min(np.log(malaria_3['MalariaPerPop'])))
plt.scatter(np.log(degree_3['Degree'])/max(np.log(degree_3['Degree'])), np.log(malaria_3['MalariaPerPop'])/min(np.log(malaria_3['MalariaPerPop'])))
applyPlotStyle()
plt.xlabel('Degree Adm_3')
plt.ylabel('Log(Malaria Cases Per Person)')
plt.show()
print pearsonr(np.log(degree_4['Degree'])/max(np.log(degree_4['Degree'])), np.log(malaria_4['MalariaPerPop'])/min(np.log(malaria_4['MalariaPerPop'])))
plt.scatter(np.log(degree_4['Degree'])/max(np.log(degree_4['Degree'])), np.log(malaria_4['MalariaPerPop'])/min(np.log(malaria_4['MalariaPerPop'])))
applyPlotStyle()
plt.xlabel('Degree Adm_4')
plt.ylabel('Log(Malaria Cases Per Person)')
plt.show()

Degree PMCC:
(0.42020567040861184, 0.13466461851947076)


(0.6059391849140261, 0.00018618831544909911)


(nan, 1.0)


(nan, 1.0)


In [30]:
print 'Introversion PMCC:'
print pearsonr(np.log(introversion_1['Introversion'])/max(np.log(introversion_1['Introversion'])), np.log(malaria_1['MalariaPerPop'])/min(np.log(malaria_1['MalariaPerPop'])))
plt.scatter(np.log(introversion_1['Introversion'])/max(np.log(introversion_1['Introversion'])), np.log(malaria_1['MalariaPerPop'])/min(np.log(malaria_1['MalariaPerPop'])))
applyPlotStyle()
plt.xlabel('Introversion Adm_1')
plt.ylabel('Log(Malaria Cases Per Person)')
plt.show()
print pearsonr(np.log(introversion_2['Introversion'])/max(np.log(introversion_2['Introversion'])), np.log(malaria_2['MalariaPerPop'])/min(np.log(malaria_2['MalariaPerPop'])))
plt.scatter(np.log(introversion_2['Introversion'])/max(np.log(introversion_2['Introversion'])), np.log(malaria_2['MalariaPerPop'])/min(np.log(malaria_2['MalariaPerPop'])))
applyPlotStyle()
plt.xlabel('Introversion Adm_2')
plt.ylabel('Log(Malaria Cases Per Person)')
plt.show()
print pearsonr(np.log(introversion_3['Introversion'])/max(np.log(introversion_3['Introversion'])), np.log(malaria_3['MalariaPerPop'])/min(np.log(malaria_3['MalariaPerPop'])))
plt.scatter(np.log(introversion_3['Introversion'])/max(np.log(introversion_3['Introversion'])), np.log(malaria_3['MalariaPerPop'])/min(np.log(malaria_3['MalariaPerPop'])))
applyPlotStyle()
plt.xlabel('Introversion Adm_3')
plt.ylabel('Log(Malaria Cases Per Person)')
plt.show()
print pearsonr(np.log(introversion_4['Introversion'])/max(np.log(introversion_4['Introversion'])), np.log(malaria_4['MalariaPerPop'])/min(np.log(malaria_4['MalariaPerPop'])))
plt.scatter(np.log(introversion_4['Introversion'])/max(np.log(introversion_4['Introversion'])), np.log(malaria_4['MalariaPerPop'])/min(np.log(malaria_4['MalariaPerPop'])))
applyPlotStyle()
plt.xlabel('Introversion Adm_4')
plt.ylabel('Log(Malaria Cases Per Person)')
plt.show()

Introversion PMCC:
(-0.25437910614447956, 0.38014634136979469)


(0.26319088138989205, 0.13891247985512747)


(nan, 1.0)


(nan, 1.0)
