In [1]:

import pandas as pd
import matplotlib as plt
import os.path
import re
from scipy.stats import linregress

In [2]:
disasterPath = os.path.join("disaster-data", "disaster_data.xlsx")
disasterData = pd.read_excel(disasterPath, sheet_name="EM-DAT Data")

hdiPath = os.path.join("disaster-data", "hdi_data.csv")
hdiData = pd.read_csv(hdiPath)
hdiData

Unnamed: 0,iso3,country,hdicode,region,hdi_rank_2021,hdi_1990,hdi_1991,hdi_1992,hdi_1993,hdi_1994,...,mf_2012,mf_2013,mf_2014,mf_2015,mf_2016,mf_2017,mf_2018,mf_2019,mf_2020,mf_2021
0,AFG,Afghanistan,Low,SA,180.0,0.273,0.279,0.287,0.297,0.292,...,1.860000,1.880000,1.660000,1.620000,1.660000,1.410000,1.320000,1.380000,1.380000,1.380000
1,AGO,Angola,Medium,SSA,148.0,,,,,,...,4.090000,4.530000,3.970000,3.590000,2.790000,2.640000,2.280000,2.180000,2.180000,2.180000
2,ALB,Albania,High,ECA,67.0,0.647,0.629,0.614,0.617,0.624,...,12.440000,11.490000,13.140000,12.610000,14.390000,14.460000,12.850000,12.960000,12.960000,12.960000
3,AND,Andorra,Very High,,40.0,,,,,,...,,,,,,,,,,
4,ARE,United Arab Emirates,Very High,AS,26.0,0.728,0.739,0.742,0.748,0.755,...,49.560000,49.680000,55.490000,59.760000,64.950000,75.610000,65.970000,68.950000,68.950000,68.950000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
201,ZZG.ECA,Europe and Central Asia,,,,0.664,0.661,0.654,0.652,0.647,...,13.107171,14.172082,14.189127,14.523162,13.791770,13.618078,13.914030,14.087293,14.091826,14.096719
202,ZZH.LAC,Latin America and the Caribbean,,,,0.633,0.638,0.643,0.648,0.654,...,14.114108,14.493145,14.080904,14.146026,13.325144,13.604555,13.151315,13.296195,13.290914,13.284745
203,ZZI.SA,South Asia,,,,0.442,0.447,0.453,0.457,0.462,...,4.699407,4.584935,4.486500,4.610546,4.624886,4.740263,5.000024,5.110230,5.106447,5.102134
204,ZZJ.SSA,Sub-Saharan Africa,,,,0.407,0.410,0.410,0.411,0.413,...,3.437760,3.530479,3.491150,3.494860,3.415511,3.256156,3.145930,3.151076,3.146092,3.140740


In [3]:
yearList = [x for x in range(2000, 2022)]
iterables = [hdiData['iso3'], yearList]
hdiFrame = pd.MultiIndex.from_product(iterables, names=["iso3", "year"])
hdiFrame

MultiIndex([(      'AFG', 2000),
            (      'AFG', 2001),
            (      'AFG', 2002),
            (      'AFG', 2003),
            (      'AFG', 2004),
            (      'AFG', 2005),
            (      'AFG', 2006),
            (      'AFG', 2007),
            (      'AFG', 2008),
            (      'AFG', 2009),
            ...
            ('ZZK.WORLD', 2012),
            ('ZZK.WORLD', 2013),
            ('ZZK.WORLD', 2014),
            ('ZZK.WORLD', 2015),
            ('ZZK.WORLD', 2016),
            ('ZZK.WORLD', 2017),
            ('ZZK.WORLD', 2018),
            ('ZZK.WORLD', 2019),
            ('ZZK.WORLD', 2020),
            ('ZZK.WORLD', 2021)],
           names=['iso3', 'year'], length=4532)

In [4]:
hdiCleanFrame = pd.DataFrame(index=hdiFrame, columns=["hdi", "le", "gnipc", "ihdi", "ineq_le", "ineq_inc"])
# hdiCleanFrame.loc["AFG", "hdi"] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21]
hdiCleanFrame

Unnamed: 0_level_0,Unnamed: 1_level_0,hdi,le,gnipc,ihdi,ineq_le,ineq_inc
iso3,year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
AFG,2000,,,,,,
AFG,2001,,,,,,
AFG,2002,,,,,,
AFG,2003,,,,,,
AFG,2004,,,,,,
...,...,...,...,...,...,...,...
ZZK.WORLD,2017,,,,,,
ZZK.WORLD,2018,,,,,,
ZZK.WORLD,2019,,,,,,
ZZK.WORLD,2020,,,,,,


In [7]:
hdiYear = hdiData.filter(regex="[0-2][0-9]$")

filterHDI = hdiYear.filter(regex="^hdi.[0-9]", axis=1)
filterLE = hdiYear.filter(regex="^le.[0-9]", axis=1)
filterGNIPC = hdiYear.filter(regex="^gnipc", axis=1)
filterIHDI = hdiYear.filter(regex="^ihdi", axis=1)
filterILE = hdiYear.filter(regex="^ineq_le", axis=1)
filterIINC = hdiYear.filter(regex="^ineq_inc", axis=1)

# hdiReduced = pd.concat([hdiData["iso3"], hdiData["country"], filterHDI, filterLE, filterGNIPC, filterIHDI, filterILE, filterIINC], axis=1)
# hdiReduced.drop("hdi_rank_2021", axis=1)
filterHDI

countryList = list(hdiData["iso3"])
dataList = ["hdi", "le", "gnipc", "ihdi", "ineq_le", "indeq_inc"]

hdiList = [filterHDI.iloc[x, :].values.flatten().tolist() for x in range(0, len(filterHDI))]
leList = [filterLE.iloc[x, :].values.flatten().tolist() for x in range(0, len(filterLE))]
gnipcList = [filterGNIPC.iloc[x, :].values.flatten().tolist() for x in range(0, len(filterGNIPC))]
ihdiList = [filterIHDI.iloc[x, :].values.flatten().tolist() for x in range(0, len(filterIHDI))]
ileList = [filterILE.iloc[x, :].values.flatten().tolist() for x in range(0, len(filterILE))]
iincList = [filterIINC.iloc[x, :].values.flatten().tolist() for x in range(0, len(filterIINC))]

totalList = [hdiList, leList, gnipcList, ihdiList, ileList, iincList]

In [16]:
i = 0
j = 0

for bigList in totalList:
    for smallList in bigList:
        hdiCleanFrame.loc[countryList[i], dataList[j]] = smallList
        i += 1
    j += 1

hdiCleanFrame



IndexError: list index out of range