# GDIS / EM-DAT information merge
Script assigns GDIS entries the corresponding disaster information out of EM-DAT

In [None]:
# Import modules
import os
import pandas as pd
import geopandas as gpd

# Create paths
def mkdir(dir):
    
    if not os.path.exists(dir):
        os.mkdir(dir)
        
path_run = os.getcwd() + '/'
path_data = path_run + 'data/'
path_data_processed = path_run + 'data_processed/'
path_GDIS_processed = path_data_processed + 'GDIS_processed/'
path_GDIS_processed_creator = mkdir(path_GDIS_processed)

In [None]:
# Load and prepare data
GDIS = gpd.read_file(path_data + 'GDIS/GDIS.shp')
GDIS = GDIS.loc[GDIS.disasterty == 'flood']
GDIS['year'] = GDIS.apply(lambda GDIS : int(GDIS.disasterno.split('-')[0]), axis = 1)
GDIS = GDIS[(GDIS.year >= 2000) & (GDIS.year <= 2018)]
EMDAT = pd.read_csv(path_data + 'EM-DAT/emdat_public_2022_08_17_query_uid-W0XDn2.csv')
# EM-DAT, database accessed on 2022-08-17 ("https://public.emdat.be/")
EMDAT['disasterno'] = EMDAT.apply(lambda EMDAT: (EMDAT['Dis No'].split('-')[0] + '-' + EMDAT['Dis No'].split('-')[1]), axis = 1)
EMDAT['ISO3'] = EMDAT.apply(lambda EMDAT: (EMDAT['Dis No'].split('-')[2]), axis = 1)

# define EM-DAT's columns of interest for FLODIS
column_list = ['Start Year', 'Start Month', 'Start Day', 'End Year', 'End Month',
       'End Day', 'Total Deaths', 'No Injured', 'No Affected', 'No Homeless',
       'Total Affected',"Total Damages ('000 US$)"]

counter_error = 0
for disasterno_temp in GDIS.disasterno.unique():  
    print(disasterno_temp)
    counter = 0    
    GDIS_temp = GDIS.loc[GDIS.disasterno == disasterno_temp]
    GDIS_temp = GDIS_temp.dissolve(by='iso3', aggfunc='mean').reset_index(level=0)
    GDIS_temp = GDIS_temp.drop(['fid','geo_id','historical'], axis=1)
    GDIS_temp['disasterno'] = disasterno_temp    
    EMDAT_temp = EMDAT.loc[EMDAT.disasterno == disasterno_temp]
    
    for ISO3_temp in GDIS_temp.iso3:           
        GDIS_mod = gpd.GeoDataFrame()        
        GDIS_ISO3_temp = GDIS_temp.loc[GDIS_temp.iso3 == ISO3_temp]        
        EMDAT_ISO3_temp = EMDAT_temp.loc[EMDAT_temp.ISO3 == ISO3_temp]

        for column in column_list:            
            try:
                GDIS_ISO3_temp[column] = EMDAT_ISO3_temp.loc[EMDAT_ISO3_temp.ISO3 == ISO3_temp,column].values[0] 
            except:
                pass

        GDIS_mod = GDIS_mod.append(GDIS_ISO3_temp)
    
        if 'Start Year' in GDIS_mod:
            GDIS_mod.to_file(path_GDIS_processed + f"GDIS_mod_{disasterno_temp}_{counter}.shp")
        
        else:
            counter_error += 1
            print(f"Error: GDIS entry has no disaster information ({ISO_temp})",counter_error,)

        counter += 1
        
print("Total number of GDIS entries with no disaster information:",counter_error)