# Postprocessing
Load matched disaster datasets, clean, and merge if matched with same GFD floods.

In [1]:
# Import modules
import os
import numpy as np
import pandas as pd 
import geopandas as gpd
from datetime import datetime
import itertools
import glob
          
# Create paths

def mkdir(dir):    
    if not os.path.exists(dir):
        os.mkdir(dir)
        
path_local = os.getcwd() + '/'
path_data = path_local + 'data/'
path_data_processed = path_local + 'data_processed/'
path_FLODIS_final = path_data_processed + 'FLODIS_final/'
path_FLODIS_final_creator = mkdir(path_FLODIS_final)

  shapely_geos_version, geos_capi_version_string


# IDMC functions

In [2]:
# Delete unneccessary rows and columns. 
# Check for national vs. subnational entries.
# Search for entries with the same GFD ID. 

def drop_columns_rows(df):
    
    df = df[df.columns.drop(df.filter(regex='Unnamed:'))]     
    df = df.drop([
       'event_name',
       'Hazard Category', 'Hazard Type',
       'country_only_info','provinces','districts',
  #     'num_provinces', 'num_districts',
       'start_date', 'Name',
       'idx', 
        'countries_forUKonly'],axis=1)
    df = df.replace(-np.inf,np.nan)
    
    return df

def matches_filter(df):
    print('Number of IDMC flood events (2008-2018):',len(df))
    df = df[df.GFD_matches > 0]
    print('Number of succesfully matched IDMC events:',len(df),'\n\n')
    df = df.reset_index(drop=True)
    
    return df


def nat_vs_subnat(df):

    sum_columns = ['displacements',]

    mean_columns = [ 'GFD_matches_time_dif', 'GFD_duration',        
           'pop_density_GHSL', 
           'pop_density_GPW','pop_affected_mean_GHSL','pop_affected_mean_GPW','GDP_affected_mean',
           'education_affected_mean','energy_affected_mean','health_affected_mean',
           'telecommunication_affected_mean','transportation_affected_mean','water_affected_mean',
           'CISI_global_affected_mean', 'cable_affected_mean',
           'plant_affected_mean', 'power_pole_affected_mean',
           'power_tower_affected_mean', 'line_affected_mean', 'mast_affected_mean',
           'communication_tower_affected_mean', 'doctors_affected_mean',
           'hospital_affected_mean', 'pharmacy_affected_mean',
           'primary_road_affected_mean', 'tertiary_road_affected_mean',
           'reservoir_affected_mean', 'school_affected_mean',
           'university_affected_mean', 'GDPpc_mean', 'HDI_mean',
           'urbanization_mean', 'landuse_total_mean', 
           'elevation_mean','roughness_mean','slope_mean','female_mean',               
           'pop_0_14_mean', 'pop_65_plus_mean', 'FLOPROS_merged_mean',
           'FLOPROS_modeled_mean', 'forest_cover_mean']

    max_columns = ['pop_affected_sum_GHSL', 
                   'pop_affected_sum_GPW','GDP_affected_sum', 'cable_affected_sum', 'plant_affected_sum',
                   'power_pole_affected_sum', 'power_tower_affected_sum',
                   'line_affected_sum', 'mast_affected_sum',
                   'communication_tower_affected_sum', 'doctors_affected_sum',
                   'hospital_affected_sum', 'pharmacy_affected_sum',
                   'primary_road_affected_sum', 'tertiary_road_affected_sum',
                   'reservoir_affected_sum', 'school_affected_sum',
                   'university_affected_sum']
    
    for ID in df.GFD_matches_nr.unique():

        df_temp_1 = df[df['GFD_matches_nr']==ID]

        for ISO3 in df_temp_1.ISO3.unique():

            df_temp_2 = df_temp_1[df_temp_1.ISO3 == ISO3]

            if len(df_temp_2) > 1: 
                df_nat = df_temp_2[((df_temp_2.GID_1.isnull()) & (df_temp_2.GID_2.isnull()))]
                df_non_nat = df_temp_2.drop(df_nat.index.to_list()) 

                if df_nat.displacements.sum() > df_non_nat.displacements.sum():
                    print('nat. entry > subnat. entry')
                    print(ISO3)
                    print(ID)
                    ISO3_temp = df_temp_2.ISO3.iloc[0]
                    GFD_matches_temp = df_temp_2.GFD_matches.max()
                    GFD_matching_type_temp = 3
                    print(df_temp_2.GFD_matches_nr)
                    print(len(df_temp_2.GFD_matches_nr))
                    
                    GFD_matches_nr_temp = ID
                    print("New nr:",GFD_matches_nr_temp)
                    GID_1_temp = ''
                    GID_2_temp = ''

                    for i in df_temp_2.GID_1:
                        
                        if str(i) != 'nan':
                            GID_1_temp = GID_1_temp + str(i) + ','

                    try:
                        if GID_1_temp[-1] == ',':
                            GID_1_temp = GID_1_temp[:-1]        
                    except:
                        pass

                    for i in df_temp_2.GID_2:
                        
                        if str(i) != 'nan':
                            GID_2_temp = GID_2_temp + str(i) + ','

                    try:
                        if GID_2_temp[-1] == ',':
                            GID_2_temp = GID_2_temp[:-1]        
                    except:
                        pass
                    
                    if len(GID_1_temp) > 1:
                        GID_1_temp_len = len(GID_1_temp.split(','))
                        
                    else:
                        GID_1_temp_len = np.nan
                        
                    
                    if len(GID_2_temp) > 1:
                        GID_2_temp_len = len(GID_2_temp.split(','))
                        
                    else:
                        GID_2_temp_len = np.nan

                    df_max = pd.DataFrame(df_temp_2[max_columns].max()).T
                    df_mean = pd.DataFrame(df_temp_2[mean_columns].mean()).T
                    df_sum = pd.DataFrame(df_temp_2[sum_columns].sum()).T
                    row_single = pd.concat([df_sum,df_max,df_mean], axis=1)

                    try:
                        row_single = row_single.drop(['GID_1'],axis=1)
                    except:
                        pass
                    try:
                        row_single = row_single.drop(['GID_2'],axis=1)
                    except:
                        pass

                    row_single.insert(0,'ISO3',ISO3_temp)
                    row_single.insert(1,'GFD_matches',GFD_matches_temp)
                    row_single.insert(2,'GFD_matches_nr',GFD_matches_nr_temp)
                    row_single.insert(3,'matching_type',GFD_matching_type_temp)
                    row_single.insert(4,'GID_1',GID_1_temp)
                    row_single.insert(5,'GID_2',GID_2_temp)
                    row_single.insert(6,'num_provinces',GID_1_temp_len)
                    row_single.insert(7,'num_districts',GID_2_temp_len)
                    row_single['year'] = np.round(df_temp_2.year.mean(),0)
                    row_single = row_single.reindex(columns=df.columns)
                    df = df.drop(df_temp_2.index.to_list())
                    df = df.append(row_single)
                    
                    print(row_single.GFD_matches_nr)
                   
                else:
                    print('nat. entry dropped')
                    df = df.drop(df_nat.index.to_list())
   
    print('Number of succesfully matched IDMC events:',len(df))
    print("Number of individual GFD events:",len(df.GFD_matches_nr.unique()),'\n\n')
    df = df.reset_index(drop=True)    
    
    return(df)

def province_duplicate_merger(df):
    
    sum_columns = ['displacements',]

    mean_columns = ['GFD_matches_time_dif', 'GFD_duration',        
                    'pop_density_GHSL', 
                    'pop_density_GPW',
                    'pop_affected_sum_GHSL', 
                    'pop_affected_sum_GPW','GDP_affected_sum', 'cable_affected_sum', 'plant_affected_sum',
                    'power_pole_affected_sum', 'power_tower_affected_sum',
                    'line_affected_sum', 'mast_affected_sum',
                    'communication_tower_affected_sum', 'doctors_affected_sum',
                    'hospital_affected_sum', 'pharmacy_affected_sum',
                    'primary_road_affected_sum', 'tertiary_road_affected_sum',
                    'reservoir_affected_sum', 'school_affected_sum',
                    'university_affected_sum',
                    'affected_mean_GHSL','affected_mean_GPW','GDP_affected_mean',
                    'education_affected_mean','energy_affected_mean','health_affected_mean',
                    'telecommunication_affected_mean','transportation_affected_mean','water_affected_mean',
                    'CISI_global_affected_mean', 'cable_affected_mean',
                    'plant_affected_mean', 'power_pole_affected_mean',
                    'power_tower_affected_mean', 'line_affected_mean', 'mast_affected_mean',
                    'communication_tower_affected_mean', 'doctors_affected_mean',
                    'hospital_affected_mean', 'pharmacy_affected_mean',
                    'primary_road_affected_mean', 'tertiary_road_affected_mean',
                    'reservoir_affected_mean', 'school_affected_mean',
                    'university_affected_mean', 'GDPpc_mean', 'HDI_mean',
                    'urbanization_mean', 'landuse_total_mean', 'elevation_mean','roughness_mean',
                    'slope_mean','female_mean',               
                    'pop_0_14_mean', 'pop_65_plus_mean', 'FLOPROS_merged_mean',
                    'FLOPROS_modeled_mean', 'forest_cover_mean']

    max_columns = []

    for ID in df.GFD_matches_nr.unique():

        df_temp_1 = df[df['GFD_matches_nr']==ID]

        for ISO3 in df_temp_1.ISO3.unique():

            df_temp_2 = df_temp_1[df_temp_1.ISO3 == ISO3]
            df_temp_3 = df_temp_2[~(df_temp_2.GID_1.isnull())]

            if len(df_temp_3) > 1:

                if df_temp_3[df_temp_3.GID_1.duplicated()].empty == False:

                    print('provinces duplicate found')

                    duplicated_GID_1 = df_temp_3[df_temp_3.GID_1.duplicated()].GID_1.iloc[0]
                    df_temp_4 = df_temp_3[df_temp_3.GID_1 == duplicated_GID_1]
                    ISO3_temp = df_temp_4.ISO3.iloc[0]            
                    GFD_matches_temp = df_temp_4.GFD_matches.max()
                    GFD_matching_type_temp = 4
                    GFD_matches_nr_temp = ID
                    GID_1_temp = duplicated_GID_1
                    GID_2_temp = ''

                    for i in df_temp_4.GID_2:
                        
                        if str(i) != 'nan':
                            GID_2_temp = GID_2_temp + str(i) + ','

                    try:
                        if GID_2_temp[-1] == ',':
                            GID_2_temp = GID_2_temp[:-1]        
                    except:
                        pass
                    
                    if len(GID_1_temp) > 1:
                        GID_1_temp_len = len(GID_1_temp.split(','))
                        
                    else:
                        GID_1_temp_len = np.nan
                    
                    if len(GID_2_temp) > 1:
                        GID_2_temp_len = len(GID_2_temp.split(','))
                        
                    else:
                        GID_2_temp_len = np.nan
                        
                    df_max = pd.DataFrame(df_temp_2[max_columns].max()).T
                    df_mean = pd.DataFrame(df_temp_2[mean_columns].mean()).T
                    df_sum = pd.DataFrame(df_temp_2[sum_columns].sum()).T
                    row_single = pd.concat([df_sum,df_max,df_mean], axis=1)

                    try:
                        row_single = row_single.drop(['GID_1'],axis=1)
                    except:
                        pass
                    try:
                        row_single = row_single.drop(['GID_2'],axis=1)
                    except:
                        pass

                    row_single.insert(0,'ISO3',ISO3_temp)
                    row_single.insert(1,'GFD_matches',GFD_matches_temp)
                    row_single.insert(2,'GFD_matches_nr',GFD_matches_nr_temp)
                    row_single.insert(3,'matching_type',GFD_matching_type_temp)
                    row_single.insert(4,'GID_1',GID_1_temp)
                    row_single.insert(5,'GID_2',GID_2_temp)
                    row_single.insert(6,'num_provinces',GID_1_temp_len)
                    row_single.insert(7,'num_districts',GID_2_temp_len)
                    row_single['year'] = np.round(df_temp_4.year.mean(),0)
                    row_single = row_single.reindex(columns=df.columns)
                    df = df.drop(df_temp_4.index.to_list())
                    df = df.append(row_single)

    df = df.reset_index(drop=True)
    
    print('Number of succesfully matched IDMC events:',len(df))
    print("Number of individual GFD events:",len(df.GFD_matches_nr.unique()),'\n\n') 
    
    return(df)

def district_duplicate_merger(df):
    
    sum_columns = ['displacements',]

    mean_columns = ['GFD_matches_time_dif', 'GFD_duration',        
                    'pop_density_GHSL', 
                    'pop_density_GPW',
                    'pop_affected_sum_GHSL', 
                    'pop_affected_sum_GPW','GDP_affected_sum', 'cable_affected_sum', 'plant_affected_sum',
                    'power_pole_affected_sum', 'power_tower_affected_sum',
                    'line_affected_sum', 'mast_affected_sum',
                    'communication_tower_affected_sum', 'doctors_affected_sum',
                    'hospital_affected_sum', 'pharmacy_affected_sum',
                    'primary_road_affected_sum', 'tertiary_road_affected_sum',
                    'reservoir_affected_sum', 'school_affected_sum',
                    'university_affected_sum',
                    'affected_mean_GHSL','affected_mean_GPW','GDP_affected_mean',
                    'education_affected_mean','energy_affected_mean','health_affected_mean',
                    'telecommunication_affected_mean','transportation_affected_mean','water_affected_mean',
                    'CISI_global_affected_mean', 'cable_affected_mean',
                    'plant_affected_mean', 'power_pole_affected_mean',
                    'power_tower_affected_mean', 'line_affected_mean', 'mast_affected_mean',
                    'communication_tower_affected_mean', 'doctors_affected_mean',
                    'hospital_affected_mean', 'pharmacy_affected_mean',
                    'primary_road_affected_mean', 'tertiary_road_affected_mean',
                    'reservoir_affected_mean', 'school_affected_mean',
                    'university_affected_mean', 'GDPpc_mean', 'HDI_mean',
                    'urbanization_mean', 'landuse_total_mean', 'elevation_mean','roughness_mean',
                    'slope_mean','female_mean',  
                    'pop_0_14_mean', 'pop_65_plus_mean', 'FLOPROS_merged_mean',
                    'FLOPROS_modeled_mean', 'forest_cover_mean']

    max_columns = []

    for ID in df.GFD_matches_nr.unique():

        df_temp_1 = df[df['GFD_matches_nr']==ID]

        for ISO3 in df_temp_1.ISO3.unique():

            df_temp_2 = df_temp_1[df_temp_1.ISO3 == ISO3]
            df_temp_3 = df_temp_2[~(df_temp_2.GID_2.isnull())]

            if len(df_temp_3) > 1:

                if df_temp_3[df_temp_3.GID_2.duplicated()].empty == False:

                    print('districts duplicate found')

                    duplicated_GID_2 = df_temp_3[df_temp_3.GID_2.duplicated()].GID_2.iloc[0]
                    df_temp_4 = df_temp_3[df_temp_3.GID_2 == duplicated_GID_2]
                    ISO3_temp = df_temp_4.ISO3.iloc[0]            
                    GFD_matches_temp = df_temp_4.GFD_matches.max()
                    GFD_matching_type_temp = 5
                    GFD_matches_nr_temp = ID          
                    GID_1_temp = '' 
                    GID_2_temp = duplicated_GID_2 
                    
                    for i in df_temp_4.GID_1:
                        
                        if str(i) != 'nan':
                            GID_1_temp = GID_1_temp + str(i) + ','

                    try:
                        if GID_1_temp[-1] == ',':
                            GID_1_temp = GID_1_temp[:-1]        
                    except:
                        pass
                    
                    
                    if len(GID_1_temp) > 1:
                        GID_1_temp_len = len(GID_1_temp.split(','))
                        
                    else:
                        GID_1_temp_len = np.nan
                        
                    
                    if len(GID_2_temp) > 1:
                        GID_2_temp_len = len(GID_2_temp.split(','))
                        
                    else:
                        GID_2_temp_len = np.nan
                        
                    df_max = pd.DataFrame(df_temp_2[max_columns].max()).T
                    df_mean = pd.DataFrame(df_temp_2[mean_columns].mean()).T
                    df_sum = pd.DataFrame(df_temp_2[sum_columns].sum()).T
                    row_single = pd.concat([df_sum,df_max,df_mean], axis=1)

                    try:
                        row_single = row_single.drop(['GID_1'],axis=1)
                    except:
                        pass
                    try:
                        row_single = row_single.drop(['GID_2'],axis=1)
                    except:
                        pass

                    row_single.insert(0,'ISO3',ISO3_temp)
                    row_single.insert(1,'GFD_matches',GFD_matches_temp)
                    row_single.insert(2,'matching_type',GFD_matching_type_temp)
                    row_single.insert(3,'GFD_matches_nr',GFD_matches_nr_temp)
                    row_single.insert(4,'GID_1',GID_1_temp)
                    row_single.insert(5,'GID_2',GID_2_temp)
                    row_single.insert(6,'num_provinces',GID_1_temp_len)
                    row_single.insert(7,'num_districts',GID_2_temp_len)
                    row_single['year'] = np.round(df_temp_4.year.mean(),0)
                    row_single = row_single.reindex(columns=df.columns)
                    df = df.drop(df_temp_4.index.to_list())
                    df = df.append(row_single)
  
    df = df.reset_index(drop=True) 
    print('Number of succesfully matched IDMC events:',len(df))
    print("Number of individual GFD events:",len(df.GFD_matches_nr.unique()),'\n\n') 
    
    return(df)

# Launch (IDMC)

In [16]:
df = pd.read_csv(path_data_processed + "IDMC_EMDAT_GFD_match/IDMC_GFD_connect_results_FL_merged2.csv") #,sep='delimiter'
df.rename(columns={'Internal Displacements':'displacements','Year':'year'}, 
                     inplace=True)    

for i in range(len(df)):    
    ID = df.loc[i,'GFD_matches_nr']
    
    if "[" in str(ID) and len(ID) <= 6:        
        df.loc[i,'GFD_matches_nr'] = ID.replace('[','').replace(']','')
        
df = (df.
                pipe(drop_columns_rows).
                pipe(matches_filter).
                pipe(nat_vs_subnat).
                pipe(province_duplicate_merger).
                pipe(district_duplicate_merger)
             )

print("FINAL:")
print('Number of succesfully matched IDMC events:',len(df))
print('Number of events without displacement numbers:',len(df[df.displacements.isnull()]))

df.to_csv(path_FLODIS_final + 'FLODIS_displacements.csv')

ParserError: Error tokenizing data. C error: Expected 1 fields in line 14, saw 2


# EM-DAT functions

In [5]:
# Delete unneccessary rows and columns. 
# Check for national vs. subnational entries.
# Search for entries with the same GFD ID. 

def drop_columns_rows(df):
    
    df = df[df.columns.drop(df.filter(regex='Unnamed:'))]     
    df = df.drop([
       'Start Year', 'Start Mont', 'Start Day', 'End Year', 'End Month',
       'End Day'],axis=1)
    df = df.replace(-np.inf,np.nan)
    
    return df

def matches_filter(df):
    print('Number of EM-DAT flood events (2000-2018):',len(df))
    df = df[df.GFD_matches > 0]
    print('Number of succesfully matched EM-DAT events:',len(df),'\n\n')
    df = df.reset_index(drop=True)
    
    return df

def GDIS_duplicates(df):

    sum_columns = ['total_deaths','no_injured','no_affected_EMDAT','no_homeles',
                   'total_affected_EMDAT','total_damages_(000_USD)']

    mean_columns = [ 'GFD_matches_time_dif', 'GFD_duration',        
           'pop_density_GHSL', 
           'pop_density_GPW','pop_affected_mean_GHSL','pop_affected_mean_GPW','GDP_affected_mean',
           'education_affected_mean','energy_affected_mean','health_affected_mean',
           'telecommunication_affected_mean','transportation_affected_mean','water_affected_mean',
           'CISI_global_affected_mean', 'cable_affected_mean',
           'plant_affected_mean', 'power_pole_affected_mean',
           'power_tower_affected_mean', 'line_affected_mean', 'mast_affected_mean',
           'communication_tower_affected_mean', 'doctors_affected_mean',
           'hospital_affected_mean', 'pharmacy_affected_mean',
           'primary_road_affected_mean', 'tertiary_road_affected_mean',
           'reservoir_affected_mean', 'school_affected_mean',
           'university_affected_mean', 'GDPpc_mean', 'HDI_mean',
           'urbanization_mean', 'landuse_total_mean', 'elevation_mean','roughness_mean',
           'slope_mean','female_mean',               
           'pop_0_14_mean', 'pop_65_plus_mean', 'FLOPROS_merged_mean',
           'FLOPROS_modeled_mean', 'forest_cover_mean']

    max_columns = ['pop_affected_sum_GHSL', 
                   'pop_affected_sum_GPW','GDP_affected_sum', 'cable_affected_sum', 'plant_affected_sum',
                   'power_pole_affected_sum', 'power_tower_affected_sum',
                   'line_affected_sum', 'mast_affected_sum',
                   'communication_tower_affected_sum', 'doctors_affected_sum',
                   'hospital_affected_sum', 'pharmacy_affected_sum',
                   'primary_road_affected_sum', 'tertiary_road_affected_sum',
                   'reservoir_affected_sum', 'school_affected_sum',
                   'university_affected_sum']
    
    for ID in df.GFD_matches_nr.unique():

        df_temp_1 = df[df['GFD_matches_nr']==ID]

        for ISO3 in df_temp_1.ISO3.unique():

            df_temp_2 = df_temp_1[df_temp_1.ISO3 == ISO3]

            if len(df_temp_2) > 1: 
                
                print(ISO3)
                print(ID)
                ISO3_temp = df_temp_2.ISO3.iloc[0]
                GFD_matches_temp = df_temp_2.GFD_matches.max()
                GFD_matching_type_temp = 3
                print(df_temp_2.GFD_matches_nr)
                print(len(df_temp_2.GFD_matches_nr))

                GFD_matches_nr_temp = ID
                print("New nr:",GFD_matches_nr_temp)
                disasterno_temp = ''
                
                for i in df_temp_2.disasterno:

                    if str(i) != 'nan':
                        disasterno_temp = disasterno_temp + str(i) + ','

                try:
                    if disasterno_temp[-1] == ',':
                        disasterno_temp = disasterno_temp[:-1]        
                except:
                    pass

                df_max = pd.DataFrame(df_temp_2[max_columns].max()).T
                df_mean = pd.DataFrame(df_temp_2[mean_columns].mean()).T
                df_sum = pd.DataFrame(df_temp_2[sum_columns].sum()).T
                row_single = pd.concat([df_sum,df_max,df_mean], axis=1)

                try:
                    row_single = row_single.drop(['disasterno'],axis=1)
                except:
                    pass

                row_single.insert(0,'ISO3',ISO3_temp)
                row_single.insert(1,'GFD_matches',GFD_matches_temp)
                row_single.insert(2,'GFD_matches_nr',GFD_matches_nr_temp)
                row_single.insert(3,'matching_type',GFD_matching_type_temp)
                row_single.insert(4,'disasterno',disasterno_temp)
                row_single['year'] = np.round(df_temp_2.year.mean(),0)
                row_single = row_single.reindex(columns=df.columns)
                df = df.drop(df_temp_2.index.to_list())
                df = df.append(row_single)

                print(row_single.GFD_matches_nr)
             
    print('Number of succesfully matched EM-DAT events:',len(df))
    print("Number of individual GFD events:",len(df.GFD_matches_nr.unique()),'\n\n')
    df = df.reset_index(drop=True)    
    
    return(df)

# Launch (EM-DAT)

In [6]:
df = pd.read_csv(path_data_processed + "IDMC_EMDAT_GFD_match/EMDAT_GFD_connect_results_FL_merged.csv")
df.rename(columns={'iso3':'ISO3','Total Deat':'total_deaths', 'No Injured':'no_injured', 
                   'No Affecte':'no_affected_EMDAT', 'No Homeles':'no_homeles','Total Affe':'total_affected_EMDAT',
                   'Total Dama':'total_damages_(000_USD)',}, 
                     inplace=True)  

for i in range(len(df)):    
    ID = df.loc[i,'GFD_matches_nr']
    
    if "[" in str(ID) and len(ID) <= 6:        
        df.loc[i,'GFD_matches_nr'] = ID.replace('[','').replace(']','')
 
df = (df.
                pipe(drop_columns_rows).
                pipe(matches_filter).
                pipe(GDIS_duplicates)
             )

print("FINAL:")
print('Number of succesfully matched EM-DAT events:',len(df))
print('Number of events without fatalities and damages numbers:',len(df[df['total_damages_(000_USD)'].isnull() & df.total_deaths.isnull()]))

df.to_csv(path_FLODIS_final + 'FLODIS_fatalities_damages.csv')

Number of EM-DAT flood events (2000-2018): 2390
Number of succesfully matched EM-DAT events: 820 


nat. entry > subnat. entry
VNM
2368
1      2368
556    2368
Name: DFO_matches_nr, dtype: object
2
New nr: 2368
0    2368
Name: DFO_matches_nr, dtype: object
nat. entry > subnat. entry
MWI
2119
3      2119
522    2119
Name: DFO_matches_nr, dtype: object
2
New nr: 2119
0    2119
Name: DFO_matches_nr, dtype: object
nat. entry > subnat. entry
IND
2859
6      2859
252    2859
Name: DFO_matches_nr, dtype: object
2
New nr: 2859
0    2859
Name: DFO_matches_nr, dtype: object
nat. entry > subnat. entry
AFG
3061
12     3061
666    3061
Name: DFO_matches_nr, dtype: object
2
New nr: 3061
0    3061
Name: DFO_matches_nr, dtype: object
nat. entry > subnat. entry
IND
1789
14     1789
443    1789
622    1789
Name: DFO_matches_nr, dtype: object
3
New nr: 1789
0    1789
Name: DFO_matches_nr, dtype: object
nat. entry > subnat. entry
MNE
3756
55     3756
211    3756
Name: DFO_matches_nr, dtype: object
2
New n

0    2099
Name: DFO_matches_nr, dtype: object
nat. entry > subnat. entry
GBR
4009
181    4009
404    4009
Name: DFO_matches_nr, dtype: object
2
New nr: 4009
0    4009
Name: DFO_matches_nr, dtype: object
nat. entry > subnat. entry
PER
4117
435    4117
441    4117
Name: DFO_matches_nr, dtype: object
2
New nr: 4117
0    4117
Name: DFO_matches_nr, dtype: object
nat. entry > subnat. entry
IDN
2890
208    2890
769    2890
Name: DFO_matches_nr, dtype: object
2
New nr: 2890
0    2890
Name: DFO_matches_nr, dtype: object
nat. entry > subnat. entry
IND
[3716, 3717]
216    [3716, 3717]
253    [3716, 3717]
Name: DFO_matches_nr, dtype: object
2
New nr: [3716, 3717]
0    [3716, 3717]
Name: DFO_matches_nr, dtype: object
nat. entry > subnat. entry
CHN
4390
225    4390
601    4390
Name: DFO_matches_nr, dtype: object
2
New nr: 4390
0    4390
Name: DFO_matches_nr, dtype: object
nat. entry > subnat. entry
DOM
3915
229    3915
416    3915
Name: DFO_matches_nr, dtype: object
2
New nr: 3915
0    3915
Name: DF