## Fuction def to merge WBI dataset with cyclone dataset
*@XavierTorres*

In [1]:
import numpy as np
import pandas as pd

In [2]:
#Importing WBI Dataset (the final and cleaned one, called OUTPUT....)
df_WBI = pd.read_csv('C:/Users/torre/Documents/Local Omdena WFP files/WorldBank/Part1/OUTPUT_WBI_Selection_Country_Year.csv', sep=';', low_memory = False)

In [3]:
#Importing cyclones dataset... we use here a sample, as the final dataset is work in process
df_emdat = pd.read_csv('C:/Users/torre/Documents/Local Omdena WFP files/WorldBank/Part1/emdat_names.csv', sep=';', low_memory = False)

> Note: Function below only works if country names from both datasets are the same and there are no countries in the cyclones dataset missing in the WBI dataset

In [4]:
df_WBI.head()

Unnamed: 0,Country Name,Country Code,Series Name,1960,1961,1962,1963,1964,1965,1966,...,2012,2013,2014,2015,2016,2017,2018,2019,Income_level,Indicator_CAT
0,Afghanistan,AFG,Access to electricity (% of population),,,,,,,,...,69.1,70.153481,89.5,71.5,97.7,97.7,,,Low,Infrastructure
1,Albania,ALB,Access to electricity (% of population),,,,,,,,...,100.0,100.0,100.0,100.0,100.0,100.0,,,High_Middle,Infrastructure
2,Algeria,DZA,Access to electricity (% of population),,,,,,,,...,98.76466,99.580971,99.877052,99.943069,99.992317,100.0,,,High_Middle,Infrastructure
3,American Samoa,ASM,Access to electricity (% of population),,,,,,,,...,,,,,,,,,High_Middle,Infrastructure
4,Andorra,AND,Access to electricity (% of population),,,,,,,,...,100.0,100.0,100.0,100.0,100.0,100.0,,,High,Infrastructure


In [5]:
df_emdat.head()

Unnamed: 0,Unnamed,0,Year,Seq,ISO,Group,Subgroup,Type,Subtype,Subsubtype,...,End day,Total deaths,Num injured,Num affected,Num homeless,Total affected,Reconstruction cost ('000$),Insured losses ('000$),Total damage ('000$),CPI
0,392,1827,1973,2,ARG,Natural,Meteorological,Storm,Convective storm,Tornado,...,1.0,60.0,300.0,,,300.0,,,,17.366986
1,703,3460,1986,137,ARG,Natural,Meteorological,Storm,Convective storm,Tornado,...,,1.0,,1000.0,,1000.0,,,,42.87313
2,1639,10791,2000,839,ARG,Natural,Meteorological,Storm,Convective storm,Lightning/Thunderstorms,...,27.0,1.0,,,430.0,430.0,,,,67.355759
3,1644,10796,2000,607,ARG,Natural,Meteorological,Storm,Convective storm,Winter storm/Blizzard,...,,,,,,,,,,67.355759
4,209,1000,1966,117,ASM,Natural,Meteorological,Storm,Tropical cyclone,,...,,90.0,,,,,,,,12.696028


### Defining the function

In [12]:
# Creating a function that merges both datasets, WBI and cyclones

def merge_WBI_cyclones(df_WBI, df_emdat):
    
    # Let's add columns without numbers on the right of the cyclone dataset
    series_list = df_WBI['Series Name'].unique()
    for item in series_list:
        df_emdat[item] = np.nan
    
    # Let's also add the column 'Income_level' inherited from WBI dataset
    df_emdat['Income_level'] = np.nan
    
    # let's iterate row by row in cyclones dataset and assign corresponding value to each series columns, 
    # depending on the year and country of that row 
    for i in range(len(df_emdat)):
        curr_country = df_emdat.loc[i,'ISO']
        curr_year = str(df_emdat.loc[i,'Year'])
        
        for serie in series_list:
            #Step 1. selecting row in WBI dataset
            item_selected = df_WBI.loc[(df_WBI['Country Code'] == curr_country) & (df_WBI['Series Name'] == serie)]
            
            #Step 2. Selecting the value for the 'curr_year' column and assigning to current row in cyclone dataset
            data1 = float(item_selected[curr_year])
            data2 = str(item_selected['Income_level'].values)
            df_emdat.loc[i,serie] = data1
            df_emdat.loc[i,'Income_level'] = data2
    
    return
    

In [13]:
# Running the function. It lasts about 8-10 minutes to complete
merge_WBI_cyclones(df_WBI, df_emdat)

In [14]:
df_emdat.head()

Unnamed: 0,Unnamed,0,Year,Seq,ISO,Group,Subgroup,Type,Subtype,Subsubtype,...,Rural population (% of total population),"School enrollment, primary (% gross)","School enrollment, secondary (% gross)","School enrollment, tertiary (% gross)","Secondary education, duration (years)","Tuberculosis case detection rate (%, all forms)","Unemployment, total (% of total labor force) (modeled ILO estimate)",Urban population (% of total population),"Vulnerable employment, total (% of total employment) (modeled ILO estimate)",Income_level
0,392,1827,1973,2,ARG,Natural,Meteorological,Storm,Convective storm,Tornado,...,19.842,105.59223,49.91177,19.61808,5.0,,,80.158,,['High_Middle']
1,703,3460,1986,137,ARG,Natural,Meteorological,Storm,Convective storm,Tornado,...,14.555,107.02452,70.09199,37.42499,5.0,,,85.445,,['High_Middle']
2,1639,10791,2000,839,ARG,Natural,Meteorological,Storm,Convective storm,Lightning/Thunderstorms,...,10.858,115.69413,95.93661,53.96481,6.0,87.0,15.0,89.142,22.819001,['High_Middle']
3,1644,10796,2000,607,ARG,Natural,Meteorological,Storm,Convective storm,Winter storm/Blizzard,...,10.858,115.69413,95.93661,53.96481,6.0,87.0,15.0,89.142,22.819001,['High_Middle']
4,209,1000,1966,117,ASM,Natural,Meteorological,Storm,Tropical cyclone,,...,31.25,,,,,,,68.75,,['High_Middle']


In [15]:
# Writing to csv the final Dataset to be merged with other dataset from cyclones...

df_emdat.to_csv(r'C:/Users/torre/Documents/Local Omdena WFP files/WorldBank/Part1/OUTPUT_WBI_cyclones_merged.csv', index = False, sep = ';')