In [1]:
import pandas as pd

In [2]:
def fill_missing_columns_with_default(df, columns, columns_to_fill, values):
    copy_df = df.copy()
    unique_columns = [copy_df[col].unique() for col in columns]
    index = pd.MultiIndex.from_product(unique_columns,names=columns)
    new_df = pd.DataFrame(index=index, columns=['count'])
    merged_df = pd.merge(copy_df, new_df, how='right', left_on=columns, right_index=True)
    for idx,col in enumerate(columns_to_fill):
        merged_df[col] = merged_df[col].fillna(values[idx])
    return merged_df

def get_gdp_data(df_gdp: pd.DataFrame, df_disaster: pd.DataFrame, years, country = None, categories = False):
    columns = years + ['Country Code']
    columns = [str(c) for c in columns]
    gdp_data_filtered = df_gdp[columns]
    if (country):
        gdp_data_reduced = gdp_data_filtered[gdp_data_filtered['Country Code'] == country]
        disaster_columns = ['Start Year', 'ISO']
        if (categories): disaster_columns += ['Disaster Subgroup', 'Disaster Type']
        disaster_data_by_year = df_disaster.groupby(disaster_columns, as_index=False).sum(numeric_only=True)
        disaster_data_by_year = disaster_data_by_year[disaster_data_by_year['ISO'] == country] 
    else:
        disaster_columns = ['Start Year']
        if (categories): disaster_columns += ['Disaster Subgroup', 'Disaster Type']
        gdp_data_reduced = pd.DataFrame([gdp_data_filtered.mean(numeric_only=True)], columns=[str(y) for y in years])
        disaster_data_by_year = df_disaster.groupby(disaster_columns, as_index=False).sum(numeric_only=True)
        disaster_data_by_year['ISO'] = 'WORLD'

    def calculate_gdp_share(row):
        year = str(int(row['Start Year']))
        if (not (year in gdp_data_reduced.columns)):
            return 0
        gdp = gdp_data_reduced[year]
        if (gdp.empty):
            return 0
        damages = row["Total Damages, Adjusted ('000 US$)"] * 1000
        return (damages / gdp) * 100

    columns_to_fill = ['Start Year', 'ISO', 'Disaster Subgroup', 'Disaster Type']

    if (disaster_data_by_year.empty):
        disaster_data_by_year["share"] = 0
    else:
        disaster_data_by_year['share'] = disaster_data_by_year.apply(calculate_gdp_share, axis=1)

    if categories:
        filled_df = fill_missing_columns_with_default(disaster_data_by_year,columns_to_fill,["Total Damages, Adjusted (\'000 US$)", 'share'],[0,0])
        return filled_df
    return disaster_data_by_year

In [3]:
def fill_years(df,years,country, disaster_classification): 
    all_combinations = [(year, country, disaster_subgroup, disaster_type) for disaster_subgroup, disaster_types in disaster_classification.items() for disaster_type in disaster_types for year in years]
    all_combinations_df = pd.DataFrame(all_combinations, columns=['Start Year', 'ISO', 'Disaster Subgroup', 'Disaster Type'])
    merged_df = pd.merge(df, all_combinations_df,on=['Start Year', 'ISO', 'Disaster Subgroup', 'Disaster Type'], how="right")
    return merged_df

In [8]:
df_gdp = pd.read_csv('../gdp_data_constant.csv')
df_disaster = pd.read_csv('../Preprocessed-Natural-Disasters.csv',delimiter=';')
for year in range(1960, 2024):
    df_gdp.loc[:, str(year)] = df_gdp.loc[:, str(year)] * 1.14

In [9]:
disaster_subgroups = df_disaster['Disaster Subgroup'].unique()
disaster_types = df_disaster['Disaster Type'].unique()

disaster_classification = {dis_subgroup: list(df_disaster[df_disaster["Disaster Subgroup"] == dis_subgroup]["Disaster Type"].unique()) for dis_subgroup in df_disaster['Disaster Subgroup'].unique()}

isos = df_disaster['ISO'].unique()

years = list(range(1960,2023))

total_gdp_df = pd.DataFrame()
for iso in isos:
    gdp_data = get_gdp_data(df_gdp,df_disaster,years,country=iso,categories=True)
    gdp_data = gdp_data[['Start Year', 'ISO', 'Disaster Subgroup', 'Disaster Type','share']]
    gdp_data = fill_years(gdp_data,years,iso, disaster_classification)
    gdp_data.fillna(0,inplace=True)
    total_gdp_df = pd.concat([total_gdp_df, gdp_data])

In [None]:
total_gdp_df.to_csv('../gdp_data.csv', index=False)