In [2]:
import polars as pl
import polars.selectors as cs

In [19]:
disasters = pl.read_csv("1970-2021_DISASTERS.csv", ignore_errors=True).rename(
        {"Disaster Type" : "disasterType"})

print("Total death Columns:", disasters.select(pl.count("Total Deaths")))
print("Total Damages Columns:", disasters.select(pl.count("Total Damages ('000 US$)")))


Total death Columns: shape: (1, 1)
┌──────────────┐
│ Total Deaths │
│ ---          │
│ u32          │
╞══════════════╡
│ 10199        │
└──────────────┘
Total Damages Columns: shape: (1, 1)
┌──────────────────────────┐
│ Total Damages ('000 US$) │
│ ---                      │
│ u32                      │
╞══════════════════════════╡
│ 4863                     │
└──────────────────────────┘


In [4]:
annual_country_freq = disasters.select(
    ["Year", "disasterType", "Country"]
    ).group_by(["Year", "Country", "disasterType"], maintain_order=True).len()

# group least frequent disaster types into "other category"
least_freq = annual_country_freq.group_by("disasterType").sum().sort("len")[:6].select("disasterType")
least_freq = least_freq.to_series().to_list()
annual_country_freq = annual_country_freq.with_columns(disasterType = pl.col("disasterType").replace(least_freq, ["Other"] * 6 ))
annual_country_freq.filter(pl.col("disasterType") == "Other")


Year,Country,disasterType,len
i64,str,str,u32
1971,"""Canada""","""Other""",1
1975,"""Costa Rica""","""Other""",1
1975,"""Ecuador""","""Other""",2
1976,"""Costa Rica""","""Other""",1
1976,"""Ecuador""","""Other""",1
…,…,…,…
2021,"""Philippines (the)""","""Other""",1
2021,"""Canary Is""","""Other""",1
2019,"""Uganda""","""Other""",1
2021,"""Congo (the Democratic Republic…","""Other""",1


In [15]:
# consolidate least frequent disaster types into Other category
annual_frequencies = disasters.with_columns(\
    disasterType = pl.col("disasterType").replace(least_freq, ["Other"] * 6 ))

annual_frequencies = annual_frequencies.select(
    ["Year", "disasterType"]
    ).group_by(["Year", "disasterType"], maintain_order=True).len()

annual_frequencies = annual_frequencies.with_columns(pl.concat_str([pl.col("Year"), pl.col("disasterType")]).alias("compkey"))

annual_frequencies


Year,disasterType,len,compkey
i64,str,u32,str
1970,"""Flood""",31,"""1970Flood"""
1970,"""Storm""",24,"""1970Storm"""
1970,"""Drought""",2,"""1970Drought"""
1970,"""Landslide""",6,"""1970Landslide"""
1970,"""Earthquake""",12,"""1970Earthquake"""
…,…,…,…
2021,"""Landslide""",10,"""2021Landslide"""
2021,"""Earthquake""",19,"""2021Earthquake"""
2021,"""Other""",10,"""2021Other"""
2020,"""Epidemic""",6,"""2020Epidemic"""


In [34]:
disaster_costs = pl.read_csv("disaster_costs.csv").rename(
        {"Entity" : "disasterType"})
disaster_costs = disaster_costs.with_columns(disasterType = pl.col("disasterType").replace(least_freq, ["Other"] * 6 ))
disaster_costs = disaster_costs.filter(pl.col("Year") >= 1970).filter(pl.col("Year") <= 2021)

disaster_costs

disasterType,Code,Year,Total economic damages
str,str,i64,i64
"""All disasters""",,1970,3117534486
"""All disasters""",,1971,1747369467
"""All disasters""",,1972,4334055447
"""All disasters""",,1973,2426476263
"""All disasters""",,1974,4064924487
…,…,…,…
"""Wildfire""",,2017,16921999488
"""Wildfire""",,2018,22801999744
"""Wildfire""",,2019,3071659340
"""Wildfire""",,2020,11831341172


In [45]:
full_data = pl.read_csv("full_emdat.csv", ignore_errors=True).filter(pl.col("Start Year") >= 1970).filter(pl.col("Start Year") <= 2021)
disaster_costs = full_data.select(pl.col(["Start Year", "Disaster Type", "Region", "Country", "Total Damage, Adjusted ('000 US$)"]))
disaster_costs = disaster_costs.drop_nulls(subset=pl.col("Total Damage, Adjusted ('000 US$)"))
disaster_costs = disaster_costs.rename(
    {"Disaster Type" : "disasterType", 
     "Start Year" : "Year",
     "Total Damage, Adjusted ('000 US$)" : "Cost ($K)"
    }
)

disaster_costs

Year,disasterType,Region,Country,Cost ($K)
i64,str,str,str,i64
1971,"""Drought""","""Africa""","""Gambia""",1505
1970,"""Flood""","""Africa""","""Morocco""",235442
1970,"""Flood""","""Americas""","""Peru""",40810
1970,"""Flood""","""Americas""","""Argentina""",196202
1970,"""Storm""","""Africa""","""Madagascar""",89468
…,…,…,…,…
2021,"""Wildfire""","""Americas""","""United States of America""",3710805
2021,"""Flood""","""Asia""","""Pakistan""",11245
2021,"""Drought""","""Asia""","""China""",3485908
2021,"""Drought""","""Americas""","""Brazil""",3373459
