In [None]:
import pandas as pd
import numpy as np

In [None]:
cost_df = pd.read_excel("data/input/FIRE/fire_cost.xlsx")

for col in cost_df.columns:
    if col != "year":
        cost_df[col] = cost_df[col].str[1:]
        cost_df[col] = cost_df[col].str.replace(',', '')
        cost_df[col] = cost_df[col].astype(np.float64)
cost_df.index = cost_df["year"]
cost_df = cost_df.drop(columns=["year"])
cost_df = cost_df[[col for col in cost_df.columns if col != 'Total']][-5:]

cost_df = cost_df.astype(np.int64).round(-3)
# cost_df.loc[2017, ['C', 'E']] -= 2*10**9
cost_df.to_csv("data/input/FIRE/fire_cost.csv")

## Filter the Fire data

In [None]:
import os
import json
import numpy as np
import pandas as pd
import geopandas as gpd
from shapely import geometry

In [None]:
state_boundary_file_path = r"data/input/us-state-boundaries.json"

with open(state_boundary_file_path) as file:
    data = json.load(file)
CA = data[0]['st_asgeojson']['geometry']['coordinates'][0][0]
CA = geometry.Polygon(CA)
CA

In [None]:
gdf = gpd.read_file("data/input/FIRE_DATABASE/S_USA.FinalFirePerimeter.gdb")

gdf.head()

In [None]:
isin_CA = set()

for _, row in gdf.iterrows():
    if row["geometry"] is None:
        continue
    polygons = list(row["geometry"].geoms)
    for poly in polygons:
        if CA.intersects(poly):
            isin_CA.add(_)

In [None]:
df = gdf.loc[list(isin_CA)]

indices = df[df["FIREYEAR"] >= 2015]['DISCOVERYDATETIME'].dropna().index

df = df.loc[indices]
df["DISCOVERYDATETIME"] = pd.to_datetime(df["DISCOVERYDATETIME"]).dt.date

In [None]:
import matplotlib.pyplot as plt

static_df = pd.read_csv("data/input/static_variables.csv", index_col=[0])[["lon", "lat"]]

c = {2015: 'blue', 2016: 'orange', 2017: 'red', 2018: 'purple', 2019: 'brown', 2020: 'red', 2021: 'gray'}

fires_by_year = {2015: [], 2016: [], 2017: [], 2018: [], 2019: [], 2020: [], 2021: []}

for i, row in df.iterrows():
    year = row["FIREYEAR"]
    fires_by_year[year] += [*row["geometry"].geoms]

In [None]:
fig, ax = plt.subplots(figsize=(10, 10))

ax.set_axis_off()
ax.set_title("Célváltozó évenkénti lebontásban", fontsize=24)
ax.plot(*CA.exterior.xy, c='black')

for year, fires in fires_by_year.items():
    plot_x = []
    plot_y = []
    for fire in fires:
        lon_max, lon_min = np.max(fire.exterior.xy[0]), np.min(fire.exterior.xy[0])
        lat_max, lat_min = np.max(fire.exterior.xy[1]), np.min(fire.exterior.xy[1])
        for i, row in static_df[(static_df["lon"] <= lon_max) & (static_df["lon"] >= lon_min) &
                                     (static_df["lat"] <= lat_max) & (static_df["lat"] >= lat_min)].iterrows():
            point = geometry.Point(row["lon"], row["lat"])
            if fire.contains(point):
                plot_x.append(row["lon"])
                plot_y.append(row["lat"])
    ax.plot(plot_x, plot_y, 'o', markersize=0.4, label=year) # c=c[year]
    
    if year == 2019:
        break

legend = ax.legend(fontsize=14)
for i in range(5):
    legend.legendHandles[i]._legmarker.set_markersize(12)
plt.savefig("data/output/fire_var_plot.png")
plt.show()

In [None]:
from pyproj import Geod
from shapely import wkt

geod = Geod(ellps="WGS84")

df["TOTALKM^2"] = df.apply(lambda x: np.sum(abs(np.array(geod.geometry_area_perimeter(x["geometry"])))), axis=1)

df["TOTALKM^2"] *= (1/10**6)

df["TOTALACRES"] = 247.105381 * df["TOTALKM^2"]

df.to_csv(r"data/input/FIRE/fire_database.csv")

## Estimating the cost/fire

In [None]:
fire_data = pd.read_csv("data/input/FIRE/fire_database.csv", index_col=[0])
fire_data

In [None]:
fire_class_map = pd.read_csv("data/input/FIRE/fire_class_map.csv", sep=";")
fire_class_map["lower"] = fire_class_map["lower"].str.replace(",", ".").astype(float)
fire_class_map["upper"] = fire_class_map["upper"].str.replace(",", ".").astype(float)
fire_class_map

In [None]:
for i, row in fire_data.iterrows():
    for c, l, u in fire_class_map.values:
        if (row["TOTALACRES"] >= l) & (row["TOTALACRES"] < u):
            fire_data.loc[i, "SIZECLASS"] = c

class_count_dict = fire_data["SIZECLASS"].value_counts().to_dict()
class_count_dict

In [None]:
fire_cost = pd.read_csv("data/input/FIRE/fire_cost.csv", index_col=[0])

fire_cost = fire_cost.sum().astype(np.int64).to_dict()
fire_cost

In [None]:
pd.DataFrame({k: [fire_cost[k] / class_count_dict[k]] for k in fire_cost}, index=["kárbecslés / tűz ($)"]).round(-3).astype(np.int64)[["C", "D", "E", "F", "G"]]

In [None]:
fire_data['COST'] = 0

for i, row in fire_data.iterrows():
    fire_data.loc[i, "COST"] = fire_cost[row["SIZECLASS"]] / class_count_dict[row["SIZECLASS"]]

fire_data = fire_data[fire_data["SIZECLASS"].isin(["C", "D", "E", "F", "G"])]

fire_data

In [None]:
fire_data.to_csv("data/input/FIRE/fire_df.csv")

In [None]:
import random

fire_data = pd.read_csv("fire/fire_df.csv", index_col=[0])

fire_distribution_df = pd.read_csv("fire/fire_distribution.csv", index_col=[0])[2:].T
fire_distribution_df

In [None]:
index_dict = {}

for cls in fire_data["SIZECLASS"].unique():
    index_dict[cls] = list(fire_data[fire_data["SIZECLASS"] == cls].index)

distribution = []

for cls in fire_distribution_df.columns:
    distribution += [cls] * fire_distribution_df.loc['0', cls]
    
def calc_damage_estimate(profile, n):
    estimates = []
    
    for test in range(n):
        damage_in_dollar = 0
        total_area = 0
        
        while total_area < profile:
            sample_class = distribution[random.randint(0, len(distribution))-1]
            i = index_dict[sample_class][random.randint(0, len(index_dict[sample_class])-1)]
            
            cost = fire_data.loc[i, "COST"]
            km_squared = fire_data.loc[i, "TOTALKM^2"]

            if (total_area + km_squared) > profile:
                damage_in_dollar += (profile - total_area) / km_squared * cost
            else:
                damage_in_dollar += cost
            total_area += km_squared

        estimates.append(damage_in_dollar)
    return estimates

In [None]:
x = [i/10**9 for i in calc_damage_estimate(19254.8926013686, 500)]

fig, ax = plt.subplots(figsize=(12, 5))

ax.hist(x, bins=25, alpha=0.75)
ax.set_ylim(0, 60)
ax.vlines([np.mean(x)], 0, 60, colors=['green'], label="$\overline{x}$", lw=2.5)
ax.vlines([np.mean(x) - np.std(x), np.mean(x) + np.std(x)], 0, 60, colors=["red", "red"],
          label="$\overline{x}\pm \hat{\sigma}$", lw=2.5)

ax.vlines([12.079], 0, 60, colors=['black'], label='hivatalos becslés', lw=2.5)

ax.plot([], [], ' ', label="\n$\overline{x}=$" + f"{np.mean(x):.3f}")
ax.plot([], [], ' ', label="$\hat{\sigma}=$" + f"{np.std(x):.3f}")
ax.plot([], [], ' ', label="valószínűségi-\nprofil = 19254.89")

ax.set_xlabel("kárbecslés $mrd")
ax.set_title("Éves becsült összkárok eloszlása, 2020", fontsize=24)
plt.legend(fontsize=12)
plt.savefig("data/output/valószínűségi-profileloszlás.png")
plt.show()