In [4]:
import os
os.chdir(r"C:\Users\i-ryb\Desktop\fire_forecast_kurgan")


In [5]:
import os
os.listdir("data_raw/fires_viirs")


['VIIRS_NOAA20.csv', 'VIIRS_S_NPP.csv']

In [6]:
import pandas as pd
import geopandas as gpd
import numpy as np


In [8]:
grid = gpd.read_file("data_processed/grid_with_y_kurgan.geojson")

fires_snp = pd.read_csv("data_raw/fires_viirs/VIIRS_NOAA20.csv")
fires_noaa = pd.read_csv("data_raw/fires_viirs/VIIRS_S_NPP.csv")
fires_all = pd.concat([fires_snp, fires_noaa], ignore_index=True)
fires_all["acq_date"] = pd.to_datetime(fires_all["acq_date"])


In [9]:
fires_gdf = gpd.GeoDataFrame(
    fires_all,
    geometry=gpd.points_from_xy(
        fires_all.longitude,
        fires_all.latitude
    ),
    crs="EPSG:4326"
)

fires_gdf = fires_gdf.to_crs(grid.crs)


In [10]:
fires_cells = gpd.sjoin(
    fires_gdf,
    grid[["cell_id", "geometry"]],
    how="left",
    predicate="within"
)

fires_cells = fires_cells.dropna(subset=["cell_id"])
fires_cells["cell_id"] = fires_cells["cell_id"].astype(int)


In [11]:
fires_cells["date"] = fires_cells["acq_date"].dt.date

fires_daily = (
    fires_cells
    .groupby(["cell_id", "date"])
    .size()
    .rename("fire_today")
    .reset_index()
)


In [12]:
fires_daily["date"] = pd.to_datetime(fires_daily["date"])
fires_daily["date_t_plus_1"] = fires_daily["date"] - pd.Timedelta(days=1)

y_t_plus_1 = fires_daily[["cell_id", "date_t_plus_1"]]
y_t_plus_1 = y_t_plus_1.rename(columns={"date_t_plus_1": "date"})
y_t_plus_1["y_t_plus_1"] = 1


In [13]:
dates = pd.date_range(
    fires_daily["date"].min(),
    fires_daily["date"].max(),
    freq="D"
)

cells = grid["cell_id"].unique()

dataset = (
    pd.MultiIndex
    .from_product([cells, dates], names=["cell_id", "date"])
    .to_frame(index=False)
)

dataset = dataset.merge(
    y_t_plus_1,
    on=["cell_id", "date"],
    how="left"
)

dataset["y_t_plus_1"] = dataset["y_t_plus_1"].fillna(0).astype(np.int8)


In [14]:
dataset["y_t_plus_1"].value_counts()


y_t_plus_1
0    101493384
1          723
Name: count, dtype: int64

In [15]:
dataset.to_parquet(
    "data_processed/dataset_y_t_plus_1.parquet",
    index=False
)


In [16]:
import geopandas as gpd

grid = gpd.read_file("data_processed/grid_with_y_kurgan.geojson")

grid.to_file(
    "data_processed/kurgan_grid_shp",
    driver="ESRI Shapefile"
)

  ogr_write(
  ogr_write(
