In [8]:
import pandas as pd
import glob
import os

# папка с пожарами
folder = "../data_raw/fires_viirs/"

# ищем все CSV
files = glob.glob(os.path.join(folder, "*.csv"))
print("Found files:", files)

dfs = []

for f in files:
    print("Loading:", f)
    df = pd.read_csv(f, usecols=["latitude", "longitude", "acq_date"])
    df.rename(columns={"acq_date": "date"}, inplace=True)
    dfs.append(df)

# объединить
fires_all = pd.concat(dfs, ignore_index=True)
print("Combined shape:", fires_all.shape)

# привести дату
fires_all["date"] = pd.to_datetime(fires_all["date"])

fires_all.head()


Found files: ['../data_raw/fires_viirs\\fire_archive_J1V-C2_701141.csv', '../data_raw/fires_viirs\\fire_archive_SV-C2_701142.csv', '../data_raw/fires_viirs\\VIIRS_NOAA20.csv', '../data_raw/fires_viirs\\VIIRS_S_NPP.csv']
Loading: ../data_raw/fires_viirs\fire_archive_J1V-C2_701141.csv
Loading: ../data_raw/fires_viirs\fire_archive_SV-C2_701142.csv
Loading: ../data_raw/fires_viirs\VIIRS_NOAA20.csv
Loading: ../data_raw/fires_viirs\VIIRS_S_NPP.csv
Combined shape: (15082126, 3)


Unnamed: 0,latitude,longitude,date
0,66.20341,70.98833,2019-01-01
1,66.19938,70.99498,2019-01-01
2,66.17555,71.01565,2019-01-01
3,61.21028,62.81031,2019-01-01
4,61.21498,62.82108,2019-01-01


In [9]:
fires_all = fires_all[
    (fires_all.date >= "2019-01-01") &
    (fires_all.date <= "2023-12-31")
]

print("Filtered shape:", fires_all.shape)


Filtered shape: (12900360, 3)


In [10]:
print(fires_all.date.min(), "→", fires_all.date.max())


2019-01-01 00:00:00 → 2023-12-31 00:00:00


In [11]:
import geopandas as gpd

# сетка
grid = gpd.read_file("../data_processed/grid_with_y_kurgan.geojson")

# fires как гео
fires_gdf = gpd.GeoDataFrame(
    fires_all,
    geometry=gpd.points_from_xy(fires_all.longitude, fires_all.latitude),
    crs="EPSG:4326"
)

# привести CRS
fires_gdf = fires_gdf.to_crs(grid.crs)

# spatial join
fires_joined = gpd.sjoin(
    fires_gdf,
    grid,
    how="inner",
    predicate="within"
)

fires_joined = fires_joined[["cell_id", "date"]].drop_duplicates()
print("Fires matched to grid:", len(fires_joined))


Fires matched to grid: 40393


In [12]:
fires_daily = (
    fires_joined
    .groupby(["cell_id", "date"])
    .size()
    .reset_index(name="fire")
)

fires_daily["fire"] = 1


In [13]:
fires_daily.to_parquet(
    "../data_processed/fires_by_grid_daily.parquet",
    index=False
)


In [14]:
y = pd.read_parquet("../data_processed/fires_by_grid_daily.parquet")

print("Rows:", len(y))
print("Fire share:", y.fire.mean())
print("Date range:", y.date.min(), "→", y.date.max())


Rows: 40393
Fire share: 1.0
Date range: 2019-02-06 00:00:00 → 2023-12-25 00:00:00
