In [None]:
import numpy as np
import pandas as pd
import geopandas as gpd

import rasterio
from rasterio.sample import sample_gen

# Merge Fire Data with Land Cover

In [None]:
algeria_lc = gpd.read_file('..\\Land Cover dataset\\merged_DZA_TUN_cleaned.shp')
df_csv = pd.read_csv('..\\fire_test\\fire_alg_tun.csv')
gdf_points = gpd.GeoDataFrame(
    df_csv,
    geometry=gpd.points_from_xy(df_csv['longitude'], df_csv['latitude']),
    crs="EPSG:4326"
)
gdf_points = gdf_points.to_crs(algeria_lc.crs)
merged = gpd.sjoin_nearest(
    gdf_points,
    algeria_lc,
    how="inner",
    distance_col="dist_to_poly"
)

merged = merged.drop(columns=['geometry', 'index_right',"dist_to_poly"])
merged.to_csv('..\\fire_test\\viirs-jpss1_2024_enriched_approx.csv', index=False)

# Add Elevation Data

In [None]:
df = pd.read_csv("..\\fire_test\\viirs-jpss1_2024_enriched_approx.csv")
src = rasterio.open("..\\elevation_algeria_tunisia_cleaned.tif")

# Convertir lon/lat en index raster
rows, cols = rasterio.transform.rowcol(
    src.transform,
    df["longitude"].values,
    df["latitude"].values
)

# Lire les valeurs
band = src.read(1)

df["elevation"] = band[rows, cols]
df.to_csv("..\\fire_test\\merged_fire_landcover_elevation.csv", index=False)

# Filtrage des points sans feu par distance

In [None]:
import pandas as pd
import numpy as np
from scipy.spatial.distance import cdist


df = pd.read_csv('..\\northern_grid_add_elevation_land.csv')
fire_points = df[df['class'] == 1][['latitude', 'longitude']].values
no_fire_points = df[df['class'] == 0].copy()
def haversine_vectorized(lat1, lon1, lat2, lon2):
    R = 6371  
    
    lat1, lon1, lat2, lon2 = map(np.radians, [lat1, lon1, lat2, lon2])
    
    dlat = lat2 - lat1
    dlon = lon2 - lon1
    
    a = np.sin(dlat/2)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon/2)**2
    c = 2 * np.arcsin(np.sqrt(a))
    
    return R * c


distance_seuil = 5  
indices_a_garder = []

for idx, row in no_fire_points.iterrows():
    lat_nf = row['latitude']
    lon_nf = row['longitude']
    distances = haversine_vectorized(
        lat_nf, lon_nf,
        fire_points[:, 0], fire_points[:, 1]
    )
    if np.min(distances) >= distance_seuil:
        indices_a_garder.append(idx)

# Créer le dataset filtré
no_fire_filtered = no_fire_points.loc[indices_a_garder]
fire_df = df[df['class'] == 1]
df_final = pd.concat([fire_df, no_fire_filtered], ignore_index=True)

df_final.to_csv('..\\donnees_filtrees.csv', index=False)

print(f"Points fire originaux: {len(fire_df)}")
print(f"Points no fire originaux: {len(no_fire_points)}")
print(f"Points no fire après filtrage: {len(no_fire_filtered)}")
print(f"Points no fire supprimés: {len(no_fire_points) - len(no_fire_filtered)}")
print(f"Total points finaux: {len(df_final)}")

# Add soil 

In [None]:
merged_data = pd.read_csv("merged_fire_landcover_elevation.csv") 

merged_data.head()

In [None]:
# Load your fire-landcover-elevation CSV
merged_data = pd.read_csv("merged_fire_landcover_elevation1.csv")

# Load your soil properties dataframe
soil_df = pd.read_csv("soil_properties.csv")   # <-- replace with the real file
soil_df = soil_df.rename(columns={"HWSD2_SMU_ID": "SMU"})   # rename so merge is easy

# Empty list to store SMUs from raster
smu_values = []

# Extract SMU for each lat/lon
with rasterio.open("datasets/soil/HWSD2_Algeria_Tunisia.tif") as src:
    for idx, row in merged_data.iterrows():
        lon = row["longitude"]
        lat = row["latitude"]
        smu = list(src.sample([(lon, lat)]))[0][0]
        smu_values.append(int(smu))

# Add SMU to dataframe
merged_data["SMU"] = smu_values

# Merge on SMU
final_df = merged_data.merge(soil_df, on="SMU", how="left")

print(final_df.head())

# Save result
final_df.to_csv("merged_data_with_soil.csv", index=False)

# Add climat

## precepetation

In [None]:
coords = list(zip(df["longitude"].values, df["latitude"].values))

In [None]:
# ---------------------------------------
# 1. Load Data
# ---------------------------------------
df = pd.read_csv("merged_data_with_soil.csv")

# Output column: accumulated precipitation
df["prec"] = 0.0


# ---------------------------------------
# 2. Load all monthly precipitation rasters
# ---------------------------------------
rasters = []
for month in range(1, 13):
    path = f"datasets/climate/climate_5m_prec_2024_DZD_TUN/prec_2024-{month:02d}.tif"
    rasters.append(rasterio.open(path))

# ---------------------------------------
# 3. Accumulate precipitation for each point
# ---------------------------------------
for src in rasters:
    sampled = list(sample_gen(src, coords))
    sampled = np.array(sampled).reshape(-1)

    # Add monthly precipitation to annual total
    df["prec"] += sampled


# ---------------------------------------
# 4. Save result
# ---------------------------------------
df.to_csv("sheet_with_precipitation.csv", index=False)

print("Finished! Annual precipitation added.")


## Tmax

In [None]:
# ---------------------------------------
# 1. Load Data
# ---------------------------------------
df = pd.read_csv("sheet_with_precipitation.csv")

# The output tmax column
df["tmax"] = 0.0


# ---------------------------------------
# 2. Load all monthly precipitation rasters
# ---------------------------------------
rasters = []
for month in range(1, 13):
    path = f"datasets/climate/climate_5m_tmax_2024_DZD_TUN/tmax_2024-{month:02d}.tif"
    rasters.append(rasterio.open(path))

# ---------------------------------------
# 3. Avg tmax
# ---------------------------------------
for src in rasters:
    sampled = list(sample_gen(src, coords))
    sampled = np.array(sampled).reshape(-1)

    # Add monthly precipitation to annual total
    df["tmax"] += sampled / 12

# ---------------------------------------
# 4. Save updated sheet
# ---------------------------------------
df.to_csv("sheet_with_tmax.csv", index=False)

print("Finished! Output saved to sheet_with_tmax.csv")

## Tmin

In [None]:
# ---------------------------------------
# 1. Load Data
# ---------------------------------------
df = pd.read_csv("sheet_with_tmax.csv")

# The output tmax column
df["tmin"] = 0.0


# ---------------------------------------
# 2. Load all monthly precipitation rasters
# ---------------------------------------
rasters = []
for month in range(1, 13):
    path = f"datasets/climate/climate_5m_tmin_2024_DZD_TUN/tmin_2024-{month:02d}.tif"
    rasters.append(rasterio.open(path))

# ---------------------------------------
# 3. Avg tmin
# ---------------------------------------
for src in rasters:
    sampled = list(sample_gen(src, coords))
    sampled = np.array(sampled).reshape(-1)

    # Add monthly precipitation to annual total
    df["tmin"] += sampled / 12

# ---------------------------------------
# 4. Save updated sheet
# ---------------------------------------
df.to_csv("sheet_with_tmax.csv", index=False)

print("Finished! Output saved to sheet_with_tmax.csv")