In [14]:
import pyodbc
import pandas as pd
import geopandas as gpd
import rioxarray
import xarray
import rasterio
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns



print("Loading elevation dataset...")
elevation_dataset = xarray.open_dataset(r"../local_dataset/dataset/elevation_full.nc")
elevation_dataset = elevation_dataset.to_dataframe().reset_index(drop=True)
print("Elevation dataset shape:", elevation_dataset.shape)
print(elevation_dataset.dtypes)

# print("Loading soil dataset...")
# soil_dataset = xarray.open_dataset(r"../local_dataset/dataset/soil_full.nc")
# soil_dataset = soil_dataset.to_dataframe().reset_index(drop=True)
# print("Soil dataset shape:", soil_dataset.shape)

fire_dataset = pd.read_csv(r"../local_dataset/raw/fire/algeria_tunisia.csv").rename(columns={"latitude": "lat", "longitude": "lon"})
fire_dataset["lat"] = fire_dataset["lat"].astype(np.float64)
fire_dataset["lon"] = fire_dataset["lon"].astype(np.float64)
print("Fire dataset shape:", fire_dataset.shape)
print(fire_dataset.dtypes)



Loading elevation dataset...
Elevation dataset shape: (13183993, 3)
lon          float64
lat          float64
elevation    float64
dtype: object
Fire dataset shape: (90250, 15)
lat           float64
lon           float64
bright_ti4    float64
scan          float64
track         float64
acq_date       object
acq_time        int64
satellite      object
instrument     object
confidence     object
version         int64
bright_ti5    float64
frp           float64
daynight       object
type            int64
dtype: object


In [13]:
precision = 3
soil_dataset["lon"] = soil_dataset["lon"].round(precision)
soil_dataset["lat"] = soil_dataset["lat"].round(precision)
elevation_dataset["lon"] = elevation_dataset["lon"].round(precision)
elevation_dataset["lat"] = elevation_dataset["lat"].round(precision)

NameError: name 'soil_dataset' is not defined

In [15]:
print("Merging datasets on 'lon' and 'lat' columns...")
dataset = pd.merge(fire_dataset, elevation_dataset, on=["lon", "lat"], how="inner")
print("Merge complete.")

Merging datasets on 'lon' and 'lat' columns...
Merge complete.


In [16]:
print("Merged dataset shape:", dataset.shape)
dataset.describe()

Merged dataset shape: (0, 16)


Unnamed: 0,lat,lon,bright_ti4,scan,track,acq_time,version,bright_ti5,frp,type,elevation
count,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
mean,,,,,,,,,,,
std,,,,,,,,,,,
min,,,,,,,,,,,
25%,,,,,,,,,,,
50%,,,,,,,,,,,
75%,,,,,,,,,,,
max,,,,,,,,,,,


# Using Geopandas for spatial operations

In [18]:
import geopandas as gpd
from shapely.geometry import Point

# Convert each dataset to a GeoDataFrame
soil_gdf = gpd.GeoDataFrame(
    fire_dataset,
    geometry=gpd.points_from_xy(fire_dataset.lon, fire_dataset.lat),
    crs="EPSG:4326"
)
elev_gdf = gpd.GeoDataFrame(
    elevation_dataset,
    geometry=gpd.points_from_xy(elevation_dataset.lon, elevation_dataset.lat),
    crs="EPSG:4326"
)

# Spatial join (nearest)
merged = gpd.sjoin_nearest(soil_gdf, elev_gdf, distance_col="dist", max_distance=0.1)

print(merged.shape)




(88966, 21)


In [19]:
import geopandas as gpd

# Convert each dataset to GeoDataFrame in geographic coordinates (degrees)
soil_gdf = gpd.GeoDataFrame(
    fire_dataset,  # <-- you probably meant "soil_dataset" here, right?
    geometry=gpd.points_from_xy(fire_dataset.lon, fire_dataset.lat),
    crs="EPSG:4326"  # WGS84 latitude/longitude
)

elev_gdf = gpd.GeoDataFrame(
    elevation_dataset,
    geometry=gpd.points_from_xy(elevation_dataset.lon, elevation_dataset.lat),
    crs="EPSG:4326"
)

# ✅ STEP 1: Project both to a suitable metric CRS (meters)
# Option 1: Automatic local UTM
utm_crs = soil_gdf.estimate_utm_crs()
soil_gdf = soil_gdf.to_crs(utm_crs)
elev_gdf = elev_gdf.to_crs(utm_crs)

# Option 2 (manual alternative): global projection
# soil_gdf = soil_gdf.to_crs(epsg=3857)
# elev_gdf = elev_gdf.to_crs(epsg=3857)

# ✅ STEP 2: Spatial join with distance in METERS
merged = gpd.sjoin_nearest(
    soil_gdf,
    elev_gdf,
    distance_col="dist",
    max_distance=100  # 100 meters
)

print("Merged shape:", merged.shape)
print("Projection used:", merged.crs)


Merged shape: (15031, 21)
Projection used: EPSG:32631


In [20]:
print(merged.head())

    lat_left  lon_left  bright_ti4  scan  track    acq_date  acq_time  \
1   31.49290   9.48947      302.32  0.56   0.51  2024-01-01       142   
5   31.49449   9.49368      302.95  0.47   0.40  2024-01-02       124   
9   33.36476   8.54684      308.95  0.41   0.37  2024-01-03       104   
13  35.77696   9.88955      300.36  0.41   0.45  2024-01-04        45   
14  33.36478   8.54716      300.52  0.45   0.47  2024-01-04        45   

   satellite instrument confidence  ...  bright_ti5   frp  daynight type  \
1        N20      VIIRS          n  ...      276.10  1.23         N    0   
5        N20      VIIRS          n  ...      271.83  0.96         N    0   
9        N20      VIIRS          n  ...      278.03  0.68         N    2   
13       N20      VIIRS          n  ...      282.89  1.06         N    2   
14       N20      VIIRS          n  ...      277.54  0.85         N    2   

                           geometry index_right  lon_right  lat_right  \
1       POINT (1116917.147 3502