In [1]:
import pandas as pd
import geopandas as gpd
import rasterio
import matplotlib.pyplot as plt
import glob
import geopandas as gpd
import os
from rasterio.enums import Resampling # ✅ Import Resampling
from shapely.geometry import Point
import numpy as np
from shapely.ops import unary_union


from scripts.explore_raster import show_raster , describe_raster
from scripts.clip_raster_with_shape import clip_raster_with_shape
from scripts.plots import plot_fire_map
from scripts.concat_shapefiles import concat_shapefiles
from scripts.explore_shapefiles import describe_shapefile, show_landcover_separated

alg_tun_shp = gpd.read_file("../data/shapefiles/combined/alg_tun.shp")
alg_tun_gpd = gpd.GeoDataFrame(geometry=[unary_union(alg_tun_shp.geometry)],
                                crs=alg_tun_shp.crs)


from scripts.raster_to_df import raster_to_dataframe_filter_background , raster_to_dataframe
from eda_scripts.eda_utils import check_missing_values


## Handeling Missing data

### Climat Dataset

Check if there is any missing data

In [2]:
tmax_paths = sorted(glob.glob("../data/climate_dataset/5min/max/*.tif"))
df_maxs = []
df_mins = []
df_precs = []

for tmax_path in tmax_paths : 
 tmax_image, tmax_transform, tmax_meta = clip_raster_with_shape(
    tmax_path,
    alg_tun_gpd
 )

 #  Get CRS from original raster
 with rasterio.open(tmax_path) as src:
        crs = src.crs


 df_maxs.append(raster_to_dataframe_filter_background(
    tmax_image, tmax_transform, alg_tun_shp , crs , value_name = "tmax"
 ))



tmin_paths = sorted(glob.glob("../data/climate_dataset/5min/min/*.tif"))

for tmin_path in tmin_paths : 
 tmin_image, tmin_transform, tmin_meta = clip_raster_with_shape(
    tmin_path,
    alg_tun_gpd
 )

 #  Get CRS from original raster
 with rasterio.open(tmin_path) as src:
        crs = src.crs

 df_mins.append(raster_to_dataframe_filter_background(
    tmin_image, tmin_transform, alg_tun_shp,crs , value_name = "tmin"
 ))


tprec_paths = sorted(glob.glob("../data/climate_dataset/5min/prec/*.tif"))


for tprec_path in tprec_paths : 
 tprec_image, tprec_transform, tprec_meta = clip_raster_with_shape(
    tprec_path,
    alg_tun_gpd
 )
 with rasterio.open(tmin_path) as src:
        crs = src.crs
 df_precs.append(raster_to_dataframe_filter_background(
    tprec_image, tprec_transform, alg_tun_shp , value_name = "tprec"
 ))

TypeError: raster_to_dataframe_filter_background() missing 1 required positional argument: 'src_crs'

In [None]:
for df_max in df_maxs:
 check_missing_values(df_max)

for df_min in df_mins:
 check_missing_values(df_min)

for df_prec in df_precs:
 check_missing_values(df_prec)

In [None]:
def save_dataframe(df, folder_path, name="dataframe.csv"):
    os.makedirs(folder_path, exist_ok=True)
    file_path = os.path.join(folder_path, name)
    df.to_csv(file_path, index=False)
    print(f"✅ Saved: {file_path}")


i=0
for df_max in df_maxs : 
    save_dataframe(df_max, "../data/preprocessed/climat/tmax/", name=f"tmax_{i}.csv")
    i+=1

i=0
for df_min in df_mins : 
    save_dataframe(df_max, "../data/preprocessed/climat/tmin/", name=f"tmin_{i}.csv")
    i+=1

i=0
for df_prec in df_precs : 
    save_dataframe(df_max, "../data/preprocessed/climat/tprec/", name=f"tprec_{i}.csv")
    i+=1

## Elevation Dataset

In [None]:
elevation_files = sorted(glob.glob("../data/elevation_dataset/be15_grd/*.adf")) 
elevation_dfs = []
for elevation_file in elevation_files : 
 elevation_image, elevation_transform, elevation_meta = clip_raster_with_shape(
    elevation_file,
    alg_tun_gpd
 )

 elevation_dfs.append(raster_to_dataframe(elevation_image, elevation_transform) )

In [None]:
i=0
for  elevation_df in  elevation_dfs : 
    save_dataframe(elevation_df, "../data/preprocessed/elevation/", name=f"elevation_{i}.csv")
    i+=1

 ## LandCover dataset