In [None]:
import pandas as pd
import joblib
import time
import re

import numpy as np
from numpy import nan
import geopandas as gpd
from shapely.geometry import Point, box

import os
os.chdir(r"E:\Data challenge")

# Import Data

In [2]:
data_drias = pd.read_csv(r"data\Drias_data\RCP_2.6.csv", sep=";", header=31)
data_Flood = pd.read_csv(r"data\Flood\flood_risk_results.csv")
data_hydro=gpd.read_file(r"data\hydro\TronconHydrogra\TronconHydrograElt_FXX.shp")
data_ocean=gpd.read_file(r"data\hydro\LimiteTerreMer_FXX-shp\LimiteTerreMer_FXX.shp")

# Tools

In [3]:
def classify_water(name):
    if pd.isna(name):
        return "inconnu"
    
    if re.search(r"\bfleuve\b", name):
        return "fleuve"
    
    if re.search(r"\briviere\b", name):
        return "riviere"
    
    if re.search(r"\bcanal\b|\bchenal\b", name):
        return "canal"
    
    if re.search(r"\bestuaire\b", name):
        return "estuaire"
    
    # petits cours d’eau typiques FR
    if re.search(r"\b(rec|ruisseau|ru|vallat|torrent|ravin)\b", name):
        return "petit_cours_eau"
    
    return "autre"


# Class Water sources

In [4]:
data_hydro["NomEntiteH_clean"] = (
    data_hydro["NomEntiteH"]
    .str.lower()
    .str.normalize("NFKD")              # enlève accents
    .str.encode("ascii", errors="ignore")
    .str.decode("utf-8")
)

data_hydro["type_cours_eau"] = data_hydro["NomEntiteH_clean"].apply(classify_water)
data_hydro["type_cours_eau"].value_counts()


type_cours_eau
inconnu            225617
petit_cours_eau    151057
riviere             80536
autre               33210
fleuve              13424
canal               11375
Name: count, dtype: int64

# Aggregate different class of water sources

In [5]:
fleuves = data_hydro[data_hydro["type_cours_eau"] == "fleuve"]
rivieres = data_hydro[data_hydro["type_cours_eau"].isin(["riviere", "petit_cours_eau"])]
estuaire = data_hydro[data_hydro["type_cours_eau"] == "estuaire"]


# process data

In [6]:
gdf_drias = gpd.GeoDataFrame(
    data_drias,
    geometry=gpd.points_from_xy(data_drias["Longitude"], data_drias["Latitude"]),
    crs="EPSG:4326"
)


In [7]:
gdf_drias_proj = gdf_drias.to_crs("EPSG:2154")
fleuves_proj = fleuves.to_crs("EPSG:2154")
rivieres_proj = rivieres.to_crs("EPSG:2154")
ocean_proj = data_ocean.to_crs("EPSG:2154")


In [8]:
# 2. Fonction optimisée pour calculer la distance minimale
def calculate_min_distance(point_gdf, line_gdf):
    """Calcule la distance minimale entre chaque point et l'ensemble des lignes"""
    if len(line_gdf) == 0:
        return np.full(len(point_gdf), np.nan)
    
    # Union de toutes les géométries pour une recherche plus rapide
    union_geom = line_gdf.unary_union
    
    # Calculer la distance pour chaque point
    distances = point_gdf.geometry.distance(union_geom)
    return distances



In [9]:
gdf_drias_proj = gdf_drias_proj.reset_index(drop=True)
gdf_drias_proj['temp_id'] = gdf_drias_proj.index

In [10]:
print("calculating distance to river ...")
nearest_fleuve = gdf_drias_proj.sjoin_nearest(fleuves_proj, distance_col="dist_fleuve_m", how="left")
nearest_fleuve = nearest_fleuve.groupby('temp_id').first().reset_index()
data_drias['dist_fleuve_m'] = nearest_fleuve['dist_fleuve_m'].values

print("calculating distance to stream ...")
nearest_riviere = gdf_drias_proj.sjoin_nearest(rivieres_proj, distance_col="dist_riviere_m", how="left")
nearest_riviere = nearest_riviere.groupby('temp_id').first().reset_index()
data_drias['dist_riviere_m'] = nearest_riviere['dist_riviere_m'].values

print("calculating distance to ocean/sea ...")
nearest_ocean = gdf_drias_proj.sjoin_nearest(ocean_proj, distance_col="dist_cote_m", how="left")
nearest_ocean = nearest_ocean.groupby('temp_id').first().reset_index()
data_drias['dist_cote_m'] = nearest_ocean['dist_cote_m'].values

calculating distance to river ...
calculating distance to stream ...
calculating distance to ocean/sea ...


In [11]:
# 4. Optionnel : Convertir en kilomètres pour plus de lisibilité
data_drias['dist_fleuve_km'] = data_drias['dist_fleuve_m'] / 1000
data_drias['dist_riviere_km'] = data_drias['dist_riviere_m'] / 1000
data_drias['dist_cote_km'] = data_drias['dist_cote_m'] / 1000

data_drias.drop(columns=['dist_fleuve_m','dist_riviere_m','dist_cote_m'], inplace=True)

In [12]:
# 5. Afficher un aperçu
print("\nAperçu des distances calculées :")
print(data_drias[['Longitude', 'Latitude', 'dist_fleuve_km', 'dist_riviere_km', 'dist_cote_km']].head(10))

print(f"\nStatistiques :")
print(f"Distance moyenne au fleuve : {data_drias['dist_fleuve_km'].mean():.2f} km")
print(f"Distance moyenne à la rivière : {data_drias['dist_riviere_km'].mean():.2f} km")
print(f"Distance moyenne à la côte : {data_drias['dist_cote_km'].mean():.2f} km")

# 6. Sauvegarder si besoin
data_drias.to_csv(r"data\Drias_data\RCP_2.6_with_distance.csv", sep=";", index=False)
print("\nThe file is saved")


Aperçu des distances calculées :
   Longitude  Latitude  dist_fleuve_km  dist_riviere_km  dist_cote_km
0     9.1495   41.3373       49.508247         5.648592      3.955963
1     9.0628   41.4149       38.326724         5.089265      2.651321
2     9.1577   41.4087       43.462308         0.538889      1.831904
3     9.2527   41.4025       49.359392         1.719596      0.168025
4     8.8808   41.4985       24.003419         2.192640      0.674574
5     8.9759   41.4925       27.235670         0.159483      0.840079
6     9.0710   41.4864       32.164237         0.508547      0.860754
7     9.1660   41.4802       38.133460         0.001389      5.578398
8     9.2610   41.4739       44.503590         2.142794      0.607571
9     8.7935   41.5759       15.162831         0.881845      0.116599

Statistiques :
Distance moyenne au fleuve : 30.96 km
Distance moyenne à la rivière : 1.44 km
Distance moyenne à la côte : 150.55 km

The file is saved
