# Import Libraries

In [1]:
import time
import joblib
import re

import pandas as pd
import numpy as np
from numpy import nan

import geopandas as gpd
from shapely.geometry import Point, box
from collections import defaultdict

import os
os.chdir(r"E:\Data challenge")


In [2]:
from classes.my_classes import Format_Flood_Data, Format_Clay_Data, Format_Hydrography_Data

# Import Data

In [3]:
# Load flood data
Flood_file = {
    "high": ["n_iso_ht_01_01for_s.shp", "n_iso_ht_02_01for_s.shp", "n_iso_ht_03_01for_s.shp"],
    "mid": ["n_iso_ht_01_02moy_s.shp", "n_iso_ht_02_02moy_s.shp", "n_iso_ht_03_02moy_s.shp"],
    "low": ["n_iso_ht_01_04fai_s.shp", "n_iso_ht_02_04fai_s.shp", "n_iso_ht_03_04fai_s.shp"]
}

scenarios = ["RCP_2.6","RCP_4.5","RCP_8.5"]

### Loading Drias Data

In [4]:
data_drias = pd.read_csv(r"data\Drias_data\RCP_4.5.csv", sep=";", header=31)

### Loading Clay Data

In [5]:
print("Loading clay data...")
data_clay = gpd.read_file(r"data\sol_argileux\ExpoArgile_Fxx_L93.shp")
data_clay = data_clay[['geometry', 'ALEA', 'NIVEAU']]

print(f"Clay data loaded: {len(data_clay)} features")
print(f"CRS: {data_clay.crs}")

Loading clay data...
Clay data loaded: 122222 features
CRS: EPSG:2154


### Loading Hydrography Map

In [6]:
# Import Stream/River Map
data_hydro=gpd.read_file(r"data\hydro\TronconHydrogra\TronconHydrograElt_FXX.shp")
# Import Ocean/sea Frontier
data_ocean=gpd.read_file(r"data\hydro\LimiteTerreMer_FXX-shp\LimiteTerreMer_FXX.shp")

### Loading and Concatenating Flood File

In [7]:
print("Loading flood data...")
all_flood_data = []

for scenario, files in Flood_file.items():
    scenario_data = []
    for file in files:
        full_link = r'data\map\tri_2020_sig_di\\' + file
        gdf = gpd.read_file(full_link)
        gdf = gdf[['geometry', 'ht_min', 'ht_max']]
        scenario_data.append(gdf)
    
    scenario_gdf = pd.concat(scenario_data, ignore_index=True)
    scenario_gdf['scenario'] = scenario
    all_flood_data.append(scenario_gdf)

flood_gdf = pd.concat(all_flood_data, ignore_index=True)

print(f"Flood data loaded: {len(flood_gdf)} features worldwide")
print(f"CRS: {flood_gdf.crs}")


Loading flood data...


  return ogr_read(
  return ogr_read(
  return ogr_read(
  return ogr_read(
  return ogr_read(
  return ogr_read(
  return ogr_read(


Flood data loaded: 4154115 features worldwide
CRS: EPSG:4326


# Format Data

### Format Flood Data

In [8]:
processor = Format_Flood_Data(flood_gdf, data_drias, buffer_distance_meters=4000)
result_Flood_df = processor.process()

FloodDataProcessor initialized with buffer: 4000m (0.036036 degrees)

Creating point geometries...
Points created: 26943

Filtering flood data to region...
Point extent: X=[-4.96, 9.57], Y=[41.34, 51.05]
Filtering to: X=[-5.46, 10.07], Y=[40.84, 51.55]
Filtered flood data: 4010228 features (reduced by 3.5%)

Creating spatial index on filtered data...
Spatial index created!

Processing 26943 points...

First point found 0 candidates


  return lib.distance(a, b, **kwargs)


Point 100/26943 (0.4%) | Rate: 204.64 pts/sec | Elapsed: 0.5s | Est. remaining: 2.2min
Point 200/26943 (0.7%) | Rate: 64.75 pts/sec | Elapsed: 3.1s | Est. remaining: 6.9min
Point 300/26943 (1.1%) | Rate: 34.38 pts/sec | Elapsed: 8.7s | Est. remaining: 12.9min
Point 400/26943 (1.5%) | Rate: 42.92 pts/sec | Elapsed: 9.3s | Est. remaining: 10.3min
Point 500/26943 (1.9%) | Rate: 43.76 pts/sec | Elapsed: 11.4s | Est. remaining: 10.1min
Point 600/26943 (2.2%) | Rate: 44.77 pts/sec | Elapsed: 13.4s | Est. remaining: 9.8min
Point 700/26943 (2.6%) | Rate: 44.33 pts/sec | Elapsed: 15.8s | Est. remaining: 9.9min
Point 800/26943 (3.0%) | Rate: 33.97 pts/sec | Elapsed: 23.5s | Est. remaining: 12.8min
Point 900/26943 (3.3%) | Rate: 30.26 pts/sec | Elapsed: 29.7s | Est. remaining: 14.3min
Point 1000/26943 (3.7%) | Rate: 27.36 pts/sec | Elapsed: 36.5s | Est. remaining: 15.8min
Point 1100/26943 (4.1%) | Rate: 26.40 pts/sec | Elapsed: 41.7s | Est. remaining: 16.3min
Point 1200/26943 (4.5%) | Rate: 26.89

Format Clay Data

In [9]:
processor = Format_Clay_Data(data_drias, data_clay, buffer_distance_meters=1)

# Process the data
result_Clay_df = processor.process(buffer_margin=1000)


Format_Clay_Data initialized with buffer: 1m

Creating point geometries...
Points created: 26943

Transforming points from EPSG:4326 to EPSG:2154...

Filtering clay data to points region...
Filtered clay data: 122216 features
Creating spatial index...
Spatial index created!

Processing 26943 points with buffer_distance=1m...

First point found 0 candidates
Point 100/26943 (0.4%) | Rate: 926.45 pts/sec | Elapsed: 0.1s | Est. remaining: 0.5min
Point 200/26943 (0.7%) | Rate: 1032.47 pts/sec | Elapsed: 0.2s | Est. remaining: 0.4min
Point 300/26943 (1.1%) | Rate: 980.74 pts/sec | Elapsed: 0.3s | Est. remaining: 0.5min
Point 400/26943 (1.5%) | Rate: 711.46 pts/sec | Elapsed: 0.6s | Est. remaining: 0.6min
Point 500/26943 (1.9%) | Rate: 608.07 pts/sec | Elapsed: 0.8s | Est. remaining: 0.7min
Point 600/26943 (2.2%) | Rate: 544.85 pts/sec | Elapsed: 1.1s | Est. remaining: 0.8min
Point 700/26943 (2.6%) | Rate: 495.54 pts/sec | Elapsed: 1.4s | Est. remaining: 0.9min
Point 800/26943 (3.0%) | Rate: 

### Format Hydrography Data

In [10]:
modified_drias = {}
for scenario in scenarios:
    url_drias = f"data\Drias_data\{scenario}.csv"
    my_drias = pd.read_csv(url_drias, sep=";", header=31)

    processor = Format_Hydrography_Data(my_drias, data_hydro, data_ocean)

    # Process the data
    modified_drias_scenario = processor.process()
    modified_drias[scenario] = modified_drias_scenario



Format_Hydrography_Data initialized

Classifying water bodies...
Water body classification:
type_cours_eau
inconnu            225617
petit_cours_eau    151057
riviere             80536
autre               33210
fleuve              13424
canal               11375
Name: count, dtype: int64

Fleuves: 13424
Rivières/petits cours d'eau: 231593

Creating point geometries...
Points created: 26943

Projecting data to EPSG:2154...
Projection complete

Calculating distances...
Calculating distance to fleuves...
Calculating distance to rivières...
Calculating distance to ocean/sea...
Distance calculations complete

Converting distances to kilometers...
Conversion complete

PROCESSING COMPLETE!
Added columns: dist_fleuve_km, dist_riviere_km, dist_cote_km

First few rows:
   Longitude  Latitude  dist_fleuve_km  dist_riviere_km  dist_cote_km
0     9.1495   41.3373       49.508247         5.648592      3.955963
1     9.0628   41.4149       38.326724         5.089265      2.651321
2     9.1577   41.40

# Export Formated Data

### Export Flood Data

In [11]:
result_Flood_df.to_csv(r'data\Flood\flood_risk_results.csv')
print("Results saved to 'data\Flood\flood_risk_results.csv'")

Results saved to 'data\Floodlood_risk_results.csv'


### Export Clay Data

In [12]:
result_Clay_df.to_csv(r'data\Flood\clay_risk_results.csv')
print("Results saved to 'data\Flood\clay_risk_results.csv'")

Results saved to 'data\Flood\clay_risk_results.csv'


### Export Drias_Hydrography Data

In [13]:
for scenario in scenarios:
    url_output = f"data\Drias_data\{scenario}_with_distance.csv"
    modified_drias[scenario].to_csv(url_output, sep=";", index=False)
    print(f"Results saved to {url_output}")

Results saved to data\Drias_data\RCP_2.6_with_distance.csv
Results saved to data\Drias_data\RCP_4.5_with_distance.csv
Results saved to data\Drias_data\RCP_8.5_with_distance.csv
