In [9]:
import rasterio
from rasterio.enums import Resampling
import numpy as np
import pandas as pd
from rasterio.warp import reproject
from rasterio.transform import xy

# Step 1: Open Suitability Map (2014) as Reference
suitability_path = "D:/project 2/Model datas/Apr/woa_apr.tif"
with rasterio.open(suitability_path) as src:
    suitability_data = src.read(1)  # Read first band
    transform = src.transform
    crs = src.crs
    meta = src.meta
    height, width = src.shape  # Get reference dimensions

# Step 2: Define Parameter Layers
parameter_paths = {
    "Wave_Power": "D:/project 2/Model datas/Apr/raster_wp_apr.tif",
    "Bathymetry": "D:/project 2/Model datas/Apr/raster_bathy.tif",
    "Distance_to_Shore": "D:/project 2/Model datas/Apr/dist_reclass.tif",
    "Chlorophyll":"D:/project 2/Model datas/Apr/raster_feb.tif",
    "Salinity": "D:/project 2/Model datas/Apr/raster_sal_april.tif"
}

# Initialize Data Dictionary
data_dict = {"Suitability_Class": suitability_data.flatten()}

# Step 3: Resample Parameter Layers to Match Suitability Map
for param_name, param_path in parameter_paths.items():
    with rasterio.open(param_path) as src:
        param_data = src.read(1)

        # Resample to match suitability map
        param_resampled = np.zeros((height, width), dtype=param_data.dtype)
        reproject(
            source=param_data,
            destination=param_resampled,
            src_transform=src.transform,
            src_crs=src.crs,
            dst_transform=transform,
            dst_crs=crs,
            resampling=Resampling.nearest  # Nearest neighbor resampling
        )

        data_dict[param_name] = param_resampled.flatten()  # Flatten for DataFrame storage

# Step 4: Remove NoData Pixels
valid_mask = (data_dict["Suitability_Class"] > 0)  # Keep only valid pixels (not NoData)

for key in data_dict.keys():
    data_dict[key] = data_dict[key][valid_mask]  # Apply mask to all layers

# Step 5: Convert Pixel Indices to Latitude & Longitude
rows, cols = np.where(valid_mask.reshape(height, width))  # Convert 1D mask to 2D
longitudes, latitudes = xy(transform, rows, cols)  # Convert to coordinates

# Step 6: Add Coordinates to Data Dictionary
data_dict["Latitude"] = latitudes
data_dict["Longitude"] = longitudes

# Step 7: Save as CSV
df_training = pd.DataFrame(data_dict)
df_training.to_csv("D:/project 2/Model datas/Apr/training_data_apr_2014.csv", index=False)

print("✅ Training dataset created successfully with", len(df_training), "samples!")


✅ Training dataset created successfully with 831 samples!
