In [53]:
import pandas as pd
import numpy as np
import rasterio
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import joblib  # For saving and loading the trained model

# -----------------------------
# STEP 1: Load Training Data
# -----------------------------
train_data_path = "D:/project 2/Model datas/Apr/training_data_apr_2014.csv"
df = pd.read_csv(train_data_path)

# -----------------------------
# STEP 2: Prepare Training Data
# -----------------------------
# Define feature columns and target
feature_cols = ["Wave_Power", "Bathymetry", "Distance_to_Shore", "Chlorophyll", "Salinity"]
target_col = "Suitability_Class"

X = df[feature_cols]  # Features
y = df[target_col]  # Target

# Split into Training (80%) and Testing (20%)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# -----------------------------
# STEP 3: Train Random Forest Model
# -----------------------------
rf_model = RandomForestClassifier(n_estimators=100, max_depth=10, random_state=42, class_weight="balanced")
rf_model.fit(X_train, y_train)

# -----------------------------
# STEP 4: Model Evaluation
# -----------------------------
y_pred = rf_model.predict(X_test)

# Print Accuracy & Classification Report
print("✅ Model Accuracy:", accuracy_score(y_test, y_pred) * 100)
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print(confusion_matrix(y_pred, y_pred))

# Save the trained model
joblib.dump(rf_model, "D:/project 2/Model datas/Apr/RF_model_2014.pkl")
print("✅ Model saved successfully!")

✅ Model Accuracy: 98.20359281437125

Classification Report:
               precision    recall  f1-score   support

           3       1.00      0.98      0.99       128
           4       0.93      1.00      0.96        39

    accuracy                           0.98       167
   macro avg       0.96      0.99      0.98       167
weighted avg       0.98      0.98      0.98       167

[[125   0]
 [  0  42]]
✅ Model saved successfully!


In [55]:
import rasterio
import numpy as np
import pandas as pd
from rasterio.warp import calculate_default_transform, reproject, Resampling
from sklearn.ensemble import RandomForestClassifier
import joblib  # For saving/loading models

# Load the trained model
rf_model = joblib.load( "D:/project 2/Model datas/Apr/RF_model_2014.pkl")

# Paths for 2024 parameter rasters
parameter_paths_2024 = {
    "Wave_Power": "D:/project 2/Model prediction/Apr 2024/raster_wp_apr.tif",
    "Bathymetry":"D:/project 2/Model prediction/Apr 2024/raster_bathy.tif",
    "Distance_to_Shore": "D:/project 2/Model prediction/Apr 2024/dist_reclass.tif",
    "Chlorophyll": "D:/project 2/Model prediction/Apr 2024/raster_feb24.tif",
    "Salinity": "D:/project 2/Model prediction/Apr 2024/raster_sal_april24.tif"
}

# Choose a reference raster (Wave Power) for resampling
ref_raster_path = parameter_paths_2024["Wave_Power"]
with rasterio.open(ref_raster_path) as ref_src:
    ref_transform = ref_src.transform
    ref_crs = ref_src.crs
    ref_shape = ref_src.shape  # (height, width)

# Function to resample a raster to match the reference raster
def resample_raster(input_path, ref_transform, ref_crs, ref_shape):
    with rasterio.open(input_path) as src:
        transform, width, height = calculate_default_transform(
            src.crs, ref_crs, src.width, src.height, *src.bounds
        )

        # Create an array with the correct shape
        resampled_data = np.empty(ref_shape, dtype=src.dtypes[0])

        # Reproject the raster
        reproject(
            source=rasterio.band(src, 1),
            destination=resampled_data,
            src_transform=src.transform,
            src_crs=src.crs,
            dst_transform=ref_transform,
            dst_crs=ref_crs,
            resampling=Resampling.bilinear  # Bilinear interpolation
        )
        
        return resampled_data

# Create a dictionary to store resampled data
data_2024 = {}

# Resample all rasters
for param_name, param_path in parameter_paths_2024.items():
    data_2024[param_name] = resample_raster(param_path, ref_transform, ref_crs, ref_shape).flatten()

# Convert the data dictionary to a DataFrame
df_2024 = pd.DataFrame(data_2024)

# Define feature columns (same as used during training)
feature_cols = ["Wave_Power", "Bathymetry", "Distance_to_Shore", "Chlorophyll", "Salinity"]

# Predict suitability for 2024
df_2024["Predicted_Suitability"] = rf_model.predict(df_2024[feature_cols])

# Save the predictions as a CSV file
df_2024.to_csv("D:/project 2/Model prediction/Apr 2024/predicted_suitability_apr_2024.csv", index=False)

print("Predictions for 2024 suitability successfully generated and saved!")


Predictions for 2024 suitability successfully generated and saved!


In [None]:
import rasterio
import numpy as np
import pandas as pd

# Paths
reference_tiff = "D:/project 2/Model prediction/Apr 2024/raster_wp_apr.tif" # Use a reference raster for metadata
output_tiff = "D:/project 2/Model prediction/Apr 2024/predicted_suitability_apr_2024.tif"

# Step 1: Read Predicted CSV
predicted_csv = "D:/project 2/Model prediction/Apr 2024/predicted_suitability_apr_2024.csv"
df = pd.read_csv(predicted_csv)

# Check for value range (Debugging)
print(f"Min: {df['Predicted_Suitability'].min()}, Max: {df['Predicted_Suitability'].max()}")

# Clip values to 1-5 range (assuming 1 to 5 are valid classes)
df["Predicted_Suitability"] = df["Predicted_Suitability"].clip(1, 5).astype(np.int32)

# Step 2: Open Reference TIFF for Shape and Metadata
with rasterio.open(reference_tiff) as ref_src:
    ref_meta = ref_src.meta.copy()
    ref_shape = ref_src.shape  # Get (rows, cols)
    ref_transform = ref_src.transform  # Get spatial transform
    ref_crs = ref_src.crs  # Get CRS

# Step 3: Convert Prediction to 2D Raster
try:
    predicted_suitability = df["Predicted_Suitability"].values.reshape(ref_shape)
except ValueError as e:
    print("Error in reshaping! Check CSV size matching with raster dimensions.")
    raise e

# Step 4: Save as New TIFF with Correct Settings
ref_meta.update({
    "dtype": "int32",  # Ensure correct data type
    "count": 1,  # Single-band raster
    "compress": "lzw",  # Apply compression (optional)
    "nodata": 0  # Define NoData value
})

# Write to TIFF
with rasterio.open(output_tiff, "w", **ref_meta) as dst:
    dst.write(predicted_suitability, 1)  # Write data to band 1

print("Predicted suitability map saved successfully!")


Min: 3, Max: 4
Predicted suitability map saved successfully!


In [59]:
import rasterio
import numpy as np
from rasterio.enums import Resampling

# File paths
reference_tiff = "D:/project 2/Model datas/Apr/woa_apr.tif" # The reference mask (manual weighted overlay)
predicted_tiff = "D:/project 2/Model prediction/Apr 2024/predicted_suitability_apr_2024.tif" # ML predicted TIFF
output_tiff = "D:/project 2/Model prediction/Apr 2024/apr_2024_final.tif"  # Output TIFF

# Step 1: Open Reference (Manual Weighted Overlay) TIFF
with rasterio.open(reference_tiff) as ref_src:
    ref_mask = ref_src.read(1)  # Read first band (manual suitability map)
    ref_meta = ref_src.meta.copy()  # Copy metadata
    ref_transform = ref_src.transform
    ref_crs = ref_src.crs
    ref_nodata = ref_src.nodata  # NoData value of reference TIFF
    ref_shape = ref_src.shape  # Shape of the reference raster (rows, cols)

# Step 2: Open Predicted Suitability TIFF
with rasterio.open(predicted_tiff) as pred_src:
    predicted_suitability = pred_src.read(1)  # Read predicted suitability map

    # Step 3: Resample Predicted Suitability to match Reference TIFF
    predicted_suitability_resampled = np.zeros(ref_shape, dtype=pred_src.dtypes[0])  # Create empty array
    rasterio.warp.reproject(
        source=predicted_suitability,
        destination=predicted_suitability_resampled,
        src_transform=pred_src.transform,
        src_crs=pred_src.crs,
        dst_transform=ref_transform,
        dst_crs=ref_crs,
        resampling=Resampling.nearest
    )

# Step 4: Apply Mask
# Retain only pixels where the reference has valid values (non-NoData)
masked_suitability = np.where(ref_mask != ref_nodata, predicted_suitability_resampled, 0)  # Set land pixels to 0

# Step 5: Save the Masked TIFF
ref_meta.update({
    "dtype": "int32",
    "count": 1,
    "nodata": 0,  # Set NoData as 0 to mask land areas
    "compress": "lzw"
})

with rasterio.open(output_tiff, "w", **ref_meta) as dst:
    dst.write(masked_suitability, 1)

print("Masked suitability map saved successfully!")


Masked suitability map saved successfully!


In [None]:
import rasterio
import numpy as np
from rasterio.plot import reshape_as_raster

# Define the color map for suitability scores (1 to 5)
suitability_colors = {
    1: (0, 0, 255),      # Blue
    2: (0, 255, 255),    # Cyan
    3: (255, 255, 0),    # Yellow
    4: (255, 165, 0),    # Orange
    5: (255, 0, 0)       # Red
}

# Convert color dictionary to an array
color_array = np.array([suitability_colors[i] for i in range(1, 6)], dtype=np.uint8)

# File paths
predicted_tiff_path = "predicted_suitability.tif"  # ML-predicted suitability map
reference_tiff_path = "reference_water_mask.tif"  # Reference mask (water-only)
output_tiff_path = "colorized_suitability_map.tif"  # Output file path

# Load the predicted suitability TIFF
with rasterio.open(predicted_tiff_path) as src:
    suitability_data = src.read(1)  # Read the first band
    meta = src.meta.copy()  # Copy metadata

# Load the reference water mask
with rasterio.open(reference_tiff_path) as ref_src:
    water_mask = ref_src.read(1)  # Read reference mask
    ref_nodata = ref_src.nodata  # Get NoData value

# Create an RGB image (default black)
rgb_image = np.zeros((suitability_data.shape[0], suitability_data.shape[1], 3), dtype=np.uint8)

# Apply colors only to water pixels
for i in range(1, 6):  # Assign colors to suitability classes
    mask = (suitability_data == i) & (water_mask != ref_nodata)  # Ensure valid water pixels
    rgb_image[mask] = color_array[i - 1]

# Update metadata for RGB output
meta.update({
    "count": 3,  # 3 bands for RGB
    "dtype": "uint8",  # Data type for RGB images
    "nodata": None  # No NoData value
})

# Save the colorized TIFF
with rasterio.open(output_tiff_path, "w", **meta) as dst:
    dst.write(reshape_as_raster(rgb_image))  # Write RGB bands

print(f"Colorized suitability map saved as {output_tiff_path}")
