In [1]:
import os
import sys

current_dir = os.getcwd()

parent_dir = os.path.abspath(os.path.join(current_dir, ".."))

if parent_dir not in sys.path:
    sys.path.append(parent_dir)

In [2]:
import configargparse

parser = configargparse.ArgParser(default_config_files=['../configs/config.ini'])

parser.add('--config', is_config_file=True, help='Path to config file')
parser.add('--data-folder', type=str, required=True, help='Root data directory containing Images and CHM folders')
parser.add('--hdf5-file', type=str, default='Finland_CHM.h5', help='HDF5 file containing CHM dataset')
parser.add('--output-dir', type=str, default='output/chmpredict', help='Directory to save output models and logs')
parser.add('--learning-rate', type=float, default=1e-4, help='Learning rate for optimizer')
parser.add('--batch-size', type=int, default=16, help='Batch size for DataLoader')
parser.add('--epochs', type=int, default=50, help='Number of training epochs')
parser.add('--patience', type=int, default=5, help='Patience for early stopping')

config, _ = parser.parse_known_args()

config.data_folder = '/Users/anisr/Documents/Finland_CHM'
config.output_dir = '../output/chmpredict'
config.batch_size = 4
config.eval_only = True

print(config)

Namespace(config=None, data_folder='/Users/anisr/Documents/Finland_CHM', hdf5_file='Finland_CHM.h5', output_dir='../output/chmpredict', learning_rate=0.0001, batch_size=4, epochs=50, patience=5, eval_only=True)


In [3]:
from chmpredict.main import main

main(config)

Starting CHM Predictor Process...
Calculated CHM Mean: 8.0351
Calculated CHM Std: 8.6017
HDF5 file /Users/anisr/Documents/Finland_CHM/Finland_CHM.h5 already exists. Skipping creation.
Using device: cpu
Loading data...
Data loaded successfully: 160 test samples
Building model and optimizer...
Best model loaded from ../output/chmpredict/best_model.pth
Model and optimizer built successfully.
Evaluating on test data...
Best model loaded from ../output/chmpredict/best_model.pth


Evaluating: 100%|██████████| 40/40 [02:15<00:00,  3.39s/batch]

mse: 48.0436
mae: 5.1897
rmse: 6.9314
mape: 43.1373
smape: 26.4647
r2: 0.1576
corr_coeff: 0.6570
Test evaluation completed.
CHM Predictor Process Completed.





In [23]:
import torch

from chmpredict.data.utils import calculate_chm_mean_std
from chmpredict.data.loader import load_fn
from chmpredict.model.build import build_fn, load_best_model

rgb_dir = os.path.join(config.data_folder, "Images")
chm_dir = os.path.join(config.data_folder, "CHM")

hdf5_path = os.path.join(config.data_folder, config.hdf5_file)

mean_chm, std_chm = calculate_chm_mean_std(chm_dir, num_threads=8, nan_value=-9999)
print(f"Calculated CHM Mean: {mean_chm:.4f}")
print(f"Calculated CHM Std: {std_chm:.4f}")
    
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

print("Loading data...")
_, _, test_loader = load_fn(hdf5_path, config.batch_size)
print(f"Data loaded successfully: {len(test_loader.dataset)} test samples")

print("Building model and optimizer...")
model, criterion, optimizer = build_fn(config.learning_rate, config.output_dir, device)
print("Model and optimizer built successfully.")

load_best_model(model, config.output_dir, device)

nan_value = -9999

mean_chm = 5.9495
std_chm = 6.4516

Calculated CHM Mean: 8.0351
Calculated CHM Std: 8.6017
Using device: cpu
Loading data...
Data loaded successfully: 160 test samples
Building model and optimizer...
Best model loaded from ../output/chmpredict/best_model.pth
Model and optimizer built successfully.
Best model loaded from ../output/chmpredict/best_model.pth


In [24]:
import torch
import numpy as np

from tqdm import tqdm

model.eval()
total_loss, total_mae, total_rmse = 0, 0, 0
total_mape, total_smape = 0, 0
epsilon = 1e-6  # To handle divide-by-zero
n_samples = 0

y_true_sum, y_pred_sum = 0, 0
y_true_sq_sum, y_pred_sq_sum = 0, 0
y_pred_y_true_sum = 0

with torch.no_grad():
    for data, targets in tqdm(test_loader, desc="Evaluating", unit="batch"):
        data, targets = data.to(device), targets.to(device)
        predictions = model(data)
        
        # Reverse normalization on predictions and targets
        predictions = predictions * std_chm + mean_chm
        targets = targets * std_chm + mean_chm
        
        # Mask out `nan_value` from targets and corresponding predictions
        mask = targets != nan_value
        targets = targets[mask]
        predictions = predictions[mask]
        
        batch_size = targets.size(0)
        total_loss += criterion(predictions, targets).item() * batch_size
        n_samples += batch_size

        targets_np = targets.cpu().numpy().flatten()
        predictions_np = predictions.cpu().numpy().flatten()

        # Calculate metrics while masking out the `nan_value` locations
        total_mae += np.sum(np.abs(predictions_np - targets_np))
        total_rmse += np.sum((predictions_np - targets_np) ** 2)

        # Calculate MAPE and SMAPE with threshold to filter near-zero targets
        min_height_threshold = 1.0  # Threshold for filtering near-zero target values
        valid_mape_smape = targets_np > min_height_threshold
        total_mape += np.sum(np.abs((predictions_np[valid_mape_smape] - targets_np[valid_mape_smape]) 
                                    / targets_np[valid_mape_smape])) * 100
        total_smape += np.sum(2 * np.abs(predictions_np[valid_mape_smape] - targets_np[valid_mape_smape]) 
                                / (np.abs(targets_np[valid_mape_smape]) + np.abs(predictions_np[valid_mape_smape]) + epsilon)) * 100

        # Accumulate values for R² and correlation coefficient calculations
        y_true_sum += np.sum(targets_np)
        y_pred_sum += np.sum(predictions_np)
        y_true_sq_sum += np.sum(targets_np ** 2)
        y_pred_sq_sum += np.sum(predictions_np ** 2)
        y_pred_y_true_sum += np.sum(predictions_np * targets_np)

Evaluating: 100%|██████████| 40/40 [02:34<00:00,  3.86s/batch]


In [53]:
# Final aggregated metrics
avg_loss = total_loss / n_samples
mae = total_mae / n_samples

# Corrected RMSE
rmse = np.sqrt(total_rmse / n_samples)

# Corrected MAPE and SMAPE
mape = total_mape / n_samples
smape = total_smape / n_samples

ss_res = np.sum((predictions_np - targets_np) ** 2)  # Correctly calculated as squared residuals

ss_tot = np.sum((targets_np - np.mean(targets_np)) ** 2)  # Based on mean of target values

r2 = 1 - (ss_res / (ss_tot + epsilon))  # Corrected R² formula

# Correlation Coefficient Calculation
numerator = n_samples * y_pred_y_true_sum - y_pred_sum * y_true_sum
denominator = np.sqrt((n_samples * y_pred_sq_sum - y_pred_sum ** 2) * 
                    (n_samples * y_true_sq_sum - y_true_sum ** 2))
corr_coeff = numerator / (denominator + epsilon)

print(f"mse: {avg_loss:.3f}, mae: {mae:.3f}, rmse: {rmse:.3f}, mape: {mape:.3f}, smape: {smape:.3f}, r2: {r2:.3f}, corr_coeff: {corr_coeff:.3f}")

mse: 26.570, mae: 3.868, rmse: 5.155, mape: 40.530, smape: 25.957, r2: 0.102, corr_coeff: 0.665


In [52]:
# Calculate SS_res as the sum of squared residuals
ss_res = np.sum((predictions_np - targets_np) ** 2)

# Calculate SS_tot as the total sum of squares
ss_tot = np.sum((targets_np - np.mean(targets_np)) ** 2)

# Calculate R^2
r2 = 1 - (ss_res / (ss_tot + epsilon))

r2

np.float32(0.10218257)

In [45]:
import rasterio

canopy_heights = []

for chm_file in os.listdir(chm_dir):
    if chm_file.endswith(".tif"):
        chm_path = os.path.join(chm_dir, chm_file)
        print(chm_path)
        with rasterio.open(chm_path) as chm_src:
            chm_data = chm_src.read(1).astype(np.float32)
            canopy_data = chm_data[chm_data > 1]  # Filter out ground-level values
            canopy_heights.extend(canopy_data)

mean_canopy = np.mean(canopy_heights)
std_canopy = np.std(canopy_heights)

mean_canopy, std_canopy

/Users/anisr/Documents/Finland_CHM/CHM/CHM_M4331C_2016.tif
/Users/anisr/Documents/Finland_CHM/CHM/CHM_M4331B_2016.tif
/Users/anisr/Documents/Finland_CHM/CHM/CHM_M4423G_2016.tif
/Users/anisr/Documents/Finland_CHM/CHM/CHM_M4423F_2016.tif
/Users/anisr/Documents/Finland_CHM/CHM/CHM_M4313F_2016.tif
/Users/anisr/Documents/Finland_CHM/CHM/CHM_M4313G_2016.tif
/Users/anisr/Documents/Finland_CHM/CHM/CHM_M4442D_2016.tif
/Users/anisr/Documents/Finland_CHM/CHM/CHM_M4442E_2016.tif
/Users/anisr/Documents/Finland_CHM/CHM/CHM_M4311E_2016.tif
/Users/anisr/Documents/Finland_CHM/CHM/CHM_M4421E_2016.tif
/Users/anisr/Documents/Finland_CHM/CHM/CHM_M4414D_2016.tif
/Users/anisr/Documents/Finland_CHM/CHM/CHM_M4432B_2016.tif
/Users/anisr/Documents/Finland_CHM/CHM/CHM_M4441A_2016.tif
/Users/anisr/Documents/Finland_CHM/CHM/CHM_M4421H_2016.tif
/Users/anisr/Documents/Finland_CHM/CHM/CHM_M4442C_2016.tif
/Users/anisr/Documents/Finland_CHM/CHM/CHM_M4442B_2016.tif
/Users/anisr/Documents/Finland_CHM/CHM/CHM_M4414H_2016.t

(np.float32(13.933638), np.float32(6.8285794))