In [17]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt
import geopandas as gpd
import os
import random
import pickle
from imblearn.over_sampling import ADASYN
from sklearn.metrics import roc_curve, auc, confusion_matrix, ConfusionMatrixDisplay, classification_report
from utilities import plot_prediction_area_curves, get_pa_intersection
import time
import psutil

In [4]:
class MCDropoutNet(nn.Module):
    def __init__(self, input_dim: int, dropout_rate: float = 0.5):
        super().__init__()
        self.input_dim = input_dim
        self.dropout_rate = dropout_rate

        self.fc1 = nn.Linear(input_dim, 32)
        self.relu1 = nn.ReLU()
        self.dropout1 = nn.Dropout(p=self.dropout_rate) 
        
        self.fc2 = nn.Linear(32, 32)
        self.relu2 = nn.ReLU()
        self.dropout2 = nn.Dropout(p=self.dropout_rate)
        
        self.fc_out = nn.Linear(32, 1) 

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = self.fc1(x)
        x = self.relu1(x)
        x = self.dropout1(x) 
        
        x = self.fc2(x)
        x = self.relu2(x)
        x = self.dropout2(x) 
        
        logits = self.fc_out(x)
        return logits

In [5]:
def set_dropout_active(model, active: bool):
    for module in model.modules():
        if isinstance(module, nn.Dropout):
            if active:
                module.train() 
            else:
                module.eval()

In [6]:
def get_mc_dropout_predictions(
    model: MCDropoutNet, 
    data_loader: DataLoader, 
    num_mc_samples: int, 
    device: str
):
    model.to(device)
    set_dropout_active(model, True) 

    all_mc_probs_stacked_batches = [] 

    with torch.no_grad(): 
        for x_batch, _ in tqdm(data_loader, desc="MC Dropout Inference"): 
            x_batch = x_batch.to(device)
            
            batch_mc_logits_samples = [] 
            for _ in range(num_mc_samples):
                logits_sample = model(x_batch)
                batch_mc_logits_samples.append(logits_sample)
            
            batch_mc_logits_stacked = torch.stack(batch_mc_logits_samples)
            batch_mc_probs_stacked = torch.sigmoid(batch_mc_logits_stacked)
            
            all_mc_probs_stacked_batches.append(batch_mc_probs_stacked.cpu())

    all_mc_probs_stacked_tensor = torch.cat(all_mc_probs_stacked_batches, dim=1)

    mean_probs = all_mc_probs_stacked_tensor.mean(dim=0).squeeze().numpy() 
    pred_variances = all_mc_probs_stacked_tensor.var(dim=0).squeeze().numpy() 
    pred_labels = (mean_probs > 0.5).astype(int)

    set_dropout_active(model, False) 
    return mean_probs, pred_variances, pred_labels

In [7]:
# Dataset
train_features_path: str = './data/dataset_train.pt'
train_labels_path: str = './data/mineral_train.pt'
test_features_path: str = './data/dataset_test.pt'
test_labels_path: str = './data/mineral_test.pt'
scaler_path: str = './data/scaler.pkl' 
output_dir_for_saving: str = "./mc_dropout_mineral_outputs_run" 

random_state: int = 42
batch_size: int = 32

In [8]:
RANDOM_SEED = random_state
torch.manual_seed(RANDOM_SEED)
np.random.seed(RANDOM_SEED)
random.seed(RANDOM_SEED)
if torch.cuda.is_available(): torch.cuda.manual_seed_all(RANDOM_SEED)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
if not os.path.exists(output_dir_for_saving): 
    os.makedirs(output_dir_for_saving)
    print(f"Created output directory: {output_dir_for_saving}")
else:
    print(f"Output directory already exists: {output_dir_for_saving}")

In [10]:
X_train_np = torch.load(train_features_path).numpy()
y_train_np = torch.load(train_labels_path).numpy().ravel()
X_test_np = torch.load(test_features_path).numpy()
y_test_np = torch.load(test_labels_path).numpy().ravel() 

In [None]:
# Applying ADASYN
adasyn = ADASYN(random_state=RANDOM_SEED) 
X_train_np, y_train_np = adasyn.fit_resample(X_train_np, y_train_np)

print(f"After ADASYN - training set size: {X_train_np.shape[0]}")
print(f"After ADASYN - training class distribution: {np.bincount(y_train_np.astype(int))}")

In [None]:
train_dataset = TensorDataset(torch.from_numpy(X_train_np), torch.from_numpy(y_train_np).unsqueeze(1))
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, 
                          pin_memory=device.type == 'cuda', num_workers=2 if device.type=='cuda' else 0)
test_dataset = TensorDataset(torch.from_numpy(X_test_np), torch.from_numpy(y_test_np).unsqueeze(1))
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, 
                         pin_memory=device.type == 'cuda', num_workers=2 if device.type=='cuda' else 0)

input_dim = X_train_np.shape[1] 

print(f"Data preparation complete. Input dimension: {input_dim}")
print(f"Train loader size: {len(train_loader)} batches")
print(f"Test loader size: {len(test_loader)} batches")

In [13]:
# Hyperparameters
num_epochs_train: int = 100
learning_rate_train: float = 1e-3
dropout_rate_model: float = 0.1 
print_every_epoch_train: int = 1
num_mc_final_eval_train: int = 100

In [14]:
model_mc_dropout = MCDropoutNet(input_dim, dropout_rate=dropout_rate_model).to(device) 
optimizer = optim.Adam(model_mc_dropout.parameters(), lr=learning_rate_train) 
criterion = nn.BCEWithLogitsLoss() 

In [15]:
total_params = sum(p.numel() for p in model_mc_dropout.parameters() if p.requires_grad)
print(f"MC Dropout - Total trainable parameters: {total_params:,}")

MC Dropout - Total trainable parameters: 6,657


In [None]:
### TRAINING
train_losses_epoch, train_errors_epoch, test_errors_epoch, epochs_list = [], [], [], []
epoch_durations_list = [] 

process = psutil.Process(os.getpid())
overall_peak_ram_mb = 0 # Tracks the peak across the entire run

overall_training_start_time = time.time()
for epoch in range(num_epochs_train):
    epoch_start_time = time.time()
    model_mc_dropout.train() 
    
    # Reset the peak tracker for each epoch
    peak_ram_in_epoch_mb = 0
    
    epoch_total_loss, correct_train, total_train = 0, 0, 0
    pbar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs_train}")
    
    for x_batch, y_batch in pbar:
        x_batch, y_batch = x_batch.to(device), y_batch.to(device) 
        optimizer.zero_grad() 
        logits = model_mc_dropout(x_batch)
        loss = criterion(logits, y_batch) 
        loss.backward()
        optimizer.step() 
        epoch_total_loss += loss.item()
        probs = torch.sigmoid(logits); predicted_train = (probs > 0.5).float() 
        total_train += y_batch.size(0); correct_train += (predicted_train == y_batch).sum().item()
        pbar.set_postfix({"Loss": loss.item()})

        # RAM after each batch and update both epoch and overall peaks
        current_ram_mb = process.memory_info().rss / (1024 ** 2)
        peak_ram_in_epoch_mb = max(peak_ram_in_epoch_mb, current_ram_mb)
        overall_peak_ram_mb = max(overall_peak_ram_mb, current_ram_mb)

    avg_epoch_loss = epoch_total_loss / len(train_loader)
    train_accuracy = correct_train / total_train if total_train > 0 else 0
    train_error = (1 - train_accuracy) * 100
    print(f"Epoch {epoch+1} - Avg Loss: {avg_epoch_loss:.4f}, Train Err: {train_error:.2f}%") 
    
    current_epoch_duration = time.time() - epoch_start_time
    epoch_durations_list.append(current_epoch_duration)
    
    print(f"Epoch {epoch+1} completed in {current_epoch_duration:.2f} seconds. Peak RAM in Epoch: {peak_ram_in_epoch_mb:.2f} MB")

print(f"\n--- MC Dropout Model Training Loop Complete ---")
print(f"Total training duration for {num_epochs_train} epochs: {time.time() - overall_training_start_time:.2f} seconds.")

if epoch_durations_list: 
    avg_time_per_epoch = np.mean(epoch_durations_list)
    std_time_per_epoch = np.std(epoch_durations_list)
    print(f"MC Dropout - Average time per epoch: {avg_time_per_epoch:.2f} Â± {std_time_per_epoch:.2f} seconds")

if device.type == 'cuda':
    peak_vram_mb = torch.cuda.max_memory_allocated() / (1024**2) 
    print(f"MC Dropout - Peak VRAM (GPU) used during training: {peak_vram_mb:.2f} MB")
else:
    print(f"MC Dropout - Overall Peak RAM (System) used during training: {overall_peak_ram_mb:.2f} MB")

In [None]:
# Error plot 
plt.figure(figsize=(8, 5))

plt.plot(epochs_list, train_errors_epoch, linestyle='-', marker='', label='Training Error')

valid_test = ~np.isnan(test_errors_epoch)
if valid_test.any():
    plt.plot(
        np.array(epochs_list)[valid_test],
        np.array(test_errors_epoch)[valid_test],
        linestyle='-', marker='',
        label='Test Error'
    )

plt.xlabel('Epoch')
plt.ylabel('Error Rate (%)')
plt.title('MC Dropout Error Rate vs Epoch')
plt.legend()
plt.grid(True)

outfile = os.path.join(output_dir_for_saving, 'mc_dropout_error_rate_vs_epoch.png')
plt.tight_layout()
plt.savefig(outfile)
plt.show()

In [17]:
model_save_path = os.path.join(output_dir_for_saving, f'mc_dropout_mineral_model_ep{num_epochs_train}_seed{RANDOM_SEED}.pth')
torch.save(model_mc_dropout.state_dict(), model_save_path)

In [None]:
true_labels_test_list = []
for _, y_batch_test in test_loader: 
    true_labels_test_list.append(y_batch_test.cpu().numpy().flatten())
true_labels_np = np.concatenate(true_labels_test_list)

mean_probabilities, predictive_variances, predicted_labels = get_mc_dropout_predictions(
    model_mc_dropout, test_loader, num_mc_final_eval_train, device)

In [19]:
np.save(os.path.join(output_dir_for_saving, 'mc_dropout_true_labels.npy'), true_labels_np)
np.save(os.path.join(output_dir_for_saving, 'mc_dropout_mean_probs.npy'), mean_probabilities)
np.save(os.path.join(output_dir_for_saving, 'mc_dropout_pred_labels.npy'), predicted_labels)
np.save(os.path.join(output_dir_for_saving, 'mc_dropout_pred_variances.npy'), predictive_variances)

In [None]:
final_test_error_rate = (1.0 - np.sum(predicted_labels == true_labels_np) / len(true_labels_np)) * 100 if len(true_labels_np) > 0 else 0.0
fpr, tpr, _ = roc_curve(true_labels_np, mean_probabilities) 
roc_auc_score = auc(fpr, tpr) if len(fpr) > 1 and len(tpr) > 1 else 0.0
print(f"\nSummary from Final MC Dropout Test Outputs:")
print(f"  Final Test Error Rate: {final_test_error_rate:.2f}%")
print(f"  Final ROC AUC Score: {roc_auc_score:.4f}")

In [21]:
output_dir = "./MC_mineral_outputs" 

true_labels_loaded = np.load(os.path.join(output_dir_for_saving, 'mc_dropout_true_labels.npy'))
mean_probs_loaded = np.load(os.path.join(output_dir_for_saving, 'mc_dropout_mean_probs.npy'))
pred_labels_loaded = np.load(os.path.join(output_dir_for_saving, 'mc_dropout_pred_labels.npy'))
pred_variances_loaded = np.load(os.path.join(output_dir_for_saving, 'mc_dropout_pred_variances.npy'))

In [None]:
# ROC plot
fpr_loaded, tpr_loaded, _ = roc_curve(true_labels_loaded, mean_probs_loaded)
roc_auc_loaded = auc(fpr_loaded, tpr_loaded)

plt.figure(figsize=(8,6))
plt.plot(fpr_loaded, tpr_loaded, color='Orange', lw=2, label=f'MC Dropout ROC (AUC = {roc_auc_loaded:.2f})')
plt.plot([0, 1], [0, 1], color='gray', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])

font_size = 14

plt.xlabel('False positive rate', fontsize=font_size, labelpad=10)
plt.ylabel('True positive rate', fontsize=font_size, labelpad=10)
#plt.title('MC Dropout ROC Curve', fontsize=font_size)

plt.tick_params(axis='both', which='major', labelsize=font_size)

plt.legend(loc="lower right", fontsize=font_size)

#.grid(True)
plt.tight_layout()
plt.savefig(os.path.join(output_dir, 'mc_dropout_roc_curve_final.pdf'), dpi=300, bbox_inches='tight')
plt.show()

In [None]:
# Confusion matrix
cm_loaded = confusion_matrix(true_labels_loaded, pred_labels_loaded)

display_loaded = ConfusionMatrixDisplay(
    confusion_matrix=cm_loaded,
    display_labels=['Barren', 'Mineral']
)

fig_cm_loaded, ax_cm_loaded = plt.subplots(figsize=(8, 6))

display_loaded.plot(
    ax=ax_cm_loaded,
    cmap=plt.cm.Blues,
    values_format='d'    
)

font_size = 14
ax_cm_loaded.set_xlabel('Predicted label', fontsize=font_size, labelpad=10)
ax_cm_loaded.set_ylabel('True label',      fontsize=font_size, labelpad=10)
ax_cm_loaded.tick_params(axis='both', which='major', labelsize=font_size)

cbar = ax_cm_loaded.images[0].colorbar
if cbar is not None:
    cbar.ax.ticklabel_format(style='plain')         
    cbar.ax.tick_params(labelsize=font_size)
    
for text in ax_cm_loaded.texts:
    text.set_fontsize(font_size)

plt.tight_layout()
plt.savefig(os.path.join(output_dir, 'mc_dropout_confusion_matrix_final.pdf'),
            dpi=300, bbox_inches='tight')
plt.show()


In [None]:
print(classification_report(true_labels_loaded, pred_labels_loaded, target_names=['Class 0', 'Class 1'], zero_division=0))

### Evaluation on Noisy Data

In [None]:
test_features_path = './data/dataset_test.pt'
test_labels_path = './data/mineral_test.pt'
scaler_path = './data/scaler.pkl'

X_test_np = torch.load(test_features_path).numpy()
true_labels_np = torch.load(test_labels_path).numpy().ravel()

clean_test_dataset = TensorDataset(torch.from_numpy(X_test_np), torch.from_numpy(true_labels_np))
clean_test_loader = DataLoader(clean_test_dataset, batch_size=128, shuffle=False)
mean_probs_clean, _, _ = get_mc_dropout_predictions(
    model_mc_dropout, clean_test_loader, num_mc_final_eval_train, device)
fpr_clean, tpr_clean, _ = roc_curve(true_labels_np, mean_probs_clean)
roc_auc_score = auc(fpr_clean, tpr_clean) # This defines the variable

np.random.seed(42) 
noise_level = 0.1
noise = np.random.normal(0, noise_level, X_test_np.shape).astype(np.float32)
X_test_noisy = X_test_np + noise

noisy_test_dataset = TensorDataset(torch.from_numpy(X_test_noisy), torch.from_numpy(true_labels_np))
noisy_test_loader = DataLoader(noisy_test_dataset, batch_size=128, shuffle=False)

mean_probs_noisy, _, _ = get_mc_dropout_predictions(
    model_mc_dropout, noisy_test_loader, num_mc_final_eval_train, device)

fpr_noisy, tpr_noisy, _ = roc_curve(true_labels_np, mean_probs_noisy)
roc_auc_noisy = auc(fpr_noisy, tpr_noisy)

print(f"\nOriginal ROC AUC on clean data: {roc_auc_score:.4f}")
print(f"ROC AUC on noisy data:         {roc_auc_noisy:.4f}")
print(f"Performance Drop:               {roc_auc_score - roc_auc_noisy:.4f}")

### Total Data

In [25]:
TOTAL_FEATURES_PATH_MC   = "./data/total_train.pt" 
SCALER_PATH_MC           = "./data/scaler.pkl"     

MODEL_SAVE_PATH_MC      = os.path.join(output_dir_for_saving, f'mc_dropout_mineral_model_ep{num_epochs_train}_seed{RANDOM_SEED}.pth')
PRED_BATCH_SIZE_MC       = 1024
NUM_MC_SAMPLES_TOTAL_MC  = 20 
DROPOUT_RATE_FOR_TOTAL_EVAL = 0.1

In [26]:
MC_TOTAL_MEAN_PROBS_PATH = os.path.join(output_dir_for_saving, "mc_dropout_total_mean_probs.npy")
MC_TOTAL_PRED_VAR_PATH = os.path.join(output_dir_for_saving, "mc_dropout_total_pred_variances.npy")
MC_GEOPACKAGE_OUTPUT_PATH = os.path.join(output_dir_for_saving, "mc_dropout_total_predictions_mpm.gpkg")

In [27]:
X_total_mc = torch.load(TOTAL_FEATURES_PATH_MC).numpy().astype(np.float32)
# scaler_mc  = pickle.load(open(SCALER_PATH_MC, "rb"))
# X_total_mc = scaler_mc.transform(X_total_mc).astype(np.float32)

In [28]:
dummy_y_mc = np.zeros((X_total_mc.shape[0],), dtype=np.float32) 
full_ds_mc  = TensorDataset(torch.from_numpy(X_total_mc), torch.from_numpy(dummy_y_mc).unsqueeze(1)) 
loader_full_mc = DataLoader(
    full_ds_mc,
    batch_size=PRED_BATCH_SIZE_MC,
    shuffle=False,
    pin_memory=device.type == 'cuda', 
    num_workers=2 if device.type == 'cuda' else 0
)

In [29]:
model_total_eval  = MCDropoutNet(input_dim, dropout_rate=DROPOUT_RATE_FOR_TOTAL_EVAL).to(device)
model_total_eval.load_state_dict(torch.load(MODEL_SAVE_PATH_MC, map_location=device))

<All keys matched successfully>

In [None]:
mean_probs_full_mc, var_probs_full_mc, _ = get_mc_dropout_predictions(
    model_total_eval, loader_full_mc, NUM_MC_SAMPLES_TOTAL_MC, device)

In [31]:
np.save(MC_TOTAL_MEAN_PROBS_PATH, mean_probs_full_mc)
np.save(MC_TOTAL_PRED_VAR_PATH, var_probs_full_mc)

In [32]:
gdf_mc = gpd.read_file("data/datacube_mpm.gpkg")
n_mc = min(len(gdf_mc), len(mean_probs_full_mc)) 
gdf_mc = gdf_mc.iloc[:n_mc].copy() 

gdf_mc["mc_mean_prob"] = mean_probs_full_mc[:n_mc]
gdf_mc["mc_pred_var"]  = var_probs_full_mc[:n_mc]

std_dev_full_mc = np.sqrt(var_probs_full_mc[:n_mc])
eps_mc = 1e-10 
rel_unc_full_mc = std_dev_full_mc / (mean_probs_full_mc[:n_mc] + eps_mc)
gdf_mc["mc_std_dev"] = std_dev_full_mc
gdf_mc["mc_rel_uncertainty"] = rel_unc_full_mc

out_cols_mc = ['geometry', 'longitude_left', 'latitude_left', 'CV'] 
out_cols_mc = [col for col in out_cols_mc if col in gdf_mc.columns] 
out_cols_mc.extend(["mc_mean_prob", "mc_pred_var", "mc_std_dev", "mc_rel_uncertainty"])

In [33]:
gdf_mc[out_cols_mc].to_file(MC_GEOPACKAGE_OUTPUT_PATH, driver="GPKG", layer="mc_dropout_total_predictions")