In [77]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np 
import pandas as pd 
import scipy as sp
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import train_test_split, KFold
from sklearn.linear_model import LinearRegression
from pykrige.ok import OrdinaryKriging


In [78]:
deposit_data = pd.read_csv("../../Curated_data/final_dataset.csv", low_memory=False)


In [79]:
total_columns = ['CP_Total','PO_Total', 'PY_Total']

#all covariates
covariates = total_columns[:3] + ['RQD_Pct', 'Cr_ppm'] 

In [80]:
# Assuming deposit_data is your DataFrame
# Extract the names of the first 98 columns
phi_columns = deposit_data.columns[10:].tolist()
phi_columns[-1]


'phi_6402'

In [81]:
# Function to print evaluation metrics
def print_metrics(actual, predicted, set_name, num_predictors):
    mse = mean_squared_error(actual, predicted)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(actual, predicted)
    r2 = r2_score(actual, predicted)

    n = len(actual)
    adjusted_r2 = 1 - ((1 - r2) * (n - 1) / (n - num_predictors - 1))

    print(f"Metrics for {set_name} set:")
    print(f"  MSE: {mse:.4f}")
    print(f"  RMSE: {rmse:.4f}")
    print(f"  MAE: {mae:.4f}")
    print(f"  R^2: {r2:.4f}")
    print(f"  Adjusted R^2: {adjusted_r2:.4f}\n")


# Deepkriging covariates

In [102]:
phi_columns = deposit_data.columns[10:].tolist()
p = len(covariates) +len(phi_columns)

torch.manual_seed(42)
np.random.seed(42)

# Assuming deposit_data, covariates, and other necessary variables are defined

# Create an array to store metrics for each fold
test_mse_list = []
test_rmse_list = []
test_mae_list = []
test_r2_list = []
test_adjusted_r2_list = []


# Define the number of folds for cross-validation
num_folds = 10
kf = KFold(n_splits=num_folds, shuffle=True, random_state=42)

# Perform k-fold cross-validation
for fold, (train_index, test_index) in enumerate(kf.split(deposit_data)):
    train_data, test_data = deposit_data.iloc[train_index], deposit_data.iloc[test_index]

    x_train = train_data[phi_columns + covariates].values
    y_train = train_data['Density_gcm3'].values

    x_test = test_data[phi_columns + covariates].values
    y_test = test_data['Density_gcm3'].values

    # Define your neural network
    model = nn.Sequential(
        nn.Linear(in_features=p, out_features=100),
        nn.ReLU(),
        nn.Dropout(0.5) ,
        nn.BatchNorm1d(100),
        nn.Linear(in_features=100, out_features=100),
        nn.ReLU(),
        nn.Dropout(0.5),
        nn.Linear(in_features=100, out_features=100),
        nn.ReLU(),
        nn.BatchNorm1d(100),
        nn.Linear(in_features=100, out_features=1))


    mse_loss = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=0.005)

    train_losses = []  # To store training losses during training
    test_losses = []   # To store test losses during training

    # Training loop
    for step in range(601):
        pre = model(torch.tensor(x_train, dtype=torch.float32))
        mse = mse_loss(pre, torch.tensor(y_train.reshape(-1, 1), dtype=torch.float32))
        cost = mse

        optimizer.zero_grad()
        cost.backward()
        optimizer.step()

        pre_test = model(torch.tensor(x_test, dtype=torch.float32))
        mse_test = mse_loss(pre_test, torch.tensor(y_test.reshape(-1, 1), dtype=torch.float32))
        test_losses.append(mse_test.item())

    # Store metrics for this fold
    test_predictions_fold = model(torch.tensor(x_test, dtype=torch.float32)).detach().numpy().flatten()
    test_mse_list.append(mean_squared_error(y_test, test_predictions_fold))
    test_mae_list.append(mean_absolute_error(y_test, test_predictions_fold))
    test_r2_list.append(r2_score(y_test, test_predictions_fold))

     # Calculate adjusted R-squared
    n = len(y_test)
    sst = np.sum((y_test - np.mean(y_test)) ** 2)
    ssr = np.sum((test_predictions_fold - y_test) ** 2)
    r2 = 1 - (ssr / sst)
    adjusted_r2 = 1 - ((1 - r2) * (n - 1) / (n - p - 1))
    test_adjusted_r2_list.append(adjusted_r2)

   # Print metrics for the current fold
    print(f"\nMetrics for Fold {fold + 1}:")
    print_metrics(y_test, test_predictions_fold, "Test", p)

# Print average metrics across folds
print("\nAverage Metrics Across Folds:")
print(f"  Average MSE: {np.mean(test_mse_list):.4f}")
print(f"  Average MAE: {np.mean(test_mae_list):.4f}")
print(f"  Average Adjusted R2: {np.mean(test_adjusted_r2_list):.4f}")
print(f"  STD MSE: {np.std(test_mse_list):.4f}")
print(f"  STD MAE: {np.std(test_mae_list):.4f}")
print(f"  STD Adjusted R2: {np.std(test_adjusted_r2_list):.4f}")


Metrics for Fold 1:
Metrics for Test set:
  MSE: 0.0024
  RMSE: 0.0485
  MAE: 0.0355
  R^2: 0.8524
  Adjusted R^2: 1.0063


Metrics for Fold 2:
Metrics for Test set:
  MSE: 0.0031
  RMSE: 0.0559
  MAE: 0.0372
  R^2: 0.7618
  Adjusted R^2: 1.0101


Metrics for Fold 3:
Metrics for Test set:
  MSE: 0.0029
  RMSE: 0.0540
  MAE: 0.0384
  R^2: 0.8020
  Adjusted R^2: 1.0084


Metrics for Fold 4:
Metrics for Test set:
  MSE: 0.0018
  RMSE: 0.0427
  MAE: 0.0312
  R^2: 0.8648
  Adjusted R^2: 1.0057


Metrics for Fold 5:
Metrics for Test set:
  MSE: 0.0030
  RMSE: 0.0552
  MAE: 0.0393
  R^2: 0.8306
  Adjusted R^2: 1.0072


Metrics for Fold 6:
Metrics for Test set:
  MSE: 0.0028
  RMSE: 0.0525
  MAE: 0.0367
  R^2: 0.7834
  Adjusted R^2: 1.0092


Metrics for Fold 7:
Metrics for Test set:
  MSE: 0.0025
  RMSE: 0.0505
  MAE: 0.0383
  R^2: 0.8590
  Adjusted R^2: 1.0060


Metrics for Fold 8:
Metrics for Test set:
  MSE: 0.0021
  RMSE: 0.0456
  MAE: 0.0338
  R^2: 0.8844
  Adjusted R^2: 1.0049


Metrics

# NN Covariates

In [106]:
torch.manual_seed(42)
np.random.seed(42)

phi_columns = ['X','Y','Z']
p = len(covariates) + len(phi_columns)

# Assuming deposit_data, covariates, and other necessary variables are defined

# Create an array to store metrics for each fold
test_mse_list = []
test_rmse_list = []
test_mae_list = []
test_r2_list = []
test_adjusted_r2_list = []


# Define the number of folds for cross-validation
num_folds = 10
kf = KFold(n_splits=num_folds, shuffle=True, random_state=42)

# Perform k-fold cross-validation
for fold, (train_index, test_index) in enumerate(kf.split(deposit_data)):
    train_data, test_data = deposit_data.iloc[train_index], deposit_data.iloc[test_index]

    x_train = train_data[phi_columns + covariates].values
    y_train = train_data['Density_gcm3'].values

    x_test = test_data[phi_columns + covariates].values
    y_test = test_data['Density_gcm3'].values

    # Define your neural network
    model = nn.Sequential(
        nn.Linear(in_features=p, out_features=100),
        nn.ReLU(),
        nn.Dropout(0.5) ,
        nn.BatchNorm1d(100),
        nn.Linear(in_features=100, out_features=100),
        nn.ReLU(),
        nn.Dropout(0.5),
        nn.Linear(in_features=100, out_features=100),
        nn.ReLU(),
        nn.BatchNorm1d(100),
        nn.Linear(in_features=100, out_features=1))
    

    mse_loss = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=0.005)

    train_losses = []  # To store training losses during training
    test_losses = []   # To store test losses during training

    # Training loop
    for step in range(601):
        pre = model(torch.tensor(x_train, dtype=torch.float32))
        mse = mse_loss(pre, torch.tensor(y_train.reshape(-1, 1), dtype=torch.float32))
        cost = mse

        optimizer.zero_grad()
        cost.backward()
        optimizer.step()

        pre_test = model(torch.tensor(x_test, dtype=torch.float32))
        mse_test = mse_loss(pre_test, torch.tensor(y_test.reshape(-1, 1), dtype=torch.float32))
        test_losses.append(mse_test.item())
        train_losses.append(mse.item())

    # Store metrics for this fold
    test_predictions_fold = model(torch.tensor(x_test, dtype=torch.float32)).detach().numpy().flatten()
    test_mse_list.append(mean_squared_error(y_test, test_predictions_fold))
    test_mae_list.append(mean_absolute_error(y_test, test_predictions_fold))
    test_r2_list.append(r2_score(y_test, test_predictions_fold))



     # Calculate adjusted R-squared
    n = len(y_test)
    sst = np.sum((y_test - np.mean(y_test)) ** 2)
    ssr = np.sum((test_predictions_fold - y_test) ** 2)
    r2 = 1 - (ssr / sst)
    adjusted_r2 = 1 - ((1 - r2) * (n - 1) / (n - p - 1))
    test_adjusted_r2_list.append(adjusted_r2)

   # Print metrics for the current fold
    print(f"\nMetrics for Fold {fold + 1}:")
    print_metrics(y_test, test_predictions_fold, "Test", p)

# Print average metrics across folds
print("\nAverage Metrics Across Folds:")
print(f"  Average MSE: {np.mean(test_mse_list):.4f}")
print(f"  Average MAE: {np.mean(test_mae_list):.4f}")
print(f"  Average Adjusted R2: {np.mean(test_adjusted_r2_list):.4f}")
print(f"  STD MSE: {np.std(test_mse_list):.4f}")
print(f"  STD MAE: {np.std(test_mae_list):.4f}")
print(f"  STD Adjusted R2: {np.std(test_adjusted_r2_list):.4f}")


Metrics for Fold 1:
Metrics for Test set:
  MSE: 0.0067
  RMSE: 0.0819
  MAE: 0.0547
  R^2: 0.5797
  Adjusted R^2: 0.5664


Metrics for Fold 2:
Metrics for Test set:
  MSE: 0.0074
  RMSE: 0.0863
  MAE: 0.0504
  R^2: 0.4317
  Adjusted R^2: 0.4137


Metrics for Fold 3:
Metrics for Test set:
  MSE: 0.0059
  RMSE: 0.0768
  MAE: 0.0514
  R^2: 0.5995
  Adjusted R^2: 0.5868


Metrics for Fold 4:
Metrics for Test set:
  MSE: 0.0070
  RMSE: 0.0834
  MAE: 0.0570
  R^2: 0.4847
  Adjusted R^2: 0.4684


Metrics for Fold 5:
Metrics for Test set:
  MSE: 0.0061
  RMSE: 0.0779
  MAE: 0.0552
  R^2: 0.6624
  Adjusted R^2: 0.6517


Metrics for Fold 6:
Metrics for Test set:
  MSE: 0.0060
  RMSE: 0.0777
  MAE: 0.0503
  R^2: 0.5260
  Adjusted R^2: 0.5110


Metrics for Fold 7:
Metrics for Test set:
  MSE: 0.0076
  RMSE: 0.0872
  MAE: 0.0564
  R^2: 0.5784
  Adjusted R^2: 0.5651


Metrics for Fold 8:
Metrics for Test set:
  MSE: 0.0083
  RMSE: 0.0912
  MAE: 0.0549
  R^2: 0.5386
  Adjusted R^2: 0.5241


Metrics

# Reg kriging covariates

In [84]:
np.random.seed(42)

y = deposit_data['Density_gcm3'].values[:, np.newaxis]  # Keep variable as the output
x = deposit_data[['X', 'Y', 'Z','CP_Total', 'PO_Total', 'PY_Total', 'RQD_Pct', 'Cr_ppm']].values
x = x.reshape(len(deposit_data), 8)

num_folds = 10
kf = KFold(n_splits=num_folds, shuffle=True, random_state=42)

mse_list, mae_list, adjusted_r2_list = [], [], []
for train_index, test_index in kf.split(x):
    X_cv_train, X_cv_test = x[train_index], x[test_index]
    y_cv_train, y_cv_test = y[train_index], y[test_index]

    # Fit a linear regression model
    regression_model = LinearRegression()
    regression_model.fit(X_cv_train, y_cv_train)

    # Predictions from the regression model
    y_cv_pred = regression_model.predict(X_cv_test)

    # Residuals (the difference between actual and predicted values)
    residuals = y_cv_test - y_cv_pred

    # Ordinary Kriging on residuals
    ok = OrdinaryKriging(X_cv_test[:, 0], X_cv_test[:, 1], residuals, variogram_model='linear', verbose=False)
    kriging_pred, _ = ok.execute('grid', X_cv_test[:, 0], X_cv_test[:, 1])

    # Use Kriging predictions directly
    final_predictions = kriging_pred

    # Combine regression predictions with kriging predictions
    final_cv_predictions = y_cv_pred + kriging_pred

    # Calculate and store metrics
    mse = np.mean((y_cv_test - final_cv_predictions) ** 2)
    mae = np.mean(np.abs(y_cv_test - final_cv_predictions))
    sst = np.mean((y_cv_test - np.mean(y_cv_test)) ** 2) * len(y_cv_test)
    ssr = np.mean((final_cv_predictions - y_cv_test) ** 2) * len(y_cv_test)
    r2 = 1 - (ssr / sst)
    
    # Calculate adjusted R-squared
    n = len(y_cv_test)
    num_predictors = X_cv_test.shape[1]
    adjusted_r2 = 1 - ((1 - r2) * (n - 1) / (n - num_predictors - 1))

    mse_list.append(mse)
    mae_list.append(mae)
    adjusted_r2_list.append(adjusted_r2)

# Calculate mean metrics across folds
mean_mse = np.mean(mse_list)
mean_mae = np.mean(mae_list)
mean_adjusted_r2 = np.mean(adjusted_r2_list)

# Print mean metrics
print(f"Mean Squared Error (MSE): {mean_mse}")
print(f"Mean Absolute Error (MAE): {mean_mae}")
print(f"Mean Adjusted R-squared: {mean_adjusted_r2}")


Mean Squared Error (MSE): 0.013648385980087299
Mean Absolute Error (MAE): 0.07509199045290375
Mean Adjusted R-squared: 0.10163260900323849


# Deepkriging no covariates

In [103]:
torch.manual_seed(42)
np.random.seed(42)

phi_columns = deposit_data.columns[10:].tolist()
p = len(phi_columns)

# Create an array to store metrics for each fold
test_mse_list = []
test_rmse_list = []
test_mae_list = []
test_r2_list = []
test_adjusted_r2_list = []


# Define the number of folds for cross-validation
num_folds = 10
kf = KFold(n_splits=num_folds, shuffle=True, random_state=42)

# Perform k-fold cross-validation
for fold, (train_index, test_index) in enumerate(kf.split(deposit_data)):
    train_data, test_data = deposit_data.iloc[train_index], deposit_data.iloc[test_index]

    x_train = train_data[phi_columns].values
    y_train = train_data['Density_gcm3'].values

    x_test = test_data[phi_columns].values
    y_test = test_data['Density_gcm3'].values

    # Define your neural network
    model = nn.Sequential(
        nn.Linear(in_features=p, out_features=100),
        nn.ReLU(),
        nn.Dropout(0.5) ,
        nn.BatchNorm1d(100),
        nn.Linear(in_features=100, out_features=100),
        nn.ReLU(),
        nn.Dropout(0.5),
        nn.Linear(in_features=100, out_features=100),
        nn.ReLU(),
        nn.BatchNorm1d(100),
        nn.Linear(in_features=100, out_features=1))


    mse_loss = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=0.005)

    train_losses = []  # To store training losses during training
    test_losses = []   # To store test losses during training

    # Training loop
    for step in range(601):
        pre = model(torch.tensor(x_train, dtype=torch.float32))
        mse = mse_loss(pre, torch.tensor(y_train.reshape(-1, 1), dtype=torch.float32))
        cost = mse

        optimizer.zero_grad()
        cost.backward()
        optimizer.step()

        pre_test = model(torch.tensor(x_test, dtype=torch.float32))
        mse_test = mse_loss(pre_test, torch.tensor(y_test.reshape(-1, 1), dtype=torch.float32))
        test_losses.append(mse_test.item())

    # Store metrics for this fold
    test_predictions_fold = model(torch.tensor(x_test, dtype=torch.float32)).detach().numpy().flatten()
    test_mse_list.append(mean_squared_error(y_test, test_predictions_fold))
    test_mae_list.append(mean_absolute_error(y_test, test_predictions_fold))
    test_r2_list.append(r2_score(y_test, test_predictions_fold))

     # Calculate adjusted R-squared
    n = len(y_test)
    sst = np.sum((y_test - np.mean(y_test)) ** 2)
    ssr = np.sum((test_predictions_fold - y_test) ** 2)
    r2 = 1 - (ssr / sst)
    adjusted_r2 = 1 - ((1 - r2) * (n - 1) / (n - p - 1))
    test_adjusted_r2_list.append(adjusted_r2)

   # Print metrics for the current fold
    print(f"\nMetrics for Fold {fold + 1}:")
    print_metrics(y_test, test_predictions_fold, "Test", p)

# Print average metrics across folds
print("\nAverage Metrics Across Folds:")
print(f"  Average MSE: {np.mean(test_mse_list):.4f}")
print(f"  Average MAE: {np.mean(test_mae_list):.4f}")
print(f"  Average Adjusted R2: {np.mean(test_adjusted_r2_list):.4f}")
print(f"  STD MSE: {np.std(test_mse_list):.4f}")
print(f"  STD MAE: {np.std(test_mae_list):.4f}")
print(f"  STD Adjusted R2: {np.std(test_adjusted_r2_list):.4f}")


Metrics for Fold 1:
Metrics for Test set:
  MSE: 0.0026
  RMSE: 0.0511
  MAE: 0.0373
  R^2: 0.8365
  Adjusted R^2: 1.0069


Metrics for Fold 2:
Metrics for Test set:
  MSE: 0.0025
  RMSE: 0.0496
  MAE: 0.0347
  R^2: 0.8120
  Adjusted R^2: 1.0080


Metrics for Fold 3:
Metrics for Test set:
  MSE: 0.0028
  RMSE: 0.0534
  MAE: 0.0367
  R^2: 0.8067
  Adjusted R^2: 1.0082


Metrics for Fold 4:
Metrics for Test set:
  MSE: 0.0025
  RMSE: 0.0504
  MAE: 0.0386
  R^2: 0.8120
  Adjusted R^2: 1.0080


Metrics for Fold 5:
Metrics for Test set:
  MSE: 0.0028
  RMSE: 0.0529
  MAE: 0.0394
  R^2: 0.8445
  Adjusted R^2: 1.0066


Metrics for Fold 6:
Metrics for Test set:
  MSE: 0.0024
  RMSE: 0.0485
  MAE: 0.0375
  R^2: 0.8151
  Adjusted R^2: 1.0079


Metrics for Fold 7:
Metrics for Test set:
  MSE: 0.0025
  RMSE: 0.0502
  MAE: 0.0369
  R^2: 0.8603
  Adjusted R^2: 1.0059


Metrics for Fold 8:
Metrics for Test set:
  MSE: 0.0026
  RMSE: 0.0510
  MAE: 0.0388
  R^2: 0.8558
  Adjusted R^2: 1.0061


Metrics

# NN no covariates

In [101]:
torch.manual_seed(42)
np.random.seed(42)

phi_columns = ['X','Y','Z']
p =  len(phi_columns)

# Create an array to store metrics for each fold
test_mse_list = []
test_rmse_list = []
test_mae_list = []
test_r2_list = []
test_adjusted_r2_list = []


# Define the number of folds for cross-validation
num_folds = 10
kf = KFold(n_splits=num_folds, shuffle=True, random_state=42)

# Perform k-fold cross-validation
for fold, (train_index, test_index) in enumerate(kf.split(deposit_data)):
    train_data, test_data = deposit_data.iloc[train_index], deposit_data.iloc[test_index]

    x_train = train_data[phi_columns].values
    y_train = train_data['Density_gcm3'].values

    x_test = test_data[phi_columns].values
    y_test = test_data['Density_gcm3'].values

    # Define your neural network
    model = nn.Sequential(
        nn.Linear(in_features=p, out_features=100),
        nn.ReLU(),
        nn.Dropout(0.5) ,
        nn.BatchNorm1d(100),
        nn.Linear(in_features=100, out_features=100),
        nn.ReLU(),
        nn.Dropout(0.5),
        nn.Linear(in_features=100, out_features=100),
        nn.ReLU(),
        nn.BatchNorm1d(100),
        nn.Linear(in_features=100, out_features=1))


    mse_loss = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=0.005)

    train_losses = []  # To store training losses during training
    test_losses = []   # To store test losses during training

    # Training loop
    for step in range(601):
        pre = model(torch.tensor(x_train, dtype=torch.float32))
        mse = mse_loss(pre, torch.tensor(y_train.reshape(-1, 1), dtype=torch.float32))
        cost = mse

        optimizer.zero_grad()
        cost.backward()
        optimizer.step()

        pre_test = model(torch.tensor(x_test, dtype=torch.float32))
        mse_test = mse_loss(pre_test, torch.tensor(y_test.reshape(-1, 1), dtype=torch.float32))
        test_losses.append(mse_test.item())

    # Store metrics for this fold
    test_predictions_fold = model(torch.tensor(x_test, dtype=torch.float32)).detach().numpy().flatten()
    test_mse_list.append(mean_squared_error(y_test, test_predictions_fold))
    test_mae_list.append(mean_absolute_error(y_test, test_predictions_fold))
    test_r2_list.append(r2_score(y_test, test_predictions_fold))


     # Calculate adjusted R-squared
    n = len(y_test)
    sst = np.sum((y_test - np.mean(y_test)) ** 2)
    ssr = np.sum((test_predictions_fold - y_test) ** 2)
    r2 = 1 - (ssr / sst)
    adjusted_r2 = 1 - ((1 - r2) * (n - 1) / (n - p - 1))
    test_adjusted_r2_list.append(adjusted_r2)

   # Print metrics for the current fold
    print(f"\nMetrics for Fold {fold + 1}:")
    print_metrics(y_test, test_predictions_fold, "Test", p)

# Print average metrics across folds
print("\nAverage Metrics Across Folds:")
print(f"  Average MSE: {np.mean(test_mse_list):.4f}")
print(f"  Average MAE: {np.mean(test_mae_list):.4f}")
print(f"  Average Adjusted R2: {np.mean(test_adjusted_r2_list):.4f}")
print(f"  STD MSE: {np.std(test_mse_list):.4f}")
print(f"  STD MAE: {np.std(test_mae_list):.4f}")
print(f"  STD Adjusted R2: {np.std(test_adjusted_r2_list):.4f}")


Metrics for Fold 1:
Metrics for Test set:
  MSE: 0.0065
  RMSE: 0.0805
  MAE: 0.0577
  R^2: 0.5943
  Adjusted R^2: 0.5896


Metrics for Fold 2:
Metrics for Test set:
  MSE: 0.0109
  RMSE: 0.1042
  MAE: 0.0636
  R^2: 0.1712
  Adjusted R^2: 0.1616


Metrics for Fold 3:
Metrics for Test set:
  MSE: 0.0047
  RMSE: 0.0686
  MAE: 0.0487
  R^2: 0.6811
  Adjusted R^2: 0.6774


Metrics for Fold 4:
Metrics for Test set:
  MSE: 0.0056
  RMSE: 0.0748
  MAE: 0.0517
  R^2: 0.5855
  Adjusted R^2: 0.5807


Metrics for Fold 5:
Metrics for Test set:
  MSE: 0.0123
  RMSE: 0.1110
  MAE: 0.0687
  R^2: 0.3154
  Adjusted R^2: 0.3074


Metrics for Fold 6:
Metrics for Test set:
  MSE: 0.0084
  RMSE: 0.0915
  MAE: 0.0579
  R^2: 0.3426
  Adjusted R^2: 0.3350


Metrics for Fold 7:
Metrics for Test set:
  MSE: 0.0050
  RMSE: 0.0705
  MAE: 0.0491
  R^2: 0.7250
  Adjusted R^2: 0.7218


Metrics for Fold 8:
Metrics for Test set:
  MSE: 0.0112
  RMSE: 0.1057
  MAE: 0.0645
  R^2: 0.3802
  Adjusted R^2: 0.3730


Metrics

# Regression kriging no covariates

In [87]:
np.random.seed(42)


y = deposit_data['Density_gcm3'].values[:, np.newaxis]  # Keep variable as the output
x = deposit_data[['X', 'Y', 'Z']].values
x = x.reshape(len(deposit_data), 3)

mse_list = []
mae_list = []
test_adjusted_r2_list = []

num_folds = 10
kf = KFold(n_splits=num_folds, shuffle=True, random_state=42)

mse_list, mae_list, r2_list = [], [], []
for train_index, test_index in kf.split(x):
    X_cv_train, X_cv_test = x[train_index], x[test_index]
    y_cv_train, y_cv_test = y[train_index], y[test_index]

    # Ordinary Kriging on residuals
    ok = OrdinaryKriging(X_cv_test[:, 0], X_cv_test[:, 1], y_cv_test, variogram_model='linear', verbose=False)
    kriging_pred, _ = ok.execute('grid', X_cv_test[:, 0], X_cv_test[:, 1])

    final_cv_predictions =  kriging_pred

    # Calculate and store metrics
    mse = np.mean((y_cv_test - final_cv_predictions) ** 2)
    mae = np.mean(np.abs(y_cv_test - final_cv_predictions))
    sst = np.mean((y_cv_test - np.mean(y_cv_test)) ** 2)*len(y_cv_test)
    ssr = np.mean((final_cv_predictions - y_cv_test) ** 2)*len(y_cv_test)
    r2 = 1 - (ssr / sst)


    mse_list.append(mse)
    mae_list.append(mae)
    r2_list.append(r2)

# Calculate mean metrics across folds
mean_mse = np.mean(mse_list)
mean_mae = np.mean(mae_list)
mean_r2 = np.mean(r2_list)

n = len(y_test)
sst = np.sum((y_test - np.mean(y_test)) ** 2)
ssr = np.sum((test_predictions_fold - y_test) ** 2)
r2 = 1 - (ssr / sst)
adjusted_r2 = 1 - ((1 - r2) * (n - 1) / (n - p - 1))
test_adjusted_r2_list.append(adjusted_r2)

mean_a_r2 = np.mean(test_adjusted_r2_list)

# Print mean metrics
print(f"Mean Squared Error (MSE): {mean_mse}")
print(f"Mean Absolute Error (MAE): {mean_mae}")
print(f"Mean Adjusted R-squared (R2): {mean_a_r2}")

Mean Squared Error (MSE): 0.014367284060944865
Mean Absolute Error (MAE): 0.07749732186146643
Mean Adjusted R-squared (R2): 0.336608309189098
