In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np 
import pandas as pd 
import scipy as sp
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import train_test_split, KFold
from sklearn.linear_model import LinearRegression
from pykrige.ok import OrdinaryKriging


In [18]:
deposit_data = pd.read_csv("../../Curated_data/df_shapley.csv", low_memory=False)
dep_data = pd.read_csv("../../Curated_data/final_dataset_1_no_0.csv", low_memory=False)


In [33]:
torch.manual_seed(42)
np.random.seed(42)


# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(dep_data,
                                                    dep_data['Density_gcm3'],
                                                    test_size=0.2, random_state=42)

In [37]:
merg = X_test[['X','Y','Z','Cr_ppm']]

In [40]:
import pandas as pd

# Assuming 'Cr_ppm' is a column in both DataFrames 'merg' and 'deposit_data'

# Perform the merge
merged_df = pd.merge(merg, deposit_data, on='Cr_ppm', how='inner')

# 'how' parameter specifies the type of merge:
# - 'inner': Keeps only the rows where the 'Cr_ppm' column exists in both DataFrames

# Display the merged DataFrame
merged_df


Unnamed: 0,X,Y,Z,Cr_ppm,phi_0,phi_1,phi_2,phi_3,phi_4,phi_5,...,PO_Total-shapley value,PO_Total-hue,Cr_ppm-shapley value,Cr_ppm-hue,PY_Total-shapley value,PY_Total-hue,RQD_Pct-shapley value,RQD_Pct-hue,CP_Total-shapley value,CP_Total-hue
0,0.432015,0.485334,0.414011,0.144910,0.0,0.0,0.0,0.0,0.0,0.0,...,0.004963,0.013333,0.007424,0.392216,0.072063,0.00,0.003772,0.9930,0.034117,0.2500
1,0.453501,0.821841,0.413759,0.245509,0.0,0.0,0.0,0.0,0.0,0.0,...,-0.046689,0.200000,0.022136,0.307186,-0.036625,0.10,-0.031479,0.9900,0.007108,0.5000
2,0.453501,0.821841,0.413759,0.245509,0.0,0.0,0.0,0.0,0.0,0.0,...,0.022395,0.022000,0.018630,0.130838,-0.030349,0.00,0.000098,0.9600,-0.019688,0.7500
3,0.645737,0.464982,0.355226,0.245509,0.0,0.0,0.0,0.0,0.0,0.0,...,-0.046689,0.200000,0.022136,0.307186,-0.036625,0.10,-0.031479,0.9900,0.007108,0.5000
4,0.645737,0.464982,0.355226,0.245509,0.0,0.0,0.0,0.0,0.0,0.0,...,0.022395,0.022000,0.018630,0.130838,-0.030349,0.00,0.000098,0.9600,-0.019688,0.7500
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
627,0.696150,0.696892,0.300867,0.476707,0.0,0.0,0.0,0.0,0.0,0.0,...,-0.051660,0.133333,-0.017341,0.134132,-0.045145,0.00,-0.065869,0.9738,0.067015,0.2500
628,0.470934,0.691233,0.422005,0.156886,0.0,0.0,0.0,0.0,0.0,0.0,...,-0.003270,0.046667,0.023816,0.345395,-0.086445,0.08,0.009117,0.8521,-0.020063,0.0500
629,0.292491,0.696231,0.286296,0.503593,0.0,0.0,0.0,0.0,0.0,0.0,...,-0.075494,0.013333,0.029307,0.233174,-0.071768,0.00,-0.049386,0.9270,0.011122,0.7500
630,0.535419,0.808200,0.089997,0.443593,0.0,0.0,0.0,0.0,0.0,0.0,...,-0.006076,0.033333,-0.005567,0.169042,-0.077015,0.00,0.048243,0.9032,0.016825,0.1275


In [15]:
total_columns = ['CP_Total','PO_Total', 'PY_Total']

#all covariates
covariates = total_columns[:3] + ['RQD_Pct', 'Cr_ppm'] 

In [16]:
deposit_data.columns

Index(['phi_0', 'phi_1', 'phi_2', 'phi_3', 'phi_4', 'phi_5', 'phi_6', 'phi_7',
       'phi_8', 'phi_9',
       ...
       'PO_Total-shapley value', 'PO_Total-hue', 'Cr_ppm-shapley value',
       'Cr_ppm-hue', 'PY_Total-shapley value', 'PY_Total-hue',
       'RQD_Pct-shapley value', 'RQD_Pct-hue', 'CP_Total-shapley value',
       'CP_Total-hue'],
      dtype='object', length=504)

In [17]:
deposit_data['PO_Total-shapley value']

0      0.004963
1     -0.046689
2     -0.022507
3     -0.046630
4      0.036938
         ...   
519   -0.051660
520   -0.003270
521   -0.075494
522   -0.006076
523   -0.000405
Name: PO_Total-shapley value, Length: 524, dtype: float64

In [11]:
import plotly.graph_objects as go

# Extract relevant rows from deposit_data
test_data_xyz = deposit_data.loc[['X', 'Y', 'Z']].values

# Determine the color range based on all sets of predictions

# Create subplot figure
fig = go.Figure()

# Define the first subplot (left side)




# Define the second subplot (middle)
fig.add_trace(go.Scatter3d(
    x=test_data_xyz[:, 0],  # X coordinates
    y=test_data_xyz[:, 1],  # Y coordinates
    z=test_data_xyz[:, 2],  # Z coordinates
    mode='markers',
    marker=dict(
        size=4,
        color=y_test.ravel(),  # Color based on predictions
        colorscale='speed',    # Color scale
        opacity=0.8,
        colorbar=dict(title='Predicted Values')
    ),
    showlegend=False,
    scene='scene2'
))



# Update layout for all subplots
fig.update_layout(
    title='Test Predictions in 3D',
    scene2=dict(
        xaxis_title='X',
        yaxis_title='Y',
        zaxis_title='Z',
        domain=dict(x=[0.33, 0.66]),
        camera_eye=dict(x=-1.25, y=1.25, z=1)
    ),
    margin=dict(l=0, r=0, b=0, t=50),
)

fig.update_layout(
    title={
        'text': "Train set                                                                                            Test set",
        'y':0.9,
        'x':0.1,
        'xanchor': 'left',
        'yanchor': 'top'}
),


# Show plot
fig.show()


KeyError: "None of [Index(['X', 'Y', 'Z'], dtype='object')] are in the [index]"

In [112]:
import plotly.graph_objects as go

# Extract relevant rows from deposit_data for the test set
test_data_xyz = deposit_data.loc[idx_test, ['X', 'Y', 'Z']].values

# Determine the color range based on all sets of predictions
min_value = min(test_predictions_nn.min(), test_predictions_dk.min(), test_predictions_rk.min())
max_value = max(test_predictions_nn.max(), test_predictions_dk.max(), test_predictions_rk.max())

# Create subplot figure
fig = go.Figure()

# Define the first subplot (left side)
fig.add_trace(go.Scatter3d(
    x=test_data_xyz[:, 0],  
    y=test_data_xyz[:, 1],  
    z=test_data_xyz[:, 2],  
    mode='markers',
    marker=dict(
        size=4,
        color=test_predictions_dk.ravel(),  # Color based on predictions
        colorscale='speed',    
        cmin=min_value,
        cmax=max_value,
        opacity=0.8,
        colorbar=dict(title='Predicted Values')
    ),
    showlegend=False,
    scene='scene1',  # Use scene1 for the first subplot
))

# Define the second subplot (middle)
fig.add_trace(go.Scatter3d(
    x=test_data_xyz[:, 0],  
    y=test_data_xyz[:, 1],  
    z=test_data_xyz[:, 2],  
    mode='markers',
    marker=dict(
        size=4,
        color=test_predictions_nn.ravel(),  # Color based on predictions
        colorscale='speed',   
        cmin=min_value,
        cmax=max_value,
        opacity=0.8,
        colorbar=dict(title='Predicted Values')
    ),
    showlegend=False,
    scene='scene2'  # Use scene2 for the second subplot
))

# Define the third subplot (right side)
fig.add_trace(go.Scatter3d(
    x=test_data_xyz[:, 0], 
    y=test_data_xyz[:, 1], 
    z=test_data_xyz[:, 2],  
    mode='markers',
    marker=dict(
        size=4,
        color=test_predictions_rk.ravel(),  # Color based on predictions
        colorscale='speed',    
        cmin=min_value,
        cmax=max_value,
        opacity=0.8,
        colorbar=dict(title='Predicted Values')
    ),
    showlegend=False,
    scene='scene3'  # Use scene3 for the third subplot
))

fig.update_layout(
    scene1=dict(
        xaxis_title='X',
        yaxis_title='Y',
        zaxis_title='Z',
        camera_eye=dict(x=-1.25, y=1.25, z=1)
    ),
    scene2=dict(
        xaxis_title='X',
        yaxis_title='Y',
        zaxis_title='Z',
        camera_eye=dict(x=-1.25, y=1.25, z=1)
    ),
    scene3=dict(
        xaxis_title='X',
        yaxis_title='Y',
        zaxis_title='Z',
        camera_eye=dict(x=-1.25, y=1.25, z=1)
    ),
    margin=dict(l=0, r=0, b=0, t=50),
    title={
        'text': "Deepkriging                                                                            Deep neural net                                                                 Regression kriging",
        'x': 0.1  # Center the titles horizontally
    }
)

# Show plot
fig.show()


In [88]:
import plotly.graph_objects as go

# Extract relevant rows from deposit_data for the test set
test_data_xyz = deposit_data.loc[idx_test, ['X', 'Y', 'Z']].values

# Determine the color range based on all sets of predictions
min_value = min(test_predictions_nn.min(), test_predictions_dk.min(), test_predictions_rk.min())
max_value = max(test_predictions_nn.max(), test_predictions_dk.max(), test_predictions_rk.max())

# Create subplot figure
fig = go.Figure()

# Define the first subplot (left side)
fig.add_trace(go.Scatter3d(
    x=test_data_xyz[:, 0],  # X coordinates
    y=test_data_xyz[:, 1],  # Y coordinates
    z=test_data_xyz[:, 2],  # Z coordinates
    mode='markers',
    marker=dict(
        size=4,
        color=test_predictions_dk.ravel(),  # Color based on predictions
        colorscale='speed',    # Color scale
        cmin=min_value,
        cmax=max_value,
        opacity=0.8,
        colorbar=dict(title='Predicted Values')
    ),
    showlegend=False,
    scene='scene1',  # Use scene1 for the first subplot
))

# Define the second subplot (middle)
fig.add_trace(go.Scatter3d(
    x=test_data_xyz[:, 0],  # X coordinates
    y=test_data_xyz[:, 1],  # Y coordinates
    z=test_data_xyz[:, 2],  # Z coordinates
    mode='markers',
    marker=dict(
        size=4,
        color=test_predictions_nn.ravel(),  # Color based on predictions
        colorscale='speed',    # Color scale
        cmin=min_value,
        cmax=max_value,
        opacity=0.8,
        colorbar=dict(title='Predicted Values')
    ),
    showlegend=False,
    scene='scene2'  # Use scene2 for the second subplot
))

# Define the third subplot (right side)
fig.add_trace(go.Scatter3d(
    x=test_data_xyz[:, 0],  # X coordinates
    y=test_data_xyz[:, 1],  # Y coordinates
    z=test_data_xyz[:, 2],  # Z coordinates
    mode='markers',
    marker=dict(
        size=4,
        color=test_predictions_rk.ravel(),  # Color based on predictions
        colorscale='speed',    # Color scale
        cmin=min_value,
        cmax=max_value,
        opacity=0.8,
        colorbar=dict(title='Predicted Values')
    ),
    showlegend=False,
    scene='scene3'  # Use scene3 for the third subplot
))

# Update layout for all subplots
fig.update_layout(
    scene1=dict(
        xaxis_title='X',
        yaxis_title='Y',
        zaxis_title='Z',
        camera_eye=dict(x=-1.25, y=1.25, z=1)
    ),
    scene2=dict(
        xaxis_title='X',
        yaxis_title='Y',
        zaxis_title='Z',
        camera_eye=dict(x=-1.25, y=1.25, z=1)
    ),
    scene3=dict(
        xaxis_title='X',
        yaxis_title='Y',
        zaxis_title='Z',
        camera_eye=dict(x=-1.25, y=1.25, z=1)
    ),
    margin=dict(l=0, r=0, b=0, t=50),
    title={
        'text': "Deepkriging                                                                            Deep neural net                                                                 Regression kriging",
        'x': 0.1  # Center the titles horizontally
    }
)

# Show plot
fig.show()


In [87]:
import plotly.graph_objects as go

# Extract relevant rows from deposit_data for the test set
test_data_xyz = deposit_data.loc[idx_test, ['X', 'Y', 'Z']].values

# Determine the color range based on all sets of predictions
min_value = min((test_predictions_dk.ravel() - test_predictions_nn.ravel()).min(), (test_predictions_nn.ravel() - test_predictions_rk.ravel()).min(), (test_predictions_dk.ravel() - test_predictions_rk.ravel()).min())
max_value = max((test_predictions_dk.ravel() - test_predictions_nn.ravel()).max(), (test_predictions_nn.ravel() - test_predictions_rk.ravel()).max(), (test_predictions_dk.ravel() - test_predictions_rk.ravel()).max())

# Create subplot figure
fig = go.Figure()

# Define the first subplot (left side)
fig.add_trace(go.Scatter3d(
    x=test_data_xyz[:, 0],  # X coordinates
    y=test_data_xyz[:, 1],  # Y coordinates
    z=test_data_xyz[:, 2],  # Z coordinates
    mode='markers',
    marker=dict(
        size=4,
        color=test_predictions_dk.ravel() - test_predictions_nn.ravel(),  # Color based on predictions
        colorscale='RdBu_r',    # Color scale
        cmin=min_value,
        cmax=max_value,
        opacity=0.8,
        colorbar=dict(title='Predicted Values')
    ),
    showlegend=False,
    scene='scene1',  # Use scene1 for the first subplot
))

# Define the second subplot (middle)
fig.add_trace(go.Scatter3d(
    x=test_data_xyz[:, 0],  # X coordinates
    y=test_data_xyz[:, 1],  # Y coordinates
    z=test_data_xyz[:, 2],  # Z coordinates
    mode='markers',
    marker=dict(
        size=4,
        color=test_predictions_nn.ravel() - test_predictions_rk.ravel(),  # Color based on predictions
        colorscale='RdBu_r',    # Color scale
        cmin=min_value,
        cmax=max_value,
        opacity=0.8,
        colorbar=dict(title='Predicted Values')
    ),
    showlegend=False,
    scene='scene2'  # Use scene2 for the second subplot
))

# Define the third subplot (right side)
fig.add_trace(go.Scatter3d(
    x=test_data_xyz[:, 0],  # X coordinates
    y=test_data_xyz[:, 1],  # Y coordinates
    z=test_data_xyz[:, 2],  # Z coordinates
    mode='markers',
    marker=dict(
        size=4,
        color= test_predictions_dk.ravel() - test_predictions_rk.ravel(),  # Color based on predictions
        colorscale='RdBu_r',    # Color scale
        cmin=min_value,
        cmax=max_value,
        opacity=0.8,
        colorbar=dict(title='Predicted Values')
    ),
    showlegend=False,
    scene='scene3'  # Use scene3 for the third subplot
))

# Update layout for all subplots
fig.update_layout(
    scene1=dict(
        xaxis_title='X',
        yaxis_title='Y',
        zaxis_title='Z',
        camera_eye=dict(x=-1.25, y=1.25, z=1)
    ),
    scene2=dict(
        xaxis_title='X',
        yaxis_title='Y',
        zaxis_title='Z',
        camera_eye=dict(x=-1.25, y=1.25, z=1)
    ),
    scene3=dict(
        xaxis_title='X',
        yaxis_title='Y',
        zaxis_title='Z',
        camera_eye=dict(x=-1.25, y=1.25, z=1)
    ),
    margin=dict(l=0, r=0, b=0, t=50),
    title={
        'text': "Deepkriging-DNN                                                       Deep neural net - Regression kriging                                            Deepkriging - Regression kriging",
        'x': 0.1  # Center the titles horizontally
    }
)

# Show plot
fig.show()


# Deepkriging no covariates

In [None]:
torch.manual_seed(42)
np.random.seed(42)

phi_columns = deposit_data.columns[10:].tolist()
p = len(phi_columns)

# Create an array to store metrics for each fold
test_mse_list = []
test_rmse_list = []
test_mae_list = []
test_r2_list = []
test_adjusted_r2_list = []


# Define the number of folds for cross-validation
num_folds = 10
kf = KFold(n_splits=num_folds, shuffle=True, random_state=42)

# Perform k-fold cross-validation
for fold, (train_index, test_index) in enumerate(kf.split(deposit_data)):
    train_data, test_data = deposit_data.iloc[train_index], deposit_data.iloc[test_index]

    x_train = train_data[phi_columns].values
    y_train = train_data['Density_gcm3'].values

    x_test = test_data[phi_columns].values
    y_test = test_data['Density_gcm3'].values

    # Define your neural network
    model = nn.Sequential(
        nn.Linear(in_features=p, out_features=100),
        nn.ReLU(),
        nn.Dropout(0.5) ,
        nn.BatchNorm1d(100),
        nn.Linear(in_features=100, out_features=100),
        nn.ReLU(),
        nn.Dropout(0.5),
        nn.Linear(in_features=100, out_features=100),
        nn.ReLU(),
        nn.BatchNorm1d(100),
        nn.Linear(in_features=100, out_features=1))


    mse_loss = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=0.005)

    train_losses = []  # To store training losses during training
    test_losses = []   # To store test losses during training

    # Training loop
    for step in range(601):
        pre = model(torch.tensor(x_train, dtype=torch.float32))
        mse = mse_loss(pre, torch.tensor(y_train.reshape(-1, 1), dtype=torch.float32))
        cost = mse

        optimizer.zero_grad()
        cost.backward()
        optimizer.step()

        pre_test = model(torch.tensor(x_test, dtype=torch.float32))
        mse_test = mse_loss(pre_test, torch.tensor(y_test.reshape(-1, 1), dtype=torch.float32))
        test_losses.append(mse_test.item())

    # Store metrics for this fold
    test_predictions_fold = model(torch.tensor(x_test, dtype=torch.float32)).detach().numpy().flatten()
    test_mse_list.append(mean_squared_error(y_test, test_predictions_fold))
    test_mae_list.append(mean_absolute_error(y_test, test_predictions_fold))
    test_r2_list.append(r2_score(y_test, test_predictions_fold))

     # Calculate adjusted R-squared
    n = len(y_test)
    sst = np.sum((y_test - np.mean(y_test)) ** 2)
    ssr = np.sum((test_predictions_fold - y_test) ** 2)
    r2 = 1 - (ssr / sst)
    adjusted_r2 = 1 - ((1 - r2) * (n - 1) / (n - p - 1))
    test_adjusted_r2_list.append(adjusted_r2)

   # Print metrics for the current fold
    print(f"\nMetrics for Fold {fold + 1}:")
    print_metrics(y_test, test_predictions_fold, "Test", p)

# Print average metrics across folds
print("\nAverage Metrics Across Folds:")
print(f"  Average MSE: {np.mean(test_mse_list):.4f}")
print(f"  Average MAE: {np.mean(test_mae_list):.4f}")
print(f"  Average Adjusted R2: {np.mean(test_adjusted_r2_list):.4f}")
print(f"  STD MSE: {np.std(test_mse_list):.4f}")
print(f"  STD MAE: {np.std(test_mae_list):.4f}")
print(f"  STD Adjusted R2: {np.std(test_adjusted_r2_list):.4f}")

KeyboardInterrupt: 

# NN no covariates

In [None]:
torch.manual_seed(42)
np.random.seed(42)

phi_columns = ['X','Y','Z']
p =  len(phi_columns)

# Create an array to store metrics for each fold
test_mse_list = []
test_rmse_list = []
test_mae_list = []
test_r2_list = []
test_adjusted_r2_list = []


# Define the number of folds for cross-validation
num_folds = 10
kf = KFold(n_splits=num_folds, shuffle=True, random_state=42)

# Perform k-fold cross-validation
for fold, (train_index, test_index) in enumerate(kf.split(deposit_data)):
    train_data, test_data = deposit_data.iloc[train_index], deposit_data.iloc[test_index]

    x_train = train_data[phi_columns].values
    y_train = train_data['Density_gcm3'].values

    x_test = test_data[phi_columns].values
    y_test = test_data['Density_gcm3'].values

    # Define your neural network
    model = nn.Sequential(
        nn.Linear(in_features=p, out_features=100),
        nn.ReLU(),
        nn.Dropout(0.5) ,
        nn.BatchNorm1d(100),
        nn.Linear(in_features=100, out_features=100),
        nn.ReLU(),
        nn.Dropout(0.5),
        nn.Linear(in_features=100, out_features=100),
        nn.ReLU(),
        nn.BatchNorm1d(100),
        nn.Linear(in_features=100, out_features=1))


    mse_loss = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=0.005)

    train_losses = []  # To store training losses during training
    test_losses = []   # To store test losses during training

    # Training loop
    for step in range(601):
        pre = model(torch.tensor(x_train, dtype=torch.float32))
        mse = mse_loss(pre, torch.tensor(y_train.reshape(-1, 1), dtype=torch.float32))
        cost = mse

        optimizer.zero_grad()
        cost.backward()
        optimizer.step()

        pre_test = model(torch.tensor(x_test, dtype=torch.float32))
        mse_test = mse_loss(pre_test, torch.tensor(y_test.reshape(-1, 1), dtype=torch.float32))
        test_losses.append(mse_test.item())

    # Store metrics for this fold
    test_predictions_fold = model(torch.tensor(x_test, dtype=torch.float32)).detach().numpy().flatten()
    test_mse_list.append(mean_squared_error(y_test, test_predictions_fold))
    test_mae_list.append(mean_absolute_error(y_test, test_predictions_fold))
    test_r2_list.append(r2_score(y_test, test_predictions_fold))


     # Calculate adjusted R-squared
    n = len(y_test)
    sst = np.sum((y_test - np.mean(y_test)) ** 2)
    ssr = np.sum((test_predictions_fold - y_test) ** 2)
    r2 = 1 - (ssr / sst)
    adjusted_r2 = 1 - ((1 - r2) * (n - 1) / (n - p - 1))
    test_adjusted_r2_list.append(adjusted_r2)

   # Print metrics for the current fold
    print(f"\nMetrics for Fold {fold + 1}:")
    print_metrics(y_test, test_predictions_fold, "Test", p)

# Print average metrics across folds
print("\nAverage Metrics Across Folds:")
print(f"  Average MSE: {np.mean(test_mse_list):.4f}")
print(f"  Average MAE: {np.mean(test_mae_list):.4f}")
print(f"  Average Adjusted R2: {np.mean(test_adjusted_r2_list):.4f}")
print(f"  STD MSE: {np.std(test_mse_list):.4f}")
print(f"  STD MAE: {np.std(test_mae_list):.4f}")
print(f"  STD Adjusted R2: {np.std(test_adjusted_r2_list):.4f}")


Metrics for Fold 1:
Metrics for Test set:
  MSE: 0.0065
  RMSE: 0.0805
  MAE: 0.0577
  R^2: 0.5943
  Adjusted R^2: 0.5896


Metrics for Fold 2:
Metrics for Test set:
  MSE: 0.0109
  RMSE: 0.1042
  MAE: 0.0636
  R^2: 0.1712
  Adjusted R^2: 0.1616


Metrics for Fold 3:
Metrics for Test set:
  MSE: 0.0047
  RMSE: 0.0686
  MAE: 0.0487
  R^2: 0.6811
  Adjusted R^2: 0.6774


Metrics for Fold 4:
Metrics for Test set:
  MSE: 0.0056
  RMSE: 0.0748
  MAE: 0.0517
  R^2: 0.5855
  Adjusted R^2: 0.5807


Metrics for Fold 5:
Metrics for Test set:
  MSE: 0.0123
  RMSE: 0.1110
  MAE: 0.0687
  R^2: 0.3154
  Adjusted R^2: 0.3074


Metrics for Fold 6:
Metrics for Test set:
  MSE: 0.0084
  RMSE: 0.0915
  MAE: 0.0579
  R^2: 0.3426
  Adjusted R^2: 0.3350


Metrics for Fold 7:
Metrics for Test set:
  MSE: 0.0050
  RMSE: 0.0705
  MAE: 0.0491
  R^2: 0.7250
  Adjusted R^2: 0.7218


Metrics for Fold 8:
Metrics for Test set:
  MSE: 0.0112
  RMSE: 0.1057
  MAE: 0.0645
  R^2: 0.3802
  Adjusted R^2: 0.3730


Metrics

# Regression kriging no covariates

In [None]:
np.random.seed(42)


y = deposit_data['Density_gcm3'].values[:, np.newaxis]  # Keep variable as the output
x = deposit_data[['X', 'Y', 'Z']].values
x = x.reshape(len(deposit_data), 3)

mse_list = []
mae_list = []
test_adjusted_r2_list = []

num_folds = 10
kf = KFold(n_splits=num_folds, shuffle=True, random_state=42)

mse_list, mae_list, r2_list = [], [], []
for train_index, test_index in kf.split(x):
    X_cv_train, X_cv_test = x[train_index], x[test_index]
    y_cv_train, y_cv_test = y[train_index], y[test_index]

    # Ordinary Kriging on residuals
    ok = OrdinaryKriging(X_cv_test[:, 0], X_cv_test[:, 1], y_cv_test, variogram_model='linear', verbose=False)
    kriging_pred, _ = ok.execute('grid', X_cv_test[:, 0], X_cv_test[:, 1])

    final_cv_predictions =  kriging_pred

    # Calculate and store metrics
    mse = np.mean((y_cv_test - final_cv_predictions) ** 2)
    mae = np.mean(np.abs(y_cv_test - final_cv_predictions))
    sst = np.mean((y_cv_test - np.mean(y_cv_test)) ** 2)*len(y_cv_test)
    ssr = np.mean((final_cv_predictions - y_cv_test) ** 2)*len(y_cv_test)
    r2 = 1 - (ssr / sst)


    mse_list.append(mse)
    mae_list.append(mae)
    r2_list.append(r2)

# Calculate mean metrics across folds
mean_mse = np.mean(mse_list)
mean_mae = np.mean(mae_list)
mean_r2 = np.mean(r2_list)

n = len(y_test)
sst = np.sum((y_test - np.mean(y_test)) ** 2)
ssr = np.sum((test_predictions_fold - y_test) ** 2)
r2 = 1 - (ssr / sst)
adjusted_r2 = 1 - ((1 - r2) * (n - 1) / (n - p - 1))
test_adjusted_r2_list.append(adjusted_r2)

mean_a_r2 = np.mean(test_adjusted_r2_list)

# Print mean metrics
print(f"Mean Squared Error (MSE): {mean_mse}")
print(f"Mean Absolute Error (MAE): {mean_mae}")
print(f"Mean Adjusted R-squared (R2): {mean_a_r2}")

Mean Squared Error (MSE): 0.014367284060944865
Mean Absolute Error (MAE): 0.07749732186146643
Mean Adjusted R-squared (R2): 0.5786027443512991
