In [44]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np 
import pandas as pd 
import scipy as sp
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import train_test_split, KFold
from sklearn.linear_model import LinearRegression
from pykrige.ok import OrdinaryKriging


In [45]:
deposit_data = pd.read_csv("../../Curated_data/final_dataset_1_no_0.csv", low_memory=False)


In [46]:
# # Assuming 'deposit_data' is your DataFrame
# # Assuming 'means_after_10th' is your Series of means
# means_after_10th = deposit_data.iloc[:, 10:].mean()
# # Create a boolean mask based on the condition
# mask = means_after_10th < 1.28827e-10

# # Combine the first 10 columns with the filtered columns
# filtered_deposit_data = deposit_data.iloc[:, :10].join(deposit_data.iloc[:, 10:][mask.index[mask]])

# # Display or use the filtered DataFrame as needed
# print(filtered_deposit_data)
# deposit_data =filtered_deposit_data

In [47]:
len(deposit_data.columns[10:])

488

In [48]:
total_columns = ['CP_Total','PO_Total', 'PY_Total']

#all covariates
covariates = total_columns[:3] + ['RQD_Pct', 'Cr_ppm'] 

In [49]:
deposit_data

Unnamed: 0,Name,X,Y,Z,Density_gcm3,RQD_Pct,Cr_ppm,CP_Total,PO_Total,PY_Total,...,phi_478,phi_479,phi_480,phi_481,phi_482,phi_483,phi_484,phi_485,phi_486,phi_487
0,KV-NME001,0.437814,0.509816,0.461455,0.400922,0.8800,0.127305,0.250,0.066667,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,KV-NME001,0.438061,0.509789,0.460591,0.410138,0.8800,0.160479,0.250,0.066667,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,KV-NME001,0.448174,0.508800,0.426068,0.442396,0.9900,0.128743,0.250,0.066667,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,KV-NME001,0.448431,0.508777,0.425204,0.442396,0.9900,0.141317,0.375,0.133333,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,KV-NME001,0.448683,0.508755,0.424340,0.442396,0.9900,0.153293,0.500,0.200000,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2613,KV365,0.629186,0.001755,0.432766,0.543779,0.9235,0.078443,0.050,0.200000,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2614,KV365,0.629096,0.001314,0.431988,0.539171,0.9235,0.074850,0.050,0.200000,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2615,KV365,0.629011,0.000877,0.431205,0.543779,0.8584,0.076647,0.050,0.200000,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2616,KV365,0.628921,0.000436,0.430427,0.525346,0.8584,0.077246,0.050,0.200000,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [50]:
# Assuming deposit_data is your DataFrame
# Extract the names of the first 98 columns
phi_columns = deposit_data.columns[10:].tolist()
phi_columns[-1]


'phi_487'

In [51]:
# Function to print evaluation metrics
def print_metrics(actual, predicted, set_name, num_predictors):
    mse = mean_squared_error(actual, predicted)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(actual, predicted)
    r2 = r2_score(actual, predicted)

    n = len(actual)
    adjusted_r2 = 1 - ((1 - r2) * (n - 1) / (n - num_predictors - 1))

    print(f"Metrics for {set_name} set:")
    print(f"  MSE: {mse:.4f}")
    print(f"  RMSE: {rmse:.4f}")
    print(f"  MAE: {mae:.4f}")
    print(f"  R^2: {r2:.4f}")
    print(f"  Adjusted R^2: {adjusted_r2:.4f}\n")


# Deepkriging covariates

In [52]:
from sklearn.model_selection import train_test_split
torch.manual_seed(42)
np.random.seed(42)
p = len(covariates) + len(phi_columns)

# Splitting data into training and testing sets
x_train, x_test, y_train, y_test, idx_train, idx_test = train_test_split(
    deposit_data[phi_columns + covariates].values,
    deposit_data['Density_gcm3'].values,
    deposit_data.index,  # Return indices for later use
    test_size=0.3, random_state=42)



# Define your neural network
model = nn.Sequential(
    nn.Linear(in_features=p, out_features=100),
    nn.ReLU(),
    nn.Dropout(0.5),
    nn.BatchNorm1d(100),
    nn.Linear(in_features=100, out_features=100),
    nn.ReLU(),
    nn.Dropout(0.5),
    nn.Linear(in_features=100, out_features=100),
    nn.ReLU(),
    nn.BatchNorm1d(100),
    nn.Linear(in_features=100, out_features=1))

# Loss function and optimizer
mse_loss = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.005)

# Training loop
for step in range(601):
    pre = model(torch.tensor(x_train, dtype=torch.float32))
    mse = mse_loss(pre, torch.tensor(y_train.reshape(-1, 1), dtype=torch.float32))
    cost = mse

    optimizer.zero_grad()
    cost.backward()
    optimizer.step()

# Evaluation
with torch.no_grad():
    model.eval()
    test_predictions_dk = model(torch.tensor(x_test, dtype=torch.float32)).detach().numpy().flatten()
    test_mse = mse_loss(torch.tensor(test_predictions.reshape(-1, 1), dtype=torch.float32), torch.tensor(y_test.reshape(-1, 1), dtype=torch.float32))
    print("Test MSE:", test_mse.item())


Test MSE: 0.0038445298559963703


In [53]:
import plotly.graph_objects as go


# Extract relevant rows from deposit_data for the test set
test_data_xyz = deposit_data.loc[idx_test, ['X', 'Y', 'Z']].values

# Create 3D scatter plot
fig = go.Figure(data=[go.Scatter3d(
    x=test_data_xyz[:, 0],  # X coordinates
    y=test_data_xyz[:, 1],  # Y coordinates
    z=test_data_xyz[:, 2],  # Z coordinates
    mode='markers',
    marker=dict(
        size=4,
        color='blue',  # Color of markers
        opacity=0.8
    )
)])

# Add test predictions
fig.add_trace(go.Scatter3d(
    x=test_data_xyz[:, 0],
    y=test_data_xyz[:, 1],
    z=test_data_xyz[:, 2],
    mode='markers',
    marker=dict(
        size=4,
        color=test_predictions_dk,  # Color based on predictions
        colorscale='speed',    # Color scale
        opacity=0.8,
        colorbar=dict(title='Predicted Values')
    ),
    name='Predictions'
))

# Update layout
fig.update_layout(scene=dict(
    xaxis_title='X',
    yaxis_title='Y',
    zaxis_title='Z'),
    title='Test Predictions in 3D',
    margin=dict(l=0, r=0, b=0, t=0)
)

# Show plot
fig.show()


# NN Covariates

In [54]:
torch.manual_seed(42)
np.random.seed(42)

phi_columns = ['X','Y','Z']
p = len(covariates) + len(phi_columns)

# Perform k-fold cross-validation
from sklearn.model_selection import train_test_split

# Splitting data into training and testing sets
x_train, x_test, y_train, y_test, idx_train, idx_test = train_test_split(
    deposit_data[phi_columns + covariates].values,
    deposit_data['Density_gcm3'].values,
    deposit_data.index,  # Return indices for later use
    test_size=0.3, random_state=42)



# Define your neural network
model = nn.Sequential(
    nn.Linear(in_features=p, out_features=100),
    nn.ReLU(),
    nn.Dropout(0.5),
    nn.BatchNorm1d(100),
    nn.Linear(in_features=100, out_features=100),
    nn.ReLU(),
    nn.Dropout(0.5),
    nn.Linear(in_features=100, out_features=100),
    nn.ReLU(),
    nn.BatchNorm1d(100),
    nn.Linear(in_features=100, out_features=1))

# Loss function and optimizer
mse_loss = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.005)

# Training loop
for step in range(601):
    pre = model(torch.tensor(x_train, dtype=torch.float32))
    mse = mse_loss(pre, torch.tensor(y_train.reshape(-1, 1), dtype=torch.float32))
    cost = mse

    optimizer.zero_grad()
    cost.backward()
    optimizer.step()

# Evaluation
with torch.no_grad():
    model.eval()
    test_predictions_nn = model(torch.tensor(x_test, dtype=torch.float32)).detach().numpy().flatten()
    test_mse = mse_loss(torch.tensor(test_predictions.reshape(-1, 1), dtype=torch.float32), torch.tensor(y_test.reshape(-1, 1), dtype=torch.float32))
    print("Test MSE:", test_mse.item())


Test MSE: 0.0038445298559963703


In [55]:
import plotly.graph_objects as go


# Extract relevant rows from deposit_data for the test set
test_data_xyz = deposit_data.loc[idx_test, ['X', 'Y', 'Z']].values

# Create 3D scatter plot
fig = go.Figure(data=[go.Scatter3d(
    x=test_data_xyz[:, 0],  # X coordinates
    y=test_data_xyz[:, 1],  # Y coordinates
    z=test_data_xyz[:, 2],  # Z coordinates
    mode='markers',
    marker=dict(
        size=4,
        color='blue',  # Color of markers
        opacity=0.8
    )
)])

# Add test predictions
fig.add_trace(go.Scatter3d(
    x=test_data_xyz[:, 0],
    y=test_data_xyz[:, 1],
    z=test_data_xyz[:, 2],
    mode='markers',
    marker=dict(
        size=4,
        color=test_predictions_nn,  # Color based on predictions
        colorscale='speed',    # Color scale
        opacity=0.8,
        colorbar=dict(title='Predicted Values')
    ),
    name='Predictions'
))

# Update layout
fig.update_layout(scene=dict(
    xaxis_title='X',
    yaxis_title='Y',
    zaxis_title='Z'),
    title='Test Predictions in 3D',
    margin=dict(l=0, r=0, b=0, t=0)
)

# Show plot
fig.show()


In [61]:
import plotly.graph_objects as go

# Extract relevant rows from deposit_data for the test set
test_data_xyz = deposit_data.loc[idx_test, ['X', 'Y', 'Z']].values

# Determine the color range based on both sets of predictions
min_value = min(test_predictions_nn.min(), test_predictions_dk.min())
max_value = max(test_predictions_nn.max(), test_predictions_dk.max())

# Create subplot figure
fig = go.Figure()

# Define the first subplot (left side)
fig.add_trace(go.Scatter3d(
    x=test_data_xyz[:, 0],  # X coordinates
    y=test_data_xyz[:, 1],  # Y coordinates
    z=test_data_xyz[:, 2],  # Z coordinates
    mode='markers',
    marker=dict(
        size=4,
        color=test_predictions_nn.ravel(),  # Color based on predictions
        colorscale='speed',    # Color scale
        cmin=min_value,
        cmax=max_value,
        opacity=0.8,
        colorbar=dict(title='Predicted Values')
    ),
    showlegend=False
))

# Define the second subplot (right side)
fig.add_trace(go.Scatter3d(
    x=test_data_xyz[:, 0],  # X coordinates
    y=test_data_xyz[:, 1],  # Y coordinates
    z=test_data_xyz[:, 2],  # Z coordinates
    mode='markers',
    marker=dict(
        size=4,
        color=test_predictions_dk.ravel(),  # Color based on predictions
        colorscale='speed',    # Color scale
        cmin=min_value,
        cmax=max_value,
        opacity=0.8,
        colorbar=dict(title='Predicted Values')
    ),
    showlegend=False,
    scene='scene2'
))

# Update layout for both subplots
fig.update_layout(
    title='Test Predictions in 3D',
    scene=dict(
        xaxis_title='X',
        yaxis_title='Y',
        zaxis_title='Z',
        domain=dict(x=[0, 0.5]),
        camera_eye=dict(x=-1.25, y=1.25, z=1)
    ),
    scene2=dict(
        xaxis_title='X',
        yaxis_title='Y',
        zaxis_title='Z',
        domain=dict(x=[0.5, 1]),
        camera_eye=dict(x=1.25, y=1.25, z=1)
    ),
    margin=dict(l=0, r=0, b=0, t=50),
)

# Show plot
fig.show()


# Reg kriging covariates

In [56]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error
import numpy as np

np.random.seed(42)

# Separate input features (x) and target variable (y)
y = deposit_data['Density_gcm3'].values[:, np.newaxis]  # Keep variable as the output
x = deposit_data[['X', 'Y', 'Z','CP_Total', 'PO_Total', 'PY_Total', 'RQD_Pct', 'Cr_ppm']].values

# Perform 70:30 train-test split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=42)

# Fit a linear regression model
regression_model = LinearRegression()
regression_model.fit(x_train, y_train)

# Predictions from the regression model
test_predictions_rk = regression_model.predict(x_test)

# Calculate metrics
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
sst = np.mean((y_test - np.mean(y_test)) ** 2) * len(y_test)
ssr = np.mean((y_pred - y_test) ** 2) * len(y_test)
r2 = 1 - (ssr / sst)

# Calculate adjusted R-squared
n = len(y_test)
num_predictors = x_test.shape[1]
adjusted_r2 = 1 - ((1 - r2) * (n - 1) / (n - num_predictors - 1))

# Print metrics
print(f"Mean Squared Error (MSE): {mse}")
print(f"Mean Absolute Error (MAE): {mae}")
print(f"Adjusted R-squared: {adjusted_r2}")


Mean Squared Error (MSE): 0.012730452388159527
Mean Absolute Error (MAE): 0.07245573027232928
Adjusted R-squared: 0.12165101727740812


In [57]:
import plotly.graph_objects as go


# Extract relevant rows from deposit_data for the test set
test_data_xyz = deposit_data.loc[idx_test, ['X', 'Y', 'Z']].values

# Create 3D scatter plot
fig = go.Figure(data=[go.Scatter3d(
    x=test_data_xyz[:, 0],  # X coordinates
    y=test_data_xyz[:, 1],  # Y coordinates
    z=test_data_xyz[:, 2],  # Z coordinates
    mode='markers',
    marker=dict(
        size=4,
        color='blue',  # Color of markers
        opacity=0.8
    )
)])

# Add test predictions
fig.add_trace(go.Scatter3d(
    x=test_data_xyz[:, 0],
    y=test_data_xyz[:, 1],
    z=test_data_xyz[:, 2],
    mode='markers',
    marker=dict(
        size=4,
        color=test_predictions_rk.ravel(),  # Color based on predictions
        colorscale='speed',    # Color scale
        opacity=0.8,
        colorbar=dict(title='Predicted Values')
    ),
    name='Predictions'
))

# Update layout
fig.update_layout(scene=dict(
    xaxis_title='X',
    yaxis_title='Y',
    zaxis_title='Z'),
    title='Test Predictions in 3D',
    margin=dict(l=0, r=0, b=0, t=0)
)

# Show plot
fig.show()


In [68]:
import plotly.graph_objects as go

# Extract relevant rows from deposit_data for the test set
test_data_xyz = deposit_data.loc[idx_test, ['X', 'Y', 'Z']].values

# Determine the color range based on all sets of predictions
min_value = min(test_predictions_nn.min(), test_predictions_dk.min(), test_predictions_rk.min())
max_value = max(test_predictions_nn.max(), test_predictions_dk.max(), test_predictions_rk.max())

# Create subplot figure
fig = go.Figure()

# Define the first subplot (left side)
fig.add_trace(go.Scatter3d(
    x=test_data_xyz[:, 0],  # X coordinates
    y=test_data_xyz[:, 1],  # Y coordinates
    z=test_data_xyz[:, 2],  # Z coordinates
    mode='markers',
    marker=dict(
        size=4,
        color=test_predictions_dk.ravel(),  # Color based on predictions
        colorscale='speed',    # Color scale
        cmin=min_value,
        cmax=max_value,
        opacity=0.8,
        colorbar=dict(title='Predicted Values')
    ),
    showlegend=False
))

# Define the second subplot (middle)
fig.add_trace(go.Scatter3d(
    x=test_data_xyz[:, 0],  # X coordinates
    y=test_data_xyz[:, 1],  # Y coordinates
    z=test_data_xyz[:, 2],  # Z coordinates
    mode='markers',
    marker=dict(
        size=4,
        color=test_predictions_nn.ravel(),  # Color based on predictions
        colorscale='speed',    # Color scale
        cmin=min_value,
        cmax=max_value,
        opacity=0.8,
        colorbar=dict(title='Predicted Values')
    ),
    showlegend=False,
    scene='scene2'
))

# Define the third subplot (right side)
fig.add_trace(go.Scatter3d(
    x=test_data_xyz[:, 0],  # X coordinates
    y=test_data_xyz[:, 1],  # Y coordinates
    z=test_data_xyz[:, 2],  # Z coordinates
    mode='markers',
    marker=dict(
        size=4,
        color=test_predictions_rk.ravel(),  # Color based on predictions
        colorscale='speed',    # Color scale
        cmin=min_value,
        cmax=max_value,
        opacity=0.8,
        colorbar=dict(title='Predicted Values')
    ),
    showlegend=False,
    scene='scene3'
))

# Update layout for all subplots
fig.update_layout(
    title='Test Predictions in 3D',
    scene=dict(
        xaxis_title='X',
        yaxis_title='Y',
        zaxis_title='Z',
        domain=dict(x=[0, 0.33]),
        camera_eye=dict(x=-1.25, y=1.25, z=1)
    ),
    scene2=dict(
        xaxis_title='X',
        yaxis_title='Y',
        zaxis_title='Z',
        domain=dict(x=[0.33, 0.66]),
        camera_eye=dict(x=-1.25, y=1.25, z=1)
    ),
    scene3=dict(
        xaxis_title='X',
        yaxis_title='Y',
        zaxis_title='Z',
        domain=dict(x=[0.66, 1]),
        camera_eye=dict(x=-1.25, y=1.25, z=1)
    ),
    margin=dict(l=0, r=0, b=0, t=50),
)

# Show plot
fig.show()


# Deepkriging no covariates

In [58]:
torch.manual_seed(42)
np.random.seed(42)

phi_columns = deposit_data.columns[10:].tolist()
p = len(phi_columns)

# Create an array to store metrics for each fold
test_mse_list = []
test_rmse_list = []
test_mae_list = []
test_r2_list = []
test_adjusted_r2_list = []


# Define the number of folds for cross-validation
num_folds = 10
kf = KFold(n_splits=num_folds, shuffle=True, random_state=42)

# Perform k-fold cross-validation
for fold, (train_index, test_index) in enumerate(kf.split(deposit_data)):
    train_data, test_data = deposit_data.iloc[train_index], deposit_data.iloc[test_index]

    x_train = train_data[phi_columns].values
    y_train = train_data['Density_gcm3'].values

    x_test = test_data[phi_columns].values
    y_test = test_data['Density_gcm3'].values

    # Define your neural network
    model = nn.Sequential(
        nn.Linear(in_features=p, out_features=100),
        nn.ReLU(),
        nn.Dropout(0.5) ,
        nn.BatchNorm1d(100),
        nn.Linear(in_features=100, out_features=100),
        nn.ReLU(),
        nn.Dropout(0.5),
        nn.Linear(in_features=100, out_features=100),
        nn.ReLU(),
        nn.BatchNorm1d(100),
        nn.Linear(in_features=100, out_features=1))


    mse_loss = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=0.005)

    train_losses = []  # To store training losses during training
    test_losses = []   # To store test losses during training

    # Training loop
    for step in range(601):
        pre = model(torch.tensor(x_train, dtype=torch.float32))
        mse = mse_loss(pre, torch.tensor(y_train.reshape(-1, 1), dtype=torch.float32))
        cost = mse

        optimizer.zero_grad()
        cost.backward()
        optimizer.step()

        pre_test = model(torch.tensor(x_test, dtype=torch.float32))
        mse_test = mse_loss(pre_test, torch.tensor(y_test.reshape(-1, 1), dtype=torch.float32))
        test_losses.append(mse_test.item())

    # Store metrics for this fold
    test_predictions_fold = model(torch.tensor(x_test, dtype=torch.float32)).detach().numpy().flatten()
    test_mse_list.append(mean_squared_error(y_test, test_predictions_fold))
    test_mae_list.append(mean_absolute_error(y_test, test_predictions_fold))
    test_r2_list.append(r2_score(y_test, test_predictions_fold))

     # Calculate adjusted R-squared
    n = len(y_test)
    sst = np.sum((y_test - np.mean(y_test)) ** 2)
    ssr = np.sum((test_predictions_fold - y_test) ** 2)
    r2 = 1 - (ssr / sst)
    adjusted_r2 = 1 - ((1 - r2) * (n - 1) / (n - p - 1))
    test_adjusted_r2_list.append(adjusted_r2)

   # Print metrics for the current fold
    print(f"\nMetrics for Fold {fold + 1}:")
    print_metrics(y_test, test_predictions_fold, "Test", p)

# Print average metrics across folds
print("\nAverage Metrics Across Folds:")
print(f"  Average MSE: {np.mean(test_mse_list):.4f}")
print(f"  Average MAE: {np.mean(test_mae_list):.4f}")
print(f"  Average Adjusted R2: {np.mean(test_adjusted_r2_list):.4f}")
print(f"  STD MSE: {np.std(test_mse_list):.4f}")
print(f"  STD MAE: {np.std(test_mae_list):.4f}")
print(f"  STD Adjusted R2: {np.std(test_adjusted_r2_list):.4f}")

KeyboardInterrupt: 

# NN no covariates

In [None]:
torch.manual_seed(42)
np.random.seed(42)

phi_columns = ['X','Y','Z']
p =  len(phi_columns)

# Create an array to store metrics for each fold
test_mse_list = []
test_rmse_list = []
test_mae_list = []
test_r2_list = []
test_adjusted_r2_list = []


# Define the number of folds for cross-validation
num_folds = 10
kf = KFold(n_splits=num_folds, shuffle=True, random_state=42)

# Perform k-fold cross-validation
for fold, (train_index, test_index) in enumerate(kf.split(deposit_data)):
    train_data, test_data = deposit_data.iloc[train_index], deposit_data.iloc[test_index]

    x_train = train_data[phi_columns].values
    y_train = train_data['Density_gcm3'].values

    x_test = test_data[phi_columns].values
    y_test = test_data['Density_gcm3'].values

    # Define your neural network
    model = nn.Sequential(
        nn.Linear(in_features=p, out_features=100),
        nn.ReLU(),
        nn.Dropout(0.5) ,
        nn.BatchNorm1d(100),
        nn.Linear(in_features=100, out_features=100),
        nn.ReLU(),
        nn.Dropout(0.5),
        nn.Linear(in_features=100, out_features=100),
        nn.ReLU(),
        nn.BatchNorm1d(100),
        nn.Linear(in_features=100, out_features=1))


    mse_loss = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=0.005)

    train_losses = []  # To store training losses during training
    test_losses = []   # To store test losses during training

    # Training loop
    for step in range(601):
        pre = model(torch.tensor(x_train, dtype=torch.float32))
        mse = mse_loss(pre, torch.tensor(y_train.reshape(-1, 1), dtype=torch.float32))
        cost = mse

        optimizer.zero_grad()
        cost.backward()
        optimizer.step()

        pre_test = model(torch.tensor(x_test, dtype=torch.float32))
        mse_test = mse_loss(pre_test, torch.tensor(y_test.reshape(-1, 1), dtype=torch.float32))
        test_losses.append(mse_test.item())

    # Store metrics for this fold
    test_predictions_fold = model(torch.tensor(x_test, dtype=torch.float32)).detach().numpy().flatten()
    test_mse_list.append(mean_squared_error(y_test, test_predictions_fold))
    test_mae_list.append(mean_absolute_error(y_test, test_predictions_fold))
    test_r2_list.append(r2_score(y_test, test_predictions_fold))


     # Calculate adjusted R-squared
    n = len(y_test)
    sst = np.sum((y_test - np.mean(y_test)) ** 2)
    ssr = np.sum((test_predictions_fold - y_test) ** 2)
    r2 = 1 - (ssr / sst)
    adjusted_r2 = 1 - ((1 - r2) * (n - 1) / (n - p - 1))
    test_adjusted_r2_list.append(adjusted_r2)

   # Print metrics for the current fold
    print(f"\nMetrics for Fold {fold + 1}:")
    print_metrics(y_test, test_predictions_fold, "Test", p)

# Print average metrics across folds
print("\nAverage Metrics Across Folds:")
print(f"  Average MSE: {np.mean(test_mse_list):.4f}")
print(f"  Average MAE: {np.mean(test_mae_list):.4f}")
print(f"  Average Adjusted R2: {np.mean(test_adjusted_r2_list):.4f}")
print(f"  STD MSE: {np.std(test_mse_list):.4f}")
print(f"  STD MAE: {np.std(test_mae_list):.4f}")
print(f"  STD Adjusted R2: {np.std(test_adjusted_r2_list):.4f}")


Metrics for Fold 1:
Metrics for Test set:
  MSE: 0.0065
  RMSE: 0.0805
  MAE: 0.0577
  R^2: 0.5943
  Adjusted R^2: 0.5896


Metrics for Fold 2:
Metrics for Test set:
  MSE: 0.0109
  RMSE: 0.1042
  MAE: 0.0636
  R^2: 0.1712
  Adjusted R^2: 0.1616


Metrics for Fold 3:
Metrics for Test set:
  MSE: 0.0047
  RMSE: 0.0686
  MAE: 0.0487
  R^2: 0.6811
  Adjusted R^2: 0.6774


Metrics for Fold 4:
Metrics for Test set:
  MSE: 0.0056
  RMSE: 0.0748
  MAE: 0.0517
  R^2: 0.5855
  Adjusted R^2: 0.5807


Metrics for Fold 5:
Metrics for Test set:
  MSE: 0.0123
  RMSE: 0.1110
  MAE: 0.0687
  R^2: 0.3154
  Adjusted R^2: 0.3074


Metrics for Fold 6:
Metrics for Test set:
  MSE: 0.0084
  RMSE: 0.0915
  MAE: 0.0579
  R^2: 0.3426
  Adjusted R^2: 0.3350


Metrics for Fold 7:
Metrics for Test set:
  MSE: 0.0050
  RMSE: 0.0705
  MAE: 0.0491
  R^2: 0.7250
  Adjusted R^2: 0.7218


Metrics for Fold 8:
Metrics for Test set:
  MSE: 0.0112
  RMSE: 0.1057
  MAE: 0.0645
  R^2: 0.3802
  Adjusted R^2: 0.3730


Metrics

# Regression kriging no covariates

In [None]:
np.random.seed(42)


y = deposit_data['Density_gcm3'].values[:, np.newaxis]  # Keep variable as the output
x = deposit_data[['X', 'Y', 'Z']].values
x = x.reshape(len(deposit_data), 3)

mse_list = []
mae_list = []
test_adjusted_r2_list = []

num_folds = 10
kf = KFold(n_splits=num_folds, shuffle=True, random_state=42)

mse_list, mae_list, r2_list = [], [], []
for train_index, test_index in kf.split(x):
    X_cv_train, X_cv_test = x[train_index], x[test_index]
    y_cv_train, y_cv_test = y[train_index], y[test_index]

    # Ordinary Kriging on residuals
    ok = OrdinaryKriging(X_cv_test[:, 0], X_cv_test[:, 1], y_cv_test, variogram_model='linear', verbose=False)
    kriging_pred, _ = ok.execute('grid', X_cv_test[:, 0], X_cv_test[:, 1])

    final_cv_predictions =  kriging_pred

    # Calculate and store metrics
    mse = np.mean((y_cv_test - final_cv_predictions) ** 2)
    mae = np.mean(np.abs(y_cv_test - final_cv_predictions))
    sst = np.mean((y_cv_test - np.mean(y_cv_test)) ** 2)*len(y_cv_test)
    ssr = np.mean((final_cv_predictions - y_cv_test) ** 2)*len(y_cv_test)
    r2 = 1 - (ssr / sst)


    mse_list.append(mse)
    mae_list.append(mae)
    r2_list.append(r2)

# Calculate mean metrics across folds
mean_mse = np.mean(mse_list)
mean_mae = np.mean(mae_list)
mean_r2 = np.mean(r2_list)

n = len(y_test)
sst = np.sum((y_test - np.mean(y_test)) ** 2)
ssr = np.sum((test_predictions_fold - y_test) ** 2)
r2 = 1 - (ssr / sst)
adjusted_r2 = 1 - ((1 - r2) * (n - 1) / (n - p - 1))
test_adjusted_r2_list.append(adjusted_r2)

mean_a_r2 = np.mean(test_adjusted_r2_list)

# Print mean metrics
print(f"Mean Squared Error (MSE): {mean_mse}")
print(f"Mean Absolute Error (MAE): {mean_mae}")
print(f"Mean Adjusted R-squared (R2): {mean_a_r2}")

Mean Squared Error (MSE): 0.014367284060944865
Mean Absolute Error (MAE): 0.07749732186146643
Mean Adjusted R-squared (R2): 0.5786027443512991
