# Full-scale Vecchia approximation parameters

## Packages

In [6]:
import torch
import gpytorch
import matplotlib.pyplot as plt
from matplotlib import colors
import numpy as np
import os
import gpboost as gpb
import requests

## Function to generate data

In [16]:
# Download notebook from GitHub
url = 'https://raw.githubusercontent.com/TimGyger/FSVA/refs/heads/main/Simulation/Simulate_Data.py'

# Download the Python file
response = requests.get(url)

# Save the Python file locally
with open('your_script.py', 'wb') as f:
    f.write(response.content)

print("Python file downloaded successfully!")

# Execute the downloaded Python file using exec()
with open('your_script.py', 'r') as f:
    code = f.read()

exec(code)

Python file downloaded successfully!


## Gaussian

### Generate Data

In [17]:
X, y, _ = simulate_gp_response("gaussian",10000, 0.3, 1.0, torch.tensor([(0.25, 0.50,0.75,1.00,1.25)]),1)

In [None]:
# Convert to numpy:
X_np = X.numpy()
y_np = y.numpy()

### Experiment

In [None]:
# Different inducing points
vector_ind_points = [0, 10, 50, 100, 200, 500]
# Different Vecchia neighbors
vector_neighbors = [5, 10, 15, 20, 30, 50]
# Zero matrix
matrix = np.zeros((len(vector_ind_points), len(vector_neighbors)))
matrix2 = np.zeros((len(vector_ind_points), len(vector_neighbors)))
# Nested loop to iterate over both vectors
for i, val1 in enumerate(vector_ind_points):
    for j, val2 in enumerate(vector_neighbors):
        if val1 == 0:
            # Vecchia
            model_vecchia = gpb.GPModel(gp_coords=X_np, cov_function="gaussian_ard", 
                                     likelihood="gaussian",num_neighbors = val2,
                                     matrix_inversion_method = "cholesky", gp_approx="vecchia",seed = 10)
            neg_vecchia = model_vecchia.neg_log_likelihood(cov_pars = np.array([0.3, 1.0, 0.25, 0.50,0.75,1.00,1.25]), y = y_np)
            matrix[i, j] = neg_vecchia
            matrix2[i, j] = neg_vecchia
        else:
            # FSVecchia with euclidean-based neighbor search
            model_fsva = gpb.GPModel(gp_coords=X_np, cov_function="gaussian_ard", 
                                     likelihood="gaussian",num_neighbors = val2,num_ind_points = val1,ind_points_selection = "kmeans++",
                                     matrix_inversion_method = "cholesky", gp_approx="vecchia",seed = 2)
            neg_fsva = model_fsva.neg_log_likelihood(cov_pars = np.array([0.3, 1.0, 0.25, 0.50,0.75,1.00,1.25]), y = y_np)
            matrix[i, j] = neg_fsva
            # FSVecchia with correlation-based neighbor search
            model_fsva = gpb.GPModel(gp_coords=X_np, cov_function="gaussian_ard", 
                                     likelihood="gaussian",num_neighbors = val2,num_ind_points = val1,ind_points_selection = "kmeans++",
                                     matrix_inversion_method = "cholesky", gp_approx="vecchia",seed = 4)
            neg_fsva = model_fsva.neg_log_likelihood(cov_pars = np.array([0.3, 1.0, 0.25, 0.50,0.75,1.00,1.25]), y = y_np)
            matrix2[i, j] = neg_fsva
            

### Plot

In [None]:
# Plotting the matrix
fig, ax = plt.subplots(figsize=(8, 6))

# Define a colormap (darker colors for larger values in vector1)
cmap = plt.cm.get_cmap("Greys")  # You can choose different colormaps like "viridis", "plasma", etc.
new_cmap = colors.LinearSegmentedColormap.from_list("mycmap", cmap(np.linspace(0.1, 1, 256)))
# Loop over each row (for each value of vector1) and plot a line
for i, row in enumerate(matrix):
    # Normalize the color intensity based on the value of vector1
    color_intensity = i / len(vector_ind_points)  # Darker as index increases
    ax.plot(vector_neighbors, row, label=f' {vector_ind_points[i]}', color=new_cmap(color_intensity))

for i, row in enumerate(matrix2):
    color_intensity = i / len(vector_ind_points)  # Darker as index increases
    ax.plot(vector_neighbors, row, linestyle='--', color=new_cmap(color_intensity))

# Adding labels and title
ax.set_xlabel("Number of Vecchia neighbors")
ax.set_ylabel("Negative Log-Likelihood")
ax.set_title("")

# Enable grid
ax.grid(True)

# Show legend
ax.legend(title="Number of inducing points")
plt.xticks(vector_neighbors)
# Show the plot
plt.show()

## Bernoulli-Logit

### Generate Data

In [17]:
X, y, _ = simulate_gp_response("bernoulli-logit",10000, 0, 1.0, torch.tensor([(0.25, 0.50,0.75,1.00,1.25)]),1)

In [None]:
# Convert to numpy:
X_np = X.numpy()
y_np = y.numpy()

### Experiment

In [None]:
# Different inducing points
vector_ind_points = [0, 10, 50, 100, 200, 500]
# Different Vecchia neighbors
vector_neighbors = [5, 10, 15, 20, 30, 50]
# Zero matrix
matrix = np.zeros((len(vector_ind_points), len(vector_neighbors)))
matrix2 = np.zeros((len(vector_ind_points), len(vector_neighbors)))
# Nested loop to iterate over both vectors
for i, val1 in enumerate(vector_ind_points):
    for j, val2 in enumerate(vector_neighbors):
        if val1 == 0:
            # Vecchia
            model_vecchia = gpb.GPModel(gp_coords=X_np, cov_function="gaussian_ard", 
                                     likelihood="bernoulli_logit",num_neighbors = val2,
                                     matrix_inversion_method = "cholesky", gp_approx="vecchia",seed = 10)
            neg_vecchia = model_vecchia.neg_log_likelihood(cov_pars = np.array([1.0, 0.25, 0.50,0.75,1.00,1.25]), y = y_np)
            matrix[i, j] = neg_vecchia
            matrix2[i, j] = neg_vecchia
        else:
            # FSVecchia with euclidean-based neighbor search
            model_fsva = gpb.GPModel(gp_coords=X_np, cov_function="gaussian_ard", 
                                     likelihood="bernoulli_logit",num_neighbors = val2,num_ind_points = val1,ind_points_selection = "kmeans++",
                                     matrix_inversion_method = "cholesky", gp_approx="vecchia",seed = 2)
            neg_fsva = model_fsva.neg_log_likelihood(cov_pars = np.array([1.0, 0.25, 0.50,0.75,1.00,1.25]), y = y_np)
            matrix[i, j] = neg_fsva
            # FSVecchia with correlation-based neighbor search
            model_fsva = gpb.GPModel(gp_coords=X_np, cov_function="gaussian_ard", 
                                     likelihood="bernoulli_logit",num_neighbors = val2,num_ind_points = val1,ind_points_selection = "kmeans++",
                                     matrix_inversion_method = "cholesky", gp_approx="vecchia",seed = 4)
            neg_fsva = model_fsva.neg_log_likelihood(cov_pars = np.array([1.0, 0.25, 0.50,0.75,1.00,1.25]), y = y_np)
            matrix2[i, j] = neg_fsva

### Plot

In [None]:
# Plotting the matrix
fig, ax = plt.subplots(figsize=(8, 6))

# Define a colormap (darker colors for larger values in vector1)
cmap = plt.cm.get_cmap("Greys")  # You can choose different colormaps like "viridis", "plasma", etc.
new_cmap = colors.LinearSegmentedColormap.from_list("mycmap", cmap(np.linspace(0.1, 1, 256)))
# Loop over each row (for each value of vector1) and plot a line
for i, row in enumerate(matrix):
    # Normalize the color intensity based on the value of vector1
    color_intensity = i / len(vector_ind_points)  # Darker as index increases
    ax.plot(vector_neighbors, row, label=f' {vector_ind_points[i]}', color=new_cmap(color_intensity))

for i, row in enumerate(matrix2):
    color_intensity = i / len(vector_ind_points)  # Darker as index increases
    ax.plot(vector_neighbors, row, linestyle='--', color=new_cmap(color_intensity))

# Adding labels and title
ax.set_xlabel("Number of Vecchia neighbors")
ax.set_ylabel("Negative Log-Likelihood")
ax.set_title("")

# Enable grid
ax.grid(True)

# Show legend
ax.legend(title="Number of inducing points")
plt.xticks(vector_neighbors)
# Show the plot
plt.show()