# Set up

In [1]:
import torch
import gpytorch
import pandas as pd
import numpy as np
import tqdm as tqdm


import os
import pickle

import GP_functions.Loss_function as Loss_function
import GP_functions.bound as bound
import GP_functions.Estimation as Estimation
import GP_functions.Training as Training
import GP_functions.Prediction as Prediction
import GP_functions.GP_models as GP_models
import GP_functions.Tools as Tools
import GP_functions.FeatureE as FeatureE

# data

In [2]:
X_train = pd.read_csv('Data/Data20260110/X_train_Final_10.csv', header=None, delimiter=',').values
Y_train_pca = pd.read_csv('Data/Data20260110/Y_data_train_pca.csv', header=None, delimiter=',').values

X_test = pd.read_csv('Data/Data20260110/X_test.csv', header=None, delimiter=',').values
Y_test_pca = pd.read_csv('Data/Data20260110/Y_data_test_pca.csv', header=None, delimiter=',').values

X_test_MI_1 = pd.read_csv('LocalDisease/X_MI_1_5.csv', header=None, delimiter=',').values
Y_test_MI_1 = pd.read_csv('LocalDisease/Y_data_MI_1_pca.csv', header=None, delimiter=',').values

X_test_MI_2 = pd.read_csv('LocalDisease/X_MI_2.csv', header=None, delimiter=',').values
Y_test_MI_2 = pd.read_csv('LocalDisease/Y_data_MI_2_pca.csv', header=None, delimiter=',').values

X_test_MI_3 = pd.read_csv('LocalDisease/X_MI_3.csv', header=None, delimiter=',').values
Y_test_MI_3 = pd.read_csv('LocalDisease/Y_data_MI_3_pca.csv', header=None, delimiter=',').values

X_test_HCM = pd.read_csv('LocalDisease/X_HCM.csv', header=None, delimiter=',').values
Y_test_HCM = pd.read_csv('LocalDisease/Y_data_HCM_pca.csv', header=None, delimiter=',').values

train_x = torch.tensor(X_train, dtype=torch.float32)
train_y_21 = torch.tensor(Y_train_pca, dtype=torch.float32)

test_x = torch.tensor(X_test, dtype=torch.float32)
test_y_21 = torch.tensor(Y_test_pca, dtype=torch.float32)

test_x_MI_1 = torch.tensor(X_test_MI_1, dtype=torch.float32)
test_y_MI_1 = torch.tensor(Y_test_MI_1, dtype=torch.float32)

test_x_MI_2 = torch.tensor(X_test_MI_2, dtype=torch.float32)
test_y_MI_2 = torch.tensor(Y_test_MI_2, dtype=torch.float32)

test_x_MI_3 = torch.tensor(X_test_MI_3, dtype=torch.float32)
test_y_MI_3 = torch.tensor(Y_test_MI_3, dtype=torch.float32)

test_x_HCM = torch.tensor(X_test_HCM, dtype=torch.float32)
test_y_HCM = torch.tensor(Y_test_HCM, dtype=torch.float32)



torch.set_default_dtype(torch.float32)

# Emulator

In [3]:
Device = 'cuda'

In [4]:
MVGP_models, MVGP_likelihoods = Training.train_MultitaskVGP_minibatch(
    train_x=train_x.to(Device),
    train_y=train_y_21.to(Device),
    covar_type='RQ',
    num_latents=24,
    num_inducing=500,
    lr_hyper=0.01,
    lr_variational=0.1,
    num_iterations=15000,
    patience=10,
    device=Device,
    batch_size=512,
    eval_every=100,
    eval_batch_size=1024
)

Training: 100%|█████████▉| 14999/15000 [33:09<00:00,  7.54it/s, full_loss=-24.1]  


In [17]:
checkpoint = {
    'model_state_dict': MVGP_models.state_dict(),
    'likelihood_state_dict': MVGP_likelihoods.state_dict(),
    'model_params': {
        'num_latents': 24,
        'num_inducing': 500,
        'covar_type': 'RQ',
        'input_dim': train_x.size(1),
        'num_tasks': train_y_21.size(1)
    }
}

torch.save(checkpoint, 'multitask_gp_checkpoint_LocalD_tmp.pth')

In [None]:
checkpoint = torch.load('multitask_gp_checkpoint_LocalD.pth', map_location=Device)
model_params = checkpoint['model_params']

MVGP_models = GP_models.MultitaskVariationalGP(train_x, train_y_21, 
                                               num_latents=model_params['num_latents'], 
                                               num_inducing=model_params['num_inducing'], 
                                               covar_type=model_params['covar_type']).to(Device)

MVGP_models.load_state_dict(checkpoint['model_state_dict'])

MVGP_likelihoods = gpytorch.likelihoods.MultitaskGaussianLikelihood(num_tasks=train_y_21.shape[1]).to(Device)
MVGP_likelihoods.load_state_dict(checkpoint['likelihood_state_dict'])

MVGP_models.eval()
MVGP_likelihoods.eval()


In [5]:
from sklearn.metrics import r2_score

preds_tmp = Prediction.preds_for_one_model(
    MVGP_models, MVGP_likelihoods, test_x.to(Device)
    ).cpu().detach().numpy()


print(r2_score(preds_tmp.reshape(512, 21), test_y_21.numpy()))
print(np.mean((preds_tmp.reshape(512, 21) - test_y_21.numpy()) ** 2))

0.9667959809303284
0.0009928372


In [6]:
from sklearn.metrics import r2_score

preds_tmp = Prediction.preds_for_one_model(
    MVGP_models, MVGP_likelihoods, test_x_MI_1.to(Device)
    ).cpu().detach().numpy()


print(r2_score(preds_tmp.reshape(100, 21), test_y_MI_1.numpy()))
print(np.mean((preds_tmp.reshape(100, 21) - test_y_MI_1.numpy()) ** 2))

0.4513742923736572
0.0016140322


In [7]:
preds_tmp = Prediction.preds_for_one_model(
    MVGP_models, MVGP_likelihoods, test_x_MI_2.to(Device)
    ).cpu().detach().numpy()


print(r2_score(preds_tmp.reshape(100, 21), test_y_MI_2.numpy()))
print(np.mean((preds_tmp.reshape(100, 21) - test_y_MI_2.numpy()) ** 2))

0.6259183287620544
0.001286375


In [8]:
preds_tmp = Prediction.preds_for_one_model(
    MVGP_models, MVGP_likelihoods, test_x_MI_3.to(Device)
    ).cpu().detach().numpy()


print(r2_score(preds_tmp.reshape(100, 21), test_y_MI_3.numpy()))
print(np.mean((preds_tmp.reshape(100, 21) - test_y_MI_3.numpy()) ** 2))

0.87346351146698
0.0011467861


In [10]:
preds_tmp = Prediction.preds_for_one_model(
    MVGP_models, MVGP_likelihoods, test_x_HCM.to(Device)
    ).cpu().detach().numpy()


print(r2_score(preds_tmp.reshape(100, 21), test_y_HCM.numpy()))
print(np.mean((preds_tmp.reshape(100, 21) - test_y_HCM.numpy()) ** 2))

-1.4510003328323364
0.0006578536


In [11]:
np.var(test_y_HCM.numpy(), axis=0)

array([5.5184811e-03, 2.7172187e-01, 3.2082644e-01, 1.7887577e-01,
       7.5231403e-02, 8.0839708e-02, 8.5091144e-03, 2.2382732e-03,
       1.4317095e-03, 2.6007134e-03, 2.0902082e-03, 1.0056627e-03,
       9.8909927e-04, 7.1666919e-04, 3.5236482e-04, 5.8405945e-04,
       2.7161720e-04, 9.5083342e-05, 2.3966881e-04, 3.3953736e-05,
       1.0316915e-04], dtype=float32)

# PE

In [12]:
row_idx = 0

input_point = test_y_MI_1[row_idx, :]

local_train_x, local_train_y = Tools.find_k_nearest_neighbors_CPU(input_point, train_x, train_y_21, k=100)

bounds = bound.get_bounds(local_train_x)

In [18]:
estimated_params_tmp, _ = Estimation.multi_start_estimation(
    MVGP_models, MVGP_likelihoods, row_idx, test_y_MI_1, bounds,
    Estimation.estimate_params_for_one_model_Adam, num_starts=16, num_iterations=2000, lr=0.05,
    patience=10, attraction_threshold=0.1, repulsion_strength=0.1, device=Device, show_progress=True
)

Multi-start:   0%|          | 0/16 [00:00<?, ?it/s]

Start 1/16:   0%|          | 0/2000 [00:00<?, ?it/s]

Start 2/16:   0%|          | 0/2000 [00:00<?, ?it/s]

Start 3/16:   0%|          | 0/2000 [00:00<?, ?it/s]

Start 4/16:   0%|          | 0/2000 [00:00<?, ?it/s]

Start 5/16:   0%|          | 0/2000 [00:00<?, ?it/s]

Start 6/16:   0%|          | 0/2000 [00:00<?, ?it/s]

Start 7/16:   0%|          | 0/2000 [00:00<?, ?it/s]

Start 8/16:   0%|          | 0/2000 [00:00<?, ?it/s]

Start 9/16:   0%|          | 0/2000 [00:00<?, ?it/s]

Start 10/16:   0%|          | 0/2000 [00:00<?, ?it/s]

Start 11/16:   0%|          | 0/2000 [00:00<?, ?it/s]

Start 12/16:   0%|          | 0/2000 [00:00<?, ?it/s]

Start 13/16:   0%|          | 0/2000 [00:00<?, ?it/s]

Start 14/16:   0%|          | 0/2000 [00:00<?, ?it/s]

Start 15/16:   0%|          | 0/2000 [00:00<?, ?it/s]

Start 16/16:   0%|          | 0/2000 [00:00<?, ?it/s]

In [14]:
estimated_params_tmp

array([2.2285407, 2.116224 , 1.4796703, 0.7780102, 1.1151286, 1.2205496,
       1.0805556, 1.3241574, 1.0806774, 1.2078546], dtype=float32)

In [16]:
X_test_MI_1[row_idx, :]

array([2.07  , 2.3466, 1.1437, 1.1468, 1.1847, 1.1417, 1.1672, 1.2229,
       1.1382, 1.142 ])