# Set up

In [1]:
import torch
import gpytorch
import pandas as pd
import numpy as np
import tqdm as tqdm
from linear_operator import settings

import pyro
import math
import pickle
import time
from joblib import Parallel, delayed

from sklearn.preprocessing import StandardScaler

from sklearn.decomposition import PCA
import matplotlib.pyplot as plt

import pyro.distributions as dist
from pyro.infer import MCMC, NUTS
import arviz as az
import seaborn as sns

import os

In [2]:
import GP_functions.Loss_function as Loss_function
import GP_functions.bound as bound
import GP_functions.Estimation as Estimation
import GP_functions.Training as Training
import GP_functions.Prediction as Prediction
import GP_functions.GP_models as GP_models
import GP_functions.Tools as Tools
import GP_functions.FeatureE as FeatureE

# Data

In [3]:
X_train = pd.read_csv('Data/X_train.csv', header=None, delimiter=',').values
X_test = pd.read_csv('Data/X_test.csv', header=None, delimiter=',').values

# Y_train_8 = pd.read_csv('Data/Y_train_8.csv', header=None, delimiter=',').values
# Y_test_8 = pd.read_csv('Data/Y_test_8.csv', header=None, delimiter=',').values

Y_train_21 = pd.read_csv('Data/Y_train_std_21.csv', header=None, delimiter=',').values
Y_test_21 = pd.read_csv('Data/Y_test_std_21.csv', header=None, delimiter=',').values

Y_train_std = pd.read_csv('Data/Y_train_std.csv', header=None, delimiter=',').values
Y_test_std = pd.read_csv('Data/Y_test_std.csv', header=None, delimiter=',').values

In [4]:
train_x = torch.tensor(X_train, dtype=torch.float32)
test_x = torch.tensor(X_test, dtype=torch.float32)

# train_y_8 = torch.tensor(Y_train_8, dtype=torch.float32)
# test_y_8 = torch.tensor(Y_test_8, dtype=torch.float32)

train_y_21 = torch.tensor(Y_train_21, dtype=torch.float32)
test_y_21 = torch.tensor(Y_test_21, dtype=torch.float32)

train_y = torch.tensor(Y_train_std, dtype=torch.float32)
test_y = torch.tensor(Y_test_std, dtype=torch.float32)

## PCA of output

In [None]:
pca_20 = PCA(n_components = 20)

pca_20.fit(train_y[:,1:])
torch.set_default_dtype(torch.float32)
torch.set_num_threads(8)
torch.set_num_interop_threads(4)

# Emulators

In [None]:
Device = 'cpu'

In [36]:
Device = 'cuda'

In [None]:
row_idx = 0

input_point = test_y[row_idx,:]
local_train_x, local_train_y = Tools.find_k_nearest_neighbors_CPU(input_point, train_x, train_y, k = 100)

bounds = bound.get_bounds(local_train_x)

### LocalGP

In [None]:
output_file = 'Result/LocalGP_21_result.csv'


if not os.path.exists(output_file):
    with open(output_file, 'w') as f:
        f.write('Iteration,test_preds,estimated_params,posterior_means\n')



In [None]:
row_idx = 0


input_point = test_y_21[row_idx, :]

local_train_x, local_train_y = Tools.find_k_nearest_neighbors_CPU(input_point, train_x, train_y_21, k=100)

LocalGP_models, LocalGP_likelihoods = Training.train_one_row_LocalGP_Parallel(
    train_x, train_y_21, test_y_21, row_idx,
    covar_type='RBF', k_num=100, lr=0.025,
    num_iterations=5000, patience=10, device=Device
)


In [None]:
preds_tmp = Prediction.full_preds(
    LocalGP_models, LocalGP_likelihoods, test_x[row_idx, :].unsqueeze(0).to(Device)
).cpu().detach().numpy()



estimated_params_tmp, _ = Estimation.multi_start_estimation(
    LocalGP_models, LocalGP_likelihoods, row_idx, test_y_21, bounds,
    Estimation.estimate_params_Adam, num_starts=5, num_iterations=2000, lr=0.01,
    patience=50, attraction_threshold=0.1, repulsion_strength=0.1, device=Device
)




In [None]:
def train_and_predict_LocalGP(row_idx, train_x, train_y, test_x, test_y, K_num = 100, Device = 'cpu', PCA_trans = 'None'):

    LocalGP_models, LocalGP_likelihoods = Training.train_one_row_LocalGP(
        train_x, train_y, test_y, row_idx, covar_type = 'RQ', k_num=K_num, lr=0.05, num_iterations=5000, patience=10, device=Device
    )
    
    preds = Prediction.full_preds(LocalGP_models, LocalGP_likelihoods, test_x[row_idx,:].unsqueeze(0).to(Device)).cpu().detach().numpy()
    if PCA_trans != 'None':
        preds = PCA_trans.inverse_transform(preds)

    return preds




In [None]:
results = Parallel(n_jobs=-1)(delayed(train_and_predict_LocalGP)(row_idx, train_x, train_y, test_x, test_y, PCA_trans = pca_20) for row_idx in range(test_y.shape[0]))
full_test_preds_LocalGP = np.vstack(results)

MSE_LocalGP = np.mean((full_test_preds_LocalGP - test_y.numpy()) ** 2)

In [None]:
MSE_LocalGP

In [None]:
MSE_LocalGP

### MultiGP

In [None]:
row_idx = 0

input_point = test_y_21[row_idx,:]
local_train_x, local_train_y = Tools.find_k_nearest_neighbors_CPU(input_point, train_x, train_y_21, k = 100)


MultitaskGP_models, MultitaskGP_likelihoods = Training.train_one_row_MultitaskGP(local_train_x, local_train_y, n_tasks = local_train_y.shape[1], 
                                                                                 covar_type = 'RBF', lr=0.05, num_iterations=5000, patience=10, device=Device)


In [None]:
Prediction.preds_for_one_model(MultitaskGP_models, MultitaskGP_likelihoods, theta.unsqueeze(0)).view(-1)

In [None]:
def train_and_predict_MGP(row_idx, train_x, train_y, test_x, test_y, K_num = 300, Device = 'cpu', PCA_trans = 'None'):


    input_point = test_y[row_idx,:]
    local_train_x, local_train_y = Tools.find_k_nearest_neighbors_CPU(input_point, train_x, train_y, k = K_num)

    MultitaskGP_models, MultitaskGP_likelihoods = Training.train_one_row_MultitaskGP(local_train_x, local_train_y, n_tasks = train_y.shape[1], covar_type = 'RQ', 
                                                                                     lr=0.05, num_iterations=10000, patience=10, device=Device)

    preds = Prediction.preds_for_one_model(MultitaskGP_models, MultitaskGP_likelihoods, test_x[row_idx,:].unsqueeze(0).to(Device)).detach().numpy()
    if PCA_trans != 'None':
        preds = PCA_trans.inverse_transform(preds)

    return preds



In [None]:
results = Parallel(n_jobs=-1)(delayed(train_and_predict_MGP)(row_idx, train_x, train_y, test_x, test_y) for row_idx in range(test_y.shape[0]))
full_test_preds_MGP = np.vstack(results)


np.mean((full_test_preds_MGP - test_y.numpy()) ** 2)

### NN + MultiGP

In [None]:
row_idx = 0

input_point = test_y[row_idx,:]
local_train_x, local_train_y = Tools.find_k_nearest_neighbors_CPU(input_point, train_x, train_y, k = 500)

MultitaskGP_models, MultitaskGP_likelihoods = Training.train_one_row_NNMultitaskGP(local_train_x, local_train_y, n_tasks = local_train_y.shape[1], 
                                                                                   feature_extractor_class = FeatureE.FeatureExtractor_4, covar_type = 'Matern5/2', 
                                                                                   lr=0.05, num_iterations=5000, patience=10, device = Device)
    

In [None]:
def train_and_predict_NNMGP(row_idx, train_x, train_y, test_x, test_y, K_num = 500, Device = 'cpu', PCA_trans = 'None'):


    input_point = test_y[row_idx,:]
    local_train_x, local_train_y = Tools.find_k_nearest_neighbors_CPU(input_point, train_x, train_y, k = K_num)

    NNMultitaskGP_models, NNMultitaskGP_likelihoods = Training.train_one_row_NNMultitaskGP(local_train_x, local_train_y, n_tasks = train_y.shape[1], 
                                                                                            feature_extractor_class = FeatureE.FeatureExtractor_4, covar_type = 'RQ', 
                                                                                            lr=0.05, num_iterations=5000, patience=10, device = Device)

    preds = Prediction.preds_for_one_model(NNMultitaskGP_models, NNMultitaskGP_likelihoods, test_x[row_idx,:].unsqueeze(0).to(Device)).squeeze().detach().numpy()
    if PCA_trans != 'None':
        # preds = PCA_trans.inverse_transform(preds)
        first_column = preds[0]
        remaining_columns = preds[1:]
        remaining_columns = PCA_trans.inverse_transform(remaining_columns)
        preds = np.concatenate((first_column, remaining_columns), axis=1)

    return preds



In [None]:
results = Parallel(n_jobs=-1)(delayed(train_and_predict_NNMGP)(row_idx, train_x, train_y, test_x, test_y) for row_idx in range(test_y.shape[0]))
full_test_preds_MGP = np.vstack(results)


MSE_NNMGP = np.mean((full_test_preds_MGP - test_y.numpy()) ** 2)

In [None]:
results = Parallel(n_jobs=-1)(delayed(train_and_predict_NNMGP)(row_idx, train_x, train_y_21, test_x, test_y_21) for row_idx in range(test_y.shape[0]))
# results = Parallel(n_jobs=-1)(delayed(train_and_predict_NNMGP)(row_idx, train_x, train_y_21, test_x, test_y_21) for row_idx in range(test_y.shape[0]))
full_test_preds_MGP = np.vstack(results)


MSE_NNMGP_20 = np.mean((full_test_preds_MGP - test_y_21.numpy()) ** 2)
MSE_NNMGP_20

In [None]:
MSE_NNMGP_20

### VGP

In [None]:
inducing_points = train_x[:600, :].to(Device)
VGP_models, VGP_likelihoods = Training.train_full_VGP_Parallel(train_x, train_y_21, inducing_points, covar_type = 'RQ', lr=0.025, num_iterations=5000, patience=30, device=Device)


### MVGP

In [None]:
MVGP_models, MVGP_likelihoods = Training.train_full_MultitaskVGP(train_x, train_y_21, covar_type = 'RQ', 
                                                                 num_latents=14, num_inducing=100, 
                                                                 lr_hyper=0.05, lr_variational=0.05, num_iterations=5000, patience=50, device=Device)


In [76]:
MVGP_models, MVGP_likelihoods = Training.train_MultitaskVGP_minibatch(train_x, train_y_21, covar_type='RQ', num_latents=20, num_inducing=400, 
                                                                      lr_hyper=0.01, lr_variational=0.1, num_iterations=5000, patience=10, 
                                                                      device=Device, batch_size=512, eval_every=100, eval_batch_size=1024)


Training:   0%|          | 9/5000 [00:27<4:17:07,  3.09s/it]


KeyboardInterrupt: 

In [12]:
full_test_preds_MVGP = Prediction.preds_for_one_model(MVGP_models, MVGP_likelihoods, test_x.to(Device)).cpu().detach().numpy()
np.mean((full_test_preds_MVGP.reshape(120,21) - test_y_21.numpy()) ** 2)

0.0012369358

In [8]:
np.mean((full_test_preds_MVGP.reshape(120,52) - test_y.numpy()) ** 2)

0.0015008309

In [None]:
full_test_preds_MVGP = pca_20.inverse_transform(full_test_preds_MVGP)
MSE_MVGP = np.mean((full_test_preds_MVGP - test_y.numpy()) ** 2)
MSE_MVGP

In [13]:
checkpoint = {
    'model_state_dict': MVGP_models.state_dict(),
    'likelihood_state_dict': MVGP_likelihoods.state_dict(),
    'model_params': {
        'num_latents': 20,
        'num_inducing': 500,
        'covar_type': 'RQ',
        'input_dim': train_x.size(1),  # 输入特征维度
        'num_tasks': train_y.size(1)   # 输出任务数
    }
}
torch.save(checkpoint, 'multitask_gp_checkpoint.pth')

NameError: name 'MVGP_models' is not defined

### DGP

In [None]:
# Device = 'cuda'
DGP_2 = Training.train_full_DGP_2(train_x, train_y, num_hidden_dgp_dims = 10, inducing_num = 100, num_iterations = 5000, patiences = 50, device=Device)

In [77]:
DGP_2 = Training.train_DGP_2_minibatch(train_x, train_y_21, num_hidden_dgp_dims=10, inducing_num=300, num_iterations=2000, patience=50, 
                                       device='cuda',batch_size=512,eval_every=100,eval_batch_size=1024,lr=0.1)

Training DGP_2:   0%|          | 4/2000 [00:52<7:16:24, 13.12s/it]


KeyboardInterrupt: 

In [12]:
from torch.utils.data import TensorDataset, DataLoader

def evaluate_full_dataset_loss_dgp(model, x_data, y_data, mll, device='cuda', batch_size=1024):

    model.eval()
    total_loss = 0.0
    dataset = TensorDataset(x_data, y_data)
    data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=False)
    
    with torch.no_grad():
        for x_batch, y_batch in data_loader:
            x_batch, y_batch = x_batch.to(device), y_batch.to(device)
            output = model.predict(x_batch)[0]
            loss = mll(output, y_batch)
            total_loss += loss.item() * x_batch.size(0)

    avg_loss = total_loss / len(dataset)
    model.train()
    return avg_loss


evaluate_full_dataset_loss_dgp(DGP_2, test_x.to(Device),  test_y_21.to(Device), torch.nn.MSELoss(), device='cuda', batch_size=20)

2.0461097955703735

In [17]:
checkpoint = {
    'model_state_dict': DGP_2.state_dict(),
    'likelihood_state_dict': DGP_2.likelihood.state_dict(),
    'input_dims': DGP_2.hidden_layer_1.input_dims,
    'num_tasks': DGP_2.last_layer.output_dims,
    'num_hidden_dgp_dims': DGP_2.hidden_layer_1.output_dims,
}

torch.save(checkpoint, 'dgp_model.pth')

In [53]:
checkpoint = torch.load('dgp_model.pth', map_location=torch.device('cuda'))

dummy_train_x_shape = (1, checkpoint['input_dims'])
dummy_train_y = torch.zeros(1, checkpoint['num_tasks'])


loaded_model = GP_models.DeepGP_2(
    train_x_shape=dummy_train_x_shape,
    train_y=dummy_train_y,
    num_hidden_dgp_dims=checkpoint['num_hidden_dgp_dims'], 
    inducing_num=500
)


loaded_model.load_state_dict(checkpoint['model_state_dict'])
loaded_model.likelihood.load_state_dict(checkpoint['likelihood_state_dict'])
loaded_model = loaded_model.to(Device)


  checkpoint = torch.load('dgp_model.pth', map_location=torch.device('cuda'))


In [57]:
loaded_model.predict(test_x[:1].to(Device))

(tensor([-0.9499,  0.0202,  0.0202,  0.0202,  0.8728,  0.0202,  0.0202,  0.0202,
          0.0202,  0.0202,  0.0202,  0.0202,  0.0202, -0.2175,  0.0202,  0.0202,
         -0.1284,  0.0202,  0.0202,  0.0106,  0.0202], device='cuda:0',
        grad_fn=<SqueezeBackward0>),
 tensor([3.5439e-02, 1.4620e+01, 1.1734e+01, 1.0023e+01, 5.0288e-02, 4.0517e+00,
         1.5869e-01, 1.2491e-01, 9.0505e-02, 8.6849e-02, 4.9222e-02, 5.7805e-02,
         4.0537e-02, 2.2661e-02, 3.5357e-02, 2.2699e-02, 3.9727e-03, 1.5552e-02,
         1.1265e-02, 2.7205e-03, 7.2542e-03], device='cuda:0',
        grad_fn=<SqueezeBackward0>))

In [50]:
test_x[:1]

tensor([[3.6700, 4.8642, 1.4330, 4.7406, 2.8699, 0.6171, 3.0394, 3.4495, 4.7810,
         1.8129]])

In [37]:
test_x[:1].to(Device)

tensor([[3.6700, 4.8642, 1.4330, 4.7406, 2.8699, 0.6171, 3.0394, 3.4495, 4.7810,
         1.8129]], device='cuda:0')

In [56]:
DGP_2.predict(test_x[:1].to(Device))

(tensor([-0.9336,  0.0187,  0.0187,  0.0187,  0.8132,  0.0187,  0.0187,  0.0187,
          0.0187,  0.0187,  0.0187,  0.0187,  0.0187, -0.2040,  0.0187,  0.0187,
         -0.1183,  0.0187,  0.0187,  0.0135,  0.0187], device='cuda:0',
        grad_fn=<SqueezeBackward0>),
 tensor([3.5331e-02, 1.4620e+01, 1.1734e+01, 1.0023e+01, 5.0043e-02, 4.0517e+00,
         1.5869e-01, 1.2491e-01, 9.0505e-02, 8.6849e-02, 4.9222e-02, 5.7805e-02,
         4.0537e-02, 2.2568e-02, 3.5357e-02, 2.2699e-02, 3.9456e-03, 1.5552e-02,
         1.1265e-02, 2.7104e-03, 7.2542e-03], device='cuda:0',
        grad_fn=<SqueezeBackward0>))

In [71]:
DGP_2.eval()
DGP_2.likelihood.eval()


mean, var = DGP_2.predict(test_x[:1].to(Device))
mean

tensor([-0.9575,  0.0203,  0.0203,  0.0203,  0.8253,  0.0203,  0.0203,  0.0203,
         0.0203,  0.0203,  0.0203,  0.0203,  0.0203, -0.2080,  0.0203,  0.0203,
        -0.1242,  0.0203,  0.0203,  0.0179,  0.0203], device='cuda:0',
       grad_fn=<SqueezeBackward0>)

In [72]:
DGP_2.state_dict()

OrderedDict([('hidden_layer_1.variational_strategy.inducing_points',
              tensor([[[ 5.1397,  5.7806,  4.1946,  ...,  0.0930,  1.8100,  5.9063],
                       [ 1.4916,  1.0533,  2.2086,  ...,  5.3225,  2.0638,  3.1021],
                       [ 2.6889,  0.9007,  0.3010,  ...,  3.8687,  2.6669,  0.8021],
                       ...,
                       [-3.7203, -0.7093, -2.3237,  ..., -1.2736,  1.3199,  4.2071],
                       [-2.0169,  1.5574, -5.8672,  ..., -2.7132,  4.6708,  1.3651],
                       [-1.6703,  3.9795,  1.9985,  ..., -5.9495,  7.9066,  5.4192]],
              
                      [[ 1.1644,  1.0673,  4.0660,  ...,  3.3011,  2.2568, -0.3824],
                       [ 3.0885,  4.5404,  5.7173,  ...,  4.1927,  0.2847,  2.4185],
                       [ 3.0253,  2.5651,  3.9566,  ...,  3.2017,  1.2977,  2.8559],
                       ...,
                       [-0.3813, -1.5167,  3.7336,  ..., -7.7186,  9.0799, -1.5754],
         

In [73]:
loaded_model.state_dict()

OrderedDict([('hidden_layer_1.variational_strategy.inducing_points',
              tensor([[[ 5.1397,  5.7806,  4.1946,  ...,  0.0930,  1.8100,  5.9063],
                       [ 1.4916,  1.0533,  2.2086,  ...,  5.3225,  2.0638,  3.1021],
                       [ 2.6889,  0.9007,  0.3010,  ...,  3.8687,  2.6669,  0.8021],
                       ...,
                       [-3.7203, -0.7093, -2.3237,  ..., -1.2736,  1.3199,  4.2071],
                       [-2.0169,  1.5574, -5.8672,  ..., -2.7132,  4.6708,  1.3651],
                       [-1.6703,  3.9795,  1.9985,  ..., -5.9495,  7.9066,  5.4192]],
              
                      [[ 1.1644,  1.0673,  4.0660,  ...,  3.3011,  2.2568, -0.3824],
                       [ 3.0885,  4.5404,  5.7173,  ...,  4.1927,  0.2847,  2.4185],
                       [ 3.0253,  2.5651,  3.9566,  ...,  3.2017,  1.2977,  2.8559],
                       ...,
                       [-0.3813, -1.5167,  3.7336,  ..., -7.7186,  9.0799, -1.5754],
         

# Estimation

### Point estimation

In [None]:
estimated_params, func_loss = Estimation.multi_start_estimation(MultitaskGP_models, MultitaskGP_likelihoods, row_idx, test_y_21, bounds, Estimation.estimate_params_for_one_model_Adam, 
                                                                num_starts=5, num_iterations=2000, lr=0.01, patience=10, 
                                                                attraction_threshold=0.1, repulsion_strength=0.1, device=Device)


# full_estimated_params = estimated_params.detach().numpy()


In [None]:
estimated_params

### MCMC 

In [None]:
mcmc_result_Uniform = Estimation.run_mcmc_Uniform(Prediction.preds_distribution, MultitaskGP_models, MultitaskGP_likelihoods, row_idx, test_y_21, bounds, 
                                                  num_sampling = 1200, warmup_step = 300)



In [None]:
mcmc_result_Uniform = Estimation.run_mcmc_Uniform(Prediction.preds_distribution, MVGP_models, MVGP_likelihoods, row_idx, test_y_21, bounds, 
                                                  num_sampling = 1200, warmup_step = 300)



In [None]:
mcmc_result_Normal = Estimation.run_mcmc_Normal(Prediction.preds_for_one_model, MultitaskGP_models, MultitaskGP_likelihoods, row_idx, test_y, local_train_x, 
                                                 PCA_func = pca_20, 
                                                 num_sampling = 400, warmup_step = 100)



In [None]:
posterior_samples = mcmc_result_Uniform.get_samples()

param_names = [f'param_{i}' for i in range(len(bounds))]

posterior_means_array = np.zeros(len(param_names))


for idx, param_name in enumerate(param_names):
    samples = posterior_samples[param_name]
    if samples.ndim > 1:
        samples = samples.reshape(-1)
    mean_value = torch.mean(samples).item()
    posterior_means_array[idx] = mean_value

In [None]:
bounds

### Plot

In [None]:
true_values

In [None]:
mcmc_result_Uniform.diagnostics()

In [None]:
posterior_samples_Uniform = mcmc_result_Uniform.get_samples()

true_values = test_x[row_idx,:]

point_estimations = posterior_means_array

fig, axes = plt.subplots(len(posterior_samples_Uniform), 1, figsize=(8, len(posterior_samples_Uniform) * 3))

for i, (param_name, samples) in enumerate(posterior_samples_Uniform.items()):
    if len(posterior_samples_Uniform) > 1:
        ax = axes[i]
    else:
        ax = axes
    sns.kdeplot(samples.detach().numpy(), ax=ax, color='blue')
    ax.set_title(f'Density of {param_name}')
    
    # 标记 true_values 和 point_estimations
    if true_values is not None and i < len(true_values):
        ax.axvline(true_values[i], color='red', linestyle='--', label='True Value')
    if point_estimations is not None and i < len(point_estimations):
        ax.axvline(point_estimations[i], color='green', linestyle='-.', label='Point Estimation')
    ax.legend()

plt.tight_layout()
plt.show()

In [None]:
posterior_samples_Uniform

In [None]:
posterior_samples_Uniform['param_1']

In [None]:
idata = az.from_pyro(mcmc_result_Uniform)
az.plot_trace(idata)
plt.show()


summary = az.summary(idata, hdi_prob=0.95)
print(summary)

# **End**

In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Parameters for the Gaussian distribution
mu = 0  # mean
sigma = 1  # standard deviation

# Generate random samples from the Gaussian distribution
x = np.random.normal(mu, sigma, 10000)
# x = np.random.uniform(mu, sigma, 10000)

# Plot the distribution
plt.figure(figsize=(8, 6))
plt.hist(x, bins=30, density=True, alpha=0.6, color='b', edgecolor='black')


# Labeling the plot

plt.xlabel("X values")
plt.ylabel("Probability Density")

plt.show()


In [None]:
# Define the sigmoid function
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

# Define the affine transformation function
def affine_transform(x, loc=1.3, scale=2.6):
    return loc + scale * x

In [None]:
# Apply sigmoid transform to the generated data
# x_1 = affine_transform(x,3,1.5)
x_2 = sigmoid(x)
x_transformed = affine_transform(x_2)

# Plot the transformed distribution
plt.figure(figsize=(8, 6))
plt.hist(x_transformed, bins=30, density=True, alpha=0.6, color='b', edgecolor='black')

# Labeling the plot
plt.xlabel("Transformed X values")
plt.ylabel("Probability Density")

plt.show()


In [None]:
import numpy as np
import scipy.stats as stats
import matplotlib.pyplot as plt

row_idx = 0

input_point = test_y[row_idx,:]
local_train_x, local_train_y = Tools.find_k_nearest_neighbors_CPU(input_point, train_x, train_y, k = 400)

# 假设x是已知数据
x = local_train_x[:,3] 

# 1. 绘制直方图
plt.hist(x, bins=30, density=True, alpha=0.6, color='g')

# 2. 尝试拟合正态分布
mu, std = stats.norm.fit(x)

# 3. 绘制拟合的正态分布曲线
xmin, xmax = plt.xlim()
x_plot = np.linspace(xmin, xmax, 100)
p = stats.norm.pdf(x_plot, mu, std)
plt.plot(x_plot, p, 'k', linewidth=2)
title = f"Fit results: mu = {mu:.2f}, std = {std:.2f}"
plt.title(title)
plt.show()

# 4. 生成与数据类似的随机数
random_data = np.random.normal(mu, std, len(x))


In [None]:
test_x[0,:]