<a href="https://colab.research.google.com/github/DusunHwang/code_tunnel/blob/main/heteroskedasic_likely_hood_ipynb.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# !pip install gpytorch
import torch
import gpytorch
import numpy as np

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

# -------------------------
# 예제용 고차원 조성 데이터 생성
# -------------------------
N, D = 300, 100  # 샘플 수, 조성 차원 수
X_raw = np.random.rand(N, D)
true_function = lambda x: np.sin(x[:, 0] * 6.28) + 0.5 * x[:, 1]  # 일부 조성 feature만 사용
noise_std = 0.1 + 0.4 * np.square(x[:, 2])  # 입력 의존적 노이즈
y_raw = true_function(X_raw) + noise_std * np.random.randn(N)

# 스케일링
scaler_x = StandardScaler().fit(X_raw)
X = torch.tensor(scaler_x.transform(X_raw)).float()
y = torch.tensor(y_raw).float()

# -------------------------
# 훈련/테스트 분할
# -------------------------
train_x, test_x, train_y, test_y = train_test_split(X, y, test_size=0.2, random_state=0)

# -------------------------
# 메인 GPR 모델
# -------------------------
class MainGPR(gpytorch.models.ExactGP):
    def __init__(self, train_x, train_y, likelihood):
        super().__init__(train_x, train_y, likelihood)
        self.mean_module = gpytorch.means.ZeroMean()
        self.covar_module = gpytorch.kernels.ScaleKernel(
            gpytorch.kernels.RBFKernel(ard_num_dims=train_x.shape[1])
        )

    def forward(self, x):
        mean_x = self.mean_module(x)
        covar_x = self.covar_module(x)
        return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)

# -------------------------
# 노이즈 GPR (residual variance 예측)
# -------------------------
class NoiseModel(gpytorch.models.ExactGP):
    def __init__(self, x, y):
        super().__init__(x, y, gpytorch.likelihoods.GaussianLikelihood())
        self.mean_module = gpytorch.means.ConstantMean()
        self.covar_module = gpytorch.kernels.RBFKernel(ard_num_dims=x.shape[1])

    def forward(self, x):
        return gpytorch.distributions.MultivariateNormal(
            self.mean_module(x), self.covar_module(x)
        )

# -------------------------
# Heteroskedastic Likelihood + 모델 정의
# -------------------------
hetero_likelihood = gpytorch.likelihoods.HeteroskedasticNoise()
# 초기 residual (y - mean)^2 을 통해 noise model 학습
residual_init = (train_y - train_y.mean())**2 + 1e-4
hetero_likelihood.noise_model = NoiseModel(train_x, residual_init)

main_model = MainGPR(train_x, train_y, hetero_likelihood)

# -------------------------
# 학습 루프
# -------------------------
def train_model(model, likelihood, n_iter=150):
    model.train()
    likelihood.train()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.1)
    mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model)
    for i in range(n_iter):
        optimizer.zero_grad()
        output = model(train_x)
        loss = -mll(output, train_y)
        loss.backward()
        if i % 10 == 0:
            print(f"Iter {i} - Loss: {loss.item():.3f}")
        optimizer.step()

train_model(main_model, hetero_likelihood)

# -------------------------
# 예측 및 시각화
# -------------------------
main_model.eval()
hetero_likelihood.eval()
with torch.no_grad(), gpytorch.settings.fast_pred_var():
    preds = hetero_likelihood(main_model(test_x))
    mean = preds.mean
    lower, upper = preds.confidence_region()

# 산점도로 예측 vs 실제 보기
plt.figure(figsize=(6, 6))
plt.errorbar(test_y.numpy(), mean.numpy(), yerr=(upper - mean).numpy(), fmt='o', alpha=0.6)
plt.plot([test_y.min(), test_y.max()], [test_y.min(), test_y.max()], 'k--')
plt.xlabel("True Property")
plt.ylabel("Predicted Property")
plt.title("Heteroskedastic GPR: Prediction vs Ground Truth")
plt.grid(True)
plt.show()

NameError: name 'x' is not defined