In [24]:
import pandas as pd
import matplotlib.pyplot as plt
import os
import numpy as np

import torch
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

plt.rcParams['figure.figsize'] = (11, 6)
plt.rcParams['figure.dpi'] = 75
plt.rcParams['savefig.dpi'] = 75*10

file_name = "exponential2_20251029_0928"
groud_truth = lambda x1, x2: np.exp(-2*x1) + x2
# d_opt = pd.read_excel(os.path.join(os.getcwd(), "multkan_sweep_autosave", file_name + ".xlsx"), sheet_name='best_spline_avg_by_params')
d_opt = pd.read_excel(os.path.join(os.getcwd(), "multkan_sweep_autosave", file_name + ".xlsx"), sheet_name='best_avg_by_params')

save_heading = os.path.join(os.getcwd(), "custom_figures", file_name)

x1_grid = np.linspace(-1, 1, 120)
x2_grid = np.linspace(-1, 1, 120)

x1, x2= np.meshgrid(x1_grid, x2_grid)
X = np.stack((x1.flatten(), x2.flatten()), axis=1)
y_mesh = groud_truth(x1, x2)

y = y_mesh.flatten().reshape(-1, 1)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"This script is running on {device}.")

X_temp, X_test, y_temp, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_temp, y_temp, test_size=0.2, random_state=42)  # 0.2 × 0.8 = 0.16 (전체의 16%)

print(f"전체 데이터셋 크기: {len(X)}")
print(f"훈련셋 크기: {len(X_train)} ({len(X_train)/len(X)*100:.1f}%)")
print(f"검증셋 크기: {len(X_val)} ({len(X_val)/len(X)*100:.1f}%)")
print(f"테스트셋 크기: {len(X_test)} ({len(X_test)/len(X)*100:.1f}%)")

# 1. MinMaxScaler 객체 생성 --- 범위를 0.1~0.9로 재설정
scaler_X = MinMaxScaler(feature_range=(0.1, 0.9))
scaler_y = MinMaxScaler(feature_range=(0.1, 0.9))

X_train_norm = scaler_X.fit_transform(X_train)
y_train_norm = scaler_y.fit_transform(y_train)
X_val_norm = scaler_X.transform(X_val)
X_test_norm = scaler_X.transform(X_test)

y_val_norm = scaler_y.transform(y_val)
y_test_norm = scaler_y.transform(y_test)

X_norm = np.concatenate([X_train_norm, X_val_norm, X_test_norm])
y_norm = np.concatenate([y_train_norm, y_val_norm, y_test_norm])


This script is running on cpu.
전체 데이터셋 크기: 14400
훈련셋 크기: 9216 (64.0%)
검증셋 크기: 2304 (16.0%)
테스트셋 크기: 2880 (20.0%)


In [25]:
from SALib.analyze.sobol import analyze

nx = X_norm.shape[1]
problem = {
    'num_vars': nx,
    'names': [f'x{i+1}' for i in range(nx)],
    'bounds': [[0.1, 0.9]] * nx
}
y_norm_flat = y_norm.flatten()
Si = analyze(problem, y_norm_flat, print_to_console=True)

          ST   ST_conf
x1  1.029740  0.058949
x2  1.004789  0.067802
          S1   S1_conf
x1 -0.044816  0.050162
x2 -0.030148  0.052596
                S2   S2_conf
[x1, x2]  0.073611  0.069818
