In [1]:
import pandas as pd
import numpy as np
import torch
from sklearn.metrics import mean_absolute_error, r2_score
from sklearn.preprocessing import MinMaxScaler, StandardScaler
import gpytorch
from gpytorch.models import ExactGP
from gpytorch.means import ConstantMean
from gpytorch.kernels import ScaleKernel, RBFKernel, MaternKernel, AdditiveKernel, LinearKernel
from gpytorch.likelihoods import GaussianLikelihood
from gpytorch.mlls import ExactMarginalLogLikelihood

In [2]:
device = torch.device("cpu")

In [3]:
# --- 1. Загрузка данных ---
df = pd.read_csv("ourall.csv")
df = df[df['B4TOD4'] != 0.005]
df = df[df['OMEGA5'] != 15]
df = df[df['OMEGA5'] != -15]
PARAMS = [col for col in df.columns if col != "PT_LOSS"]
X = df[PARAMS].values
y = df["PT_LOSS"].values #.reshape(-1, 1)

# --- 2. min/max по параметрам ---
param_bounds = {col: (df[col].min(), df[col].max()) for col in PARAMS}

# --- 3. Нормализация данных (очень желательно для GPs) ---
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)
y_scaler = StandardScaler()
# y_scaled = y_scaler.fit_transform(y).flatten()
# --- 4. Делим на train и pool ---
np.random.seed(42)
INIT_SIZE = 10
initial_idx = np.random.choice(len(X), size=INIT_SIZE, replace=False)
X_train, y_train = X_scaled[initial_idx], y[initial_idx]
X_pool, y_pool = (
    np.delete(X_scaled, initial_idx, axis=0),
    np.delete(y, initial_idx, axis=0),
)
print(X_pool,y_pool,len(X_pool))

[[0.  0.5 0.  ... 0.  0.  0. ]
 [0.  0.5 0.  ... 0.  0.  1. ]
 [0.  0.5 0.  ... 0.  0.2 0. ]
 ...
 [1.  0.  1.  ... 0.5 0.8 1. ]
 [1.  0.  1.  ... 0.5 1.  0. ]
 [1.  0.  1.  ... 0.5 1.  1. ]] [0.108975  0.0847613 0.061027  ... 0.0992762 0.116008  0.0890589] 3878


In [4]:
class GPRegressionModel(ExactGP):
    def __init__(self, train_x, train_y, likelihood):
        super().__init__(train_x, train_y, likelihood)
        self.mean_module = ConstantMean()
        self.covar_module = (
            ScaleKernel(MaternKernel(nu=1.5, ard_num_dims=train_x.shape[1])) +
            ScaleKernel(RBFKernel(ard_num_dims=train_x.shape[1]))
        )

    def forward(self, x):
        mean_x = self.mean_module(x)
        covar_x = self.covar_module(x)
        return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)

In [5]:
def make_kernel(idx, num_dims):
    if idx == 0:
        return ScaleKernel(RBFKernel(ard_num_dims=num_dims))
    elif idx == 1:
        return ScaleKernel(MaternKernel(nu=1.5, ard_num_dims=num_dims))
    elif idx == 2:
        return ScaleKernel(MaternKernel(nu=2.5, ard_num_dims=num_dims))
    else:
        raise ValueError("Committee kernel idx out of range")


# --- Обёртка для одной модели ---
class GPCommitteeModel(ExactGP):
    def __init__(self, train_x, train_y, likelihood, kernel):
        super().__init__(train_x, train_y, likelihood)
        self.mean_module = ConstantMean()
        self.covar_module = kernel

    def forward(self, x):
        mean_x = self.mean_module(x)
        covar_x = self.covar_module(x)
        return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)


# --- Обучаем комитет ---
def train_committee(X_train, y_train, num_models=3, num_iter=60, device="cpu"):
    committee = []
    for i in range(num_models):
        train_x = torch.tensor(X_train, dtype=torch.float32).to(device)
        train_y = torch.tensor(y_train, dtype=torch.float32).to(device)
        likelihood = GaussianLikelihood().to(device)
        kernel = make_kernel(i, X_train.shape[1])
        model = GPCommitteeModel(train_x, train_y, likelihood, kernel).to(device)
        optimizer = torch.optim.Adam(model.parameters(), lr=0.05)
        mll = ExactMarginalLogLikelihood(likelihood, model)
        model.train()
        likelihood.train()
        for it in range(num_iter):
            optimizer.zero_grad()
            output = model(train_x)
            loss = -mll(output, train_y)
            if torch.isnan(loss):
                print(f"NaN in model {i}, iter {it}, breaking")
                break
            loss.backward()
            optimizer.step()
        committee.append((model, likelihood))
    return committee

In [6]:
def qbc(committee, X_sample):
    # 1. Преобразуем в тензор с градиентами
    tx = torch.tensor(X_sample, dtype=torch.float32, requires_grad=True)

    preds = []
    for model, likelihood in committee:
        model.eval()
        likelihood.eval()
        x_query_t = tx.reshape(1, -1)
        mean = model(x_query_t).mean
        preds.append(mean)

    # 3. Вычисляем дисперсию предсказаний
    preds_tensor = torch.stack(preds)  # [n_models, n_samples]
    f_avg = preds_tensor.mean(dim=0)
    loss = torch.var(preds_tensor - f_avg, dim=0).mean()
    print(loss,'loss')
    # 4. Вычисляем градиенты
    tx.grad = None  # Очищаем предыдущие градиенты
    loss.backward()

    gradients = (
        tx.grad.detach().numpy() if tx.grad is not None else np.zeros_like(X_sample)
    )
    print("grads", gradients)
    return loss.item(), gradients


def NA_query_strategy(comittee, X_sample, X_pool):
    _, grads = qbc(comittee, X_sample)  # (N, M) -> N
    grads = [torch.tensor(row, dtype=torch.float32).unsqueeze(0) for row in grads]
    grads = torch.tensor(grads, dtype=torch.float32).numpy()
    print(grads,'grads')
    sign = np.sign(grads)
    print(sign,'sign')
    for ind in range(len(sign)):
        step = 0.01
        x_gen = X_sample[ind] + step * sign[ind]
        x_gen = np.clip(x_gen, 0.0, 1.0)
        dists = np.linalg.norm(X_pool - x_gen, axis=1)
        idx = np.argmin(dists)
    return idx, grads

def qbc(committee, X_sample):
    # loss_fn = torch.nn.MSELoss()
    # tx = torch.from_numpy(X_sample).float()

    # preds = []
    # for model in committee.learner_list:
    #     pred = model.predict(tx)  # (1, 1) или (1,)
    #     pred = torch.tensor(pred, dtype=torch.float32)
    #     preds.append(pred)
    # print(preds)
    # # preds = torch.tensor(preds, dtype=torch.float32)
    # preds = torch.stack(preds, dim=0)  # (N_models, 1, D) или (N_models, 1)
    # f_avg = preds.mean(dim=0)         # (1, D)
    # # MSE между всеми предсказаниями и средним
    # loss = loss_fn(preds.squeeze(), f_avg.squeeze())
    # print(loss,'loss')
    # return loss
    tx = torch.tensor(X_sample, dtype=torch.float32)

    preds = []
    for model, likelihood in committee:
        model.eval()
        likelihood.eval()
        x_query_t = tx.reshape(1, -1)
        mean = model(x_query_t).mean
        preds.append(mean)

    # 3. Вычисляем дисперсию предсказаний
    preds_tensor = torch.stack(preds)  # [n_models, n_samples]
    f_avg = preds_tensor.mean(dim=0)
    loss = torch.var(preds_tensor - f_avg, dim=0).mean()
    print(loss,'loss')
    return loss

# def pool_based_qbc(commette, X_pool,n_instances=7):


#     train_idx = np.random.choice(range(X_pool.shape[0]), size=21, replace=False)
#     X_init= X_pool[train_idx]
#     uncertainties = []
#     for x in X_init:
#         loss = qbc(committee, np.array([x]))  # qbc должен возвращать скаляр
#         uncertainties.append(loss)
    
#     # 3. Сортируем по uncertainty (по убыванию)
#     sorted_indices = np.argsort(uncertainties)[::-1]  # Индексы для сортировки
    
#     # 4. Выбираем топ-N самых неопределённых
#     selected_indices = sorted_indices[:n_instances]
#     # X_selected = X_pool[selected_indices]
#     X_selected = X_init[selected_indices]
#     print("Selected samples shape:", X_selected.shape)
#     print("Selected samples:\n", X_selected)
    
#     return selected_indices, X_selected  # Возвращаем И индексы, И данные

def pool_based_qbc(committee, X_pool, n_instances=7):
    # 1. Случайно выбираем начальные точки из X_pool
    train_idx = np.random.choice(range(X_pool.shape[0]), size=21, replace=False)
    X_init = X_pool[train_idx]
    
    # 2. Вычисляем uncertainty для каждой точки в X_init
    uncertainties = []
    for x in X_init:
        loss = qbc(committee, np.array([x]))
        uncertainties.append(loss.detach().numpy())  # Добавляем .detach() для преобразования в numpy
    
    # 3. Преобразуем uncertainties в numpy array
    uncertainties_np = np.array(uncertainties)
    
    # 4. Сортируем индексы по uncertainty (по убыванию)
    sorted_indices = np.argsort(uncertainties_np)[::-1]
    
    # 5. Берем топ-N индексов из X_init и находим их исходные индексы в X_pool
    selected_init_indices = sorted_indices[:n_instances]
    selected_pool_indices = train_idx[selected_init_indices]
    
    print("Selected samples shape:", X_pool[selected_pool_indices].shape)
    print("Selected samples:\n", X_pool[selected_pool_indices])
    
    return selected_pool_indices, X_pool[selected_pool_indices]

In [79]:
# from sklearn.model_selection import train_test_split
# x1,y1,x2,y2 = train_test_split(
#     X_pool, y_pool, test_size=0.3, random_state=42)
# committee = train_committee(X_train, y_train, num_models=num_models, num_iter=500)
# X_train = np.vstack([X_train, x1])
# y_train = np.append(y_train, y1)
# print(X_train)
# # X_pool = np.delete(X_pool, idx, axis=0)
# # y_pool = np.delete(y_pool, idx, axis=0)
# for model, likelihood in committee:
#     model.eval()
#     likelihood.eval()
#     X_hold = torch.tensor(x2, dtype=torch.float32).to(device)
#     print(X_hold)
#     with torch.no_grad(), gpytorch.settings.fast_pred_var():
#         preds = model(X_hold)
#         y_pred = preds.mean.cpu().numpy().flatten()
#         y_true = y_pool
#     preds_list.append(y_pred)
# y_pred = np.mean(preds_list, axis=0)
# mae = mean_absolute_error(y2, y_pred)
# r2 = r2_score(y2, y_pred)
# list_r2.append(r2)
# list_mae.append(mae)
# list_elem.append(it)
# print(f"Final MAE: {mae:.4f}, R2: {r2:.4f}")
committee = train_committee(X_train, y_train, num_models=num_models, num_iter=500)
# 2. Выбираем точку по максимальному разногласию градиентов (NA-QBC)
# for i in range(X_train.shape[0]):
# best_grad, best_ind = None, None
# for rnd_indx in range(X_train.shape[0]):
# rnd_indx = np.random.choice(X_train.shape[0])
idx, x_dop = pool_based_qbc(committee, X_pool)
#     if best_grad is None:
#         best_grad = grad
#         best_ind = idx
#     # elif best_grad < sum(grad):
#     #     best_grad = sum(grad)
#     #     best_ind = idx
#     how_1 = comparison = [abs(g1) > abs(g2) for g1, g2 in zip(grad, best_grad)]
#     if len(how_1) > len(best_grad):
#         best_grad = grad
#         best_ind = idx
# idx = best_ind
# X_train = np.vstack([X_train, X_pool[idx]])
# y_train = np.append(y_train, y_pool[idx])
# X_pool = np.delete(X_pool, idx, axis=0)
# y_pool = np.delete(y_pool, idx, axis=0)
# print(f"Step {it + 1}: added point {idx}, pool left: {len(X_pool)}")
# preds_list = []
for model, likelihood in committee:
    model.eval()
    likelihood.eval()
    X_hold = torch.tensor(X_pool, dtype=torch.float32).to(device)
    with torch.no_grad(), gpytorch.settings.fast_pred_var():
        preds = model(X_hold)
        y_pred = preds.mean.cpu().numpy().flatten()
        y_true = y_pool
    preds_list.append(y_pred)
y_pred = np.mean(preds_list, axis=0)
mae = mean_absolute_error(y_true, y_pred)
r2 = r2_score(y_true, y_pred)
list_r2.append(r2)
list_mae.append(mae)
list_elem.append(it)
print(f"Final MAE: {mae:.4f}, R2: {r2:.4f}")

tensor(6.6489e-06, grad_fn=<MeanBackward0>) loss
tensor(2.9731e-05, grad_fn=<MeanBackward0>) loss
tensor(7.1019e-06, grad_fn=<MeanBackward0>) loss
tensor(2.7151e-05, grad_fn=<MeanBackward0>) loss
tensor(1.1979e-06, grad_fn=<MeanBackward0>) loss
tensor(3.4749e-06, grad_fn=<MeanBackward0>) loss
tensor(5.4249e-06, grad_fn=<MeanBackward0>) loss
tensor(1.1150e-05, grad_fn=<MeanBackward0>) loss
tensor(3.5496e-06, grad_fn=<MeanBackward0>) loss
tensor(7.3792e-06, grad_fn=<MeanBackward0>) loss
tensor(7.2546e-06, grad_fn=<MeanBackward0>) loss
tensor(3.6787e-06, grad_fn=<MeanBackward0>) loss
tensor(1.6353e-05, grad_fn=<MeanBackward0>) loss
tensor(1.5350e-06, grad_fn=<MeanBackward0>) loss
tensor(1.0358e-06, grad_fn=<MeanBackward0>) loss
tensor(6.8371e-07, grad_fn=<MeanBackward0>) loss
tensor(5.3408e-06, grad_fn=<MeanBackward0>) loss
tensor(3.9429e-05, grad_fn=<MeanBackward0>) loss
tensor(9.1297e-06, grad_fn=<MeanBackward0>) loss
tensor(8.1965e-05, grad_fn=<MeanBackward0>) loss
tensor(7.4016e-06, g

In [81]:
import numpy as np
import torch


# --- Основной цикл ---
N_QUERIES = 50
num_models = 3  # число моделей в комитете
list_r2 = []
list_mae = []
list_elem = []
for it in range(N_QUERIES):
    # 1. Обучаем комитет (пример кода train_committee см. выше)
    committee = train_committee(X_train, y_train, num_models=num_models, num_iter=500)
    # 2. Выбираем точку по максимальному разногласию градиентов (NA-QBC)
    # for i in range(X_train.shape[0]):
    # best_grad, best_ind = None, None
    # for rnd_indx in range(X_train.shape[0]):
    # rnd_indx = np.random.choice(X_train.shape[0])
    idx, x_dop = pool_based_qbc(committee, X_pool)
    #     if best_grad is None:
    #         best_grad = grad
    #         best_ind = idx
    #     # elif best_grad < sum(grad):
    #     #     best_grad = sum(grad)
    #     #     best_ind = idx
    #     how_1 = comparison = [abs(g1) > abs(g2) for g1, g2 in zip(grad, best_grad)]
    #     if len(how_1) > len(best_grad):
    #         best_grad = grad
    #         best_ind = idx
    # idx = best_ind
    X_train = np.vstack([X_train, X_pool[idx]])
    y_train = np.append(y_train, y_pool[idx])
    X_pool = np.delete(X_pool, idx, axis=0)
    y_pool = np.delete(y_pool, idx, axis=0)
    print(f"Step {it + 1}: added point {idx}, pool left: {len(X_pool)}")
    preds_list = []
    # for model, likelihood in committee:
    #     model.eval()
    #     likelihood.eval()
    #     X_hold = torch.tensor(X_pool, dtype=torch.float32).to(device)
    #     with torch.no_grad(), gpytorch.settings.fast_pred_var():
    #         preds = model(X_hold)
    #         y_pred = preds.mean.cpu().numpy().flatten()
    #         y_true = y_pool
    #     preds_list.append(y_pred)
    # y_pred = np.mean(preds_list, axis=0)
    # mae = mean_absolute_error(y_true, y_pred)
    # r2 = r2_score(y_true, y_pred)
    # list_r2.append(r2)
    # list_mae.append(mae)
    # list_elem.append(it)
    # print(f"Final MAE: {mae:.4f}, R2: {r2:.4f}")
    for model, likelihood in committee:
        model.eval()
        likelihood.eval()
        X_hold = torch.tensor(X_train, dtype=torch.float32).to(device)
        with torch.no_grad(), gpytorch.settings.fast_pred_var():
            preds = model(X_hold)
            y_pred = preds.mean.cpu().numpy().flatten()
            y_true = y_train
        preds_list.append(y_pred)
    y_pred = np.mean(preds_list, axis=0)
    mae = mean_absolute_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)
    list_r2.append(r2)
    list_mae.append(mae)
    list_elem.append(it)
    print(f"Final MAE: {mae:.4f}, R2: {r2:.4f}")

tensor(6.4017e-05, grad_fn=<MeanBackward0>) loss
tensor(0.0014, grad_fn=<MeanBackward0>) loss
tensor(0.0003, grad_fn=<MeanBackward0>) loss
tensor(0.0002, grad_fn=<MeanBackward0>) loss
tensor(0.0032, grad_fn=<MeanBackward0>) loss
tensor(0.0012, grad_fn=<MeanBackward0>) loss
tensor(0.0014, grad_fn=<MeanBackward0>) loss
tensor(0.0003, grad_fn=<MeanBackward0>) loss
tensor(7.0728e-06, grad_fn=<MeanBackward0>) loss
tensor(0.0008, grad_fn=<MeanBackward0>) loss
tensor(0.0016, grad_fn=<MeanBackward0>) loss
tensor(5.3463e-05, grad_fn=<MeanBackward0>) loss
tensor(0.0006, grad_fn=<MeanBackward0>) loss
tensor(4.5241e-05, grad_fn=<MeanBackward0>) loss
tensor(4.5973e-05, grad_fn=<MeanBackward0>) loss
tensor(0.0005, grad_fn=<MeanBackward0>) loss
tensor(0.0010, grad_fn=<MeanBackward0>) loss
tensor(0.0009, grad_fn=<MeanBackward0>) loss
tensor(8.2605e-05, grad_fn=<MeanBackward0>) loss
tensor(0.0013, grad_fn=<MeanBackward0>) loss
tensor(1.8674e-05, grad_fn=<MeanBackward0>) loss
Selected samples shape: (7,

In [82]:
print(list_elem)
print(list_mae)
print(list_r2)

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]
[0.04205072626348794, 0.014953696997968354, 0.012152229876241373, 0.012860504222758192, 0.015701396655767228, 0.010677148385720982, 0.011949943267341383, 0.012900637101497069, 0.01085494719608385, 0.012002158842191097, 0.00867987643708799, 0.009363303887130353, 0.008883761797672213, 0.00799881213773224, 0.007619057488225025, 0.007408253412470662, 0.006970173713369887, 0.006757592865035464, 0.006626140397896467, 0.006107090945191384, 0.005923218536277941, 0.005591677288729534, 0.005222309102659617, 0.0048505067030350825, 0.004948092178399628, 0.0036191921514010677, 0.003993947375415079, 0.0035905690261234364, 0.003806576194191651, 0.003998850863244209, 0.0038081953975131343, 0.003850389258636675, 0.003720297006192842, 0.003913834862636849, 0.0036559216521406183, 0.004299446498894056, 0.0038851241994

In [35]:
# --- 1. Загрузка данных ---
df = pd.read_csv("ourall.csv")
df = df[df['B4TOD4'] != 0.005]
df = df[df['OMEGA5'] != 15]
df = df[df['OMEGA5'] != -15]
PARAMS = [col for col in df.columns if col != "PT_LOSS"]
X = df[PARAMS].values
y = df["PT_LOSS"].values #.reshape(-1, 1)

# --- 2. min/max по параметрам ---
param_bounds = {col: (df[col].min(), df[col].max()) for col in PARAMS}

# --- 3. Нормализация данных (очень желательно для GPs) ---
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)
y_scaler = StandardScaler()
# y_scaled = y_scaler.fit_transform(y).flatten()
# --- 4. Делим на train и pool ---
np.random.seed(42)
INIT_SIZE = 10
initial_idx = np.random.choice(len(X), size=INIT_SIZE, replace=False)
X_train, y_train = X_scaled[initial_idx], y[initial_idx]
X_pool, y_pool = (
    np.delete(X_scaled, initial_idx, axis=0),
    np.delete(y, initial_idx, axis=0),
)
print(X_pool,y_pool,len(X_pool))

[[0.  0.5 0.  ... 0.  0.  0. ]
 [0.  0.5 0.  ... 0.  0.  1. ]
 [0.  0.5 0.  ... 0.  0.2 0. ]
 ...
 [1.  0.  1.  ... 0.5 0.8 1. ]
 [1.  0.  1.  ... 0.5 1.  0. ]
 [1.  0.  1.  ... 0.5 1.  1. ]] [0.108975  0.0847613 0.061027  ... 0.0992762 0.116008  0.0890589] 3878


In [13]:
import numpy as np
import torch
np.random(1)
# Рандомная стратегия !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
# --- Основной цикл ---
N_QUERIES = 50
num_models = 3  # число моделей в комитете
list_r2 = []
list_mae = []
list_elem = []
r2_tren =[]
mae_tren =[]
for it in range(N_QUERIES):
    # 1. Обучаем комитет (пример кода train_committee см. выше)
    committee = train_committee(X_train, y_train, num_models=num_models, num_iter=500)
    
    idx = np.random.choice(range(X_pool.shape[0]), size=1, replace=False)
    # X_init = X_pool[train_idx]
    # y_init = X_pool[train_idx]
    
    X_train = np.vstack([X_train, X_pool[idx]])
    y_train = np.append(y_train, y_pool[idx])
    X_pool = np.delete(X_pool, idx, axis=0)
    y_pool = np.delete(y_pool, idx, axis=0)
    print(f"Step {it + 1}: added point {idx}, pool left: {len(X_pool)}")
    preds_list = []
    # for model, likelihood in committee:
    #     model.eval()
    #     likelihood.eval()
    #     X_hold = torch.tensor(X_pool, dtype=torch.float32).to(device)
    #     with torch.no_grad(), gpytorch.settings.fast_pred_var():
    #         preds = model(X_hold)
    #         y_pred = preds.mean.cpu().numpy().flatten()
    #         y_true = y_pool
    #     preds_list.append(y_pred)
    # y_pred = np.mean(preds_list, axis=0)
    # mae = mean_absolute_error(y_true, y_pred)
    # r2 = r2_score(y_true, y_pred)
    # list_r2.append(r2)
    # list_mae.append(mae)
    # list_elem.append(it)
    for model, likelihood in committee:
        model.eval()
        likelihood.eval()
        X_hold = torch.tensor(X_train, dtype=torch.float32).to(device)
        with torch.no_grad(), gpytorch.settings.fast_pred_var():
            preds = model(X_hold)
            y_pred = preds.mean.cpu().numpy().flatten()
            y_true = y_train
        preds_list.append(y_pred)
    y_pred = np.mean(preds_list, axis=0)
    mae = mean_absolute_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)
    list_r2.append(r2)
    list_mae.append(mae)
    list_elem.append(it)
    print(f"Final MAE: {mae:.4f}, R2: {r2:.4f}")

TypeError: 'module' object is not callable

In [15]:
print(list_elem)
print(list_mae)
print(list_r2)

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]
[0.089936568460938, 0.09506775019379378, 0.03592081643541542, 0.059349286190124806, 0.057759637754462786, 0.03838391585973966, 0.039137820894971025, 0.03813603988762719, 0.03777061380435944, 0.03815348609501149, 0.04021692438809761, 0.04930788788463797, 0.047198673558829146, 0.04666863221944952, 0.04732998566204282, 0.04270127845700537, 0.042312004651544693, 0.041425255603376614, 0.04014405896682611, 0.03921990523668634, 0.03917928050631768, 0.03932988163575429, 0.03836358892863984, 0.03858764626674106, 0.03389657816663363, 0.033900921203401886, 0.035089776911655574, 0.03487019877847037, 0.03449126044016333, 0.03437516761929149, 0.032045784761976705, 0.03435597038935927, 0.03291224581991319, 0.03476138759554644, 0.036058757331753, 0.036109410511906796, 0.03635632866836821, 0.03509590182193327, 0.03

In [13]:
import numpy as np
import torch


# --- Основной цикл ---
N_QUERIES = 30
num_models = 3  # число моделей в комитете
list_r2 = []
list_mae = []
list_elem = []
for it in range(N_QUERIES):
    # 1. Обучаем комитет (пример кода train_committee см. выше)
    committee = train_committee(X_train, y_train, num_models=num_models, num_iter=500)
    # 2. Выбираем точку по максимальному разногласию градиентов (NA-QBC)
    # for i in range(X_train.shape[0]):
    best_grad, best_ind = None, None
    for rnd_indx in range(X_train.shape[0]):
    # rnd_indx = np.random.choice(X_train.shape[0])
        idx, grad = NA_query_strategy(committee, X_train[rnd_indx], X_pool)
        if best_grad is None:
            best_grad = grad
            best_ind = idx
        # elif best_grad < sum(grad):
        #     best_grad = sum(grad)
        #     best_ind = idx
        how_1 = comparison = [abs(g1) > abs(g2) for g1, g2 in zip(grad, best_grad)]
        if len(how_1) > len(best_grad):
            best_grad = grad
            best_ind = idx
    idx = best_ind
    X_train = np.vstack([X_train, X_pool[idx]])
    y_train = np.append(y_train, y_pool[idx])
    X_pool = np.delete(X_pool, idx, axis=0)
    y_pool = np.delete(y_pool, idx, axis=0)
    print(f"Step {it + 1}: added point {idx}, pool left: {len(X_pool)}")
    preds_list = []
    for model, likelihood in committee:
        model.eval()
        likelihood.eval()
        X_hold = torch.tensor(X_pool, dtype=torch.float32).to(device)
        with torch.no_grad(), gpytorch.settings.fast_pred_var():
            preds = model(X_hold)
            y_pred = preds.mean.cpu().numpy().flatten()
            y_true = y_pool
        preds_list.append(y_pred)
    y_pred = np.mean(preds_list, axis=0)
    mae = mean_absolute_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)
    list_r2.append(r2)
    list_mae.append(mae)
    list_elem.append(it)
    print(f"Final MAE: {mae:.4f}, R2: {r2:.4f}")


tensor(1.7004e-07, grad_fn=<MeanBackward0>) loss
grads [-1.2247035e-06 -1.1446273e-05 -2.2081349e-05  2.9464923e-06
 -5.0549438e-06  1.5494745e-05  4.5846547e-05]
[-1.2247035e-06 -1.1446273e-05 -2.2081349e-05  2.9464923e-06
 -5.0549438e-06  1.5494745e-05  4.5846547e-05] grads
[-1. -1. -1.  1. -1.  1.  1.] sign
tensor(3.2476e-06, grad_fn=<MeanBackward0>) loss
grads [ 3.3342192e-06 -8.9873625e-05  6.0001394e-04 -1.7901872e-05
  6.7758934e-05 -1.2131255e-04 -2.3981062e-05]
[ 3.3342192e-06 -8.9873625e-05  6.0001394e-04 -1.7901872e-05
  6.7758934e-05 -1.2131255e-04 -2.3981062e-05] grads
[ 1. -1.  1. -1.  1. -1. -1.] sign
tensor(6.1287e-06, grad_fn=<MeanBackward0>) loss
grads [-2.5645788e-05  2.7449342e-04 -3.5056350e-05  1.1529815e-05
 -1.2369631e-04  1.8526912e-04  2.4901610e-04]
[-2.5645788e-05  2.7449342e-04 -3.5056350e-05  1.1529815e-05
 -1.2369631e-04  1.8526912e-04  2.4901610e-04] grads
[-1.  1. -1.  1. -1.  1.  1.] sign
tensor(1.8983e-06, grad_fn=<MeanBackward0>) loss
grads [-9.22170

In [15]:
print(list_elem)
print(list_mae)
print(list_r2)

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29]
[0.08993083868385854, 0.0876854281490194, 0.05731188349968657, 0.07404431981070547, 0.07500355139022577, 0.07875482941388906, 0.04476109118214158, 0.04764847958399539, 0.044921811259819124, 0.04439842789256572, 0.0395787171704587, 0.03970555618423374, 0.039665013367192255, 0.03931160218369782, 0.03938249252458535, 0.038421764386621045, 0.03879848194393555, 0.028113647333100518, 0.029338264794302727, 0.027791719689908928, 0.027144649041973064, 0.027330308940072042, 0.028246419256938892, 0.028617094367180147, 0.028501016772035676, 0.02856995457105316, 0.028636451353078123, 0.028670583326873705, 0.028611844149050894, 0.028285156795062764]
[-0.5360053552324047, -0.4625639821771159, 0.35894134345364526, -0.0002563849641277205, -0.04252844009739465, -0.018861356635506477, 0.6617741946170872, 0.625590311098734, 0.6614098057403188, 0.6674341271623088, 0.7212662594537091, 0.71985499619