In [4]:
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader

# Генерация данных
def generate_data(pe, n_samples):
    x_inv = np.random.randn(n_samples, 2)
    Y = x_inv.sum(axis=1, keepdims=True) + 0.1 * np.random.randn(n_samples, 1)
    x_env = np.column_stack([Y.squeeze(), Y.squeeze()]) + np.sqrt(pe) * np.random.randn(n_samples, 2)
    X = np.hstack([x_inv, x_env])
    return X, Y

# Создание DataLoader
def create_dataloader(pe, count, batch_size):
    res_X, res_Y = [], []
    for p in pe:
        X, Y = generate_data(p, count)
        res_X.append(X)
        res_Y.append(Y)
    # Используем vstack вместо устаревшего row_stack
    res_X = np.vstack(res_X)
    res_Y = np.vstack(res_Y)
    dataset = TensorDataset(torch.tensor(res_X, dtype=torch.float32),
                          torch.tensor(res_Y, dtype=torch.float32))
    return DataLoader(dataset, batch_size=batch_size, shuffle=True)

In [24]:
# Параметры данных
pe_train = [0.1, 0.3, 0.5, 0.7, 0.9]
pe_val = [0.4, 0.8]
pe_test = [10.0, 100.0]
count = 2000
batch_size = 16

np.random.seed(50)
torch.manual_seed(50)

# Создаем загрузчики данных
train_loader = create_dataloader(pe_train, count, batch_size)
val_loader = create_dataloader(pe_val, count, batch_size)
test_loader = create_dataloader(pe_test, count, batch_size)

In [20]:
# Модель для регрессии
class RegressionModel(nn.Module):
    def __init__(self):
        super(RegressionModel, self).__init__()
        self.net = nn.Sequential(
            nn.Linear(4, 16),
            nn.ReLU(),
            nn.Linear(16, 8),
            nn.ReLU(),
            nn.Linear(8, 1)
        )

    def forward(self, x):
        return self.net(x)

# Функции для оценки
def evaluate_model(model, dataloader):
    model.eval()
    mse_loss = 0
    mae_loss = 0
    criterion_mse = nn.MSELoss()
    criterion_mae = nn.L1Loss()

    with torch.no_grad():
        for X, Y in dataloader:
            outputs = model(X)
            mse_loss += criterion_mse(outputs, Y).item()
            mae_loss += criterion_mae(outputs, Y).item()

    return mse_loss/len(dataloader), mae_loss/len(dataloader)

In [14]:
# Обучение ансамбля
def train_ensemble(train_loader, val_loader, pe_list, n_epochs=100):
    ensemble = []

    for i, excluded_pe in enumerate(pe_list):
        print(f"\nTraining model {i+1}, excluding pe={excluded_pe}")

        # Создаем подмножество данных без excluded_pe
        all_X, all_Y = [], []
        for X, Y in train_loader:
            all_X.append(X)
            all_Y.append(Y)
        all_X = torch.cat(all_X)
        all_Y = torch.cat(all_Y)

        # Разделяем данные по pe (предполагаем упорядоченность)
        samples_per_pe = len(all_X) // len(pe_list)
        mask = torch.ones(len(all_X), dtype=bool)
        start_idx = i * samples_per_pe
        end_idx = start_idx + samples_per_pe
        mask[start_idx:end_idx] = False

        partial_dataset = TensorDataset(all_X[mask], all_Y[mask])
        partial_loader = DataLoader(partial_dataset, batch_size=batch_size, shuffle=True)

        # Инициализация и обучение модели
        model = RegressionModel()
        optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
        criterion = nn.MSELoss()

        for epoch in range(n_epochs):
            model.train()
            epoch_loss = 0
            for X_batch, Y_batch in partial_loader:
                optimizer.zero_grad()
                outputs = model(X_batch)
                loss = criterion(outputs, Y_batch)
                loss.backward()
                optimizer.step()
                epoch_loss += loss.item()

            # Валидация
            val_mse, val_mae = evaluate_model(model, val_loader)
            if (epoch % 7 == 0 or epoch == n_epochs - 1):
              print(f"Epoch {epoch+1}/{n_epochs} | Train Loss: {epoch_loss/len(partial_loader):.4f} | Val MSE: {val_mse:.4f} | Val MAE: {val_mae:.4f}")

        ensemble.append(model)

    return ensemble

In [25]:
# Обучение ансамбля из 5 моделей
ensemble = train_ensemble(train_loader, val_loader, pe_train, n_epochs=20)


Training model 1, excluding pe=0.1
Epoch 1/20 | Train Loss: 0.4676 | Val MSE: 0.0663 | Val MAE: 0.1993
Epoch 8/20 | Train Loss: 0.0103 | Val MSE: 0.0123 | Val MAE: 0.0885
Epoch 15/20 | Train Loss: 0.0104 | Val MSE: 0.0114 | Val MAE: 0.0853
Epoch 20/20 | Train Loss: 0.0100 | Val MSE: 0.0104 | Val MAE: 0.0816

Training model 2, excluding pe=0.3
Epoch 1/20 | Train Loss: 0.9358 | Val MSE: 0.6200 | Val MAE: 0.5072
Epoch 8/20 | Train Loss: 0.0614 | Val MSE: 0.0508 | Val MAE: 0.1185
Epoch 15/20 | Train Loss: 0.0192 | Val MSE: 0.0178 | Val MAE: 0.0884
Epoch 20/20 | Train Loss: 0.0141 | Val MSE: 0.0137 | Val MAE: 0.0848

Training model 3, excluding pe=0.5
Epoch 1/20 | Train Loss: 0.3603 | Val MSE: 0.0296 | Val MAE: 0.1338
Epoch 8/20 | Train Loss: 0.0101 | Val MSE: 0.0109 | Val MAE: 0.0833
Epoch 15/20 | Train Loss: 0.0100 | Val MSE: 0.0116 | Val MAE: 0.0856
Epoch 20/20 | Train Loss: 0.0099 | Val MSE: 0.0105 | Val MAE: 0.0814

Training model 4, excluding pe=0.7
Epoch 1/20 | Train Loss: 0.4862 | 

In [26]:
# Оценка ансамбля на тестовых данных
def evaluate_ensemble(ensemble, test_loader):
    ensemble_mse = 0
    ensemble_mae = 0
    criterion_mse = nn.MSELoss()
    criterion_mae = nn.L1Loss()

    with torch.no_grad():
        for X, Y in test_loader:
            # Усредняем предсказания всех моделей
            predictions = torch.stack([model(X) for model in ensemble]).mean(dim=0)
            ensemble_mse += criterion_mse(predictions, Y).item()
            ensemble_mae += criterion_mae(predictions, Y).item()

    return ensemble_mse/len(test_loader), ensemble_mae/len(test_loader)

test_mse, test_mae = evaluate_ensemble(ensemble, test_loader)
print(f"\nFinal Ensemble Test Metrics: MSE = {test_mse:.4f}, MAE = {test_mae:.4f}")


Final Ensemble Test Metrics: MSE = 0.7947, MAE = 0.5285
