<a href="https://colab.research.google.com/github/SilkenMocha/Hackathon_Ac_BO/blob/main/BOtorch_subset.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install torch
!pip install torch_geometric
!pip install ax_platform
!pip install botorch

In [None]:
import copy
import os.path as osp

import torch
import torch.nn.functional as F
from torch.nn import GRU, Linear, ReLU, Sequential

import torch_geometric.transforms as T
from torch_geometric.datasets import QM9
from torch_geometric.loader import DataLoader
from torch_geometric.nn import NNConv, Set2Set
from torch_geometric.utils import remove_self_loops
import pandas as pd

target = 0
dim = 64


class MyTransform:
    def __call__(self, data):
        data = copy.copy(data)
        data.y = data.y[:, target]  # Specify target.
        return data


class Complete:
    def __call__(self, data):
        data = copy.copy(data)
        device = data.edge_index.device

        row = torch.arange(data.num_nodes, dtype=torch.long, device=device)
        col = torch.arange(data.num_nodes, dtype=torch.long, device=device)

        row = row.view(-1, 1).repeat(1, data.num_nodes).view(-1)
        col = col.repeat(data.num_nodes)
        edge_index = torch.stack([row, col], dim=0)

        edge_attr = None
        if data.edge_attr is not None:
            idx = data.edge_index[0] * data.num_nodes + data.edge_index[1]
            size = list(data.edge_attr.size())
            size[0] = data.num_nodes * data.num_nodes
            edge_attr = data.edge_attr.new_zeros(size)
            edge_attr[idx] = data.edge_attr

        edge_index, edge_attr = remove_self_loops(edge_index, edge_attr)
        data.edge_attr = edge_attr
        data.edge_index = edge_index

        return data


path = osp.join(osp.dirname(osp.realpath("__file__")), '..', 'data', 'QM9')
transform = T.Compose([MyTransform(), Complete(), T.Distance(norm=False)])
# Originalmente: Carga y preparación del dataset
dataset = QM9(path, transform=transform).shuffle()

# Modificación para usar un subconjunto más pequeño
# Por ejemplo, usar solo los primeros 10,000 datos del dataset

subset_size = 1000  # Define el tamaño del subconjunto
subset_indices = torch.randperm(len(dataset), generator=torch.Generator().manual_seed(15))[:subset_size]  # Selecciona índices al azar
dataset = dataset[subset_indices]

# Procede como antes
mean = dataset.data.y.mean(dim=0, keepdim=True)
std = dataset.data.y.std(dim=0, keepdim=True)
dataset.data.y = (dataset.data.y - mean) / std
mean, std = mean[:, target].item(), std[:, target].item()

# Split datasets teniendo en cuenta el nuevo tamaño
test_dataset = dataset[:int(subset_size*0.2)]
val_dataset = dataset[int(subset_size*0.2):int(subset_size*0.4)]
train_dataset = dataset[int(subset_size*0.4):]
test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False)
val_loader = DataLoader(val_dataset, batch_size=128, shuffle=False)
train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)



def objective_function(dim_values):
    # Crear un DataFrame para almacenar los resultados
    results = pd.DataFrame(columns=['Dim', 'Función de pérdida'])

    # Iterar sobre los valores de neuronas
    for dim in dim_values:
        # Redefinir la arquitectura de la red neuronal con el nuevo número de neuronas
        class Net(torch.nn.Module):
            def __init__(self):
                super().__init__()
                self.lin0 = torch.nn.Linear(dataset.num_features, dim)

                nn = Sequential(Linear(5, 128), ReLU(), Linear(128, dim * dim))
                self.conv = NNConv(dim, dim, nn, aggr='mean')
                self.gru = GRU(dim, dim)

                self.set2set = Set2Set(dim, processing_steps=3)
                self.lin1 = torch.nn.Linear(2 * dim, dim)
                self.lin2 = torch.nn.Linear(dim, 1)

            # Propagación de información
            def forward(self, data):
                out = F.relu(self.lin0(data.x))
                h = out.unsqueeze(0)

                for i in range(3):
                    m = F.relu(self.conv(out, data.edge_index, data.edge_attr))
                    out, h = self.gru(m.unsqueeze(0), h)
                    out = out.squeeze(0)

                out = self.set2set(out, data.batch)
                out = F.relu(self.lin1(out))
                out = self.lin2(out)
                return out.view(-1)

        # Función de pérdida
        torch.cuda.manual_seed(15)

        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        model = Net().to(device)

        optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min',
                                                               factor=0.7, patience=5,
                                                               min_lr=0.00001)  # Ajusta el LR durante el entrenamiento.

        # Entrenamiento de red neuronal
        def train(epoch):
            model.train()
            loss_all = 0

            for data in train_loader:
                data = data.to(device)
                optimizer.zero_grad()
                loss = F.mse_loss(model(data), data.y)
                loss.backward()
                loss_all += loss.item() * data.num_graphs
                optimizer.step()
            return loss_all / len(train_loader.dataset)

        # Test de red neuronal
        def test(loader):
            model.eval()
            error = 0

            for data in loader:
                data = data.to(device)
                error += (model(data) * std - data.y * std).abs().sum().item()  # MAE
            return error / len(loader.dataset)

        # Entrenamiento del modelo en épocas
        best_val_error = None
        for epoch in range(1, 51):
            lr = scheduler.optimizer.param_groups[0]['lr']
            loss = train(epoch)
            val_error = test(val_loader)
            scheduler.step(val_error)

            if best_val_error is None or val_error <= best_val_error:
                test_error = test(test_loader)
                best_val_error = val_error

            print(f'Epoch: {epoch:03d}, LR: {lr:7f}, Loss: {loss:.7f}, '
                  f'Val MAE: {val_error:.7f}, Test MAE: {test_error:.7f}')
        # Agregar los resultados al DataFrame
        results = results.append({'Dim': dim, 'Función de pérdida': best_val_error}, ignore_index=True)

    # Imprimir los resultados
    print(results)

# Definir los valores de neuronas para probar
dim_values = [64, 128, 256, 512]

# Llamar a la función objetivo
objective_function(dim_values)

In [None]:
import copy
import os.path as osp

import torch
import torch.nn.functional as F
from torch.nn import GRU, Linear, ReLU, Sequential

import torch_geometric.transforms as T
from torch_geometric.datasets import QM9
from torch_geometric.loader import DataLoader
from torch_geometric.nn import NNConv, Set2Set
from torch_geometric.utils import remove_self_loops
import pandas as pd

target = 0
dim = 64


class MyTransform:
    def __call__(self, data):
        data = copy.copy(data)
        data.y = data.y[:, target]  # Specify target.
        return data


class Complete:
    def __call__(self, data):
        data = copy.copy(data)
        device = data.edge_index.device

        row = torch.arange(data.num_nodes, dtype=torch.long, device=device)
        col = torch.arange(data.num_nodes, dtype=torch.long, device=device)

        row = row.view(-1, 1).repeat(1, data.num_nodes).view(-1)
        col = col.repeat(data.num_nodes)
        edge_index = torch.stack([row, col], dim=0)

        edge_attr = None
        if data.edge_attr is not None:
            idx = data.edge_index[0] * data.num_nodes + data.edge_index[1]
            size = list(data.edge_attr.size())
            size[0] = data.num_nodes * data.num_nodes
            edge_attr = data.edge_attr.new_zeros(size)
            edge_attr[idx] = data.edge_attr

        edge_index, edge_attr = remove_self_loops(edge_index, edge_attr)
        data.edge_attr = edge_attr
        data.edge_index = edge_index

        return data


path = osp.join(osp.dirname(osp.realpath("__file__")), '..', 'data', 'QM9')
transform = T.Compose([MyTransform(), Complete(), T.Distance(norm=False)])
# Originalmente: Carga y preparación del dataset
dataset = QM9(path, transform=transform).shuffle()

# Modificación para usar un subconjunto más pequeño
# Por ejemplo, usar solo los primeros 10,000 datos del dataset


def subset_size_objective(subset_size):
    # Define el tamaño del subconjunto
    subset_indices = torch.randperm(len(dataset), generator=torch.Generator().manual_seed(15))[:subset_size]
    subset_dataset = dataset[subset_indices]

    # Calcula la media y la desviación estándar de los datos y normaliza las etiquetas
    mean = subset_dataset.data.y.mean(dim=0, keepdim=True)
    std = subset_dataset.data.y.std(dim=0, keepdim=True)
    subset_dataset.data.y = (subset_dataset.data.y - mean) / std
    mean, std = mean[:, target].item(), std[:, target].item()

    # Divide el conjunto de datos en entrenamiento, validación y prueba
    test_dataset = subset_dataset[:int(subset_size*0.2)]
    val_dataset = subset_dataset[int(subset_size*0.2):int(subset_size*0.4)]
    train_dataset = subset_dataset[int(subset_size*0.4):]

    # Carga los datos en los DataLoaders
    test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False)
    val_loader = DataLoader(val_dataset, batch_size=128, shuffle=False)
    train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)

    # Define la arquitectura de la red neuronal
    class Net(torch.nn.Module):
        def __init__(self):
            super().__init__()
            self.lin0 = torch.nn.Linear(dataset.num_features, dim)

            nn = Sequential(Linear(5, 128), ReLU(), Linear(128, dim * dim))
            self.conv = NNConv(dim, dim, nn, aggr='mean')
            self.gru = GRU(dim, dim)

            self.set2set = Set2Set(dim, processing_steps=3)
            self.lin1 = torch.nn.Linear(2 * dim, dim)
            self.lin2 = torch.nn.Linear(dim, 1)

        def forward(self, data):
            out = F.relu(self.lin0(data.x))
            h = out.unsqueeze(0)

            for i in range(3):
                m = F.relu(self.conv(out, data.edge_index, data.edge_attr))
                out, h = self.gru(m.unsqueeze(0), h)
                out = out.squeeze(0)

            out = self.set2set(out, data.batch)
            out = F.relu(self.lin1(out))
            out = self.lin2(out)
            return out.view(-1)

    # Inicializa el modelo, optimizador y programador de tasa de aprendizaje
    torch.cuda.manual_seed(15)

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = Net().to(device)

    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min',
                                                           factor=0.7, patience=5,
                                                           min_lr=0.00001)

    #Entrenamiento de red neuronal
    def train(epoch):
        model.train()
        loss_all = 0

        for data in train_loader:
            data = data.to(device)
            optimizer.zero_grad()
            loss = F.mse_loss(model(data), data.y)
            loss.backward()
            loss_all += loss.item() * data.num_graphs
            optimizer.step()
        return loss_all / len(train_loader.dataset)

    #Test de red neuronal
    def test(loader):
        model.eval()
        error = 0

        for data in loader:
            data = data.to(device)
            error += (model(data) * std - data.y * std).abs().sum().item()  # MAE
        return error / len(loader.dataset)


    # Entrena el modelo y evalúa el rendimiento
    best_val_error = None
    for epoch in range(1, 6):
        lr = scheduler.optimizer.param_groups[0]['lr']
        loss = train(epoch)
        val_error = test(val_loader)
        scheduler.step(val_error)

        if best_val_error is None or val_error <= best_val_error:
            test_error = test(test_loader)
            best_val_error = val_error

        print(f'Epoch: {epoch:03d}, Batch: {size.item()}, LR: {lr:7f}, Loss: {loss:.7f}, '
              f'Val MAE: {val_error:.7f}, Test MAE: {test_error:.7f}')

    # Devuelve la pérdida final
    return best_val_error

# Tamaño del subconjunto a optimizar
subset_size = [500, 750, 1000, 1250]  # Ejemplo de tamaños de subconjunto

# Itera sobre los tamaños del subconjunto y calcula la pérdida final
for size in subset_size:
    subset_loss = subset_size_objective(size)
    print(f'Subset Size: {size.item()}, Final Loss: {subset_loss}')


#Bayesian Optimization for subset sizes

In [None]:
import copy
import os.path as osp

import torch
import torch.nn.functional as F
from torch.nn import GRU, Linear, ReLU, Sequential

import torch_geometric.transforms as T
from torch_geometric.datasets import QM9
from torch_geometric.loader import DataLoader
from torch_geometric.nn import NNConv, Set2Set
from torch_geometric.utils import remove_self_loops
import pandas as pd


from botorch.models import SingleTaskGP
from gpytorch.mlls import ExactMarginalLogLikelihood
from botorch.fit import fit_gpytorch_model
from botorch.acquisition import ExpectedImprovement
from botorch.optim import optimize_acqf


target = 0
dim = 64


class MyTransform:
    def __call__(self, data):
        data = copy.copy(data)
        data.y = data.y[:, target]  # Specify target.
        return data


class Complete:
    def __call__(self, data):
        data = copy.copy(data)
        device = data.edge_index.device

        row = torch.arange(data.num_nodes, dtype=torch.long, device=device)
        col = torch.arange(data.num_nodes, dtype=torch.long, device=device)

        row = row.view(-1, 1).repeat(1, data.num_nodes).view(-1)
        col = col.repeat(data.num_nodes)
        edge_index = torch.stack([row, col], dim=0)

        edge_attr = None
        if data.edge_attr is not None:
            idx = data.edge_index[0] * data.num_nodes + data.edge_index[1]
            size = list(data.edge_attr.size())
            size[0] = data.num_nodes * data.num_nodes
            edge_attr = data.edge_attr.new_zeros(size)
            edge_attr[idx] = data.edge_attr

        edge_index, edge_attr = remove_self_loops(edge_index, edge_attr)
        data.edge_attr = edge_attr
        data.edge_index = edge_index

        return data


path = osp.join(osp.dirname(osp.realpath("__file__")), '..', 'data', 'QM9')
transform = T.Compose([MyTransform(), Complete(), T.Distance(norm=False)])
# Originalmente: Carga y preparación del dataset
dataset = QM9(path, transform=transform).shuffle()

# Modificación para usar un subconjunto más pequeño
# Por ejemplo, usar solo los primeros 10,000 datos del dataset


def subset_size_objective(subset_size):
    # Define el tamaño del subconjunto
    subset_indices = torch.randperm(len(dataset), generator=torch.Generator().manual_seed(15))[:subset_size]
    subset_dataset = dataset[subset_indices]

    # Calcula la media y la desviación estándar de los datos y normaliza las etiquetas
    mean = subset_dataset.data.y.mean(dim=0, keepdim=True)
    std = subset_dataset.data.y.std(dim=0, keepdim=True)
    subset_dataset.data.y = (subset_dataset.data.y - mean) / std
    mean, std = mean[:, target].item(), std[:, target].item()

    # Divide el conjunto de datos en entrenamiento, validación y prueba
    test_dataset = subset_dataset[:int(subset_size*0.2)]
    val_dataset = subset_dataset[int(subset_size*0.2):int(subset_size*0.4)]
    train_dataset = subset_dataset[int(subset_size*0.4):]

    # Carga los datos en los DataLoaders
    test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False)
    val_loader = DataLoader(val_dataset, batch_size=128, shuffle=False)
    train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)

    # Define la arquitectura de la red neuronal
    class Net(torch.nn.Module):
        def __init__(self):
            super().__init__()
            self.lin0 = torch.nn.Linear(dataset.num_features, dim)

            nn = Sequential(Linear(5, 128), ReLU(), Linear(128, dim * dim))
            self.conv = NNConv(dim, dim, nn, aggr='mean')
            self.gru = GRU(dim, dim)

            self.set2set = Set2Set(dim, processing_steps=3)
            self.lin1 = torch.nn.Linear(2 * dim, dim)
            self.lin2 = torch.nn.Linear(dim, 1)

        def forward(self, data):
            out = F.relu(self.lin0(data.x))
            h = out.unsqueeze(0)

            for i in range(3):
                m = F.relu(self.conv(out, data.edge_index, data.edge_attr))
                out, h = self.gru(m.unsqueeze(0), h)
                out = out.squeeze(0)

            out = self.set2set(out, data.batch)
            out = F.relu(self.lin1(out))
            out = self.lin2(out)
            return out.view(-1)

    # Inicializa el modelo, optimizador y programador de tasa de aprendizaje
    torch.cuda.manual_seed(15)

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = Net().to(device)

    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min',
                                                           factor=0.7, patience=5,
                                                           min_lr=0.00001)

    #Entrenamiento de red neuronal
    def train(epoch):
        model.train()
        loss_all = 0

        for data in train_loader:
            data = data.to(device)
            optimizer.zero_grad()
            loss = F.mse_loss(model(data), data.y)
            loss.backward()
            loss_all += loss.item() * data.num_graphs
            optimizer.step()
        return loss_all / len(train_loader.dataset)

    #Test de red neuronal
    def test(loader):
        model.eval()
        error = 0

        for data in loader:
            data = data.to(device)
            error += (model(data) * std - data.y * std).abs().sum().item()  # MAE
        return error / len(loader.dataset)


    # Entrena el modelo y evalúa el rendimiento
    best_val_error = None
    for epoch in range(1, 6):
        lr = scheduler.optimizer.param_groups[0]['lr']
        loss = train(epoch)
        val_error = test(val_loader)
        scheduler.step(val_error)

        if best_val_error is None or val_error <= best_val_error:
            test_error = test(test_loader)
            best_val_error = val_error

        print(f'Epoch: {epoch:03d}, Batch:{subset_size} , LR: {lr:7f}, Loss: {loss:.7f}, '
              f'Val MAE: {val_error:.7f}, Test MAE: {test_error:.7f}')

    # Devuelve la pérdida final
    return best_val_error




# Definir el espacio de búsqueda (los tamaños de subconjunto)
subset_size = torch.tensor([[500], [750], [1000], [1250]])
subset_sizef = subset_size.float()

# Normalizar el espacio de búsqueda
subset_size_normalized = (subset_sizef - subset_sizef.mean()) / subset_sizef.std()

# Crear un modelo de regresión gaussiano para la optimización bayesiana
train_X = subset_size_normalized
train_Y = torch.tensor([subset_size_objective(size.item()) for size in subset_size]).unsqueeze(-1)
gp = SingleTaskGP(train_X, train_Y)
mll = ExactMarginalLogLikelihood(gp.likelihood, gp)
fit_gpytorch_model(mll)

# Definir la adquisición (Expected Improvement)
EI = ExpectedImprovement(gp, train_Y.min())

# Realizar la optimización bayesiana
candidate, acq_value = optimize_acqf(
    EI,
    bounds=torch.tensor([[-3.0], [3.0]]),  # Limitar la búsqueda al rango [-3, 3]
    q=1,  # Número de puntos para la optimización multiarranque
    num_restarts=20,  # Número de reinicios aleatorios para la optimización multiarranque
    raw_samples=512,  # Número de muestras aleatorias para la optimización multiarranque
)

# Desnormalizar el candidato encontrado
best_size = candidate * subset_size.float().std() + subset_size.float().mean() #Es necesario normalizar?

print(f"El mejor tamaño de subconjunto encontrado es: {best_size.item()}")


#Bayesian Optimization within ranges of 500/1250 batch size

In [None]:
import copy
import os.path as osp

import torch
import torch.nn.functional as F
from torch.nn import GRU, Linear, ReLU, Sequential

import torch_geometric.transforms as T
from torch_geometric.datasets import QM9
from torch_geometric.loader import DataLoader
from torch_geometric.nn import NNConv, Set2Set
from torch_geometric.utils import remove_self_loops
import pandas as pd


from botorch.models import SingleTaskGP
from gpytorch.mlls import ExactMarginalLogLikelihood
from botorch.fit import fit_gpytorch_model
from botorch.acquisition import ExpectedImprovement
from botorch.optim import optimize_acqf
from botorch.utils import draw_sobol_samples


target = 0
dim = 64


class MyTransform:
    def __call__(self, data):
        data = copy.copy(data)
        data.y = data.y[:, target]  # Specify target.
        return data


class Complete:
    def __call__(self, data):
        data = copy.copy(data)
        device = data.edge_index.device

        row = torch.arange(data.num_nodes, dtype=torch.long, device=device)
        col = torch.arange(data.num_nodes, dtype=torch.long, device=device)

        row = row.view(-1, 1).repeat(1, data.num_nodes).view(-1)
        col = col.repeat(data.num_nodes)
        edge_index = torch.stack([row, col], dim=0)

        edge_attr = None
        if data.edge_attr is not None:
            idx = data.edge_index[0] * data.num_nodes + data.edge_index[1]
            size = list(data.edge_attr.size())
            size[0] = data.num_nodes * data.num_nodes
            edge_attr = data.edge_attr.new_zeros(size)
            edge_attr[idx] = data.edge_attr

        edge_index, edge_attr = remove_self_loops(edge_index, edge_attr)
        data.edge_attr = edge_attr
        data.edge_index = edge_index

        return data


path = osp.join(osp.dirname(osp.realpath("__file__")), '..', 'data', 'QM9')
transform = T.Compose([MyTransform(), Complete(), T.Distance(norm=False)])
# Originalmente: Carga y preparación del dataset
dataset = QM9(path, transform=transform).shuffle()

# Modificación para usar un subconjunto más pequeño
# Por ejemplo, usar solo los primeros 10,000 datos del dataset


def subset_size_objective(subset_sizes_samples):

    # Define el tamaño del subconjunto
    subset_indices = torch.randperm(len(dataset), generator=torch.Generator().manual_seed(15))[:subset_sizes_samples]
    subset_dataset = dataset[subset_indices]

    # Calcula la media y la desviación estándar de los datos y normaliza las etiquetas
    mean = subset_dataset.data.y.mean(dim=0, keepdim=True)
    std = subset_dataset.data.y.std(dim=0, keepdim=True)
    subset_dataset.data.y = (subset_dataset.data.y - mean) / std
    mean, std = mean[:, target].item(), std[:, target].item()

    # Divide el conjunto de datos en entrenamiento, validación y prueba
    test_dataset = subset_dataset[:int(subset_sizes_samples*0.2)]
    val_dataset = subset_dataset[int(subset_sizes_samples*0.2):int(subset_sizes_samples*0.4)]
    train_dataset = subset_dataset[int(subset_sizes_samples*0.4):]

    # Carga los datos en los DataLoaders
    test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False)
    val_loader = DataLoader(val_dataset, batch_size=128, shuffle=False)
    train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)

    # Define la arquitectura de la red neuronal
    class Net(torch.nn.Module):
        def __init__(self):
            super().__init__()
            self.lin0 = torch.nn.Linear(dataset.num_features, dim)

            nn = Sequential(Linear(5, 128), ReLU(), Linear(128, dim * dim))
            self.conv = NNConv(dim, dim, nn, aggr='mean')
            self.gru = GRU(dim, dim)

            self.set2set = Set2Set(dim, processing_steps=3)
            self.lin1 = torch.nn.Linear(2 * dim, dim)
            self.lin2 = torch.nn.Linear(dim, 1)

        def forward(self, data):
            out = F.relu(self.lin0(data.x))
            h = out.unsqueeze(0)

            for i in range(3):
                m = F.relu(self.conv(out, data.edge_index, data.edge_attr))
                out, h = self.gru(m.unsqueeze(0), h)
                out = out.squeeze(0)

            out = self.set2set(out, data.batch)
            out = F.relu(self.lin1(out))
            out = self.lin2(out)
            return out.view(-1)

    # Inicializa el modelo, optimizador y programador de tasa de aprendizaje
    torch.cuda.manual_seed(15)

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = Net().to(device)

    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min',
                                                           factor=0.7, patience=5,
                                                           min_lr=0.00001)

    #Entrenamiento de red neuronal
    def train(epoch):
        model.train()
        loss_all = 0

        for data in train_loader:
            data = data.to(device)
            optimizer.zero_grad()
            loss = F.mse_loss(model(data), data.y)
            loss.backward()
            loss_all += loss.item() * data.num_graphs
            optimizer.step()
        return loss_all / len(train_loader.dataset)

    #Test de red neuronal
    def test(loader):
        model.eval()
        error = 0

        for data in loader:
            data = data.to(device)
            error += (model(data) * std - data.y * std).abs().sum().item()  # MAE
        return error / len(loader.dataset)


    # Entrena el modelo y evalúa el rendimiento
    best_val_error = None
    for epoch in range(1, 6):
        lr = scheduler.optimizer.param_groups[0]['lr']
        loss = train(epoch)
        val_error = test(val_loader)
        scheduler.step(val_error)

        if best_val_error is None or val_error <= best_val_error:
            test_error = test(test_loader)
            best_val_error = val_error

        print(f'Epoch: {epoch:03d}, Batch:{subset_sizes_samples} , LR: {lr:7f}, Loss: {loss:.7f}, '
              f'Val MAE: {val_error:.7f}, Test MAE: {test_error:.7f}')

    # Devuelve la pérdida final
    return best_val_error


# Definir los límites inferior y superior para el tamaño del subconjunto
subset_sizes_min = 500
subset_sizes_max = 1250

# Definir la dimensión de la entrada para el espacio de búsqueda
num_samples = 4  # Número de muestras para la optimización bayesiana

# Generar muestras sobol dentro del rango especificado para el tamaño del subconjunto
subset_sizes_samples = torch.randint(subset_sizes_min, subset_sizes_max + 1, (num_samples, 1))

# Normalizar las muestras si es necesario
subset_sizes_normalized = (subset_sizes_samples.float() - subset_sizes_samples.float().mean()) / subset_sizes_samples.float().std()

# Crear un modelo de regresión gaussiano para la optimización bayesiana
train_X = subset_sizes_normalized
train_Y = torch.tensor([subset_size_objective(size.item()) for size in subset_sizes_samples]).unsqueeze(-1)

gp = SingleTaskGP(train_X, train_Y)
mll = ExactMarginalLogLikelihood(gp.likelihood, gp)
fit_gpytorch_model(mll)

# Definir la adquisición (Expected Improvement)
EI = ExpectedImprovement(gp, train_Y.min())

# Realizar la optimización bayesiana
candidate, acq_value = optimize_acqf(
    EI,
    bounds=torch.tensor([[-3.0], [3.0]]),  # Limitar la búsqueda al rango [-3, 3]
    q=1,  # Número de puntos para la optimización multiarranque
    num_restarts=20,  # Número de reinicios aleatorios para la optimización multiarranque
    raw_samples=512,  # Número de muestras aleatorias para la optimización multiarranque
)

# Desnormalizar el candidato encontrado
best_size = candidate * subset_sizes_samples.float().std() + subset_sizes_samples.float().mean()

print(f"El mejor tamaño de subconjunto encontrado es: {best_size.item()}")

