In [2]:
import optuna
import torch
import torch.nn as nn

import pytorch_lightning as pl
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping, Timer
from lion_pytorch import Lion

torch.manual_seed(42)

if torch.cuda.is_available():
    torch.cuda.manual_seed_all(42)
    print("cuda", torch.cuda.is_available())
    print(torch.cuda.get_device_name(0))
    torch.cuda.empty_cache()
else:
    print("CUDA is not available.")

import warnings
warnings.filterwarnings("ignore", category=UserWarning, module="pytorch_lightning.trainer.connectors.data_connector")
warnings.filterwarnings("ignore", category=UserWarning, module="lightning_fabric.plugins.environments.slurm")

torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

torch.set_float32_matmul_precision('medium')

from utils.utils import save_trial_to_csv, create_hyperopt_dir, MoleculeDataModule
from utils.train import MoleculeModel


cuda True
NVIDIA GeForce RTX 3090


In [3]:
dataset = torch.load("../data/QM_137k.pt")

In [5]:
import torch
import torch.nn as nn
from torch_geometric.nn import ChebConv
from torch_scatter import scatter_mean

import torch.nn.functional as F
import pytorch_lightning as pl

from utils.train import MoleculeModel
from utils.efficient_kan import KANLinear


class AtomEdgeInteraction(nn.Module):
    def __init__(self, in_features, edge_features, out_features, edge_importance=1.0, dropout_rate=0.1, use_batch_norm=True):
        super(AtomEdgeInteraction, self).__init__()
        self.edge_importance = edge_importance
        self.interaction = KANLinear(in_features + edge_features, out_features)
        self.activation = nn.ReLU()
        self.batch_norm = nn.BatchNorm1d(out_features) if use_batch_norm else nn.Identity()
        self.dropout = nn.Dropout(dropout_rate)
        self.residual = nn.Linear(in_features, out_features) if in_features != out_features else nn.Identity()

    def forward(self, x, edge_index, edge_attr):
        row, col = edge_index
        edge_features = edge_attr * self.edge_importance
        atom_features = x[row]
        combined_features = torch.cat([atom_features, edge_features], dim=-1)
        updated_features = self.interaction(combined_features)
        updated_features = self.activation(updated_features)
        updated_features = self.batch_norm(updated_features)
        updated_features = self.dropout(updated_features)
        residual_features = self.residual(x)
        x = scatter_mean(updated_features, col, dim=0, dim_size=x.size(0))
        return x + residual_features

class Model(nn.Module):
    def __init__(self, atom_in_features, edge_attr_dim, preprocess_hidden_features, cheb_hidden_features, K, cheb_normalizations, dropout_rates, activation_fns, use_batch_norm, postprocess_hidden_features, out_features):
        super(Model, self).__init__()

        self.atom_preprocess = nn.ModuleList([AtomEdgeInteraction(atom_in_features, edge_attr_dim, preprocess_hidden_features[0], dropout_rate=dropout_rates[0], use_batch_norm=use_batch_norm[0])])
        for i in range(1, len(preprocess_hidden_features)):
            layer = nn.Sequential(
                KANLinear(preprocess_hidden_features[i-1], preprocess_hidden_features[i]),
                nn.BatchNorm1d(preprocess_hidden_features[i]) if use_batch_norm[i] else nn.Identity(),
                activation_fns[i](),
                nn.Dropout(dropout_rates[i])
            )
            self.atom_preprocess.append(layer)

        self.cheb_convolutions = nn.ModuleList()
        in_channels = preprocess_hidden_features[-1]
        for i in range(len(cheb_hidden_features)):
            self.cheb_convolutions.append(ChebConv(in_channels, cheb_hidden_features[i], K[i], normalization=cheb_normalizations[i]))
            in_channels = cheb_hidden_features[i]

        self.postprocess = nn.ModuleList()
        for i in range(len(postprocess_hidden_features)):
            layer = nn.Sequential(
                KANLinear(cheb_hidden_features[i-1] if i > 0 else cheb_hidden_features[-1], postprocess_hidden_features[i]),
                nn.BatchNorm1d(postprocess_hidden_features[i]) if use_batch_norm[len(preprocess_hidden_features) + i] else nn.Identity(),
                activation_fns[len(preprocess_hidden_features) + i](),
                nn.Dropout(dropout_rates[len(preprocess_hidden_features) + i])
            )
            self.postprocess.append(layer)

        self.output_layer = KANLinear(postprocess_hidden_features[-1], out_features)

    def forward(self, x, edge_index, edge_attr):
        x = self.atom_preprocess[0](x, edge_index, edge_attr)
        for layer in self.atom_preprocess[1:]:
            x = layer(x)

        for conv in self.cheb_convolutions:
            x = F.relu(conv(x, edge_index))

        for layer in self.postprocess:
            x = layer(x)

        return self.output_layer(x).squeeze(-1)

### Гиперпараметры

In [6]:
def remove_atom_features(dataset, features_to_remove):
    for data in dataset:
        if data.x.size(1) > len(features_to_remove):
            mask = torch.ones(data.x.size(1), dtype=torch.bool)
            mask[features_to_remove] = False
            data.x = data.x[:, mask]
    return dataset

def objective(trial):
    try:
        # Пример количества и индексов удаляемых фичей
        num_features_to_remove = trial.suggest_int('num_features_to_remove', 1, min(20, dataset[0].x.size(1) - 1))
        features_to_remove = trial.suggest_categorical('features_to_remove', [list(range(dataset[0].x.size(1))) for _ in range(num_features_to_remove)])

        dataset_ex = remove_atom_features(dataset, features_to_remove)

        in_features = dataset_ex[0].x.shape[1]
        if in_features == 0:
            raise ValueError("No features left after removing atom features and adding SkipAtom features.")
        
        batch_size = 1024
        data_module = MoleculeDataModule(dataset_ex, batch_size=batch_size, num_workers=8)

        out_features = 1
        edge_attr_dim = dataset_ex[0].edge_attr.shape[1]
        optimizer_class = torch.optim.Adam
        metric = 'rmse'

        learning_rate = 2.2e-5
        weight_decay = 3e-5
        step_size = 80
        gamma = 0.2

        preprocess_hidden_features = [128] * 9
        postprocess_hidden_features = [128, 128]
        cheb_hidden_features = [128, 128]
        K = [10, 16]
        cheb_normalization = ['sym', 'sym']

        dropout_rates = [0.0] * (len(preprocess_hidden_features) + len(postprocess_hidden_features))
        activation_fns = [nn.ReLU] * (len(preprocess_hidden_features) + len(postprocess_hidden_features))
        use_batch_norm = [True] * (len(preprocess_hidden_features) + len(postprocess_hidden_features))

        backbone = Model(
            atom_in_features=in_features,
            edge_attr_dim=edge_attr_dim,
            preprocess_hidden_features=preprocess_hidden_features,
            cheb_hidden_features=cheb_hidden_features,
            K=K,
            cheb_normalizations=cheb_normalization,
            dropout_rates=dropout_rates,
            activation_fns=activation_fns,
            use_batch_norm=use_batch_norm,
            postprocess_hidden_features=postprocess_hidden_features,
            out_features=out_features
        )

        model = MoleculeModel(
            model_backbone=backbone,
            optimizer_class=optimizer_class,
            learning_rate=learning_rate,
            weight_decay=weight_decay,
            step_size=step_size,
            gamma=gamma,
            batch_size=batch_size,
            metric=metric
        )

        early_stop_callback = EarlyStopping(monitor="val_loss", patience=5, mode="min")
        timer = Timer()
        logger = pl.loggers.TensorBoardLogger('tb_logs', name='hyperopt/skipatom_null')

        trainer = pl.Trainer(
            max_epochs=100,
            devices=1,
            accelerator='gpu',
            logger=logger,
            enable_progress_bar=False,
            enable_checkpointing=False,
            enable_model_summary=False,
            callbacks=[early_stop_callback, timer]
        )
        trainer.fit(model, data_module)

        val_loss = trainer.callback_metrics["val_loss"].item()
        save_trial_to_csv(trial, hyperopt_dir, val_loss)

    except RuntimeError as e:
        if 'CUDA out of memory' in str(e):
            print("CUDA out of memory. Skipping this trial.")
            return float('inf')
        raise

    return val_loss

torch.set_float32_matmul_precision('medium')

hyperopt_dir = create_hyperopt_dir()
print(f"Results will be saved in: {hyperopt_dir}")

study = optuna.create_study(direction='minimize', pruner=optuna.pruners.SuccessiveHalvingPruner())
study.optimize(objective, n_trials=1000)

print(f'Best trial: {study.best_trial.number}')
print(f'Best value (RMSE): {study.best_trial.value}')
for key, value in study.best_trial.params.items():
    print(f'{key}: {value}')

[I 2024-05-27 11:33:18,157] A new study created in memory with name: no-name-cae6c70a-bf27-4160-913c-06f314a75d4a


Results will be saved in: hyperopt_7


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
/home/nikolenko/.local/lib/python3.10/site-packages/pytorch_lightning/loops/fit_loop.py:298: The number of training batches (1) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.
[I 2024-05-27 11:33:25,749] Trial 0 finished with value: 0.22599643468856812 and parameters: {'model_file': '../skipatom/data/mp_2020_10_09.dim30.keras.model', 'scaler': 'MinMaxScaler', 'min_count': 3, 'top_n': 97767823.60867481}. Best is trial 0 with value: 0.22599643468856812.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
/home/nikolenko/.local/lib/python3.10/site-packa