In [1]:
import optuna
from optuna.pruners import SuccessiveHalvingPruner

import torch
import torch.nn as nn

import pytorch_lightning as pl
from pytorch_lightning.callbacks import EarlyStopping

from lion_pytorch import Lion

print("cuda", torch.cuda.is_available())  
print(torch.cuda.get_device_name(0)) 

import warnings
warnings.filterwarnings("ignore", category=UserWarning, module="pytorch_lightning.trainer.connectors.data_connector")
warnings.filterwarnings("ignore", category=UserWarning, module="lightning_fabric.plugins.environments.slurm")

torch.cuda.empty_cache()

from utils.train import MoleculeModel, MoleculeDataModule, GATv2Model, get_metric, save_trial_to_csv, create_hyperopt_dir
from utils.prepare import FeaturizationParameters, MoleculeDataset, MoleculeData


cuda True
NVIDIA GeForce RTX 3080


In [2]:
molecule_dataset = torch.load("../data/QM_10k.pt")

In [3]:
num_workers = 8
in_features = molecule_dataset[0].x.shape[1]
edge_attr_dim = molecule_dataset[0].edge_attr.shape[1]
max_epochs = 100
patience = 10

### Гиперпараметры

In [4]:
import optuna

def objective(trial):
    # Гиперпараметры для предобработки
    num_preprocess_layers = trial.suggest_int('num_preprocess_layers', 2, 9)
    preprocess_hidden_features = [trial.suggest_categorical(f'preprocess_layer_{i}_size', [64, 128, 256]) for i in range(num_preprocess_layers)]
    
    # Гиперпараметры для постобработки
    num_postprocess_layers = trial.suggest_int('num_postprocess_layers', 2, 9)
    postprocess_hidden_features = [trial.suggest_categorical(f'postprocess_layer_{i}_size', [64, 128, 256]) for i in range(num_postprocess_layers)]
    
    # Другие гиперпараметры
    num_heads = [trial.suggest_int(f'num_heads_{i}', 8, 20, step=2) for i in range(2)]
    dropout_rates = [trial.suggest_float(f'dropout_rate_{i}', 0.0, 0.2, step=0.1) for i in range(num_preprocess_layers + 2 + num_postprocess_layers)]
    use_batch_norm = [trial.suggest_categorical(f'use_batch_norm_{i}', [True, False]) for i in range(num_preprocess_layers + 2 + num_postprocess_layers)]
    learning_rate = 8.5e-4
    weight_decay = 2e-4
    step_size = 50
    gamma = 0.1
    batch_size = 64

    learning_rate = trial.suggest_float('learning_rate', 1e-6, 1e-3, log=True)
    weight_decay = trial.suggest_float('weight_decay', 1e-6, 1e-3, log=True)
    step_size = trial.suggest_int('step_size', 10, 200, step=10)
    gamma = trial.suggest_float('gamma', 0.1, 0.9)
    #batch_size = trial.suggest_int('batch_size', 64, 128, step=64)

    # Создание модели с переменными гиперпараметрами
    base_model = GATv2Model(
        atom_in_features=in_features,
        edge_in_features=edge_attr_dim,
        num_preprocess_layers=num_preprocess_layers,
        preprocess_hidden_features=preprocess_hidden_features,
        num_heads=num_heads,
        dropout_rates=dropout_rates,
        activation_fns=[nn.ReLU for _ in range(len(dropout_rates))],  # ReLU для всех слоев
        use_batch_norm=use_batch_norm,
        num_postprocess_layers=num_postprocess_layers,
        postprocess_hidden_features=postprocess_hidden_features,
        out_features=1
    )

    model = MoleculeModel(
        base_model=base_model,
        optimizer_class=Lion,
        learning_rate=learning_rate,
        weight_decay=weight_decay,
        step_size=step_size,
        gamma=gamma,
        batch_size=batch_size,
        metric='rmse'
    )

    # Обучение модели
    data_module = MoleculeDataModule(molecule_dataset, batch_size=128, num_workers=num_workers)
    early_stop_callback = EarlyStopping(monitor="val_loss", patience=patience, mode="min")

    trainer = pl.Trainer(
        max_epochs=max_epochs,
        devices=1,
        accelerator='gpu',
        logger=False,
        enable_progress_bar=False,
        enable_checkpointing=False,
        enable_model_summary=False,
        callbacks=[early_stop_callback]
    )
    trainer.fit(model, data_module)

    val_loss = trainer.callback_metrics["val_loss"].item()

    save_trial_to_csv(trial, hyperopt_dir, val_loss)

    return val_loss

torch.set_float32_matmul_precision('medium')

hyperopt_dir = create_hyperopt_dir()
print(f"Results will be saved in: {hyperopt_dir}")

pruner = SuccessiveHalvingPruner()
study = optuna.create_study(direction='minimize')

study.optimize(objective, n_trials=10000)

print(f'Best trial: {study.best_trial.number}')
print(f'Best value (RMSE): {study.best_trial.value}')
for key, value in study.best_trial.params.items():
    print(f'{key}: {value}')


[I 2024-03-22 10:18:36,638] A new study created in memory with name: no-name-b25d6a72-1b61-4eb0-92b5-db41d9269162


Results will be saved in: hyperopt_2
MoleculeModel(
  (base_model): GATv2Model(
    (atom_preprocess_layers): ModuleList(
      (0): Sequential(
        (0): Linear(in_features=133, out_features=64, bias=True)
        (1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU()
        (3): Dropout(p=0.0, inplace=False)
      )
      (1): Sequential(
        (0): Linear(in_features=64, out_features=64, bias=True)
        (1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU()
        (3): Dropout(p=0.0, inplace=False)
      )
    )
    (edge_preprocess_layers): ModuleList(
      (0): Sequential(
        (0): Linear(in_features=14, out_features=64, bias=True)
        (1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU()
        (3): Dropout(p=0.0, inplace=False)
      )
      (1): Sequential(
        (0): Linear(in_features=64, out_features=64, 

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
Missing logger folder: /home/sergei/Documents/gat/lightning_logs
2024-03-22 10:18:37.147951: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2024-03-22 10:20:23,709] Trial 0 finished with value: 0.042822908610105515 and parameters: {'num_preprocess_layers': 2, 'preprocess_layer_0_size': 64, 'preprocess_layer_1_size': 64, 'num_postprocess_layers': 2, 'postprocess_layer_0_size': 64, 'postprocess_layer_1_size': 64, 'num_heads_0': 8, 'num_heads_1': 8, 'dropout_rate_0': 0.0, 'dropout_rate_1': 0.0, 'dropout_rate_2': 0.0, 'dropout_rate_3': 0.0, 'dropout_rate_4': 0.0, 

Best trial: 0
Best value (RMSE): 0.042822908610105515
num_preprocess_layers: 2
preprocess_layer_0_size: 64
preprocess_layer_1_size: 64
num_postprocess_layers: 2
postprocess_layer_0_size: 64
postprocess_layer_1_size: 64
num_heads_0: 8
num_heads_1: 8
dropout_rate_0: 0.0
dropout_rate_1: 0.0
dropout_rate_2: 0.0
dropout_rate_3: 0.0
dropout_rate_4: 0.0
dropout_rate_5: 0.0
use_batch_norm_0: True
use_batch_norm_1: True
use_batch_norm_2: True
use_batch_norm_3: True
use_batch_norm_4: True
use_batch_norm_5: True
