In [1]:
import torch
import torch.nn as nn
import pytorch_lightning as pl
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping, Timer
from sklearn.model_selection import KFold
from lion_pytorch import Lion
import warnings
from pathlib import Path
from utils.utils import MoleculeDataModule, evaluate_model
from utils.train import MoleculeModel
from torch_geometric.nn import ChebConv
from torch_scatter import scatter_mean
import torch.nn.functional as F
from utils.efficient_kan import KAN, KANLinear



import numpy as np
import random

torch.manual_seed(42)
np.random.seed(42)
random.seed(42)

def initialize_cuda():
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(42)
        print("cuda", torch.cuda.is_available())
        print(torch.cuda.get_device_name(0))
        torch.cuda.empty_cache()
    else:
        print("CUDA is not available.")

initialize_cuda()

warnings.filterwarnings("ignore", category=UserWarning, module="pytorch_lightning.trainer.connectors.data_connector")
warnings.filterwarnings("ignore", category=UserWarning, module="lightning_fabric.plugins.environments.slurm")

torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

torch.set_float32_matmul_precision('medium')


cuda True
NVIDIA GeForce RTX 3090


In [2]:
from torch.utils.data import DataLoader
import numpy as np
from sklearn.metrics import mean_squared_error, r2_score
from torch_geometric.data import DataLoader as GeoDataLoader
from tqdm import tqdm

def evaluate_model_full(model, dataset, batch_size):
    dataloader = GeoDataLoader(dataset, batch_size=batch_size, shuffle=False, num_workers=32)
    model.eval()
    all_pred, all_true = [], []

    with torch.no_grad():
        for batch in tqdm(dataloader, desc="Evaluating"):
            batch = batch.to(model.device)
            y_hat = model(batch.x, batch.edge_index, batch.edge_attr)
            all_pred.extend(y_hat.cpu().numpy())
            all_true.extend(batch.y.cpu().numpy())

    all_pred, all_true = np.array(all_pred), np.array(all_true)
    rmse = np.sqrt(mean_squared_error(all_true, all_pred))
    r2 = r2_score(all_true, all_pred)

    print(f'Total RMSE: {rmse:.4f}')
    print(f'Total R²: {r2:.4f}')

In [3]:
class AtomEdgeInteraction(nn.Module):
    def __init__(self, in_features, edge_features, out_features, edge_importance=1.0, dropout_rate=0.1, use_batch_norm=True):
        super(AtomEdgeInteraction, self).__init__()
        self.edge_importance = edge_importance
        self.interaction = KANLinear(in_features + edge_features, out_features)
        self.activation = nn.ReLU()
        self.batch_norm = nn.BatchNorm1d(out_features) if use_batch_norm else nn.Identity()
        self.dropout = nn.Dropout(dropout_rate)
        self.residual = nn.Linear(in_features, out_features) if in_features != out_features else nn.Identity()

    def forward(self, x, edge_index, edge_attr):
        row, col = edge_index
        edge_features = edge_attr * self.edge_importance
        atom_features = x[row]
        combined_features = torch.cat([atom_features, edge_features], dim=-1)
        updated_features = self.interaction(combined_features)
        updated_features = self.activation(updated_features)
        updated_features = self.batch_norm(updated_features)
        updated_features = self.dropout(updated_features)
        residual_features = self.residual(x)
        x = scatter_mean(updated_features, col, dim=0, dim_size=x.size(0))
        return x + residual_features

class Model(nn.Module):
    def __init__(self, atom_in_features, edge_attr_dim, preprocess_hidden_features, cheb_hidden_features, K, cheb_normalizations, dropout_rates, activation_fns, use_batch_norm, postprocess_hidden_features, out_features):
        super(Model, self).__init__()

        self.atom_preprocess = nn.ModuleList([AtomEdgeInteraction(atom_in_features, edge_attr_dim, preprocess_hidden_features[0], dropout_rate=dropout_rates[0], use_batch_norm=use_batch_norm[0])])
        for i in range(1, len(preprocess_hidden_features)):
            layer = nn.Sequential(
                KANLinear(preprocess_hidden_features[i-1], preprocess_hidden_features[i]),
                nn.BatchNorm1d(preprocess_hidden_features[i]) if use_batch_norm[i] else nn.Identity(),
                activation_fns[i](),
                nn.Dropout(dropout_rates[i])
            )
            self.atom_preprocess.append(layer)

        self.cheb_convolutions = nn.ModuleList()
        in_channels = preprocess_hidden_features[-1]
        for i in range(len(cheb_hidden_features)):
            self.cheb_convolutions.append(ChebConv(in_channels, cheb_hidden_features[i], K[i], normalization=cheb_normalizations[i]))
            in_channels = cheb_hidden_features[i]

        self.postprocess = nn.ModuleList()
        for i in range(len(postprocess_hidden_features)):
            layer = nn.Sequential(
                KANLinear(cheb_hidden_features[i-1] if i > 0 else cheb_hidden_features[-1], postprocess_hidden_features[i]),
                nn.BatchNorm1d(postprocess_hidden_features[i]) if use_batch_norm[len(preprocess_hidden_features) + i] else nn.Identity(),
                activation_fns[len(preprocess_hidden_features) + i](),
                nn.Dropout(dropout_rates[len(preprocess_hidden_features) + i])
            )
            self.postprocess.append(layer)

        self.output_layer = KANLinear(postprocess_hidden_features[-1], out_features)

    def forward(self, x, edge_index, edge_attr):
        x = self.atom_preprocess[0](x, edge_index, edge_attr)
        for layer in self.atom_preprocess[1:]:
            x = layer(x)

        for conv in self.cheb_convolutions:
            x = F.relu(conv(x, edge_index))

        for layer in self.postprocess:
            x = layer(x)

        return self.output_layer(x).squeeze(-1)


In [12]:
# %%
import torch
import torch.nn as nn
import pytorch_lightning as pl
from lion_pytorch import Lion
from utils.utils import MoleculeDataModule, evaluate_model
from utils.train import MoleculeModel
from torch_geometric.nn import ChebConv
from torch_scatter import scatter_mean
import torch.nn.functional as F

in_features = 133
out_features = 1
edge_attr_dim = 14

batch_size = 1024
num_workers = 8

preprocess_hidden_features = [128] * 9
postprocess_hidden_features = [128, 128]
cheb_hidden_features = [128, 128]
K = [10, 16]
cheb_normalization = ['sym', 'sym']

dropout_rates = [0.0] * (len(preprocess_hidden_features) + len(postprocess_hidden_features))
activation_fns = [nn.PReLU] * (len(preprocess_hidden_features) + len(postprocess_hidden_features))
use_batch_norm = [True] * (len(preprocess_hidden_features) + len(postprocess_hidden_features))

learning_rate = 2.2e-5
weight_decay = 3e-5
step_size = 30
gamma = 0.2
metric = 'rmse'

# Определение архитектуры модели
backbone = Model(
    atom_in_features=in_features,
    edge_attr_dim=edge_attr_dim,
    preprocess_hidden_features=preprocess_hidden_features,
    cheb_hidden_features=cheb_hidden_features,
    K=K,
    cheb_normalizations=cheb_normalization,
    dropout_rates=dropout_rates,
    activation_fns=activation_fns,
    use_batch_norm=use_batch_norm,
    postprocess_hidden_features=postprocess_hidden_features,
    out_features=out_features
)

# Инициализация модели
model = MoleculeModel(
    model_backbone=backbone,
    optimizer_class=Lion,
    learning_rate=learning_rate,
    weight_decay=weight_decay,
    step_size=step_size,
    gamma=gamma,
    batch_size=batch_size,
    metric=metric
)

# Загрузка контрольной точки
checkpoint_path = 'final_model.ckpt'
model = MoleculeModel.load_from_checkpoint(checkpoint_path, model_backbone=backbone)

print("Model loaded for finetuning.")



Model loaded for finetuning.


In [13]:
batch_size = 1024 
num_workers = 8  
dataset = torch.load(f'../data/QM_137k.pt')
data_module = MoleculeDataModule(dataset, batch_size=batch_size, num_workers=num_workers)

In [14]:
evaluate_model_full(model, dataset, batch_size)

Evaluating: 100%|██████████| 134/134 [00:26<00:00,  5.15it/s]


Total RMSE: 0.0188
Total R²: 0.9919


In [15]:
import torch
from torch.utils.data import WeightedRandomSampler
from pytorch_lightning import Trainer, callbacks

high_quality_dataset = torch.load(f'../data/QM_cool.pt')

new_data_weight = 10
old_data_weight = 1

combined_dataset = dataset + high_quality_dataset
weights = [old_data_weight] * len(dataset) + [new_data_weight] * len(high_quality_dataset)

sampler = WeightedRandomSampler(weights, num_samples=len(combined_dataset), replacement=True)

data_module = MoleculeDataModule(dataset=combined_dataset, batch_size=1024, num_workers=8)


for param in model.parameters():
    param.requires_grad = True

for name, param in model.named_parameters():
    if "postprocess" not in name and "output_layer" not in name:
        param.requires_grad = False

optimizer = Lion(filter(lambda p: p.requires_grad, model.parameters()), lr=learning_rate, weight_decay=weight_decay)


model.configure_optimizers = lambda: optimizer


early_stop_callback = callbacks.EarlyStopping(monitor='val_loss', patience=5, verbose=True, mode='min')
timer = callbacks.Timer()
logger = pl.loggers.TensorBoardLogger('tb_logs', name='KAN_fine_loss')

trainer = Trainer(
    max_epochs=100,
    enable_checkpointing=False,
    callbacks=[early_stop_callback, timer],
    enable_progress_bar=False,
    logger=logger,
    accelerator='gpu',
    devices=1
)

trainer.fit(model, data_module)

seconds = timer.time_elapsed()
h, m, s = int(seconds // 3600), int((seconds % 3600) // 60), int(seconds % 60)

print(f"Общее время обучения: {h}:{m:02d}:{s:02d}")



GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name           | Type  | Params
-----------------------------------------
0 | model_backbone | Model | 2.3 M 
-----------------------------------------
329 K     Trainable params
1.9 M     Non-trainable params
2.3 M     Total params
9.096     Total estimated model params size (MB)
Metric val_loss improved. New best score: 0.065
Metric val_loss improved by 0.000 >= min_delta = 0.0. New best score: 0.065
Metric val_loss improved by 0.000 >= min_delta = 0.0. New best score: 0.065
Metric val_loss improved by 0.000 >= min_delta = 0.0. New best score: 0.065
Metric val_loss improved by 0.000 >= min_delta = 0.0. New best score: 0.065
Metric val_loss improved by 0.000 >= min_delta = 0.0. New best score: 0.065
Metric val_loss improved by 0.000 >= min_delta = 0.0. New best score: 0.065
Metric val_loss improved by 0.000 >= min_delta = 0.0. New best score: 0.065
Metric val_loss improved by 0.000 >= min_delta = 0.0. New best score: 0.065
Metric val_loss

Общее время обучения: 0:19:18


In [16]:
evaluate_model_full(model, dataset, batch_size)

Evaluating:  32%|███▏      | 43/134 [08:26<19:11, 12.65s/it]

In [None]:
import torch
from torch.utils.data import WeightedRandomSampler, DataLoader
from pytorch_lightning import Trainer, callbacks
from pytorch_lightning.callbacks import EarlyStopping, Timer
from lion_pytorch import Lion
from utils.utils import MoleculeDataModule
from utils.train import MoleculeModel
from torch import nn

# Загружаем датасеты
dataset = torch.load(f'../data/QM_137k.pt')
high_quality_dataset = torch.load(f'../data/QM_cool.pt')

# Комбинируем датасеты
combined_dataset = dataset + high_quality_dataset

# Определяем веса
new_data_weight = 10
old_data_weight = 1
weights = [old_data_weight] * len(dataset) + [new_data_weight] * len(high_quality_dataset)

# Создаем WeightedRandomSampler
sampler = WeightedRandomSampler(weights, num_samples=len(combined_dataset), replacement=True)

# Настраиваем DataModule с новым датасетом и семплером
data_module = MoleculeDataModule(dataset=combined_dataset, batch_size=1024, num_workers=8, sampler=sampler)

# Функция для постепенной разморозки слоев
def unfreeze_layers(model, num_layers_to_unfreeze):
    total_layers = len(list(model.parameters()))
    for i, param in enumerate(model.parameters()):
        if i >= total_layers - num_layers_to_unfreeze:
            param.requires_grad = True
        else:
            param.requires_grad = False

# Функция для дообучения модели
def finetune_model(model, method_name, num_layers_to_unfreeze=None, additional_layers=None):
    if method_name == "gradual_unfreeze":
        unfreeze_layers(model, num_layers_to_unfreeze)
    elif method_name == "additional_layers":
        if additional_layers is not None:
            model.model_backbone.output_layer = nn.Sequential(
                model.model_backbone.output_layer,
                nn.ReLU(),
                *additional_layers
            )
    
    optimizer = Lion(filter(lambda p: p.requires_grad, model.parameters()), lr=2.2e-5, weight_decay=3e-5)
    model.configure_optimizers = lambda: optimizer

    early_stop_callback = EarlyStopping(monitor='val_loss', patience=5, verbose=True, mode='min')
    timer = Timer()
    logger = pl.loggers.TensorBoardLogger('tb_logs', name=f'{method_name}_finetuning')

    trainer = Trainer(
        max_epochs=100,
        enable_checkpointing=False,
        callbacks=[early_stop_callback, timer],
        enable_progress_bar=True,
        logger=logger,
        accelerator='gpu',
        devices=1
    )

    trainer.fit(model, data_module)

    seconds = timer.time_elapsed()
    h, m, s = int(seconds // 3600), int((seconds % 3600) // 60), int(seconds % 60)
    print(f"Общее время дообучения ({method_name}): {h}:{m:02d}:{s:02d}")

    val_loss = evaluate_model_full(model, dataset, 1024, num_workers)
    return val_loss

# Загружаем модель из контрольной точки
checkpoint_path = 'final_model.ckpt'
model = MoleculeModel.load_from_checkpoint(checkpoint_path, model_backbone=backbone)

# Применяем метод постепенной разморозки слоев
model_copy = model.clone()
val_loss_gradual_unfreeze = finetune_model(model_copy, "gradual_unfreeze", num_layers_to_unfreeze=2)

# Применяем метод добавления новых слоев
model_copy = model.clone()
additional_layers = [nn.Linear(1, 1)]
val_loss_additional_layers = finetune_model(model_copy, "additional_layers", additional_layers=additional_layers)

# Применяем метод transfer learning
model_copy = MoleculeModel(model_backbone=backbone)
optimizer = Lion(model_copy.parameters(), lr=2.2e-5, weight_decay=3e-5)
model_copy.configure_optimizers = lambda: optimizer
early_stop_callback = EarlyStopping(monitor='val_loss', patience=5, verbose=True, mode='min')
timer = Timer()
logger = pl.loggers.TensorBoardLogger('tb_logs', name='transfer_learning')

trainer = Trainer(
    max_epochs=100,
    enable_checkpointing=False,
    callbacks=[early_stop_callback, timer],
    enable_progress_bar=True,
    logger=logger,
    accelerator='gpu',
    devices=1
)

data_module = MoleculeDataModule(dataset=high_quality_dataset, batch_size=1024, num_workers=8)
trainer.fit(model_copy, data_module)

seconds = timer.time_elapsed()
h, m, s = int(seconds // 3600), int((seconds % 3600) // 60), int(seconds % 60)
print(f"Общее время дообучения (transfer_learning): {h}:{m:02d}:{s:02d}")

val_loss_transfer_learning = evaluate_model_full(model_copy, dataset, 1024)

# Выводим результаты
print(f"Результаты дообучения:")
print(f"Постепенная разморозка слоев: Валид. ошибка (RMSE) = {val_loss_gradual_unfreeze:.4f}")
print(f"Добавление новых слоев: Валид. ошибка (RMSE) = {val_loss_additional_layers:.4f}")
print(f"Transfer Learning: Валид. ошибка (RMSE) = {val_loss_transfer_learning:.4f}")


ImportError: cannot import name 'evaluate_model_full' from 'utils.utils' (/home/nikolenko/work/gat/utils/utils.py)