# Install Packages

In [None]:
!pip install deepchem torch-geometric transformers lightning

In [3]:
!pip install  dgl -f https://data.dgl.ai/wheels/torch-2.3/cu121/repo.html

Looking in links: https://data.dgl.ai/wheels/torch-2.3/cu121/repo.html
Collecting dgl
  Downloading https://data.dgl.ai/wheels/torch-2.3/cu121/dgl-2.4.0%2Bcu121-cp311-cp311-manylinux1_x86_64.whl (355.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m355.1/355.1 MB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Collecting pydantic>=2.0 (from dgl)
  Downloading pydantic-2.11.3-py3-none-any.whl.metadata (65 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m65.2/65.2 kB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
Collecting annotated-types>=0.6.0 (from pydantic>=2.0->dgl)
  Downloading annotated_types-0.7.0-py3-none-any.whl.metadata (15 kB)
Collecting pydantic-core==2.33.1 (from pydantic>=2.0->dgl)
  Downloading pydantic_core-2.33.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.8 kB)
Collecting typing-extensions>=4.12.2 (from pydantic>=2.0->dgl)
  Downloading typing_extensions-4.13.2-py3-none-any.whl.metadata (3.0 k

In [42]:
import deepchem as dc
import torch

# QM9 DeepChem Dataset Info
QM9_TASKS = [
    "mu", "alpha", "homo", "lumo", "gap", "r2", "zpve", "cv", "u0", "u298",
    "h298", "g298"
]

# MolGraphConvFeaturizer Models

In [112]:
featurizer = dc.feat.MolGraphConvFeaturizer(use_edges=True)

tasks, datasets, transformers = dc.molnet.load_qm9(featurizer=featurizer)

train, val, test = datasets

In [104]:
train.w.shape

(105576, 12)

In [32]:
train.X[6] # 30, 11

GraphData(node_features=[9, 30], edge_index=[2, 18], edge_features=[18, 11])

In [106]:
train

<DiskDataset X.shape: (105576,), y.shape: (105576, 12), w.shape: (105576, 12), task_names: ['mu' 'alpha' 'homo' ... 'u298' 'h298' 'g298']>

In [109]:
def filter_dataset(dataset, selected_indicies):
    X = dataset.X
    y = dataset.y[:, selected_indicies]
    w = dataset.w[:, selected_indicies]
    ids = dataset.ids
    return dc.data.NumpyDataset(X, y, w, ids)

In [138]:
# Choose only tasks= ["homo", "lumo", "gap"]
from deepchem.trans import NormalizationTransformer



selected_tasks = ["homo", "lumo", "gap"]
selected_indicies = [tasks.index(task) for task in selected_tasks]

train_filtered = filter_dataset(train, selected_indicies)
val_filtered = filter_dataset(val, selected_indicies)
test_filtered = filter_dataset(test, selected_indicies)

transformers = [NormalizationTransformer(
    transform_y=True,
    dataset=train_filtered,
    move_mean=True
)]

In [122]:
train_filtered.X[0]

GraphData(node_features=[16, 30], edge_index=[2, 32], edge_features=[32, 11])

In [123]:
len(selected_tasks)

3

## PagtnModel

In [20]:
!pip install dgllife

Collecting dgllife
  Downloading dgllife-0.3.2-py3-none-any.whl.metadata (667 bytes)
Collecting hyperopt (from dgllife)
  Downloading hyperopt-0.2.7-py2.py3-none-any.whl.metadata (1.7 kB)
Collecting future (from hyperopt->dgllife)
  Downloading future-1.0.0-py3-none-any.whl.metadata (4.0 kB)
Collecting py4j (from hyperopt->dgllife)
  Downloading py4j-0.10.9.9-py2.py3-none-any.whl.metadata (1.3 kB)
Downloading dgllife-0.3.2-py3-none-any.whl (226 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m226.1/226.1 kB[0m [31m1.6 MB/s[0m eta [36m0:00:00[0m:00:01[0m
[?25hDownloading hyperopt-0.2.7-py2.py3-none-any.whl (1.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m17.0 MB/s[0m eta [36m0:00:00[0m00:01[0m
[?25hDownloading future-1.0.0-py3-none-any.whl (491 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m491.3/491.3 kB[0m [31m41.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading py4j-0.10.9.9-py2.py3-none-any.w

In [137]:
from deepchem.models.optimizers import Adam
# tasks = [ "mu", "alpha", "homo", "lumo", "gap", "r2", "zpve", "cv", "u0", "u298", "h298", "g298" ]  # Multiple tasks

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

optimizer = Adam(learning_rate=0.001)

model = dc.models.PagtnModel(
    n_tasks=len(selected_tasks),
    number_atom_features=30, # Should match the dimension of featurized dataset
    number_bond_features=11, # ''
    mode='regression',
    batch_size=16384,
    # learning_rate=0.025,
    optimizer = optimizer,
    hidden_features=64,  # Hidden dimension size
    output_node_features=64,  # Output features before final layer
    num_layers=3,        # Number of PAGTN layers
    num_heads=4,         # Number of attention heads
    dropout=0.2,
    max_path_length=5,   # Should match featurizer's max_length
    path_hidden_dim=64, # Hidden dimension for path embeddings
    device=device,
    model_dir="./models/PagtnModel/"
)

Using device: cuda:0


In [95]:
metrics = [
    dc.metrics.Metric(dc.metrics.mae_score),
    dc.metrics.Metric(dc.metrics.pearson_r2_score),
    dc.metrics.Metric(dc.metrics.mean_squared_error)
]

### Lightning (doesnt work)

In [67]:
from deepchem.models.lightning import DCLightningModule, DCLightningDatasetModule
import pytorch_lightning as L

lightning_model = DCLightningModule(model)
train_data_module = DCLightningDatasetModule(train, model.batch_size)
valid_data_module = DCLightningDatasetModule(val, model.batch_size)

trainer = L.Trainer(
    accelerator='gpu',  # Use GPU
    devices=1,  # Use 1 GPU (change to more if you have multiple)
    max_epochs=50,
    # precision=16,  # Use mixed precision for faster training
    callbacks=[
        L.callbacks.EarlyStopping(
            monitor='val_loss',
            patience=10,
            mode='min',
            verbose=True
        ),
        L.callbacks.ModelCheckpoint(
            monitor='val_loss',
            dirpath='lightning_checkpoints/',
            filename='pagtn-{epoch:02d}-{val_loss:.2f}',
            save_top_k=3,
            mode='min'
        ),
        L.callbacks.LearningRateMonitor(logging_interval='epoch')
    ],
    # logger=L.loggers.TensorBoardLogger('lightning_logs/', name='pagtn_qm9'),
    logger=L.loggers.CSVLogger('lightning_logs/', name='pagtn_qm9'),
    log_every_n_steps=10,
    gradient_clip_val=0.5,  # Gradient clipping for stability
    enable_progress_bar=True,
    deterministic=True  # For reproducibility
)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


In [None]:
print("Starting Lightning training on GPU...")
trainer.fit(lightning_model, train_data_module, valid_data_module)

### Regular

In [72]:
torch.use_deterministic_algorithms(False)

In [None]:
best_valid_mae = float('inf')
patience = 20
current_patience = 0
training_history = []

torch.cuda.empty_cache()

print("Base MAE")
valid_scores = model.evaluate(val_filtered, metrics, transformers)
valid_mae = valid_scores['mae_score']
print(f"Valid_mae: {valid_mae}")

print("Training PagtnModel...")
for epoch in range(200):
    print(f"Epoch #{epoch + 1}", end='\t')
    if torch.cuda.is_available():
        print(f"\nGPU Memory allocated: {torch.cuda.memory_allocated(0)/1e9:.2f} GB")
    
    loss = model.fit(train_filtered, nb_epoch=1, deterministic=False)
    training_history.append(loss)
    
    # Evaluate on validation set
    valid_scores = model.evaluate(val_filtered, metrics, transformers)
    valid_mae = valid_scores['mae_score']

    print(f"Valid_mae: {valid_mae}")
    
    if epoch % 10 == 0:
        train_scores = model.evaluate(train_filtered, metrics, transformers)
        print(f"Epoch {epoch}:")
        print(f"  Train MAE: {train_scores['mae_score']:.4f}, "
              f"R2: {train_scores['pearson_r2_score']:.4f}")
        print(f"  Valid MAE: {valid_mae:.4f}, "
              f"R2: {valid_scores['pearson_r2_score']:.4f}")
    
    # Early stopping
    if valid_mae < best_valid_mae:
        best_valid_mae = valid_mae
        current_patience = 0
        # Save best model
        # model.save()
        torch.save(model.model.state_dict(), "best_model_checkpoint.pth")
    else:
        current_patience += 1
        print(f"MAE did not decrease, current patience: {current_patience}")
        if current_patience >= patience:
            print(f"Early stopping at epoch {epoch}")
            break

# Load best model for evaluation
# model.restore()

Base MAE
Valid_mae: 3.482545019443993
Training PagtnModel...
Epoch #1	
GPU Memory allocated: 0.02 GB
Valid_mae: 1.3255443662811253
Epoch 0:
  Train MAE: 1.3236, R2: 0.0001
  Valid MAE: 1.3255, R2: 0.0001
Epoch #2	
GPU Memory allocated: 0.02 GB


In [None]:
model.model.state_dict()

In [93]:
torch.cuda.empty_cache()