# Generation

In [1]:
from pard.parallel.utils import find_checkpoint_with_lowest_val_loss
from pard.parallel.task import AutoregressiveDiffusion, PredictBlockProperties

device = 0

blocksize_model_dir = 'checkpoints/block_prediction/qm9.3hops.ppgnTrans-Parallel.BlockID11.bn.PreNorm=1.H256.E64.L8-lr0.0004.cosine'
diffusion_model_dir = 'checkpoints/local_denoising/qm9.3hops.ppgnTrans-Parallel.BlockID11.bn.PreNorm=1.H256.E64.L8-lr0.0004.cosine-ires1.blocktime0.uni_noise1.T20.cosine.vlb1.ce0.1.combine=False'
blocksize_path = find_checkpoint_with_lowest_val_loss(blocksize_model_dir)
diffusion_path = find_checkpoint_with_lowest_val_loss(diffusion_model_dir)

blocksize_model = PredictBlockProperties.load_from_checkpoint(blocksize_path, map_location=f'cuda:{device}')
diffusion_model = AutoregressiveDiffusion.load_from_checkpoint(diffusion_path, combine_training=False, map_location=f'cuda:{device}')
diffusion_model.blocksize_model = blocksize_model

node_noise: tensor([0.1667, 0.1667, 0.1667, 0.1667, 0.1667, 0.1667]), torch.Size([6])
edge_noise: tensor([0.2000, 0.2000, 0.2000, 0.2000, 0.2000]), torch.Size([5])


In [2]:
from pard.utils import check_block_id_train_vs_generation
generated_batch = diffusion_model.generate(100).to('cpu') 
generated_list = check_block_id_train_vs_generation(generated_batch.nodes, generated_batch.edges, generated_batch.nodes_blockid, train_max_hops=3)

In [7]:
l = []
l.extend(generated_list)
m = 100*sum(l) / len(l)
print('test',m)

test 95.0


In [10]:
import logging
print('Percentage of graphs that have the same generation block path as training block path:',
                100*sum(l) / len(l))

Percentage of graphs that have the same generation block path as training block path: 95.0


In [6]:
import os, re
def find_checkpoint_with_lowest_val_loss(directory):
    lowest_val_loss = float('inf')
    best_checkpoint = None
    # Regex to match the pattern in the filename and extract the loss value
    pattern = re.compile(r'epoch=([\d.]+)+-val_loss=([\d.]+)\.ckpt')

    for filename in os.listdir(directory):
        match = pattern.search(filename)
        # print(match.group(0))
        if match:
            print(match.group(0), match.group(1))
            val_loss = float(match.group(1))
            # epoch = float(match.group(0))
find_checkpoint_with_lowest_val_loss(diffusion_model_dir)

epoch=85-val_loss=2.042.ckpt 2.042
epoch=99-val_loss=2.049.ckpt 2.049
epoch=94-val_loss=2.048.ckpt 2.048
epoch=83-val_loss=2.047.ckpt 2.047
epoch=91-val_loss=2.046.ckpt 2.046


# Stats

In [None]:
from LocalDiffusion.dataset import DATA_INFO
dataset = 'caveman'
data_info_dict = DATA_INFO[dataset]
atom_decoder = data_info_dict.get('atom_decoder', None)
metric_class = data_info_dict.get('metric_class', None)
original_datasets = {split:data_info_dict['class'](**(data_info_dict['default_args'] | {'split':split})) for split in ['train', 'val', 'test']}

print( sum(len(dataset) for dataset in original_datasets.values()))
print('num_nodes avg:', sum([x.num_nodes for x in original_datasets['train']]) / len(original_datasets['train']))
print('num_edges avg:', sum([x.num_edges for x in original_datasets['train']]) / len(original_datasets['train']))