In [1]:
import os
os.chdir('../')

In [None]:
import pandas as pd
import torch
import os
from rdkit import Chem, DataStructs
from rdkit.Chem import AllChem
from dataset import SmileDataset, SmileCollator
from tokenizer import SmilesTokenizer
from model import GPTConfig, GPT
import time
# from test_connet import reconstruct
from fragment_utils import reconstruct
from tqdm import tqdm
from utils.train_utils import seed_all
from tdc import Oracle
import numpy as np


def calculate_tanimoto_distance(fingerprint1, fingerprint2):
    """
    计算两个指纹之间的 Tanimoto 距离。
    """
    return 1 - DataStructs.TanimotoSimilarity(fingerprint1, fingerprint2)

def calculate_morgan_fingerprint(mol, radius=2, nBits=2048):
    """
    计算分子的 Morgan 指纹。
    Args:
        mol: RDKit 分子对象。
        radius: Morgan 指纹的半径。
        nBits: 指纹的位数。
    Returns:
        分子指纹，或者如果分子无效则返回 None。
    """
    try:
        fp = AllChem.GetMorganFingerprintAsBitVect(mol, radius, nBits=nBits)
        return fp
    except:
        return None



def calculate_diversity(molecules, radius=2, nBits=2048):
    """
    计算生成分子的多样性（平均成对 Tanimoto 距离）。
    Args:
        molecules: RDKit 分子对象的列表。
        radius: Morgan 指纹的半径。
        nBits: 指纹的位数。
    Returns:
        多样性值。
    """
    fingerprints = []
    valid_molecules = []
    for mol in molecules:
        fp = calculate_morgan_fingerprint(mol, radius, nBits)
        if fp is not None:
            fingerprints.append(fp)
            valid_molecules.append(mol)
    if not fingerprints:
        return 0.0  # 如果没有有效分子，返回 0.0
    n = len(fingerprints)
    total_distance = 0.0
    count = 0
    for i in range(n):
        for j in range(i + 1, n):
            distance = calculate_tanimoto_distance(fingerprints[i], fingerprints[j])
            total_distance += distance
            count += 1
    if count == 0:
        return 0.0
    return total_distance / count

def calculate_distance(generated_molecules, original_molecules, radius=2, nBits=2048):
    """
    计算生成分子与原始分子之间的平均 Tanimoto 距离。
    Args:
        generated_molecules: 生成的 RDKit 分子对象的列表。
        original_molecules: 原始 RDKit 分子对象的列表。
        radius: Morgan 指纹的半径。
        nBits: 指纹的位数。
    Returns:
        平均距离值。
    """
    generated_fingerprints = []
    original_fingerprints = []
    # 计算生成分子的指纹
    for mol in generated_molecules:
        fp = calculate_morgan_fingerprint(mol, radius, nBits)
        if fp is not None:
            generated_fingerprints.append(fp)
    # 计算原始分子的指纹
    for mol in original_molecules:
        fp = calculate_morgan_fingerprint(mol, radius, nBits)
        if fp is not None:
            original_fingerprints.append(fp)
    if not generated_fingerprints or not original_fingerprints:
        return 0.0
    total_distance = 0.0
    count = 0
    for gen_fp in generated_fingerprints:
        for orig_fp in original_fingerprints:
            distance = calculate_tanimoto_distance(gen_fp, orig_fp)
            total_distance += distance
            count += 1
    if count == 0:
        return 0.0
    return total_distance / count


def cal_QED(smiles):
    oracle = Oracle(name = 'QED')
    return oracle(smiles)

def cal_SA(smiles):
    oracle = Oracle(name = 'SA')
    return oracle(smiles)

def cal_all(smiles):
    results = {}
    results['QED'] = cal_QED(smiles)
    results['SA'] = cal_SA(smiles)
    return results


def Test(model, smiles, tokenizer, max_seq_len, temperature, top_k, stream, rp, num_samples, kv_cache, is_simulation,
         device, scaffold=False, linker=False):
    complete_answer_list = []
    valid_answer_list = []
    model.eval()
    # place data on the correct device
    src_smiles = tokenizer.bos_token + smiles
    x = torch.tensor(tokenizer.encode(src_smiles, add_special_tokens=False), dtype=torch.long).unsqueeze(0)
    x = x.to(device)
    with torch.no_grad():
        res_y = model.generate(x, tokenizer, max_new_tokens=max_seq_len,
                               temperature=temperature, top_k=top_k, stream=stream, rp=rp, kv_cache=kv_cache,
                               is_simulation=is_simulation)
        try:
            y = next(res_y)
        except StopIteration:
            print("No answer")

        history_idx = 0
        complete_answer = f"{tokenizer.decode(x[0])}"  # 用于保存整个生成的句子

        while y != None:
            answer = tokenizer.decode(y[0].tolist())
            if answer and answer[-1] == '�':
                try:
                    y = next(res_y)
                except:
                    break
                continue
            if not len(answer):
                try:
                    y = next(res_y)
                except:
                    break
                continue

            # 保存生成的片段到完整回答中
            complete_answer += answer[history_idx:]

            try:
                y = next(res_y)
            except:
                break
            history_idx = len(answer)
            if not stream:
                break

        complete_answer = complete_answer.replace(" ", "").replace("[BOS]", "").replace("[EOS]", "")
        frag_list = complete_answer.replace(" ", "").split('[SEP]')
        try:
            if linker:
                last_frag = frag_list[0].split('.')[1]
                first_frag = frag_list[0].split('.')[0]
                frag_list[0] = first_frag
                frag_list[len(frag_list) - 1] = last_frag
            frag_mol = [Chem.MolFromSmiles(s) for s in frag_list]
            mol = reconstruct(frag_mol)[0]
            if type(mol) == list:
                mol = mol[0]
            if mol:
                generate_smiles = Chem.MolToSmiles(mol)
                valid_answer_list.append(generate_smiles)
                answer = frag_list
            else:
                answer = frag_list
        except:
            answer = frag_list
        complete_answer_list.append(answer)

    return complete_answer_list, valid_answer_list



def find_attachpoint(smile, rand_seed):
    mol = Chem.MolFromSmiles(smile)
    mol = Chem.AddHs(mol)
    attachment_points = []
    for atom in mol.GetAtoms():
        if atom.GetSymbol() == 'H':
            attachment_points.append(atom.GetIdx())  # 邻居原子的index
    np.random.seed(rand_seed)
    pos = np.random.choice(attachment_points)
    mol.GetAtomWithIdx(pos.item()).SetAtomicNum(0)
    mol = Chem.RemoveHs(mol)
    smile = Chem.MolToSmiles(mol)
    return smile


def main_superstructure():
    superstructure_lst = ['O=C(CNCCCc1ccccc1)N1CCC2(C1)SCCS2', 'O=S1(=O)NC(C2CC3C=CC2C3)Nc2ccccc21',
                          'c1ccc2c(c1)ncn2[C@@H]1CCCO1', 'C(#Cc1nn([C@H]2CCNC2)c2ncncc12)c1ccccc1',
                          'c1nc(-c2cnn(C3CNC3)c2)c2cc[nH]c2n1', 'c1cc2c(cc1CCCN1CCCC1)OCCO2', 'c1ccc(Oc2ccccc2)cc1',
                          'O=C1CCC[C@@H](CC[C@H]2CC=CC3=CCCC[C@@H]32)O1', 'c1ccc2c(-n3cnnc3)ccc(C3CC3)c2c1',
                          'c1ccc(Nc2ncnc3ccccc23)cc1']
    original_smiles = ['CCOC(=O)[C@H](CCc1ccccc1)N[C@@H](C)C(=O)N1CC2(C[C@H]1C(=O)O)SCCS2',
                       'NS(=O)(=O)c1cc2c(cc1Cl)NC(C1CC3C=CC1C3)NS2(=O)=O',
                       'CC(C)Nc1nc2cc(Cl)c(Cl)cc2n1[C@H]1O[C@@H](CO)[C@H](O)[C@@H]1O',
                       'C=CC(=O)N1CC[C@H](n2nc(C#Cc3cc(OC)cc(OC)c3)c3c(N)ncnc32)C1',
                       'CCS(=O)(=O)N1CC(CC#N)(n2cc(-c3ncnc4[nH]ccc34)cn2)C1',
                       'CCCCCCCC(=O)N[C@H](CN1CCCC1)[C@H](O)c1ccc2c(c1)OCCO2',
                       'N[C@@H](Cc1cc(I)c(Oc2ccc(O)c(I)c2)c(I)c1)C(=O)O',
                       'CC[C@H](C)C(=O)O[C@H]1C[C@@H](C)C=C2C=C[C@H](C)[C@H](CC[C@@H]3C[C@@H](O)CC(=O)O3)[C@H]21',
                       'O=C(O)CSc1nnc(Br)n1-c1ccc(C2CC2)c2ccccc12', 'C#Cc1cccc(Nc2ncnc3cc(OCCOC)c(OCCOC)cc23)c1']

    # 设置随机种子的值
    seed_value = 42
    seed_all(seed_value)
    # device = torch.device(f'cuda:{0}')  # 逻辑编号 cuda:0 对应 os.environ["CUDA_VISIBLE_DEVICES"]中的第一个gpu
    device = 'cuda:7'
    batch_size = 1

    test_names = "test"

    tokenizer = SmilesTokenizer('./vocabs/vocab.txt')
    tokenizer.bos_token = "[BOS]"
    tokenizer.bos_token_id = tokenizer.convert_tokens_to_ids("[BOS]")
    tokenizer.eos_token = "[EOS]"
    tokenizer.eos_token_id = tokenizer.convert_tokens_to_ids("[EOS]")

    mconf = GPTConfig(vocab_size=tokenizer.vocab_size, n_layer=12, n_head=12, n_embd=768)
    model = GPT(mconf).to(device)
    checkpoint = torch.load(f'./weights/fragpt.pt', weights_only=True)
    model.load_state_dict(checkpoint)

    start_time = time.time()
    valid_ratio_sum = 0
    uniqueness_sum = 0
    quality_sum = 0
    sa_sum = 0
    qed_sum = 0
    div_sum = 0
    dist_sum = 0
    for i in superstructure_lst:
        complete_answer_list, valid_answer_list = [], []
        for j in tqdm(range(100)):
            smile = find_attachpoint(i, seed_value + j)
            smile = smile + '[SEP]'
            l1, l2 = Test(model, smile, tokenizer, max_seq_len=512, temperature=1.2, top_k=8, stream=False, rp=1., num_samples=1,
                 kv_cache=True, is_simulation=True, device=device)
            if (len(l2) != 0):
                valid_answer_list.append(l2[0])
            if (len(l1) != 0):
                complete_answer_list.append(l1[0])
        unique_smiles = set(smile for smile in valid_answer_list if smile is not None)
        unique_smiles_lst = list(unique_smiles)
        num_unique_molecules = len(unique_smiles)
        uniqueness = num_unique_molecules / len(valid_answer_list)
        valid_ratio = len(valid_answer_list) / 100
        results = cal_all(unique_smiles_lst)
        SA_score = 0
        QED_score = 0
        sum = 0
        for k in range(len(unique_smiles_lst)):
            SA_score += results['SA'][k]
            QED_score += results['QED'][k]
            if (results['QED'][k] >= 0.6 and results['SA'][k] <= 4):
                sum += 1

        generated_molecules = [Chem.MolFromSmiles(s) for s in valid_answer_list]
        original_molecules = [Chem.MolFromSmiles(s) for s in original_smiles]
        # 计算多样性
        diversity = calculate_diversity(generated_molecules)
        # 计算距离
        distance = calculate_distance(generated_molecules, original_molecules)

        print('valid_ratio:', valid_ratio, 'uniqueness:', uniqueness, 'Quality:', sum / 100, 'SA:',
              SA_score / len(unique_smiles_lst), 'QED:', QED_score / len(unique_smiles_lst), 'diversity:', diversity,
              'distance:', distance)
        valid_ratio_sum += valid_ratio
        uniqueness_sum += uniqueness
        quality_sum += sum / len(unique_smiles_lst)
        sa_sum += SA_score / len(unique_smiles_lst)
        qed_sum += QED_score / len(unique_smiles_lst)
        div_sum += diversity
        dist_sum += distance
    end_time = time.time()
    elapsed_time = end_time - start_time

    print(f"运行时间: {elapsed_time:.4f} 秒")
    print(f"valid_ratio_avg: {valid_ratio_sum / len(superstructure_lst)}, uniqueness_avg: {uniqueness_sum / len(superstructure_lst)}, "
          f"quality_avg: {quality_sum / len(superstructure_lst)}, sa_avg: {sa_sum / len(superstructure_lst)}, "
          f"qed_avg: {qed_sum / len(superstructure_lst)}, div_avg: {div_sum / len(superstructure_lst)}, dist_avg: {dist_sum / len(superstructure_lst)}")



if __name__ == '__main__':
    main_superstructure()

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [00:19<00:00,  5.18it/s]
Found local copy...


valid_ratio: 1.0 uniqueness: 0.69 Quality: 0.22 SA: 3.8379197287102413 QED: 0.4997894933063715 diversity: 0.5480706960687286 distance: 0.8735139539028606


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [00:19<00:00,  5.14it/s]


valid_ratio: 1.0 uniqueness: 0.95 Quality: 0.0 SA: 5.14677684337114 QED: 0.6666436633835432 diversity: 0.6580285107609918 distance: 0.8930752580909622


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [00:26<00:00,  3.78it/s]


valid_ratio: 0.99 uniqueness: 1.0 Quality: 0.55 SA: 3.595481895778446 QED: 0.647908081944031 diversity: 0.7114193022408686 distance: 0.9053882986878461


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [00:18<00:00,  5.50it/s]


valid_ratio: 0.99 uniqueness: 0.9393939393939394 Quality: 0.25 SA: 3.7426842270249954 QED: 0.5356789042516771 diversity: 0.5438682835005504 distance: 0.8966918899491104


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [00:21<00:00,  4.72it/s]


valid_ratio: 1.0 uniqueness: 0.97 Quality: 0.27 SA: 3.683541233537134 QED: 0.5723446130476384 diversity: 0.6152910941623722 distance: 0.8885749772526038


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [00:22<00:00,  4.52it/s]


valid_ratio: 0.98 uniqueness: 0.9183673469387755 Quality: 0.71 SA: 3.050596636013746 QED: 0.6907760138616539 diversity: 0.6465614472346036 distance: 0.894426877558846


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [00:31<00:00,  3.16it/s]


valid_ratio: 0.98 uniqueness: 0.9897959183673469 Quality: 0.46 SA: 2.539938723989491 QED: 0.5384369666282567 diversity: 0.7579483250084863 distance: 0.9072289200305217


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [00:22<00:00,  4.51it/s]


valid_ratio: 0.97 uniqueness: 0.979381443298969 Quality: 0.01 SA: 4.388626896790038 QED: 0.5506581191177119 diversity: 0.6158547824894722 distance: 0.9071059109073716


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [00:22<00:00,  4.50it/s]


valid_ratio: 0.98 uniqueness: 0.9693877551020408 Quality: 0.42 SA: 3.2092201604104402 QED: 0.5563615863782212 diversity: 0.6748259847601723 distance: 0.8981347812775996


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [00:20<00:00,  4.82it/s]


valid_ratio: 1.0 uniqueness: 0.95 Quality: 0.4 SA: 2.523109255922116 QED: 0.5653138219684992 diversity: 0.6868625118826751 distance: 0.89368066266626
运行时间: 227.4454 秒
valid_ratio_avg: 0.9890000000000001, uniqueness_avg: 0.935632640310107, quality_avg: 0.35583637585696093, sa_avg: 3.571789560154779, qed_avg: 0.5823911263887603, div_avg: 0.6458730938108921, dist_avg: 0.8957821530323982


In [None]:
import pandas as pd
import torch
import os
from rdkit import Chem, DataStructs
from rdkit.Chem import AllChem
from dataset import SmileDataset, SmileCollator
from tokenizer import SmilesTokenizer
from model import GPTConfig, GPT
import time
# from test_connet import reconstruct
from fragment_utils import reconstruct
from tqdm import tqdm
from utils.train_utils import seed_all
from tdc import Oracle
import numpy as np


def calculate_tanimoto_distance(fingerprint1, fingerprint2):
    """
    计算两个指纹之间的 Tanimoto 距离。
    """
    return 1 - DataStructs.TanimotoSimilarity(fingerprint1, fingerprint2)

def calculate_morgan_fingerprint(mol, radius=2, nBits=2048):
    """
    计算分子的 Morgan 指纹。
    Args:
        mol: RDKit 分子对象。
        radius: Morgan 指纹的半径。
        nBits: 指纹的位数。
    Returns:
        分子指纹，或者如果分子无效则返回 None。
    """
    try:
        fp = AllChem.GetMorganFingerprintAsBitVect(mol, radius, nBits=nBits)
        return fp
    except:
        return None



def calculate_diversity(molecules, radius=2, nBits=2048):
    """
    计算生成分子的多样性（平均成对 Tanimoto 距离）。
    Args:
        molecules: RDKit 分子对象的列表。
        radius: Morgan 指纹的半径。
        nBits: 指纹的位数。
    Returns:
        多样性值。
    """
    fingerprints = []
    valid_molecules = []
    for mol in molecules:
        fp = calculate_morgan_fingerprint(mol, radius, nBits)
        if fp is not None:
            fingerprints.append(fp)
            valid_molecules.append(mol)
    if not fingerprints:
        return 0.0  # 如果没有有效分子，返回 0.0
    n = len(fingerprints)
    total_distance = 0.0
    count = 0
    for i in range(n):
        for j in range(i + 1, n):
            distance = calculate_tanimoto_distance(fingerprints[i], fingerprints[j])
            total_distance += distance
            count += 1
    if count == 0:
        return 0.0
    return total_distance / count

def calculate_distance(generated_molecules, original_molecules, radius=2, nBits=2048):
    """
    计算生成分子与原始分子之间的平均 Tanimoto 距离。
    Args:
        generated_molecules: 生成的 RDKit 分子对象的列表。
        original_molecules: 原始 RDKit 分子对象的列表。
        radius: Morgan 指纹的半径。
        nBits: 指纹的位数。
    Returns:
        平均距离值。
    """
    generated_fingerprints = []
    original_fingerprints = []
    # 计算生成分子的指纹
    for mol in generated_molecules:
        fp = calculate_morgan_fingerprint(mol, radius, nBits)
        if fp is not None:
            generated_fingerprints.append(fp)
    # 计算原始分子的指纹
    for mol in original_molecules:
        fp = calculate_morgan_fingerprint(mol, radius, nBits)
        if fp is not None:
            original_fingerprints.append(fp)
    if not generated_fingerprints or not original_fingerprints:
        return 0.0
    total_distance = 0.0
    count = 0
    for gen_fp in generated_fingerprints:
        for orig_fp in original_fingerprints:
            distance = calculate_tanimoto_distance(gen_fp, orig_fp)
            total_distance += distance
            count += 1
    if count == 0:
        return 0.0
    return total_distance / count


def cal_QED(smiles):
    oracle = Oracle(name = 'QED')
    return oracle(smiles)

def cal_SA(smiles):
    oracle = Oracle(name = 'SA')
    return oracle(smiles)

def cal_all(smiles):
    results = {}
    results['QED'] = cal_QED(smiles)
    results['SA'] = cal_SA(smiles)
    return results


def Test(model, smiles, tokenizer, max_seq_len, temperature, top_k, stream, rp, num_samples, kv_cache, is_simulation,
         device, scaffold=False, linker=False):
    complete_answer_list = []
    valid_answer_list = []
    model.eval()
    # place data on the correct device
    src_smiles = tokenizer.bos_token + smiles
    x = torch.tensor(tokenizer.encode(src_smiles, add_special_tokens=False), dtype=torch.long).unsqueeze(0)
    x = x.to(device)
    with torch.no_grad():
        res_y = model.generate(x, tokenizer, max_new_tokens=max_seq_len,
                               temperature=temperature, top_k=top_k, stream=stream, rp=rp, kv_cache=kv_cache,
                               is_simulation=is_simulation)
        try:
            y = next(res_y)
        except StopIteration:
            print("No answer")

        history_idx = 0
        complete_answer = f"{tokenizer.decode(x[0])}"  # 用于保存整个生成的句子

        while y != None:
            answer = tokenizer.decode(y[0].tolist())
            if answer and answer[-1] == '�':
                try:
                    y = next(res_y)
                except:
                    break
                continue
            if not len(answer):
                try:
                    y = next(res_y)
                except:
                    break
                continue

            # 保存生成的片段到完整回答中
            complete_answer += answer[history_idx:]

            try:
                y = next(res_y)
            except:
                break
            history_idx = len(answer)
            if not stream:
                break

        complete_answer = complete_answer.replace(" ", "").replace("[BOS]", "").replace("[EOS]", "")
        frag_list = complete_answer.replace(" ", "").split('[SEP]')
        try:
            if linker:
                last_frag = frag_list[0].split('.')[1]
                first_frag = frag_list[0].split('.')[0]
                frag_list[0] = first_frag
                frag_list[len(frag_list) - 1] = last_frag
            frag_mol = [Chem.MolFromSmiles(s) for s in frag_list]
            mol = reconstruct(frag_mol)[0]
            if type(mol) == list:
                mol = mol[0]
            if mol:
                generate_smiles = Chem.MolToSmiles(mol)
                valid_answer_list.append(generate_smiles)
                answer = frag_list
            else:
                answer = frag_list
        except:
            answer = frag_list
        complete_answer_list.append(answer)

    return complete_answer_list, valid_answer_list



def find_attachpoint(smile, rand_seed):
    mol = Chem.MolFromSmiles(smile)
    mol = Chem.AddHs(mol)
    attachment_points = []
    for atom in mol.GetAtoms():
        if atom.GetSymbol() == 'H':
            attachment_points.append(atom.GetIdx())  # 邻居原子的index
    np.random.seed(rand_seed)
    pos = np.random.choice(attachment_points)
    mol.GetAtomWithIdx(pos.item()).SetAtomicNum(0)
    mol = Chem.RemoveHs(mol)
    smile = Chem.MolToSmiles(mol)
    return smile


def main_superstructure():
    superstructure_lst = ['O=C(CNCCCc1ccccc1)N1CCC2(C1)SCCS2', 'O=S1(=O)NC(C2CC3C=CC2C3)Nc2ccccc21',
                          'c1ccc2c(c1)ncn2[C@@H]1CCCO1', 'C(#Cc1nn([C@H]2CCNC2)c2ncncc12)c1ccccc1',
                          'c1nc(-c2cnn(C3CNC3)c2)c2cc[nH]c2n1', 'c1cc2c(cc1CCCN1CCCC1)OCCO2', 'c1ccc(Oc2ccccc2)cc1',
                          'O=C1CCC[C@@H](CC[C@H]2CC=CC3=CCCC[C@@H]32)O1', 'c1ccc2c(-n3cnnc3)ccc(C3CC3)c2c1',
                          'c1ccc(Nc2ncnc3ccccc23)cc1']
    original_smiles = ['CCOC(=O)[C@H](CCc1ccccc1)N[C@@H](C)C(=O)N1CC2(C[C@H]1C(=O)O)SCCS2',
                       'NS(=O)(=O)c1cc2c(cc1Cl)NC(C1CC3C=CC1C3)NS2(=O)=O',
                       'CC(C)Nc1nc2cc(Cl)c(Cl)cc2n1[C@H]1O[C@@H](CO)[C@H](O)[C@@H]1O',
                       'C=CC(=O)N1CC[C@H](n2nc(C#Cc3cc(OC)cc(OC)c3)c3c(N)ncnc32)C1',
                       'CCS(=O)(=O)N1CC(CC#N)(n2cc(-c3ncnc4[nH]ccc34)cn2)C1',
                       'CCCCCCCC(=O)N[C@H](CN1CCCC1)[C@H](O)c1ccc2c(c1)OCCO2',
                       'N[C@@H](Cc1cc(I)c(Oc2ccc(O)c(I)c2)c(I)c1)C(=O)O',
                       'CC[C@H](C)C(=O)O[C@H]1C[C@@H](C)C=C2C=C[C@H](C)[C@H](CC[C@@H]3C[C@@H](O)CC(=O)O3)[C@H]21',
                       'O=C(O)CSc1nnc(Br)n1-c1ccc(C2CC2)c2ccccc12', 'C#Cc1cccc(Nc2ncnc3cc(OCCOC)c(OCCOC)cc23)c1']

    # 设置随机种子的值
    seed_value = 43
    seed_all(seed_value)
    # device = torch.device(f'cuda:{0}')  # 逻辑编号 cuda:0 对应 os.environ["CUDA_VISIBLE_DEVICES"]中的第一个gpu
    device = 'cuda:7'
    batch_size = 1

    test_names = "test"

    tokenizer = SmilesTokenizer('./vocabs/vocab.txt')
    tokenizer.bos_token = "[BOS]"
    tokenizer.bos_token_id = tokenizer.convert_tokens_to_ids("[BOS]")
    tokenizer.eos_token = "[EOS]"
    tokenizer.eos_token_id = tokenizer.convert_tokens_to_ids("[EOS]")

    mconf = GPTConfig(vocab_size=tokenizer.vocab_size, n_layer=12, n_head=12, n_embd=768)
    model = GPT(mconf).to(device)
    checkpoint = torch.load(f'./weights/fragpt.pt', weights_only=True)
    model.load_state_dict(checkpoint)

    start_time = time.time()
    valid_ratio_sum = 0
    uniqueness_sum = 0
    quality_sum = 0
    sa_sum = 0
    qed_sum = 0
    div_sum = 0
    dist_sum = 0
    for i in superstructure_lst:
        complete_answer_list, valid_answer_list = [], []
        for j in tqdm(range(100)):
            smile = find_attachpoint(i, seed_value + j)
            smile = smile + '[SEP]'
            l1, l2 = Test(model, smile, tokenizer, max_seq_len=512, temperature=1.2, top_k=8, stream=False, rp=1., num_samples=1,
                 kv_cache=True, is_simulation=True, device=device)
            if (len(l2) != 0):
                valid_answer_list.append(l2[0])
            if (len(l1) != 0):
                complete_answer_list.append(l1[0])
        unique_smiles = set(smile for smile in valid_answer_list if smile is not None)
        unique_smiles_lst = list(unique_smiles)
        num_unique_molecules = len(unique_smiles)
        uniqueness = num_unique_molecules / len(valid_answer_list)
        valid_ratio = len(valid_answer_list) / 100
        results = cal_all(unique_smiles_lst)
        SA_score = 0
        QED_score = 0
        sum = 0
        for k in range(len(unique_smiles_lst)):
            SA_score += results['SA'][k]
            QED_score += results['QED'][k]
            if (results['QED'][k] >= 0.6 and results['SA'][k] <= 4):
                sum += 1

        generated_molecules = [Chem.MolFromSmiles(s) for s in valid_answer_list]
        original_molecules = [Chem.MolFromSmiles(s) for s in original_smiles]
        # 计算多样性
        diversity = calculate_diversity(generated_molecules)
        # 计算距离
        distance = calculate_distance(generated_molecules, original_molecules)

        print('valid_ratio:', valid_ratio, 'uniqueness:', uniqueness, 'Quality:', sum / 100, 'SA:',
              SA_score / len(unique_smiles_lst), 'QED:', QED_score / len(unique_smiles_lst), 'diversity:', diversity,
              'distance:', distance)
        valid_ratio_sum += valid_ratio
        uniqueness_sum += uniqueness
        quality_sum += sum / len(unique_smiles_lst)
        sa_sum += SA_score / len(unique_smiles_lst)
        qed_sum += QED_score / len(unique_smiles_lst)
        div_sum += diversity
        dist_sum += distance
    end_time = time.time()
    elapsed_time = end_time - start_time

    print(f"运行时间: {elapsed_time:.4f} 秒")
    print(f"valid_ratio_avg: {valid_ratio_sum / len(superstructure_lst)}, uniqueness_avg: {uniqueness_sum / len(superstructure_lst)}, "
          f"quality_avg: {quality_sum / len(superstructure_lst)}, sa_avg: {sa_sum / len(superstructure_lst)}, "
          f"qed_avg: {qed_sum / len(superstructure_lst)}, div_avg: {div_sum / len(superstructure_lst)}, dist_avg: {dist_sum / len(superstructure_lst)}")



if __name__ == '__main__':
    main_superstructure()

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [00:21<00:00,  4.71it/s]


valid_ratio: 1.0 uniqueness: 0.84 Quality: 0.28 SA: 3.926061107805622 QED: 0.5041290886319079 diversity: 0.5706620244720236 distance: 0.8747452352783808


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [00:20<00:00,  4.86it/s]


valid_ratio: 1.0 uniqueness: 0.94 Quality: 0.0 SA: 5.130093755052577 QED: 0.642136899902584 diversity: 0.6590606278055257 distance: 0.8924219428595098


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [00:26<00:00,  3.72it/s]


valid_ratio: 0.96 uniqueness: 0.9583333333333334 Quality: 0.63 SA: 3.5155973335524573 QED: 0.7207808835703587 diversity: 0.6868546297155598 distance: 0.9034045927813665


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [00:20<00:00,  4.76it/s]


valid_ratio: 0.96 uniqueness: 0.875 Quality: 0.21 SA: 3.7834545028705486 QED: 0.533092843849733 diversity: 0.5455790831664212 distance: 0.8979386007255908


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [00:18<00:00,  5.33it/s]


valid_ratio: 0.99 uniqueness: 0.9292929292929293 Quality: 0.41 SA: 3.6101915450749797 QED: 0.599221480579021 diversity: 0.5952168846801097 distance: 0.8865976831798097


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [00:20<00:00,  4.92it/s]


valid_ratio: 0.97 uniqueness: 0.9278350515463918 Quality: 0.73 SA: 2.984574345033139 QED: 0.7265629075912278 diversity: 0.6544885755991243 distance: 0.8944534344005896


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [00:31<00:00,  3.19it/s]


valid_ratio: 0.97 uniqueness: 1.0 Quality: 0.45 SA: 2.5963583070734813 QED: 0.5531386844580409 diversity: 0.7630604674425614 distance: 0.9070473636108072


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [00:21<00:00,  4.69it/s]


valid_ratio: 0.98 uniqueness: 0.8775510204081632 Quality: 0.04 SA: 4.390710488871326 QED: 0.5943433869597158 diversity: 0.5970339289634856 distance: 0.9051452919805828


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [00:26<00:00,  3.83it/s]


valid_ratio: 0.98 uniqueness: 0.9693877551020408 Quality: 0.31 SA: 3.188406192980935 QED: 0.5233119731525546 diversity: 0.6802708742705734 distance: 0.899693409340918


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [00:21<00:00,  4.58it/s]


valid_ratio: 1.0 uniqueness: 0.98 Quality: 0.49 SA: 2.4292046103088687 QED: 0.5770063592333263 diversity: 0.6817486115053875 distance: 0.8909467935058241
运行时间: 232.7570 秒
valid_ratio_avg: 0.9810000000000001, uniqueness_avg: 0.929740008968286, quality_avg: 0.38616241702069976, sa_avg: 3.5554652188623934, qed_avg: 0.5973724507928471, div_avg: 0.6433975707620772, dist_avg: 0.8952394347663379


In [None]:
import pandas as pd
import torch
import os
from rdkit import Chem, DataStructs
from rdkit.Chem import AllChem
from dataset import SmileDataset, SmileCollator
from tokenizer import SmilesTokenizer
from model import GPTConfig, GPT
import time
# from test_connet import reconstruct
from fragment_utils import reconstruct
from tqdm import tqdm
from utils.train_utils import seed_all
from tdc import Oracle
import numpy as np


def calculate_tanimoto_distance(fingerprint1, fingerprint2):
    """
    计算两个指纹之间的 Tanimoto 距离。
    """
    return 1 - DataStructs.TanimotoSimilarity(fingerprint1, fingerprint2)

def calculate_morgan_fingerprint(mol, radius=2, nBits=2048):
    """
    计算分子的 Morgan 指纹。
    Args:
        mol: RDKit 分子对象。
        radius: Morgan 指纹的半径。
        nBits: 指纹的位数。
    Returns:
        分子指纹，或者如果分子无效则返回 None。
    """
    try:
        fp = AllChem.GetMorganFingerprintAsBitVect(mol, radius, nBits=nBits)
        return fp
    except:
        return None



def calculate_diversity(molecules, radius=2, nBits=2048):
    """
    计算生成分子的多样性（平均成对 Tanimoto 距离）。
    Args:
        molecules: RDKit 分子对象的列表。
        radius: Morgan 指纹的半径。
        nBits: 指纹的位数。
    Returns:
        多样性值。
    """
    fingerprints = []
    valid_molecules = []
    for mol in molecules:
        fp = calculate_morgan_fingerprint(mol, radius, nBits)
        if fp is not None:
            fingerprints.append(fp)
            valid_molecules.append(mol)
    if not fingerprints:
        return 0.0  # 如果没有有效分子，返回 0.0
    n = len(fingerprints)
    total_distance = 0.0
    count = 0
    for i in range(n):
        for j in range(i + 1, n):
            distance = calculate_tanimoto_distance(fingerprints[i], fingerprints[j])
            total_distance += distance
            count += 1
    if count == 0:
        return 0.0
    return total_distance / count

def calculate_distance(generated_molecules, original_molecules, radius=2, nBits=2048):
    """
    计算生成分子与原始分子之间的平均 Tanimoto 距离。
    Args:
        generated_molecules: 生成的 RDKit 分子对象的列表。
        original_molecules: 原始 RDKit 分子对象的列表。
        radius: Morgan 指纹的半径。
        nBits: 指纹的位数。
    Returns:
        平均距离值。
    """
    generated_fingerprints = []
    original_fingerprints = []
    # 计算生成分子的指纹
    for mol in generated_molecules:
        fp = calculate_morgan_fingerprint(mol, radius, nBits)
        if fp is not None:
            generated_fingerprints.append(fp)
    # 计算原始分子的指纹
    for mol in original_molecules:
        fp = calculate_morgan_fingerprint(mol, radius, nBits)
        if fp is not None:
            original_fingerprints.append(fp)
    if not generated_fingerprints or not original_fingerprints:
        return 0.0
    total_distance = 0.0
    count = 0
    for gen_fp in generated_fingerprints:
        for orig_fp in original_fingerprints:
            distance = calculate_tanimoto_distance(gen_fp, orig_fp)
            total_distance += distance
            count += 1
    if count == 0:
        return 0.0
    return total_distance / count


def cal_QED(smiles):
    oracle = Oracle(name = 'QED')
    return oracle(smiles)

def cal_SA(smiles):
    oracle = Oracle(name = 'SA')
    return oracle(smiles)

def cal_all(smiles):
    results = {}
    results['QED'] = cal_QED(smiles)
    results['SA'] = cal_SA(smiles)
    return results


def Test(model, smiles, tokenizer, max_seq_len, temperature, top_k, stream, rp, num_samples, kv_cache, is_simulation,
         device, scaffold=False, linker=False):
    complete_answer_list = []
    valid_answer_list = []
    model.eval()
    # place data on the correct device
    src_smiles = tokenizer.bos_token + smiles
    x = torch.tensor(tokenizer.encode(src_smiles, add_special_tokens=False), dtype=torch.long).unsqueeze(0)
    x = x.to(device)
    with torch.no_grad():
        res_y = model.generate(x, tokenizer, max_new_tokens=max_seq_len,
                               temperature=temperature, top_k=top_k, stream=stream, rp=rp, kv_cache=kv_cache,
                               is_simulation=is_simulation)
        try:
            y = next(res_y)
        except StopIteration:
            print("No answer")

        history_idx = 0
        complete_answer = f"{tokenizer.decode(x[0])}"  # 用于保存整个生成的句子

        while y != None:
            answer = tokenizer.decode(y[0].tolist())
            if answer and answer[-1] == '�':
                try:
                    y = next(res_y)
                except:
                    break
                continue
            if not len(answer):
                try:
                    y = next(res_y)
                except:
                    break
                continue

            # 保存生成的片段到完整回答中
            complete_answer += answer[history_idx:]

            try:
                y = next(res_y)
            except:
                break
            history_idx = len(answer)
            if not stream:
                break

        complete_answer = complete_answer.replace(" ", "").replace("[BOS]", "").replace("[EOS]", "")
        frag_list = complete_answer.replace(" ", "").split('[SEP]')
        try:
            if linker:
                last_frag = frag_list[0].split('.')[1]
                first_frag = frag_list[0].split('.')[0]
                frag_list[0] = first_frag
                frag_list[len(frag_list) - 1] = last_frag
            frag_mol = [Chem.MolFromSmiles(s) for s in frag_list]
            mol = reconstruct(frag_mol)[0]
            if type(mol) == list:
                mol = mol[0]
            if mol:
                generate_smiles = Chem.MolToSmiles(mol)
                valid_answer_list.append(generate_smiles)
                answer = frag_list
            else:
                answer = frag_list
        except:
            answer = frag_list
        complete_answer_list.append(answer)

    return complete_answer_list, valid_answer_list



def find_attachpoint(smile, rand_seed):
    mol = Chem.MolFromSmiles(smile)
    mol = Chem.AddHs(mol)
    attachment_points = []
    for atom in mol.GetAtoms():
        if atom.GetSymbol() == 'H':
            attachment_points.append(atom.GetIdx())  # 邻居原子的index
    np.random.seed(rand_seed)
    pos = np.random.choice(attachment_points)
    mol.GetAtomWithIdx(pos.item()).SetAtomicNum(0)
    mol = Chem.RemoveHs(mol)
    smile = Chem.MolToSmiles(mol)
    return smile


def main_superstructure():
    superstructure_lst = ['O=C(CNCCCc1ccccc1)N1CCC2(C1)SCCS2', 'O=S1(=O)NC(C2CC3C=CC2C3)Nc2ccccc21',
                          'c1ccc2c(c1)ncn2[C@@H]1CCCO1', 'C(#Cc1nn([C@H]2CCNC2)c2ncncc12)c1ccccc1',
                          'c1nc(-c2cnn(C3CNC3)c2)c2cc[nH]c2n1', 'c1cc2c(cc1CCCN1CCCC1)OCCO2', 'c1ccc(Oc2ccccc2)cc1',
                          'O=C1CCC[C@@H](CC[C@H]2CC=CC3=CCCC[C@@H]32)O1', 'c1ccc2c(-n3cnnc3)ccc(C3CC3)c2c1',
                          'c1ccc(Nc2ncnc3ccccc23)cc1']
    original_smiles = ['CCOC(=O)[C@H](CCc1ccccc1)N[C@@H](C)C(=O)N1CC2(C[C@H]1C(=O)O)SCCS2',
                       'NS(=O)(=O)c1cc2c(cc1Cl)NC(C1CC3C=CC1C3)NS2(=O)=O',
                       'CC(C)Nc1nc2cc(Cl)c(Cl)cc2n1[C@H]1O[C@@H](CO)[C@H](O)[C@@H]1O',
                       'C=CC(=O)N1CC[C@H](n2nc(C#Cc3cc(OC)cc(OC)c3)c3c(N)ncnc32)C1',
                       'CCS(=O)(=O)N1CC(CC#N)(n2cc(-c3ncnc4[nH]ccc34)cn2)C1',
                       'CCCCCCCC(=O)N[C@H](CN1CCCC1)[C@H](O)c1ccc2c(c1)OCCO2',
                       'N[C@@H](Cc1cc(I)c(Oc2ccc(O)c(I)c2)c(I)c1)C(=O)O',
                       'CC[C@H](C)C(=O)O[C@H]1C[C@@H](C)C=C2C=C[C@H](C)[C@H](CC[C@@H]3C[C@@H](O)CC(=O)O3)[C@H]21',
                       'O=C(O)CSc1nnc(Br)n1-c1ccc(C2CC2)c2ccccc12', 'C#Cc1cccc(Nc2ncnc3cc(OCCOC)c(OCCOC)cc23)c1']

    # 设置随机种子的值
    seed_value = 44
    seed_all(seed_value)
    # device = torch.device(f'cuda:{0}')  # 逻辑编号 cuda:0 对应 os.environ["CUDA_VISIBLE_DEVICES"]中的第一个gpu
    device = 'cuda:7'
    batch_size = 1

    test_names = "test"

    tokenizer = SmilesTokenizer('./vocabs/vocab.txt')
    tokenizer.bos_token = "[BOS]"
    tokenizer.bos_token_id = tokenizer.convert_tokens_to_ids("[BOS]")
    tokenizer.eos_token = "[EOS]"
    tokenizer.eos_token_id = tokenizer.convert_tokens_to_ids("[EOS]")

    mconf = GPTConfig(vocab_size=tokenizer.vocab_size, n_layer=12, n_head=12, n_embd=768)
    model = GPT(mconf).to(device)
    checkpoint = torch.load(f'./weights/fragpt.pt', weights_only=True)
    model.load_state_dict(checkpoint)

    start_time = time.time()
    valid_ratio_sum = 0
    uniqueness_sum = 0
    quality_sum = 0
    sa_sum = 0
    qed_sum = 0
    div_sum = 0
    dist_sum = 0
    for i in superstructure_lst:
        complete_answer_list, valid_answer_list = [], []
        for j in tqdm(range(100)):
            smile = find_attachpoint(i, seed_value + j)
            smile = smile + '[SEP]'
            l1, l2 = Test(model, smile, tokenizer, max_seq_len=512, temperature=1.2, top_k=8, stream=False, rp=1., num_samples=1,
                 kv_cache=True, is_simulation=True, device=device)
            if (len(l2) != 0):
                valid_answer_list.append(l2[0])
            if (len(l1) != 0):
                complete_answer_list.append(l1[0])
        unique_smiles = set(smile for smile in valid_answer_list if smile is not None)
        unique_smiles_lst = list(unique_smiles)
        num_unique_molecules = len(unique_smiles)
        uniqueness = num_unique_molecules / len(valid_answer_list)
        valid_ratio = len(valid_answer_list) / 100
        results = cal_all(unique_smiles_lst)
        SA_score = 0
        QED_score = 0
        sum = 0
        for k in range(len(unique_smiles_lst)):
            SA_score += results['SA'][k]
            QED_score += results['QED'][k]
            if (results['QED'][k] >= 0.6 and results['SA'][k] <= 4):
                sum += 1

        generated_molecules = [Chem.MolFromSmiles(s) for s in valid_answer_list]
        original_molecules = [Chem.MolFromSmiles(s) for s in original_smiles]
        # 计算多样性
        diversity = calculate_diversity(generated_molecules)
        # 计算距离
        distance = calculate_distance(generated_molecules, original_molecules)

        print('valid_ratio:', valid_ratio, 'uniqueness:', uniqueness, 'Quality:', sum / 100, 'SA:',
              SA_score / len(unique_smiles_lst), 'QED:', QED_score / len(unique_smiles_lst), 'diversity:', diversity,
              'distance:', distance)
        valid_ratio_sum += valid_ratio
        uniqueness_sum += uniqueness
        quality_sum += sum / len(unique_smiles_lst)
        sa_sum += SA_score / len(unique_smiles_lst)
        qed_sum += QED_score / len(unique_smiles_lst)
        div_sum += diversity
        dist_sum += distance
    end_time = time.time()
    elapsed_time = end_time - start_time

    print(f"运行时间: {elapsed_time:.4f} 秒")
    print(f"valid_ratio_avg: {valid_ratio_sum / len(superstructure_lst)}, uniqueness_avg: {uniqueness_sum / len(superstructure_lst)}, "
          f"quality_avg: {quality_sum / len(superstructure_lst)}, sa_avg: {sa_sum / len(superstructure_lst)}, "
          f"qed_avg: {qed_sum / len(superstructure_lst)}, div_avg: {div_sum / len(superstructure_lst)}, dist_avg: {dist_sum / len(superstructure_lst)}")



if __name__ == '__main__':
    main_superstructure()

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [00:20<00:00,  4.78it/s]


valid_ratio: 0.99 uniqueness: 0.8080808080808081 Quality: 0.3 SA: 3.9261760756963384 QED: 0.4966560550056919 diversity: 0.5630825420750278 distance: 0.8747568350043385


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [00:18<00:00,  5.37it/s]


valid_ratio: 1.0 uniqueness: 0.91 Quality: 0.0 SA: 5.167447800590084 QED: 0.6593969908773455 diversity: 0.6534706948512542 distance: 0.8939879265430623


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [00:26<00:00,  3.79it/s]


valid_ratio: 0.99 uniqueness: 0.9494949494949495 Quality: 0.61 SA: 3.6104285128787565 QED: 0.672691360668639 diversity: 0.6992488498662985 distance: 0.9063046669642976


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [00:20<00:00,  4.82it/s]


valid_ratio: 0.99 uniqueness: 0.898989898989899 Quality: 0.22 SA: 3.760372902514527 QED: 0.5163998115186623 diversity: 0.5430827878743105 distance: 0.8979439743020744


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [00:19<00:00,  5.17it/s]


valid_ratio: 0.99 uniqueness: 0.9393939393939394 Quality: 0.42 SA: 3.6353607590146275 QED: 0.5990907403078102 diversity: 0.6035053713263727 distance: 0.8881539260142232


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [00:20<00:00,  4.94it/s]


valid_ratio: 0.98 uniqueness: 0.8877551020408163 Quality: 0.71 SA: 3.0111937929694643 QED: 0.7260428295607597 diversity: 0.6407096526733579 distance: 0.8942128364320591


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [00:39<00:00,  2.51it/s]


valid_ratio: 0.95 uniqueness: 0.9894736842105263 Quality: 0.5 SA: 2.539030429669378 QED: 0.5788666270196028 diversity: 0.7584040127426418 distance: 0.905663012043347


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [00:24<00:00,  4.10it/s]


valid_ratio: 0.99 uniqueness: 0.9191919191919192 Quality: 0.01 SA: 4.486721691504176 QED: 0.5491572511096158 diversity: 0.6208503726717612 distance: 0.9060038548509949


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [00:23<00:00,  4.22it/s]


valid_ratio: 0.99 uniqueness: 0.98989898989899 Quality: 0.35 SA: 3.2619002983690284 QED: 0.5354046720307907 diversity: 0.6837237325363382 distance: 0.8992278920379111


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [00:24<00:00,  4.08it/s]


valid_ratio: 1.0 uniqueness: 0.98 Quality: 0.41 SA: 2.5606828314740637 QED: 0.5419114315011313 diversity: 0.6955751458409175 distance: 0.8937956653751039
运行时间: 241.9880 秒
valid_ratio_avg: 0.9869999999999999, uniqueness_avg: 0.9272279291301849, quality_avg: 0.3857246147385181, sa_avg: 3.595931509468044, qed_avg: 0.587561776960005, div_avg: 0.646165316245828, dist_avg: 0.8960050589567412
