In [3]:
import os
os.chdir('../')

In [None]:
import torch
from utils.train_utils import seed_all
import argparse
from tokenizer import SmilesTokenizer
from model import GPTConfig, GPT
import time
from fragment_utils import reconstruct, reconstruct_d, reconstruct_scaffold1
from torch.nn import functional as F
from rdkit import Chem, DataStructs
from rdkit.Chem import AllChem
from tdc import Oracle
import math
import multiprocessing
import random
from tqdm import tqdm


def Test1(model, smiles, tokenizer, max_seq_len, temperature, top_k, stream, rp, num_samples, kv_cache, is_simulation,
         device, scaffold=False, linker=False, dummy_lst=None):
    complete_answer_list = []
    valid_answer_list = []
    model.eval()
    # place data on the correct device
    src_smiles = tokenizer.bos_token + smiles
    x = torch.tensor(tokenizer.encode(src_smiles, add_special_tokens=False), dtype=torch.long).unsqueeze(0)
    x = x.to(device)
    with torch.no_grad():
        res_y = model.generate(x, tokenizer, max_new_tokens=max_seq_len,
                               temperature=temperature, top_k=top_k, stream=stream, rp=rp, kv_cache=kv_cache,
                               is_simulation=is_simulation)
        try:
            y = next(res_y)
        except StopIteration:
            print("No answer")

        history_idx = 0
        complete_answer = f"{tokenizer.decode(x[0])}"  # 用于保存整个生成的句子

        while y != None:
            answer = tokenizer.decode(y[0].tolist())
            if answer and answer[-1] == '�':
                try:
                    y = next(res_y)
                except:
                    break
                continue
            if not len(answer):
                try:
                    y = next(res_y)
                except:
                    break
                continue

            # 保存生成的片段到完整回答中
            complete_answer += answer[history_idx:]

            try:
                y = next(res_y)
            except:
                break
            history_idx = len(answer)
            if not stream:
                break

        complete_answer = complete_answer.replace(" ", "").replace("[BOS]", "").replace("[EOS]", "")
        frag_list = complete_answer.replace(" ", "").split('[SEP]')
        try:
            if linker:
                last_frag = frag_list[0].split('.')[1]
                first_frag = frag_list[0].split('.')[0]
                frag_list[0] = first_frag
                frag_list[len(frag_list) - 1] = last_frag
            frag_mol = [Chem.MolFromSmiles(s) for s in frag_list]
            # frag_mol[0] = change_H2star(frag_list[0], dummy_lst)
            mol = reconstruct_scaffold1(frag_mol, scaffold=scaffold)[0]
            if type(mol) == list:
                mol = mol[0]
            if mol:
                generate_smiles = Chem.MolToSmiles(mol)
                valid_answer_list.append(generate_smiles)
                answer = frag_list
            else:
                answer = frag_list
        except:
            answer = frag_list
        complete_answer_list.append(answer)

    return complete_answer_list, valid_answer_list

def cal_QED(smiles):
    oracle = Oracle(name = 'QED')
    return oracle(smiles)

def cal_SA(smiles):
    oracle = Oracle(name = 'SA')
    return oracle(smiles)

def cal_all(smiles):
    results = {}
    results['QED'] = cal_QED(smiles)
    results['SA'] = cal_SA(smiles)
    return results

def calculate_tanimoto_distance(fingerprint1, fingerprint2):
    """
    计算两个指纹之间的 Tanimoto 距离。
    """
    return 1 - DataStructs.TanimotoSimilarity(fingerprint1, fingerprint2)

def calculate_morgan_fingerprint(mol, radius=2, nBits=2048):
    """
    计算分子的 Morgan 指纹。
    Args:
        mol: RDKit 分子对象。
        radius: Morgan 指纹的半径。
        nBits: 指纹的位数。
    Returns:
        分子指纹，或者如果分子无效则返回 None。
    """
    try:
        fp = AllChem.GetMorganFingerprintAsBitVect(mol, radius, nBits=nBits)
        return fp
    except:
        return None

def calculate_diversity(molecules, radius=2, nBits=2048):
    """
    计算生成分子的多样性（平均成对 Tanimoto 距离）。
    Args:
        molecules: RDKit 分子对象的列表。
        radius: Morgan 指纹的半径。
        nBits: 指纹的位数。
    Returns:
        多样性值。
    """
    fingerprints = []
    valid_molecules = []
    for mol in molecules:
        fp = calculate_morgan_fingerprint(mol, radius, nBits)
        if fp is not None:
            fingerprints.append(fp)
            valid_molecules.append(mol)
    if not fingerprints:
        return 0.0  # 如果没有有效分子，返回 0.0
    n = len(fingerprints)
    total_distance = 0.0
    count = 0
    for i in range(n):
        for j in range(i + 1, n):
            distance = calculate_tanimoto_distance(fingerprints[i], fingerprints[j])
            total_distance += distance
            count += 1
    if count == 0:
        return 0.0
    return total_distance / count

def calculate_distance(generated_molecules, original_molecules, radius=2, nBits=2048):
    """
    计算生成分子与原始分子之间的平均 Tanimoto 距离。
    Args:
        generated_molecules: 生成的 RDKit 分子对象的列表。
        original_molecules: 原始 RDKit 分子对象的列表。
        radius: Morgan 指纹的半径。
        nBits: 指纹的位数。
    Returns:
        平均距离值。
    """
    generated_fingerprints = []
    original_fingerprints = []
    # 计算生成分子的指纹
    for mol in generated_molecules:
        fp = calculate_morgan_fingerprint(mol, radius, nBits)
        if fp is not None:
            generated_fingerprints.append(fp)
    # 计算原始分子的指纹
    for mol in original_molecules:
        fp = calculate_morgan_fingerprint(mol, radius, nBits)
        if fp is not None:
            original_fingerprints.append(fp)
    if not generated_fingerprints or not original_fingerprints:
        return 0.0
    total_distance = 0.0
    count = 0
    for gen_fp in generated_fingerprints:
        for orig_fp in original_fingerprints:
            distance = calculate_tanimoto_distance(gen_fp, orig_fp)
            total_distance += distance
            count += 1
    if count == 0:
        return 0.0
    return total_distance / count

def main_scaffold1():
    scaffold_lst = ['*[C@H](CCc1ccccc1)N[C@@H](*)C(=O)N1CC2(C[C@H]1*)SCCS2[SEP]', '*c1cc2c(cc1*)NC(C1CC3C=CC1C3)NS2(=O)=O[SEP]',
                    '*c1nc2cc(*)c(*)cc2n1[C@H]1O[C@@H](*)[C@H](*)[C@@H]1*[SEP]', '*N1CC[C@H](n2nc(C#Cc3cc(*)cc(*)c3)c3c(*)ncnc32)C1[SEP]',
                    '*N1CC(*)(n2cc(-c3ncnc4[nH]ccc34)cn2)C1[SEP]', '*[C@H](CN1CCCC1)[C@H](*)c1ccc2c(c1)OCCO2[SEP]',
                    '*c1cc(*)c(Oc2ccc(*)c(*)c2)c(*)c1[SEP]', '*[C@H]1C[C@@H](*)C=C2C=C[C@H](*)[C@H](CC[C@@H]3C[C@@H](*)CC(=O)O3)[C@H]21[SEP]',
                    '*c1nnc(*)n1-c1ccc(C2CC2)c2ccccc12[SEP]', '*c1cccc(Nc2ncnc3cc(*)c(*)cc23)c1[SEP]']
    original_smiles = ['CCOC(=O)[C@H](CCc1ccccc1)N[C@@H](C)C(=O)N1CC2(C[C@H]1C(=O)O)SCCS2',
                       'NS(=O)(=O)c1cc2c(cc1Cl)NC(C1CC3C=CC1C3)NS2(=O)=O',
                       'CC(C)Nc1nc2cc(Cl)c(Cl)cc2n1[C@H]1O[C@@H](CO)[C@H](O)[C@@H]1O',
                       'C=CC(=O)N1CC[C@H](n2nc(C#Cc3cc(OC)cc(OC)c3)c3c(N)ncnc32)C1',
                       'CCS(=O)(=O)N1CC(CC#N)(n2cc(-c3ncnc4[nH]ccc34)cn2)C1',
                       'CCCCCCCC(=O)N[C@H](CN1CCCC1)[C@H](O)c1ccc2c(c1)OCCO2',
                       'N[C@@H](Cc1cc(I)c(Oc2ccc(O)c(I)c2)c(I)c1)C(=O)O',
                       'CC[C@H](C)C(=O)O[C@H]1C[C@@H](C)C=C2C=C[C@H](C)[C@H](CC[C@@H]3C[C@@H](O)CC(=O)O3)[C@H]21',
                       'O=C(O)CSc1nnc(Br)n1-c1ccc(C2CC2)c2ccccc12', 'C#Cc1cccc(Nc2ncnc3cc(OCCOC)c(OCCOC)cc23)c1']

    # 设置随机种子的值
    seed_value = 42
    seed_all(seed_value)
    # device = torch.device(f'cuda:{0}')  # 逻辑编号 cuda:0 对应 os.environ["CUDA_VISIBLE_DEVICES"]中的第一个gpu
    device = 'cuda:1'
    batch_size = 1

    test_names = "test"

    tokenizer = SmilesTokenizer('./vocabs/vocab.txt')
    tokenizer.bos_token = "[BOS]"
    tokenizer.bos_token_id = tokenizer.convert_tokens_to_ids("[BOS]")
    tokenizer.eos_token = "[EOS]"
    tokenizer.eos_token_id = tokenizer.convert_tokens_to_ids("[EOS]")

    mconf = GPTConfig(vocab_size=tokenizer.vocab_size, n_layer=12, n_head=12, n_embd=768)
    model = GPT(mconf).to(device)
    checkpoint = torch.load(f'./weights/fragpt.pt', weights_only=True)
    model.load_state_dict(checkpoint)

    start_time = time.time()
    valid_ratio_sum = 0
    uniqueness_sum = 0
    quality_sum = 0
    sa_sum = 0
    qed_sum = 0
    div_sum = 0
    dist_sum = 0
    for i in scaffold_lst:
        complete_answer_list, valid_answer_list = [], []
        for j in tqdm(range(100)):
            num_stars = i.count('*')
            l1 = []
            l2 = []
            scaf = i
            for k in range(num_stars):
                l1, l2 = Test1(model, scaf, tokenizer, max_seq_len=512, temperature=1.2, top_k=8, stream=False, rp=1.,
                              num_samples=1, kv_cache=True, is_simulation=True, device=device, scaffold=True, dummy_lst=None)
                if (len(l2) != 0):
                    scaf = l2[0] + '[SEP]'
                else:
                    l2 = []
                    break

            if (len(l2) != 0):
                valid_answer_list.append(l2[0])
            if (len(l1) != 0):
                complete_answer_list.append(l1[0])
        unique_smiles = set(smile for smile in valid_answer_list if smile is not None)
        unique_smiles_lst = list(unique_smiles)
        num_unique_molecules = len(unique_smiles)
        if len(valid_answer_list) == 0:
            uniqueness = 0
        else:
            uniqueness = num_unique_molecules / len(valid_answer_list)
        valid_ratio = len(valid_answer_list) / 100
        results = cal_all(unique_smiles_lst)
        SA_score = 0
        QED_score = 0
        sum = 0
        for i in range(len(unique_smiles_lst)):
            SA_score += results['SA'][i]
            QED_score += results['QED'][i]
            if (results['QED'][i] >= 0.6 and results['SA'][i] <= 4):
                sum += 1

        generated_molecules = [Chem.MolFromSmiles(s) for s in valid_answer_list]
        original_molecules = [Chem.MolFromSmiles(s) for s in original_smiles]
        # 计算多样性
        diversity = calculate_diversity(generated_molecules)
        # 计算距离
        distance = calculate_distance(generated_molecules, original_molecules)

        print('valid_ratio:', valid_ratio, 'uniqueness:', uniqueness, 'Quality:', sum / 100,
              'SA:', SA_score / len(unique_smiles_lst) if len(unique_smiles_lst) != 0 else 0,
              'QED:', QED_score / len(unique_smiles_lst) if len(unique_smiles_lst) != 0 else 0, 'diversity:', diversity,
              'distance:', distance)
        valid_ratio_sum += valid_ratio
        uniqueness_sum += uniqueness
        quality_sum += sum / 100
        if len(unique_smiles_lst) == 0:
            sa_sum += 0
            qed_sum += 0
        else:
            sa_sum += SA_score / len(unique_smiles_lst)
            qed_sum += QED_score / len(unique_smiles_lst)
        div_sum += diversity
        dist_sum += distance
    end_time = time.time()
    elapsed_time = end_time - start_time

    print(f"运行时间: {elapsed_time:.4f} 秒")
    print(f"valid_ratio_avg: {valid_ratio_sum / len(scaffold_lst)}, uniqueness_avg: {uniqueness_sum / len(scaffold_lst)}, "
          f"quality_avg: {quality_sum / len(scaffold_lst)}, sa_avg: {sa_sum / len(scaffold_lst)}, "
          f"qed_avg: {qed_sum / len(scaffold_lst)}, div_avg: {div_sum / len(scaffold_lst)}, dist_avg: {dist_sum / len(scaffold_lst)}")






if __name__ == '__main__':

    main_scaffold1()



100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [00:55<00:00,  1.80it/s]


valid_ratio: 0.98 uniqueness: 0.9591836734693877 Quality: 0.0 SA: 4.648746773776923 QED: 0.29940560423427065 diversity: 0.514774788171485 distance: 0.8602808224300293


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [00:41<00:00,  2.39it/s]


valid_ratio: 0.99 uniqueness: 0.8181818181818182 Quality: 0.0 SA: 5.2379626814704725 QED: 0.5509248262224115 diversity: 0.43249001510840585 distance: 0.8769670605079892


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [01:52<00:00,  1.13s/it]


valid_ratio: 0.96 uniqueness: 1.0 Quality: 0.0 SA: 4.767620231407032 QED: 0.21389832864980288 diversity: 0.7214512037822068 distance: 0.8880674676881031


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [01:18<00:00,  1.27it/s]


valid_ratio: 0.99 uniqueness: 1.0 Quality: 0.0 SA: 4.087600356829307 QED: 0.3195979216412912 diversity: 0.5898171599034514 distance: 0.8811540487817547


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [00:43<00:00,  2.31it/s]


valid_ratio: 0.98 uniqueness: 0.9285714285714286 Quality: 0.33 SA: 3.639270977129107 QED: 0.5236657096212078 diversity: 0.5041087956293959 distance: 0.8610817182846368


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [00:36<00:00,  2.76it/s]


valid_ratio: 0.97 uniqueness: 0.7319587628865979 Quality: 0.52 SA: 3.398029319579984 QED: 0.7278736244151535 diversity: 0.4594137986476902 distance: 0.877056829471974


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [03:01<00:00,  1.82s/it]


valid_ratio: 0.96 uniqueness: 0.9895833333333334 Quality: 0.12 SA: 3.434399139194173 QED: 0.3359712800467264 diversity: 0.7972910885720901 distance: 0.8999955380950962


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [01:25<00:00,  1.17it/s]


valid_ratio: 1.0 uniqueness: 1.0 Quality: 0.0 SA: 5.04158227720461 QED: 0.3531797712074467 diversity: 0.6309185915343847 distance: 0.883350253599778


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [00:45<00:00,  2.19it/s]


valid_ratio: 0.98 uniqueness: 0.8469387755102041 Quality: 0.29 SA: 2.9472681524333195 QED: 0.5426778383009112 diversity: 0.5300506057935626 distance: 0.8889104311701685


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [01:02<00:00,  1.60it/s]


valid_ratio: 1.0 uniqueness: 1.0 Quality: 0.14 SA: 2.861856037053513 QED: 0.4156675579635617 diversity: 0.6408124649395137 distance: 0.8784294952156271
运行时间: 747.0612 秒
valid_ratio_avg: 0.9810000000000001, uniqueness_avg: 0.9274417791952769, quality_avg: 0.13999999999999999, sa_avg: 4.006433594607843, qed_avg: 0.42828624623027833, div_avg: 0.5821128512082188, dist_avg: 0.8795293665245157


In [None]:
import torch
from utils.train_utils import seed_all
import argparse
from tokenizer import SmilesTokenizer
from model import GPTConfig, GPT
import time
from fragment_utils import reconstruct, reconstruct_d, reconstruct_scaffold1
from torch.nn import functional as F
from rdkit import Chem, DataStructs
from rdkit.Chem import AllChem
from tdc import Oracle
import math
import multiprocessing
import random
from tqdm import tqdm


def Test1(model, smiles, tokenizer, max_seq_len, temperature, top_k, stream, rp, num_samples, kv_cache, is_simulation,
         device, scaffold=False, linker=False, dummy_lst=None):
    complete_answer_list = []
    valid_answer_list = []
    model.eval()
    # place data on the correct device
    src_smiles = tokenizer.bos_token + smiles
    x = torch.tensor(tokenizer.encode(src_smiles, add_special_tokens=False), dtype=torch.long).unsqueeze(0)
    x = x.to(device)
    with torch.no_grad():
        res_y = model.generate(x, tokenizer, max_new_tokens=max_seq_len,
                               temperature=temperature, top_k=top_k, stream=stream, rp=rp, kv_cache=kv_cache,
                               is_simulation=is_simulation)
        try:
            y = next(res_y)
        except StopIteration:
            print("No answer")

        history_idx = 0
        complete_answer = f"{tokenizer.decode(x[0])}"  # 用于保存整个生成的句子

        while y != None:
            answer = tokenizer.decode(y[0].tolist())
            if answer and answer[-1] == '�':
                try:
                    y = next(res_y)
                except:
                    break
                continue
            if not len(answer):
                try:
                    y = next(res_y)
                except:
                    break
                continue

            # 保存生成的片段到完整回答中
            complete_answer += answer[history_idx:]

            try:
                y = next(res_y)
            except:
                break
            history_idx = len(answer)
            if not stream:
                break

        complete_answer = complete_answer.replace(" ", "").replace("[BOS]", "").replace("[EOS]", "")
        frag_list = complete_answer.replace(" ", "").split('[SEP]')
        try:
            if linker:
                last_frag = frag_list[0].split('.')[1]
                first_frag = frag_list[0].split('.')[0]
                frag_list[0] = first_frag
                frag_list[len(frag_list) - 1] = last_frag
            frag_mol = [Chem.MolFromSmiles(s) for s in frag_list]
            # frag_mol[0] = change_H2star(frag_list[0], dummy_lst)
            mol = reconstruct_scaffold1(frag_mol, scaffold=scaffold)[0]
            if type(mol) == list:
                mol = mol[0]
            if mol:
                generate_smiles = Chem.MolToSmiles(mol)
                valid_answer_list.append(generate_smiles)
                answer = frag_list
            else:
                answer = frag_list
        except:
            answer = frag_list
        complete_answer_list.append(answer)

    return complete_answer_list, valid_answer_list

def cal_QED(smiles):
    oracle = Oracle(name = 'QED')
    return oracle(smiles)

def cal_SA(smiles):
    oracle = Oracle(name = 'SA')
    return oracle(smiles)

def cal_all(smiles):
    results = {}
    results['QED'] = cal_QED(smiles)
    results['SA'] = cal_SA(smiles)
    return results

def calculate_tanimoto_distance(fingerprint1, fingerprint2):
    """
    计算两个指纹之间的 Tanimoto 距离。
    """
    return 1 - DataStructs.TanimotoSimilarity(fingerprint1, fingerprint2)

def calculate_morgan_fingerprint(mol, radius=2, nBits=2048):
    """
    计算分子的 Morgan 指纹。
    Args:
        mol: RDKit 分子对象。
        radius: Morgan 指纹的半径。
        nBits: 指纹的位数。
    Returns:
        分子指纹，或者如果分子无效则返回 None。
    """
    try:
        fp = AllChem.GetMorganFingerprintAsBitVect(mol, radius, nBits=nBits)
        return fp
    except:
        return None

def calculate_diversity(molecules, radius=2, nBits=2048):
    """
    计算生成分子的多样性（平均成对 Tanimoto 距离）。
    Args:
        molecules: RDKit 分子对象的列表。
        radius: Morgan 指纹的半径。
        nBits: 指纹的位数。
    Returns:
        多样性值。
    """
    fingerprints = []
    valid_molecules = []
    for mol in molecules:
        fp = calculate_morgan_fingerprint(mol, radius, nBits)
        if fp is not None:
            fingerprints.append(fp)
            valid_molecules.append(mol)
    if not fingerprints:
        return 0.0  # 如果没有有效分子，返回 0.0
    n = len(fingerprints)
    total_distance = 0.0
    count = 0
    for i in range(n):
        for j in range(i + 1, n):
            distance = calculate_tanimoto_distance(fingerprints[i], fingerprints[j])
            total_distance += distance
            count += 1
    if count == 0:
        return 0.0
    return total_distance / count

def calculate_distance(generated_molecules, original_molecules, radius=2, nBits=2048):
    """
    计算生成分子与原始分子之间的平均 Tanimoto 距离。
    Args:
        generated_molecules: 生成的 RDKit 分子对象的列表。
        original_molecules: 原始 RDKit 分子对象的列表。
        radius: Morgan 指纹的半径。
        nBits: 指纹的位数。
    Returns:
        平均距离值。
    """
    generated_fingerprints = []
    original_fingerprints = []
    # 计算生成分子的指纹
    for mol in generated_molecules:
        fp = calculate_morgan_fingerprint(mol, radius, nBits)
        if fp is not None:
            generated_fingerprints.append(fp)
    # 计算原始分子的指纹
    for mol in original_molecules:
        fp = calculate_morgan_fingerprint(mol, radius, nBits)
        if fp is not None:
            original_fingerprints.append(fp)
    if not generated_fingerprints or not original_fingerprints:
        return 0.0
    total_distance = 0.0
    count = 0
    for gen_fp in generated_fingerprints:
        for orig_fp in original_fingerprints:
            distance = calculate_tanimoto_distance(gen_fp, orig_fp)
            total_distance += distance
            count += 1
    if count == 0:
        return 0.0
    return total_distance / count

def main_scaffold1():
    scaffold_lst = ['*[C@H](CCc1ccccc1)N[C@@H](*)C(=O)N1CC2(C[C@H]1*)SCCS2[SEP]', '*c1cc2c(cc1*)NC(C1CC3C=CC1C3)NS2(=O)=O[SEP]',
                    '*c1nc2cc(*)c(*)cc2n1[C@H]1O[C@@H](*)[C@H](*)[C@@H]1*[SEP]', '*N1CC[C@H](n2nc(C#Cc3cc(*)cc(*)c3)c3c(*)ncnc32)C1[SEP]',
                    '*N1CC(*)(n2cc(-c3ncnc4[nH]ccc34)cn2)C1[SEP]', '*[C@H](CN1CCCC1)[C@H](*)c1ccc2c(c1)OCCO2[SEP]',
                    '*c1cc(*)c(Oc2ccc(*)c(*)c2)c(*)c1[SEP]', '*[C@H]1C[C@@H](*)C=C2C=C[C@H](*)[C@H](CC[C@@H]3C[C@@H](*)CC(=O)O3)[C@H]21[SEP]',
                    '*c1nnc(*)n1-c1ccc(C2CC2)c2ccccc12[SEP]', '*c1cccc(Nc2ncnc3cc(*)c(*)cc23)c1[SEP]']
    original_smiles = ['CCOC(=O)[C@H](CCc1ccccc1)N[C@@H](C)C(=O)N1CC2(C[C@H]1C(=O)O)SCCS2',
                       'NS(=O)(=O)c1cc2c(cc1Cl)NC(C1CC3C=CC1C3)NS2(=O)=O',
                       'CC(C)Nc1nc2cc(Cl)c(Cl)cc2n1[C@H]1O[C@@H](CO)[C@H](O)[C@@H]1O',
                       'C=CC(=O)N1CC[C@H](n2nc(C#Cc3cc(OC)cc(OC)c3)c3c(N)ncnc32)C1',
                       'CCS(=O)(=O)N1CC(CC#N)(n2cc(-c3ncnc4[nH]ccc34)cn2)C1',
                       'CCCCCCCC(=O)N[C@H](CN1CCCC1)[C@H](O)c1ccc2c(c1)OCCO2',
                       'N[C@@H](Cc1cc(I)c(Oc2ccc(O)c(I)c2)c(I)c1)C(=O)O',
                       'CC[C@H](C)C(=O)O[C@H]1C[C@@H](C)C=C2C=C[C@H](C)[C@H](CC[C@@H]3C[C@@H](O)CC(=O)O3)[C@H]21',
                       'O=C(O)CSc1nnc(Br)n1-c1ccc(C2CC2)c2ccccc12', 'C#Cc1cccc(Nc2ncnc3cc(OCCOC)c(OCCOC)cc23)c1']

    # 设置随机种子的值
    seed_value = 43
    seed_all(seed_value)
    # device = torch.device(f'cuda:{0}')  # 逻辑编号 cuda:0 对应 os.environ["CUDA_VISIBLE_DEVICES"]中的第一个gpu
    device = 'cuda:1'
    batch_size = 1

    test_names = "test"

    tokenizer = SmilesTokenizer('./vocabs/vocab.txt')
    tokenizer.bos_token = "[BOS]"
    tokenizer.bos_token_id = tokenizer.convert_tokens_to_ids("[BOS]")
    tokenizer.eos_token = "[EOS]"
    tokenizer.eos_token_id = tokenizer.convert_tokens_to_ids("[EOS]")

    mconf = GPTConfig(vocab_size=tokenizer.vocab_size, n_layer=12, n_head=12, n_embd=768)
    model = GPT(mconf).to(device)
    checkpoint = torch.load(f'./weights/fragpt.pt', weights_only=True)
    model.load_state_dict(checkpoint)

    start_time = time.time()
    valid_ratio_sum = 0
    uniqueness_sum = 0
    quality_sum = 0
    sa_sum = 0
    qed_sum = 0
    div_sum = 0
    dist_sum = 0
    for i in scaffold_lst:
        complete_answer_list, valid_answer_list = [], []
        for j in tqdm(range(100)):
            num_stars = i.count('*')
            l1 = []
            l2 = []
            scaf = i
            for k in range(num_stars):
                l1, l2 = Test1(model, scaf, tokenizer, max_seq_len=512, temperature=1.2, top_k=8, stream=False, rp=1.,
                              num_samples=1, kv_cache=True, is_simulation=True, device=device, scaffold=True, dummy_lst=None)
                if (len(l2) != 0):
                    scaf = l2[0] + '[SEP]'
                else:
                    l2 = []
                    break

            if (len(l2) != 0):
                valid_answer_list.append(l2[0])
            if (len(l1) != 0):
                complete_answer_list.append(l1[0])
        unique_smiles = set(smile for smile in valid_answer_list if smile is not None)
        unique_smiles_lst = list(unique_smiles)
        num_unique_molecules = len(unique_smiles)
        if len(valid_answer_list) == 0:
            uniqueness = 0
        else:
            uniqueness = num_unique_molecules / len(valid_answer_list)
        valid_ratio = len(valid_answer_list) / 100
        results = cal_all(unique_smiles_lst)
        SA_score = 0
        QED_score = 0
        sum = 0
        for i in range(len(unique_smiles_lst)):
            SA_score += results['SA'][i]
            QED_score += results['QED'][i]
            if (results['QED'][i] >= 0.6 and results['SA'][i] <= 4):
                sum += 1

        generated_molecules = [Chem.MolFromSmiles(s) for s in valid_answer_list]
        original_molecules = [Chem.MolFromSmiles(s) for s in original_smiles]
        # 计算多样性
        diversity = calculate_diversity(generated_molecules)
        # 计算距离
        distance = calculate_distance(generated_molecules, original_molecules)

        print('valid_ratio:', valid_ratio, 'uniqueness:', uniqueness, 'Quality:', sum / 100,
              'SA:', SA_score / len(unique_smiles_lst) if len(unique_smiles_lst) != 0 else 0,
              'QED:', QED_score / len(unique_smiles_lst) if len(unique_smiles_lst) != 0 else 0, 'diversity:', diversity,
              'distance:', distance)
        valid_ratio_sum += valid_ratio
        uniqueness_sum += uniqueness
        quality_sum += sum / 100
        if len(unique_smiles_lst) == 0:
            sa_sum += 0
            qed_sum += 0
        else:
            sa_sum += SA_score / len(unique_smiles_lst)
            qed_sum += QED_score / len(unique_smiles_lst)
        div_sum += diversity
        dist_sum += distance
    end_time = time.time()
    elapsed_time = end_time - start_time

    print(f"运行时间: {elapsed_time:.4f} 秒")
    print(f"valid_ratio_avg: {valid_ratio_sum / len(scaffold_lst)}, uniqueness_avg: {uniqueness_sum / len(scaffold_lst)}, "
          f"quality_avg: {quality_sum / len(scaffold_lst)}, sa_avg: {sa_sum / len(scaffold_lst)}, "
          f"qed_avg: {qed_sum / len(scaffold_lst)}, div_avg: {div_sum / len(scaffold_lst)}, dist_avg: {dist_sum / len(scaffold_lst)}")






if __name__ == '__main__':

    main_scaffold1()



100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [00:57<00:00,  1.73it/s]


valid_ratio: 0.99 uniqueness: 0.9191919191919192 Quality: 0.0 SA: 4.697212258575822 QED: 0.30147140309002535 diversity: 0.5027161298723802 distance: 0.861385941711051


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [00:41<00:00,  2.40it/s]


valid_ratio: 0.99 uniqueness: 0.8181818181818182 Quality: 0.0 SA: 5.276289055956925 QED: 0.49709941948210257 diversity: 0.4584105458611867 distance: 0.8755641327813003


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [01:45<00:00,  1.06s/it]


valid_ratio: 0.96 uniqueness: 1.0 Quality: 0.0 SA: 4.68127316094573 QED: 0.22205383593845462 diversity: 0.7137937328164814 distance: 0.8881307980943028


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [01:24<00:00,  1.18it/s]


valid_ratio: 0.99 uniqueness: 0.98989898989899 Quality: 0.0 SA: 4.100626608168272 QED: 0.3066893636285204 diversity: 0.5981227553823617 distance: 0.8814898183463524


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [00:45<00:00,  2.22it/s]


valid_ratio: 0.98 uniqueness: 0.9387755102040817 Quality: 0.38 SA: 3.512168295250528 QED: 0.5701544171921638 diversity: 0.5028691971315462 distance: 0.8615018650938778


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [00:39<00:00,  2.50it/s]


valid_ratio: 1.0 uniqueness: 0.66 Quality: 0.43 SA: 3.4269124835747427 QED: 0.703307622975149 diversity: 0.4544083566326828 distance: 0.8778992326206647


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [02:45<00:00,  1.66s/it]


valid_ratio: 0.96 uniqueness: 1.0 Quality: 0.08 SA: 3.5124921377734624 QED: 0.30838078361518945 diversity: 0.7996214648139455 distance: 0.8993325423006705


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [01:20<00:00,  1.24it/s]


valid_ratio: 0.98 uniqueness: 1.0 Quality: 0.0 SA: 5.043124845188668 QED: 0.36141096257281163 diversity: 0.6247990655175163 distance: 0.8826296102352351


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [00:44<00:00,  2.25it/s]


valid_ratio: 0.99 uniqueness: 0.8282828282828283 Quality: 0.3 SA: 2.8616321139229757 QED: 0.5385594254146534 diversity: 0.5360562266119304 distance: 0.88835710681864


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [01:08<00:00,  1.46it/s]


valid_ratio: 0.99 uniqueness: 0.98989898989899 Quality: 0.12 SA: 2.8862842142744167 QED: 0.434221124523414 diversity: 0.6323198571269547 distance: 0.8776725835055529
运行时间: 737.8209 秒
valid_ratio_avg: 0.983, uniqueness_avg: 0.9144230055658626, quality_avg: 0.131, sa_avg: 3.9998015173631543, qed_avg: 0.4243348358432484, div_avg: 0.5823117331766986, dist_avg: 0.879396363150765


In [None]:
import torch
from utils.train_utils import seed_all
import argparse
from tokenizer import SmilesTokenizer
from model import GPTConfig, GPT
import time
from fragment_utils import reconstruct, reconstruct_d, reconstruct_scaffold1
from torch.nn import functional as F
from rdkit import Chem, DataStructs
from rdkit.Chem import AllChem
from tdc import Oracle
import math
import multiprocessing
import random
from tqdm import tqdm


def Test1(model, smiles, tokenizer, max_seq_len, temperature, top_k, stream, rp, num_samples, kv_cache, is_simulation,
         device, scaffold=False, linker=False, dummy_lst=None):
    complete_answer_list = []
    valid_answer_list = []
    model.eval()
    # place data on the correct device
    src_smiles = tokenizer.bos_token + smiles
    x = torch.tensor(tokenizer.encode(src_smiles, add_special_tokens=False), dtype=torch.long).unsqueeze(0)
    x = x.to(device)
    with torch.no_grad():
        res_y = model.generate(x, tokenizer, max_new_tokens=max_seq_len,
                               temperature=temperature, top_k=top_k, stream=stream, rp=rp, kv_cache=kv_cache,
                               is_simulation=is_simulation)
        try:
            y = next(res_y)
        except StopIteration:
            print("No answer")

        history_idx = 0
        complete_answer = f"{tokenizer.decode(x[0])}"  # 用于保存整个生成的句子

        while y != None:
            answer = tokenizer.decode(y[0].tolist())
            if answer and answer[-1] == '�':
                try:
                    y = next(res_y)
                except:
                    break
                continue
            if not len(answer):
                try:
                    y = next(res_y)
                except:
                    break
                continue

            # 保存生成的片段到完整回答中
            complete_answer += answer[history_idx:]

            try:
                y = next(res_y)
            except:
                break
            history_idx = len(answer)
            if not stream:
                break

        complete_answer = complete_answer.replace(" ", "").replace("[BOS]", "").replace("[EOS]", "")
        frag_list = complete_answer.replace(" ", "").split('[SEP]')
        try:
            if linker:
                last_frag = frag_list[0].split('.')[1]
                first_frag = frag_list[0].split('.')[0]
                frag_list[0] = first_frag
                frag_list[len(frag_list) - 1] = last_frag
            frag_mol = [Chem.MolFromSmiles(s) for s in frag_list]
            # frag_mol[0] = change_H2star(frag_list[0], dummy_lst)
            mol = reconstruct_scaffold1(frag_mol, scaffold=scaffold)[0]
            if type(mol) == list:
                mol = mol[0]
            if mol:
                generate_smiles = Chem.MolToSmiles(mol)
                valid_answer_list.append(generate_smiles)
                answer = frag_list
            else:
                answer = frag_list
        except:
            answer = frag_list
        complete_answer_list.append(answer)

    return complete_answer_list, valid_answer_list

def cal_QED(smiles):
    oracle = Oracle(name = 'QED')
    return oracle(smiles)

def cal_SA(smiles):
    oracle = Oracle(name = 'SA')
    return oracle(smiles)

def cal_all(smiles):
    results = {}
    results['QED'] = cal_QED(smiles)
    results['SA'] = cal_SA(smiles)
    return results

def calculate_tanimoto_distance(fingerprint1, fingerprint2):
    """
    计算两个指纹之间的 Tanimoto 距离。
    """
    return 1 - DataStructs.TanimotoSimilarity(fingerprint1, fingerprint2)

def calculate_morgan_fingerprint(mol, radius=2, nBits=2048):
    """
    计算分子的 Morgan 指纹。
    Args:
        mol: RDKit 分子对象。
        radius: Morgan 指纹的半径。
        nBits: 指纹的位数。
    Returns:
        分子指纹，或者如果分子无效则返回 None。
    """
    try:
        fp = AllChem.GetMorganFingerprintAsBitVect(mol, radius, nBits=nBits)
        return fp
    except:
        return None

def calculate_diversity(molecules, radius=2, nBits=2048):
    """
    计算生成分子的多样性（平均成对 Tanimoto 距离）。
    Args:
        molecules: RDKit 分子对象的列表。
        radius: Morgan 指纹的半径。
        nBits: 指纹的位数。
    Returns:
        多样性值。
    """
    fingerprints = []
    valid_molecules = []
    for mol in molecules:
        fp = calculate_morgan_fingerprint(mol, radius, nBits)
        if fp is not None:
            fingerprints.append(fp)
            valid_molecules.append(mol)
    if not fingerprints:
        return 0.0  # 如果没有有效分子，返回 0.0
    n = len(fingerprints)
    total_distance = 0.0
    count = 0
    for i in range(n):
        for j in range(i + 1, n):
            distance = calculate_tanimoto_distance(fingerprints[i], fingerprints[j])
            total_distance += distance
            count += 1
    if count == 0:
        return 0.0
    return total_distance / count

def calculate_distance(generated_molecules, original_molecules, radius=2, nBits=2048):
    """
    计算生成分子与原始分子之间的平均 Tanimoto 距离。
    Args:
        generated_molecules: 生成的 RDKit 分子对象的列表。
        original_molecules: 原始 RDKit 分子对象的列表。
        radius: Morgan 指纹的半径。
        nBits: 指纹的位数。
    Returns:
        平均距离值。
    """
    generated_fingerprints = []
    original_fingerprints = []
    # 计算生成分子的指纹
    for mol in generated_molecules:
        fp = calculate_morgan_fingerprint(mol, radius, nBits)
        if fp is not None:
            generated_fingerprints.append(fp)
    # 计算原始分子的指纹
    for mol in original_molecules:
        fp = calculate_morgan_fingerprint(mol, radius, nBits)
        if fp is not None:
            original_fingerprints.append(fp)
    if not generated_fingerprints or not original_fingerprints:
        return 0.0
    total_distance = 0.0
    count = 0
    for gen_fp in generated_fingerprints:
        for orig_fp in original_fingerprints:
            distance = calculate_tanimoto_distance(gen_fp, orig_fp)
            total_distance += distance
            count += 1
    if count == 0:
        return 0.0
    return total_distance / count

def main_scaffold1():
    scaffold_lst = ['*[C@H](CCc1ccccc1)N[C@@H](*)C(=O)N1CC2(C[C@H]1*)SCCS2[SEP]', '*c1cc2c(cc1*)NC(C1CC3C=CC1C3)NS2(=O)=O[SEP]',
                    '*c1nc2cc(*)c(*)cc2n1[C@H]1O[C@@H](*)[C@H](*)[C@@H]1*[SEP]', '*N1CC[C@H](n2nc(C#Cc3cc(*)cc(*)c3)c3c(*)ncnc32)C1[SEP]',
                    '*N1CC(*)(n2cc(-c3ncnc4[nH]ccc34)cn2)C1[SEP]', '*[C@H](CN1CCCC1)[C@H](*)c1ccc2c(c1)OCCO2[SEP]',
                    '*c1cc(*)c(Oc2ccc(*)c(*)c2)c(*)c1[SEP]', '*[C@H]1C[C@@H](*)C=C2C=C[C@H](*)[C@H](CC[C@@H]3C[C@@H](*)CC(=O)O3)[C@H]21[SEP]',
                    '*c1nnc(*)n1-c1ccc(C2CC2)c2ccccc12[SEP]', '*c1cccc(Nc2ncnc3cc(*)c(*)cc23)c1[SEP]']
    original_smiles = ['CCOC(=O)[C@H](CCc1ccccc1)N[C@@H](C)C(=O)N1CC2(C[C@H]1C(=O)O)SCCS2',
                       'NS(=O)(=O)c1cc2c(cc1Cl)NC(C1CC3C=CC1C3)NS2(=O)=O',
                       'CC(C)Nc1nc2cc(Cl)c(Cl)cc2n1[C@H]1O[C@@H](CO)[C@H](O)[C@@H]1O',
                       'C=CC(=O)N1CC[C@H](n2nc(C#Cc3cc(OC)cc(OC)c3)c3c(N)ncnc32)C1',
                       'CCS(=O)(=O)N1CC(CC#N)(n2cc(-c3ncnc4[nH]ccc34)cn2)C1',
                       'CCCCCCCC(=O)N[C@H](CN1CCCC1)[C@H](O)c1ccc2c(c1)OCCO2',
                       'N[C@@H](Cc1cc(I)c(Oc2ccc(O)c(I)c2)c(I)c1)C(=O)O',
                       'CC[C@H](C)C(=O)O[C@H]1C[C@@H](C)C=C2C=C[C@H](C)[C@H](CC[C@@H]3C[C@@H](O)CC(=O)O3)[C@H]21',
                       'O=C(O)CSc1nnc(Br)n1-c1ccc(C2CC2)c2ccccc12', 'C#Cc1cccc(Nc2ncnc3cc(OCCOC)c(OCCOC)cc23)c1']

    # 设置随机种子的值
    seed_value = 44
    seed_all(seed_value)
    # device = torch.device(f'cuda:{0}')  # 逻辑编号 cuda:0 对应 os.environ["CUDA_VISIBLE_DEVICES"]中的第一个gpu
    device = 'cuda:1'
    batch_size = 1

    test_names = "test"

    tokenizer = SmilesTokenizer('./vocabs/vocab.txt')
    tokenizer.bos_token = "[BOS]"
    tokenizer.bos_token_id = tokenizer.convert_tokens_to_ids("[BOS]")
    tokenizer.eos_token = "[EOS]"
    tokenizer.eos_token_id = tokenizer.convert_tokens_to_ids("[EOS]")

    mconf = GPTConfig(vocab_size=tokenizer.vocab_size, n_layer=12, n_head=12, n_embd=768)
    model = GPT(mconf).to(device)
    checkpoint = torch.load(f'./weights/fragpt.pt', weights_only=True)
    model.load_state_dict(checkpoint)

    start_time = time.time()
    valid_ratio_sum = 0
    uniqueness_sum = 0
    quality_sum = 0
    sa_sum = 0
    qed_sum = 0
    div_sum = 0
    dist_sum = 0
    for i in scaffold_lst:
        complete_answer_list, valid_answer_list = [], []
        for j in tqdm(range(100)):
            num_stars = i.count('*')
            l1 = []
            l2 = []
            scaf = i
            for k in range(num_stars):
                l1, l2 = Test1(model, scaf, tokenizer, max_seq_len=512, temperature=1.2, top_k=8, stream=False, rp=1.,
                              num_samples=1, kv_cache=True, is_simulation=True, device=device, scaffold=True, dummy_lst=None)
                if (len(l2) != 0):
                    scaf = l2[0] + '[SEP]'
                else:
                    l2 = []
                    break

            if (len(l2) != 0):
                valid_answer_list.append(l2[0])
            if (len(l1) != 0):
                complete_answer_list.append(l1[0])
        unique_smiles = set(smile for smile in valid_answer_list if smile is not None)
        unique_smiles_lst = list(unique_smiles)
        num_unique_molecules = len(unique_smiles)
        if len(valid_answer_list) == 0:
            uniqueness = 0
        else:
            uniqueness = num_unique_molecules / len(valid_answer_list)
        valid_ratio = len(valid_answer_list) / 100
        results = cal_all(unique_smiles_lst)
        SA_score = 0
        QED_score = 0
        sum = 0
        for i in range(len(unique_smiles_lst)):
            SA_score += results['SA'][i]
            QED_score += results['QED'][i]
            if (results['QED'][i] >= 0.6 and results['SA'][i] <= 4):
                sum += 1

        generated_molecules = [Chem.MolFromSmiles(s) for s in valid_answer_list]
        original_molecules = [Chem.MolFromSmiles(s) for s in original_smiles]
        # 计算多样性
        diversity = calculate_diversity(generated_molecules)
        # 计算距离
        distance = calculate_distance(generated_molecules, original_molecules)

        print('valid_ratio:', valid_ratio, 'uniqueness:', uniqueness, 'Quality:', sum / 100,
              'SA:', SA_score / len(unique_smiles_lst) if len(unique_smiles_lst) != 0 else 0,
              'QED:', QED_score / len(unique_smiles_lst) if len(unique_smiles_lst) != 0 else 0, 'diversity:', diversity,
              'distance:', distance)
        valid_ratio_sum += valid_ratio
        uniqueness_sum += uniqueness
        quality_sum += sum / 100
        if len(unique_smiles_lst) == 0:
            sa_sum += 0
            qed_sum += 0
        else:
            sa_sum += SA_score / len(unique_smiles_lst)
            qed_sum += QED_score / len(unique_smiles_lst)
        div_sum += diversity
        dist_sum += distance
    end_time = time.time()
    elapsed_time = end_time - start_time

    print(f"运行时间: {elapsed_time:.4f} 秒")
    print(f"valid_ratio_avg: {valid_ratio_sum / len(scaffold_lst)}, uniqueness_avg: {uniqueness_sum / len(scaffold_lst)}, "
          f"quality_avg: {quality_sum / len(scaffold_lst)}, sa_avg: {sa_sum / len(scaffold_lst)}, "
          f"qed_avg: {qed_sum / len(scaffold_lst)}, div_avg: {div_sum / len(scaffold_lst)}, dist_avg: {dist_sum / len(scaffold_lst)}")






if __name__ == '__main__':

    main_scaffold1()



100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [00:54<00:00,  1.85it/s]


valid_ratio: 0.99 uniqueness: 0.9292929292929293 Quality: 0.0 SA: 4.62511107780839 QED: 0.28365229612443277 diversity: 0.5030579755639942 distance: 0.860279389033996


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [00:40<00:00,  2.45it/s]


valid_ratio: 1.0 uniqueness: 0.71 Quality: 0.0 SA: 5.249130694091 QED: 0.5183964208995867 diversity: 0.4191896967450992 distance: 0.874526174823528


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [01:53<00:00,  1.14s/it]


valid_ratio: 0.94 uniqueness: 1.0 Quality: 0.0 SA: 4.7014704158581155 QED: 0.250556277450295 diversity: 0.7111368123683309 distance: 0.8869435833624065


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [01:17<00:00,  1.30it/s]


valid_ratio: 0.99 uniqueness: 1.0 Quality: 0.0 SA: 4.1032781024516405 QED: 0.3186953505670136 diversity: 0.5908785555298866 distance: 0.8788152637198187


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [00:40<00:00,  2.48it/s]


valid_ratio: 0.99 uniqueness: 0.9090909090909091 Quality: 0.38 SA: 3.54228642711326 QED: 0.5782284351774114 diversity: 0.48354154651367887 distance: 0.8592795848663494


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [00:43<00:00,  2.28it/s]


valid_ratio: 1.0 uniqueness: 0.65 Quality: 0.42 SA: 3.558973110928189 QED: 0.6973995975483059 diversity: 0.4697017357247409 distance: 0.8776460473169019


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [02:59<00:00,  1.80s/it]


valid_ratio: 0.94 uniqueness: 1.0 Quality: 0.1 SA: 3.290568007773773 QED: 0.3519062625371475 diversity: 0.7822124141699802 distance: 0.8997856211672299


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [01:29<00:00,  1.12it/s]


valid_ratio: 0.98 uniqueness: 0.9897959183673469 Quality: 0.0 SA: 5.072694033264056 QED: 0.35038868617602437 diversity: 0.6283324285301455 distance: 0.8837861291561301


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [00:49<00:00,  2.01it/s]


valid_ratio: 0.99 uniqueness: 0.8282828282828283 Quality: 0.21 SA: 3.1003452567778504 QED: 0.46866353035092967 diversity: 0.5603871323243632 distance: 0.8894257579077501


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [01:04<00:00,  1.56it/s]


valid_ratio: 0.99 uniqueness: 0.9595959595959596 Quality: 0.05 SA: 2.973643491900483 QED: 0.38974669489326585 diversity: 0.6413424937428334 distance: 0.8810009024699741
运行时间: 756.5582 秒
valid_ratio_avg: 0.9810000000000001, uniqueness_avg: 0.8976058544629973, quality_avg: 0.11600000000000002, sa_avg: 4.021750061796675, qed_avg: 0.42076335517244134, div_avg: 0.5789780791213053, dist_avg: 0.8791488453824086
