# Save SDF for Compounds in the benchmark datasets

In [None]:
import pandas as pd
from tqdm import tqdm
from rdkit import Chem
from rdkit.Chem import AllChem
import os

activity_root = '/home/yueming/Drug_Discovery/Baselines/GIGN-main/GIGN/data/'
task_dict = {0: ('CHEMBL202', 'pIC50'), 1: ('CHEMBL235', 'pEC50'), 2: ('CHEMBL279', 'pIC50'), 3: ('CHEMBL2971', 'pIC50'), 
             4: ('CHEMBL333', 'pIC50'), 5: ('CHEMBL3820', 'pEC50'), 6: ('CHEMBL3976', 'pIC50'), 7: ('CHEMBL4422', 'pEC50')}
set_list = ['test', 'train_1', 'train_2', 'train_3', 'train_4', 'train_5']

def save_sdf(activity_path, save_path):
    # 读取CSV文件
    df = pd.read_csv(activity_path)

    # 获取分子SMILES列的数据
    smiles_list = df['SMILES'].tolist()
    id_list = df['ChEMBL_Compound_ID'].tolist()

    # 计算并保存SDF文件
    for index, smiles in zip(id_list, smiles_list):
        output_file = save_path + f'{index}.sdf'
        if os.path.exists(output_file):
            continue
        try:
            mol = Chem.MolFromSmiles(smiles)
            hmol = Chem.AddHs(mol)
            AllChem.EmbedMolecule(hmol,AllChem.ETKDG())
            print(AllChem.UFFOptimizeMolecule(hmol,1000))
            if not os.path.exists(save_path):
                os.makedirs(save_path)
            writer = Chem.SDWriter(output_file)
            hmol.SetProp("_SMILES","%s"%smiles)
            writer.write(hmol)
            writer.close()
    #         print(f'Successfully save SDF for the compound: {index}')
        except:
            print(f'Fail to save SDF for the compound: {index} in the {set_name} set')

for key, value in task_dict.items():
    target, assay = value
    for set_name in tqdm(set_list):
        activity_path =f'{activity_root}/{target}/{assay}/{target}_{assay}_{set_name}.csv'
        save_path = f'{activity_root}/{target}/ligand/sdf/{set_name}/'
        save_sdf(activity_path, save_path)

# Prepare ligands in pdbqt format for molecular docking

In [None]:
import os
from tqdm import tqdm
from rdkit import Chem

data_root = '/home/yueming/Drug_Discovery/Baselines/GIGN-main/GIGN/data/'
task_dict = {0: ('CHEMBL202', 'pIC50'), 1: ('CHEMBL235', 'pEC50'), 2: ('CHEMBL279', 'pIC50'), 3: ('CHEMBL2971', 'pIC50'), 
             4: ('CHEMBL333', 'pIC50'), 5: ('CHEMBL3820', 'pEC50'), 6: ('CHEMBL3976', 'pIC50'), 7: ('CHEMBL4422', 'pEC50')}
set_list = ['test', 'train_1', 'train_2', 'train_3', 'train_4', 'train_5']

for key, value in task_dict.items():
    target, assay = value
    # List of input SDF files
    input_dir = f'{data_root}/{target}/ligand/sdf/'
    output_dir_addH = input_dir
    output_dir_pdbqt = f'{data_root}/{target}/ligand/pdbqt/'

    # Loop through input files
    for set_name in tqdm(set_list):
        input_files = os.listdir(input_dir + set_name)
        for input_file in tqdm(input_files):
            input_file_path = input_dir + set_name + '/' + input_file
            # Load the molecule from the SDF file
    #         mol = Chem.SDMolSupplier(input_file_path)[0]

    #         # Add explicit hydrogens
    #         mol = Chem.AddHs(mol)

    #         # Save the modified molecule with explicit hydrogens
    #         sdf_addH_save_path = output_dir_addH + set_name + '/'
    #         if not os.path.exists(sdf_addH_save_path):
    #                 os.makedirs(sdf_addH_save_path)
    #         Chem.SDWriter(sdf_addH_save_path + input_file).write(mol)

            # Construct the command
            pdbqt_save_path = output_dir_pdbqt + set_name + '/'
            if not os.path.exists(pdbqt_save_path):
                    os.makedirs(pdbqt_save_path)
            output_path = pdbqt_save_path + input_file[:-4] + '.pdbqt'
            if not os.path.exists(output_path):
                command = f"mk_prepare_ligand.py -i {input_file_path} -o {output_path}"
                # Execute the command
                !{command}

# Do AutoDock Vina on a batch of ligands and one protein

In [2]:
import subprocess
import pandas as pd
from tqdm import tqdm
import os

protein_name, pdb_name, pocket_num = 'CHEMBL3976', '4ebb', 2
# 定义输入输出文件路径列表
input_protein = f'/home/yueming/Drug_Discovery/Baselines/GIGN-main/GIGN/data/{protein_name}/protein/{pdb_name}.pdbqt'
input_dir_ligand = f'/home/yueming/Drug_Discovery/Baselines/GIGN-main/GIGN/data/{protein_name}/ligand/pdbqt/'
input_config_dir = f'/home/yueming/Drug_Discovery/Baselines/GIGN-main/GIGN/data/{protein_name}/protein/'
output_dir = f'/home/yueming/Drug_Discovery/Baselines/GIGN-main/GIGN/data/{protein_name}/{pdb_name}/vina/'
input_sets = ['test', 'train_1', 'train_2', 'train_3', 'train_4', 'train_5']
command_template = "vina --receptor {} --ligand {} --config {} --exhaustiveness 32 --out {}"
# 循环处理每个文件路径
for i in range(pocket_num):
    input_config = input_config_dir + f'pocket_{i+1}.txt'
    for set_name in tqdm(input_sets):
        if not os.path.exists(output_dir + set_name):
            os.makedirs(output_dir + set_name)
        input_ligands = os.listdir(input_dir_ligand + set_name)
        for input_ligand in tqdm(input_ligands):
            # 提取文件名（不包含扩展名）
            filename = input_ligand.split('.')[0]
            output_path = output_dir + set_name + '/' + filename + f'_Pocket_{i+1}.pdbqt'
            if os.path.exists(output_path + '.txt'):
                os.rename(output_path + '.txt', output_path.replace('pdbqt', 'txt'))
            if os.path.exists(output_path):
                continue
            # 构建命令
            command = command_template.format(input_protein, input_dir_ligand + set_name + '/' + input_ligand, input_config, 
                                              output_path)
            # 执行命令并捕获输出结果
            output = subprocess.run(command, shell=True, capture_output=True, text=True)

            # 将输出结果保存到文件
            with open(output_path.replace('pdbqt', 'txt'), 'w') as f:
                f.write(output.stdout)


  0%|                                                     | 0/6 [00:00<?, ?it/s]
  0%|                                                   | 0/276 [00:00<?, ?it/s][A
 19%|███████▋                                 | 52/276 [00:00<00:00, 460.05it/s][A
 36%|██████████████▋                          | 99/276 [00:00<00:00, 243.75it/s][A
 62%|████████████████████████▉               | 172/276 [00:00<00:00, 315.81it/s][A
100%|████████████████████████████████████████| 276/276 [00:00<00:00, 377.00it/s][A
 17%|███████▌                                     | 1/6 [00:00<00:03,  1.36it/s]
  0%|                                                   | 0/273 [00:00<?, ?it/s][A
 52%|████████████████████▉                   | 143/273 [00:00<00:00, 806.75it/s][A
100%|████████████████████████████████████████| 273/273 [00:00<00:00, 510.09it/s][A
 33%|███████████████                              | 2/6 [00:01<00:02,  1.61it/s]
  0%|                                                   | 0/273 [00:00<?, ?it/s][A
 

 67%|██████████████████████████▊             | 183/273 [00:01<00:00, 131.44it/s][A
 72%|████████████████████████████▊           | 197/273 [00:01<00:00, 114.54it/s][A
 81%|████████████████████████████████▌       | 222/273 [00:01<00:00, 122.37it/s][A
 86%|██████████████████████████████████▍     | 235/273 [00:02<00:00, 106.40it/s][A
 92%|████████████████████████████████████▉   | 252/273 [00:02<00:00, 102.75it/s][A
100%|████████████████████████████████████████| 273/273 [00:02<00:00, 111.62it/s][A
100%|█████████████████████████████████████████████| 6/6 [00:07<00:00,  1.31s/it]


# Save Vina scores into the datasets

In [3]:
import os, re
from tqdm import tqdm
from rdkit import Chem
import pandas as pd

task_dict = {'I1': ('CHEMBL202', 'pIC50', '1boz', 7, 1), 'E3': ('CHEMBL235', 'pEC50', '1zgy', 4, 2), 
             'I5': ('CHEMBL279', 'pIC50', '1ywn',3, 3), 'I4': ('CHEMBL2971', 'pIC50', '3ugc', 3, 3), 
             'I3': ('CHEMBL333', 'pIC50', '1ck7', 6, 2), 'E1': ('CHEMBL3820', 'pEC50', '3f9m', 6 ,1), 
             'I2': ('CHEMBL3976', 'pIC50', '4ebb', 2, 4), 'E2': ('CHEMBL4422', 'pEC50', '5tzr', 3, 3)}
protein_name, assay_type, pdb_name, pocket_num, pose_num = task_dict['I5']
# List of input SDF files
result_root = f'/home/yueming/Drug_Discovery/Baselines/GIGN-main/GIGN/data/{protein_name}'
result_dir = f'{result_root}/{pdb_name}/vina/'
set_list = ['test', 'train_1', 'train_2', 'train_3', 'train_4', 'train_5']
csv_dir = f'{result_root}/{assay_type}/{protein_name}_{assay_type}_'
output_path = f'{result_dir}/{protein_name}_{assay_type}_'
    
# 定义正则表达式模式
pattern = r'^\s+(\d)\s+(-?\d+\.\d+)'
pattern_ = r'^\s+(\d)\s+(-?\d+)'
# new_column_names = {'value': 'pIC50', 'smiles': 'SMILES', 'ChEMBL_Compound_ID': 'ChEMBL_Compound_ID'}

for dataset in tqdm(set_list):
    dataset_path = result_dir + dataset + '/'
    dataset_df = pd.read_csv(csv_dir + dataset + '.csv')
#     dataset_df = dataset_df.rename(columns=new_column_names)
    # Loop through input files
    for input_file in os.listdir(dataset_path):
        if input_file[-5:] == 'pdbqt':
            # 读取文本文件
            file_name = input_file[:-6]
            cpd = file_name.split('_')[0]
            pocket = file_name.split('_')[-1]
            txt_path = dataset_path + file_name + '.txt'
            with open(txt_path, 'r') as file:
                lines = file.readlines()

            # 提取符合模式的行，并保存到列表中
            for line in lines:
                match_ = re.match(pattern, line)
                match__ = re.match(pattern_, line)
                match = match_ if match_ else match__
                if match:
                    number = int(match.group(1))
                    affinity = float(match.group().rsplit(maxsplit=1)[-1])
                    # 设置条件
                    condition = (dataset_df['ChEMBL_Compound_ID'] == cpd)  # 示例条件，可根据实际情况修改
                    # 根据条件筛选满足条件的行索引
                    row_indices = dataset_df[condition].index
                    column_name = f'Pocket_{pocket}-{number}_Vina_Score'  # 列名，根据文件名索引生成
                    dataset_df.loc[row_indices, column_name] = affinity

    # 保存更新后的CSV表格
    dataset_df.to_csv(output_path + dataset + '.csv', index=False)


100%|███████████████████████████████████████████████████████████████████████████| 6/6 [07:21<00:00, 73.52s/it]


# Do QVina-W on a batch of ligands and pockets in one protein

In [None]:
import os
from tqdm import tqdm

pocket_num = 7
receptor_file = '/home/yueming/Drug_Discovery/Baselines/GIGN-main/GIGN/data/CHEMBL202/protein/pdb1boz.pdbqt'
ligand_pdbqt_dir = '/home/yueming/Drug_Discovery/Baselines/GIGN-main/GIGN/data/CHEMBL202/ligand/pdbqt/'
config_file_template = '/home/yueming/Drug_Discovery/Baselines/GIGN-main/GIGN/data/CHEMBL202/protein/qvina_pocket_{}.txt'
output_root = '/home/yueming/Drug_Discovery/Baselines/GIGN-main/GIGN/data/CHEMBL202/1boz/qvina_pocket/'
qvina_w_path = '/home/yueming/Drug_Discovery/Baselines/qvina'
input_sets = os.listdir(ligand_pdbqt_dir)
# 循环执行命令
for set_name in tqdm(input_sets):
    output_dir = output_root + set_name + '/'
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    input_files = os.listdir(ligand_pdbqt_dir + set_name)
    for input_file in tqdm(input_files):
        if not os.path.exists(f'{output_dir}/{input_file}'):
            input_file_path = ligand_pdbqt_dir + set_name + '/' + input_file
            # 更改配置文件中的 receptor 路径为当前的 input_file
            for i in range(pocket_num):
                pocket = i + 1
                config_file = config_file_template.format(pocket)
                with open(config_file, 'r') as f:
                    config_lines = f.readlines()
                config_lines[0] = f'receptor = {receptor_file}\n'
                config_lines[1] = f'ligand = {ligand_pdbqt_dir + set_name}/{input_file}\n'
                config_lines[2] = f'out  = {output_dir}/{input_file[:-6]}_Pocket_{pocket}.pdbqt\n'
                config_lines[3] = f'log  = {output_dir}/{input_file[:-6]}_Pocket_{pocket}.txt\n'
                with open(config_file, 'w') as f:
                    f.writelines(config_lines)

                # 执行命令
                command = f'{qvina_w_path}/qvina-w_serial --config {config_file}'
                os.system(command)


# Read and save docking results for QVina-W

In [None]:
import os, re
from tqdm import tqdm
from rdkit import Chem
import pandas as pd

# List of input SDF files
result_root = '/home/yueming/Drug_Discovery/Baselines/GIGN-main/GIGN/data/CHEMBL202/1boz/qvina_pocket/'
output_csv_dir = '/home/yueming/Drug_Discovery/Baselines/GIGN-main/GIGN/data/CHEMBL202/1boz/qvina_pocket/'
input_sets = os.listdir(result_root)

# 定义正则表达式模式，只取top-1结果
pattern = r'^\s+1\s+(-?\d+\.\d+)'
pattern_ = r'^\s+1\s+(-?\d+)'

def extract_smiles_from_pdbqt(file_path):
    smiles = None
    with open(file_path, 'r') as file:
        for line in file:
            if line.startswith('REMARK SMILES'):
                smiles = line.split('REMARK SMILES')[1].strip()
                break
    return smiles

for set_name in tqdm(input_sets):
    result_dir = result_root + set_name + '/'
    # 创建一个空的 DataFrame
    df = pd.DataFrame(columns=['ChEMBL_Compound_ID', 'SMILES', 'Vina_Score_1', 'Vina_Score_2', 'Vina_Score_3', 'Vina_Score_4', 
                               'Vina_Score_5', 'Vina_Score_6', 'Vina_Score_7'])
    for input_file in os.listdir(result_dir):
        if input_file[-5:] == 'pdbqt':
            file_name = input_file[:-6]
            cpd = file_name.split('_')[0]
            pocket = file_name.split('_')[-1]
            # 读取文本文件
            txt_path = result_dir + file_name + '.txt' # need to be coverted to "input_file.replace('pdbqt', 'txt')" after being corrected
            with open(txt_path, 'r') as txt:
                lines = txt.readlines()
            smiles = extract_smiles_from_pdbqt(result_dir + input_file)
            # 提取符合模式的行，并保存到列表中
            for line in lines:
                match = re.findall(pattern, line, flags=re.MULTILINE)
                if match:
                    affinity = float(match[0])
                    break
            if cpd in df['ChEMBL_Compound_ID'].values:
                condition = (df['ChEMBL_Compound_ID']==cpd)
                df.loc[condition, f'Vina_Score_{pocket}'] = affinity
            else:
                init_content_list = [cpd, smiles] + [None] * 7
                df.loc[len(df)] = init_content_list
                df.loc[len(df)-1, f'Vina_Score_{pocket}'] = affinity

    # 保存更新后的CSV表格
    df.to_csv(output_csv_dir + f'{set_name}/vina_results.csv', index=False)

# Do QVina-W on a batch of ligands and one protein

In [None]:
import os
from tqdm import tqdm

receptor_file = '/home/yueming/Drug_Discovery/Baselines/qvina/dataset/6yg9.pdbqt'
ligand_pdbqt_dir = '/home/yueming/Drug_Discovery/Baselines/GIGN-main/GIGN/data/CHEMBL202/ligand/pdbqt/'
config_file = '/home/yueming/Drug_Discovery/Baselines/qvina/dataset/6yg9_config.txt'
output_dir = '/home/yueming/Drug_Discovery/Baselines/GIGN-main/GIGN/data/6yg9/'
qvina_w_path = '/home/yueming/Drug_Discovery/Baselines/qvina'
input_sets = os.listdir(ligand_pdbqt_dir)
# 循环执行命令
for set_name in tqdm(input_sets):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    input_files = os.listdir(ligand_pdbqt_dir + set_name)
    for input_file in tqdm(input_files):
        if not os.path.exists(f'{output_dir}/{input_file}'):
            input_file_path = ligand_pdbqt_dir + set_name + '/' + input_file
            # 更改配置文件中的 receptor 路径为当前的 input_file
            with open(config_file, 'r') as f:
                config_lines = f.readlines()
            config_lines[0] = f'receptor = {receptor_file}\n'
            config_lines[1] = f'ligand = {ligand_pdbqt_dir + set_name}/{input_file}\n'
            config_lines[2] = f'out  = {output_dir}/{input_file}\n'
            config_lines[3] = f'log  = {output_dir}/{input_file[:-6]}.txt\n'
            with open(config_file, 'w') as f:
                f.writelines(config_lines)

            # 执行命令
            command = f'{qvina_w_path}/qvina-w_serial --config {config_file}'
            os.system(command)


# Read and save docking results for QVina-W

In [None]:
import os, re
from tqdm import tqdm
from rdkit import Chem
import pandas as pd

# List of input SDF files
result_dir = '/home/yueming/Drug_Discovery/Baselines/GIGN-main/GIGN/data/6yg9/'
output_csv_path = '/home/yueming/Drug_Discovery/Baselines/GIGN-main/GIGN/data/6yg9/qvina_w_results.csv'
    
# 定义正则表达式模式
pattern = r'^\s+(\d+)\s+(-?\d+\.\d+)'
pattern_ = r'^\s+(\d+)\s+(-?\d+)'
# 创建一个空的 DataFrame
df = pd.DataFrame(columns=['ChEMBL_Compound_ID', 'SMILES', 'QVina-W_Score_1', 'QVina-W_Score_2', 'QVina-W_Score_3', 'QVina-W_Score_4', 
                           'QVina-W_Score_5', 'QVina-W_Score_6', 'QVina-W_Score_7', 'QVina-W_Score_8', 'QVina-W_Score_9'])

def extract_smiles_from_pdbqt(file_path):
    smiles = None
    with open(file_path, 'r') as file:
        for line in file:
            if line.startswith('REMARK SMILES'):
                smiles = line.split('REMARK SMILES')[1].strip()
                break
    return smiles

def add_row_to_dataframe(df, content_list):
    num_columns = len(df.columns)
    if len(content_list) < num_columns:
        content_list.extend([None] * (num_columns - len(content_list)))
    df.loc[len(df)] = content_list

for input_file in tqdm(os.listdir(result_dir)):
    if input_file[-5:] == 'pdbqt':
        # 读取文本文件
        txt_path = result_dir + input_file.replace('pdbqt', 'txt')
        with open(txt_path, 'r') as txt:
            lines = txt.readlines()
        smiles = extract_smiles_from_pdbqt(result_dir + input_file)
        cid = input_file[:-6]  # 文件名索引
        content_list = [cid, smiles]
        # 提取符合模式的行，并保存到列表中
        for line in lines:
            match_ = re.match(pattern, line)
            match__ = re.match(pattern_, line)
            match = match_ if match_ else match__
            if match:
                number = int(match.group(1))
                affinity = float(match.group().rsplit(maxsplit=1)[-1])
                content_list.append(affinity)
        add_row_to_dataframe(df, content_list)

# 保存更新后的CSV表格
df.to_csv(output_csv_path, index=False)


# Preprocessing docking results to mols

In [1]:
# conda activate rdkit
import os
import pickle
from rdkit import Chem
import pandas as pd
from tqdm import tqdm
import pymol
from rdkit import RDLogger
RDLogger.DisableLog('rdApp.*')

def generate_pocket(data_dir, protein_path, distance=5, save_states=1):
    complex_id = os.listdir(data_dir)
    for file in complex_id:
        if file[-6:] == '.pdbqt':
            file_name = file[:-6]
            cid = file_name.split('_')[0]
            pocket = file_name.split('_')[-1]
            pymol.cmd.delete('all')
            lig_path = os.path.join(data_dir, file)
            protein_name = protein_path.split('/')[-1].split('.')[0]

            pymol.cmd.load(lig_path)
            pymol.cmd.remove('hydrogens')
            pymol.cmd.load(protein_path)
            pymol.cmd.remove('resn HOH')
            object_list = pymol.cmd.get_object_list()  # 获取所有对象列表
            obj_ligand, obj_protein = object_list[0], object_list[1]
            total_states = pymol.cmd.count_states(obj_ligand)
            num_states = min(save_states, total_states)
            
            old_path = os.path.join(data_dir, f'{cid}_Pocket_{pocket}_{distance}A.pdb')
            if os.path.exists(old_path):
                os.rename(old_path, os.path.join(data_dir, f'{cid}_Pocket_{pocket}_Pose_1_{distance}A.pdb'))
            for state in range(1, num_states + 1):
                pocket_path = os.path.join(data_dir, f'{cid}_Pocket_{pocket}_Pose_{state}_{distance}A.pdb')
                if not os.path.exists(pocket_path) or recreate:
                    pymol.cmd.create(f"state_{state}", obj_ligand, state, 1)
                    pymol.cmd.select('Pocket', f'byres {protein_name} within {distance} of state_{state}')
                    pymol.cmd.save(pocket_path, 'Pocket')


def split_ligand_to_pdb(pdbqt_file, save_states=1):
    pymol.cmd.delete('all')
    pdb_name = pdbqt_file.split('/')[-1].split('.')[0]
    pymol.cmd.load(pdbqt_file)
    pymol.cmd.remove('hydrogens')
    object_list = pymol.cmd.get_object_list()  # 获取所有对象列表
    obj_ligand = object_list[0]
    total_states = pymol.cmd.count_states(obj_ligand)
    old_path = pdbqt_file.split('.')[0] + '_Pose_0.pdb'
    if os.path.exists(old_path):
        os.remove(old_path)
    num_states = min(save_states, total_states)
    for state in range(1, num_states + 1):
        save_path = pdbqt_file.split('.')[0] + f'_Pose_{state}.pdb'
        if not os.path.exists(save_path) or recreate:
            pymol.cmd.create(f"state_{state}", obj_ligand, state, 1)
            pymol.cmd.save(save_path, f"state_{state}")
                    
                
def generate_complex(data_dir, distance=5, input_ligand_format='pdbqt', save_states=1):
    file_list = os.listdir(data_dir)
    pdbqt_list = [x for x in file_list if x.split('.')[-1] == 'pdbqt']
    pbar = tqdm(total=len(pdbqt_list)*save_states)
    for i, pdbqt_file in enumerate(pdbqt_list):
        file_name = pdbqt_file[:-6]
        cid = file_name.split('_')[0]
        pocket = file_name.split('_')[-1]
        ligand_input_path = os.path.join(data_dir, f'{cid}_Pocket_{pocket}.{input_ligand_format}')
        # 调用函数，传入要处理的 PDBQT 文件名
        split_ligand_to_pdb(ligand_input_path, save_states=save_states)
        old_path = os.path.join(data_dir, f"{cid}_Complex_{pocket}_{distance}A.rdkit")
        if os.path.exists(old_path):
            os.rename(old_path, os.path.join(data_dir, f"{cid}_Complex_{pocket}_1_{distance}A.rdkit"))
        for pose in range(1, save_states + 1):
            old_path = os.path.join(data_dir, f"{cid}_Complex_{pocket}_{pose}_{distance}A.rdkit")
            if os.path.exists(old_path):
                os.rename(old_path, os.path.join(data_dir, f"{cid}_Complex_{pocket}-{pose}_{distance}A.rdkit"))
            save_path = os.path.join(data_dir, f"{cid}_Complex_{pocket}-{pose}_{distance}A.rdkit")
            if not os.path.exists(save_path) or recreate:
                pocket_path = os.path.join(data_dir, f'{cid}_Pocket_{pocket}_Pose_{pose}_{distance}A.pdb')
                ligand_path = ligand_input_path.split('.')[0] + f'_Pose_{pose}.pdb'
                if not os.path.exists(ligand_input_path):
                    if os.path.exists(ligand_path):
                        os.remove(ligand_path)
                    continue
                if not os.path.exists(ligand_path):
                    print(f'Not found pose: {ligand_file}')
                    continue
                
                ligand_file = Chem.MolFromPDBFile(ligand_path, removeHs=True)
                if ligand_file == None:
                    print(f"Unable to process ligand of {cid}")
                    continue

                pocket_file = Chem.MolFromPDBFile(pocket_path, removeHs=True)
                if pocket_file == None:
                    print(f"Unable to process protein of {cid}")
                    continue

                complex = (ligand_file, pocket_file)
                with open(save_path, 'wb') as f:
                    pickle.dump(complex, f)

                pbar.update(1)
                
recreate = True
task_list = ['E3', 'I4', 'I5']
task_dict = {'I1': ('CHEMBL202', 'pIC50', '1boz', 7, 1), 'E3': ('CHEMBL235', 'pEC50', '1zgy', 4, 2), 
             'I5': ('CHEMBL279', 'pIC50', '1ywn',3, 3), 'I4': ('CHEMBL2971', 'pIC50', '3ugc', 3, 3), 
             'I3': ('CHEMBL333', 'pIC50', '1ck7', 6, 2), 'E1': ('CHEMBL3820', 'pEC50', '3f9m', 6 ,1), 
             'I2': ('CHEMBL3976', 'pIC50', '4ebb', 2, 4), 'E2': ('CHEMBL4422', 'pEC50', '5tzr', 3, 3)}
for task_id in task_list:
    protein_name, assay_type, pdb_name, pocket_num, pose_num = task_dict[task_id]
    distance, input_ligand_format = 5, 'pdbqt'
    protein_path = f'/home/yueming/Drug_Discovery/Baselines/GIGN-main/GIGN/data/{protein_name}/protein/{pdb_name}.pdbqt'
    data_root = f'/home/yueming/Drug_Discovery/Baselines/GIGN-main/GIGN/data/{protein_name}/{pdb_name}/vina/'
    set_list = ['test', 'train_1', 'train_2', 'train_3', 'train_4', 'train_5']
    for dataset in tqdm(set_list):
        data_df = pd.read_csv(os.path.join(data_root, f'{protein_name}_{assay_type}_{dataset}.csv'))
        dataset_path = data_root + dataset + '/'

        ## generate pocket within 5 Ångström around ligand 
        generate_pocket(data_dir=dataset_path, protein_path=protein_path, distance=distance, save_states=pose_num)
        generate_complex(dataset_path, distance=distance, input_ligand_format=input_ligand_format, save_states=pose_num)

  0%|          | 0/6 [00:00<?, ?it/s]

 PyMOL not running, entering library mode (experimental)



  0%|          | 0/4640 [00:00<?, ?it/s][A
  0%|          | 17/4640 [00:00<00:27, 169.11it/s][A
  1%|          | 41/4640 [00:00<00:21, 210.23it/s][A
  1%|▏         | 67/4640 [00:00<00:19, 232.41it/s][A
  2%|▏         | 93/4640 [00:00<00:18, 242.05it/s][A
  3%|▎         | 119/4640 [00:00<00:18, 244.69it/s][A
  3%|▎         | 145/4640 [00:00<00:18, 244.90it/s][A
  4%|▎         | 171/4640 [00:00<00:18, 248.27it/s][A
  4%|▍         | 198/4640 [00:00<00:17, 254.30it/s][A
  5%|▍         | 224/4640 [00:00<00:17, 246.33it/s][A
  5%|▌         | 249/4640 [00:01<00:17, 246.97it/s][A
  6%|▌         | 276/4640 [00:01<00:17, 252.66it/s][A
  7%|▋         | 303/4640 [00:01<00:16, 255.77it/s][A
  7%|▋         | 329/4640 [00:01<00:16, 255.40it/s][A
  8%|▊         | 355/4640 [00:01<00:16, 256.03it/s][A
  8%|▊         | 383/4640 [00:01<00:16, 260.03it/s][A
  9%|▉         | 410/4640 [00:01<00:16, 255.20it/s][A
  9%|▉         | 438/4640 [00:01<00:16, 261.71it/s][A
 10%|█         | 465/464

 58%|█████▊    | 2763/4760 [00:12<00:09, 214.13it/s][A
 59%|█████▊    | 2785/4760 [00:12<00:09, 214.96it/s][A
 59%|█████▉    | 2807/4760 [00:12<00:09, 207.45it/s][A
 59%|█████▉    | 2829/4760 [00:12<00:09, 210.21it/s][A
 60%|█████▉    | 2851/4760 [00:12<00:09, 210.59it/s][A
 60%|██████    | 2873/4760 [00:12<00:09, 208.87it/s][A
 61%|██████    | 2894/4760 [00:13<00:09, 198.09it/s][A
 61%|██████▏   | 2916/4760 [00:13<00:09, 203.79it/s][A
 62%|██████▏   | 2940/4760 [00:13<00:08, 213.62it/s][A
 62%|██████▏   | 2963/4760 [00:13<00:08, 216.04it/s][A
 63%|██████▎   | 2986/4760 [00:13<00:08, 219.26it/s][A
 63%|██████▎   | 3009/4760 [00:13<00:07, 219.51it/s][A
 64%|██████▎   | 3032/4760 [00:13<00:07, 221.89it/s][A
 64%|██████▍   | 3055/4760 [00:13<00:07, 219.32it/s][A
 65%|██████▍   | 3079/4760 [00:13<00:07, 221.31it/s][A
 65%|██████▌   | 3103/4760 [00:14<00:07, 223.13it/s][A
 66%|██████▌   | 3126/4760 [00:14<00:07, 221.94it/s][A
 66%|██████▌   | 3149/4760 [00:14<00:07, 216.71i

 82%|████████▏ | 3850/4688 [00:22<00:04, 178.63it/s][A
 83%|████████▎ | 3869/4688 [00:22<00:04, 181.22it/s][A
 83%|████████▎ | 3889/4688 [00:22<00:04, 182.21it/s][A
 83%|████████▎ | 3908/4688 [00:22<00:04, 182.94it/s][A
 84%|████████▍ | 3927/4688 [00:22<00:04, 181.32it/s][A
 84%|████████▍ | 3946/4688 [00:22<00:04, 172.92it/s][A
 85%|████████▍ | 3964/4688 [00:22<00:04, 174.48it/s][A
 85%|████████▍ | 3982/4688 [00:22<00:04, 175.14it/s][A
 85%|████████▌ | 4000/4688 [00:23<00:03, 174.47it/s][A
 86%|████████▌ | 4019/4688 [00:23<00:03, 174.69it/s][A
 86%|████████▌ | 4037/4688 [00:23<00:03, 174.87it/s][A
 86%|████████▋ | 4055/4688 [00:23<00:03, 174.19it/s][A
 87%|████████▋ | 4073/4688 [00:23<00:03, 174.71it/s][A
 87%|████████▋ | 4091/4688 [00:23<00:03, 175.59it/s][A
 88%|████████▊ | 4109/4688 [00:23<00:03, 176.31it/s][A
 88%|████████▊ | 4127/4688 [00:23<00:03, 176.07it/s][A
 88%|████████▊ | 4147/4688 [00:23<00:03, 179.18it/s][A
 89%|████████▉ | 4165/4688 [00:23<00:02, 179.02i

 91%|█████████ | 4292/4736 [00:25<00:02, 163.69it/s][A
 91%|█████████ | 4309/4736 [00:26<00:02, 161.59it/s][A
 91%|█████████▏| 4327/4736 [00:26<00:02, 163.61it/s][A
 92%|█████████▏| 4344/4736 [00:26<00:02, 163.62it/s][A
 92%|█████████▏| 4361/4736 [00:26<00:02, 160.71it/s][A
 92%|█████████▏| 4378/4736 [00:26<00:02, 162.91it/s][A
 93%|█████████▎| 4395/4736 [00:26<00:02, 161.09it/s][A
 93%|█████████▎| 4413/4736 [00:26<00:01, 161.70it/s][A
 94%|█████████▎| 4430/4736 [00:26<00:01, 160.97it/s][A
 94%|█████████▍| 4447/4736 [00:26<00:01, 160.27it/s][A
 94%|█████████▍| 4464/4736 [00:27<00:01, 162.23it/s][A
 95%|█████████▍| 4481/4736 [00:27<00:01, 160.77it/s][A
 95%|█████████▍| 4498/4736 [00:27<00:01, 163.16it/s][A
 95%|█████████▌| 4515/4736 [00:27<00:01, 161.77it/s][A
 96%|█████████▌| 4532/4736 [00:27<00:01, 163.10it/s][A
 96%|█████████▌| 4549/4736 [00:27<00:01, 161.54it/s][A
 96%|█████████▋| 4567/4736 [00:27<00:01, 161.96it/s][A
 97%|█████████▋| 4584/4736 [00:27<00:00, 160.62i

 87%|████████▋ | 4139/4784 [00:28<00:04, 145.92it/s][A
 87%|████████▋ | 4154/4784 [00:28<00:04, 145.97it/s][A
 87%|████████▋ | 4169/4784 [00:28<00:04, 144.50it/s][A
 87%|████████▋ | 4185/4784 [00:28<00:04, 144.81it/s][A
 88%|████████▊ | 4201/4784 [00:28<00:04, 145.51it/s][A
 88%|████████▊ | 4217/4784 [00:29<00:03, 145.76it/s][A
 88%|████████▊ | 4233/4784 [00:29<00:03, 147.10it/s][A
 89%|████████▉ | 4248/4784 [00:29<00:03, 146.91it/s][A
 89%|████████▉ | 4263/4784 [00:29<00:03, 145.04it/s][A
 89%|████████▉ | 4279/4784 [00:29<00:03, 145.73it/s][A
 90%|████████▉ | 4294/4784 [00:29<00:03, 145.89it/s][A
 90%|█████████ | 4309/4784 [00:29<00:03, 143.95it/s][A
 90%|█████████ | 4324/4784 [00:29<00:03, 144.47it/s][A
 91%|█████████ | 4339/4784 [00:29<00:03, 141.95it/s][A
 91%|█████████ | 4355/4784 [00:30<00:02, 143.13it/s][A
 91%|█████████▏| 4371/4784 [00:30<00:02, 143.85it/s][A
 92%|█████████▏| 4386/4784 [00:30<00:02, 144.42it/s][A
 92%|█████████▏| 4401/4784 [00:30<00:02, 142.84i

 73%|███████▎  | 3424/4704 [00:27<00:10, 124.09it/s][A
 73%|███████▎  | 3437/4704 [00:27<00:10, 122.86it/s][A
 73%|███████▎  | 3451/4704 [00:27<00:10, 123.51it/s][A
 74%|███████▎  | 3465/4704 [00:27<00:09, 123.98it/s][A
 74%|███████▍  | 3478/4704 [00:27<00:09, 125.67it/s][A
 74%|███████▍  | 3491/4704 [00:27<00:09, 124.32it/s][A
 75%|███████▍  | 3505/4704 [00:27<00:09, 124.15it/s][A
 75%|███████▍  | 3519/4704 [00:28<00:09, 124.61it/s][A
 75%|███████▌  | 3533/4704 [00:28<00:09, 124.61it/s][A
 75%|███████▌  | 3547/4704 [00:28<00:09, 125.82it/s][A
 76%|███████▌  | 3560/4704 [00:28<00:09, 126.97it/s][A
 76%|███████▌  | 3573/4704 [00:28<00:09, 125.42it/s][A
 76%|███████▋  | 3587/4704 [00:28<00:08, 125.88it/s][A
 77%|███████▋  | 3601/4704 [00:28<00:08, 126.51it/s][A
 77%|███████▋  | 3615/4704 [00:28<00:08, 127.03it/s][A
 77%|███████▋  | 3629/4704 [00:28<00:08, 127.47it/s][A
 77%|███████▋  | 3643/4704 [00:29<00:08, 128.51it/s][A
 78%|███████▊  | 3657/4704 [00:29<00:08, 127.37i

Not found pose: <rdkit.Chem.rdchem.Mol object at 0x7f579795df30>
Not found pose: <rdkit.Chem.rdchem.Mol object at 0x7f579795df30>



 19%|█▊        | 1562/8343 [00:11<00:51, 130.83it/s][A
 19%|█▉        | 1577/8343 [00:11<00:51, 131.26it/s][A
 19%|█▉        | 1592/8343 [00:12<00:51, 131.69it/s][A
 19%|█▉        | 1607/8343 [00:12<00:51, 130.63it/s][A
 19%|█▉        | 1622/8343 [00:12<00:50, 132.43it/s][A
 20%|█▉        | 1637/8343 [00:12<00:50, 132.90it/s][A
 20%|█▉        | 1652/8343 [00:12<00:50, 132.70it/s][A
 20%|█▉        | 1667/8343 [00:12<00:50, 132.06it/s][A
 20%|██        | 1682/8343 [00:12<00:50, 132.28it/s][A
 20%|██        | 1697/8343 [00:12<00:50, 131.34it/s][A
 21%|██        | 1712/8343 [00:13<00:50, 130.65it/s][A
 21%|██        | 1727/8343 [00:13<00:50, 130.31it/s][A
 21%|██        | 1742/8343 [00:13<00:50, 130.39it/s][A
 21%|██        | 1757/8343 [00:13<00:50, 130.23it/s][A
 21%|██        | 1772/8343 [00:13<00:50, 130.23it/s][A
 21%|██▏       | 1787/8343 [00:13<00:50, 131.05it/s][A
 22%|██▏       | 1802/8343 [00:13<00:49, 132.29it/s][A
 22%|██▏       | 1817/8343 [00:13<00:49, 131.90

Not found pose: <rdkit.Chem.rdchem.Mol object at 0x7f5797960030>



 34%|███▍      | 2866/8343 [00:21<00:42, 129.17it/s][A
 35%|███▍      | 2880/8343 [00:21<00:41, 131.44it/s][A
 35%|███▍      | 2894/8343 [00:22<00:42, 128.77it/s][A
 35%|███▍      | 2908/8343 [00:22<00:43, 125.77it/s][A
 35%|███▌      | 2923/8343 [00:22<00:42, 126.79it/s][A
 35%|███▌      | 2938/8343 [00:22<00:42, 126.42it/s][A
 35%|███▌      | 2953/8343 [00:22<00:41, 128.41it/s][A
 36%|███▌      | 2967/8343 [00:22<00:41, 130.65it/s][A
 36%|███▌      | 2981/8343 [00:22<00:41, 129.34it/s][A
 36%|███▌      | 2995/8343 [00:22<00:42, 126.62it/s][A
 36%|███▌      | 3010/8343 [00:22<00:41, 127.27it/s][A
 36%|███▋      | 3025/8343 [00:23<00:41, 128.07it/s][A
 36%|███▋      | 3040/8343 [00:23<00:41, 128.92it/s][A
 37%|███▋      | 3053/8343 [00:23<00:41, 127.90it/s][A
 37%|███▋      | 3067/8343 [00:23<00:42, 125.16it/s][A
 37%|███▋      | 3080/8343 [00:23<00:43, 120.77it/s][A
 37%|███▋      | 3094/8343 [00:23<00:43, 120.29it/s][A
 37%|███▋      | 3109/8343 [00:23<00:42, 122.82

 84%|████████▎ | 6987/8343 [00:54<00:10, 128.48it/s][A
 84%|████████▍ | 7000/8343 [00:54<00:10, 123.38it/s][A
 84%|████████▍ | 7013/8343 [00:54<00:10, 124.64it/s][A
 84%|████████▍ | 7027/8343 [00:54<00:10, 123.53it/s][A
 84%|████████▍ | 7040/8343 [00:54<00:10, 125.15it/s][A
 85%|████████▍ | 7054/8343 [00:54<00:10, 123.57it/s][A
 85%|████████▍ | 7069/8343 [00:55<00:10, 125.85it/s][A
 85%|████████▍ | 7084/8343 [00:55<00:09, 126.95it/s][A
 85%|████████▌ | 7097/8343 [00:55<00:09, 127.25it/s][A
 85%|████████▌ | 7111/8343 [00:55<00:09, 124.66it/s][A
 85%|████████▌ | 7125/8343 [00:55<00:09, 128.26it/s][A
 86%|████████▌ | 7138/8343 [00:55<00:09, 125.81it/s][A
 86%|████████▌ | 7153/8343 [00:55<00:09, 125.85it/s][A
 86%|████████▌ | 7168/8343 [00:55<00:09, 125.78it/s][A
 86%|████████▌ | 7182/8343 [00:55<00:08, 129.01it/s][A
 86%|████████▌ | 7195/8343 [00:56<00:09, 122.25it/s][A
 86%|████████▋ | 7210/8343 [00:56<00:09, 123.70it/s][A
 87%|████████▋ | 7224/8343 [00:56<00:08, 127.89i

Not found pose: <rdkit.Chem.rdchem.Mol object at 0x7f579795dee0>



 94%|█████████▍| 7826/8343 [01:01<00:04, 126.21it/s][A
 94%|█████████▍| 7839/8343 [01:01<00:04, 120.97it/s][A
 94%|█████████▍| 7854/8343 [01:01<00:03, 122.73it/s][A
 94%|█████████▍| 7867/8343 [01:01<00:03, 123.80it/s][A
 94%|█████████▍| 7881/8343 [01:01<00:03, 121.63it/s][A
 95%|█████████▍| 7895/8343 [01:01<00:03, 126.25it/s][A
 95%|█████████▍| 7908/8343 [01:01<00:03, 121.51it/s][A
 95%|█████████▍| 7922/8343 [01:01<00:03, 126.59it/s][A
 95%|█████████▌| 7935/8343 [01:01<00:03, 122.58it/s][A
 95%|█████████▌| 7949/8343 [01:02<00:03, 127.21it/s][A
 95%|█████████▌| 7962/8343 [01:02<00:03, 124.57it/s][A
 96%|█████████▌| 7977/8343 [01:02<00:02, 124.99it/s][A
 96%|█████████▌| 7990/8343 [01:02<00:02, 125.74it/s][A
 96%|█████████▌| 8004/8343 [01:02<00:02, 123.24it/s][A
 96%|█████████▌| 8018/8343 [01:02<00:02, 127.59it/s][A
 96%|█████████▋| 8031/8343 [01:02<00:02, 123.61it/s][A
 96%|█████████▋| 8044/8343 [01:02<00:02, 125.20it/s][A
 97%|█████████▋| 8057/8343 [01:02<00:02, 126.38

 34%|███▍      | 3028/8901 [00:28<00:55, 105.87it/s][A
 34%|███▍      | 3039/8901 [00:28<00:54, 106.92it/s][A
 34%|███▍      | 3050/8901 [00:28<00:55, 104.54it/s][A
 34%|███▍      | 3061/8901 [00:28<01:00, 96.18it/s] [A
 35%|███▍      | 3073/8901 [00:29<00:59, 98.54it/s][A
 35%|███▍      | 3085/8901 [00:29<00:57, 101.99it/s][A
 35%|███▍      | 3097/8901 [00:29<00:56, 103.06it/s][A
 35%|███▍      | 3109/8901 [00:29<00:55, 103.53it/s][A
 35%|███▌      | 3121/8901 [00:29<00:55, 104.74it/s][A
 35%|███▌      | 3133/8901 [00:29<00:54, 105.32it/s][A
 35%|███▌      | 3145/8901 [00:29<00:54, 105.71it/s][A
 35%|███▌      | 3157/8901 [00:29<00:54, 104.78it/s][A
 36%|███▌      | 3169/8901 [00:29<00:54, 104.42it/s][A
 36%|███▌      | 3181/8901 [00:30<00:54, 104.95it/s][A
 36%|███▌      | 3193/8901 [00:30<00:54, 104.77it/s][A
 36%|███▌      | 3205/8901 [00:30<00:54, 105.04it/s][A
 36%|███▌      | 3217/8901 [00:30<00:53, 106.08it/s][A
 36%|███▋      | 3228/8901 [00:30<00:53, 107.03it

 73%|███████▎  | 6502/8901 [01:00<00:22, 109.00it/s][A
 73%|███████▎  | 6514/8901 [01:01<00:21, 109.57it/s][A
 73%|███████▎  | 6526/8901 [01:01<00:21, 109.06it/s][A
 73%|███████▎  | 6538/8901 [01:01<00:21, 108.37it/s][A
 74%|███████▎  | 6550/8901 [01:01<00:21, 108.40it/s][A
 74%|███████▎  | 6562/8901 [01:01<00:21, 107.09it/s][A
 74%|███████▍  | 6574/8901 [01:01<00:21, 107.31it/s][A
 74%|███████▍  | 6586/8901 [01:01<00:21, 105.28it/s][A
 74%|███████▍  | 6598/8901 [01:01<00:21, 107.43it/s][A
 74%|███████▍  | 6610/8901 [01:01<00:21, 108.11it/s][A
 74%|███████▍  | 6622/8901 [01:02<00:20, 108.93it/s][A
 75%|███████▍  | 6634/8901 [01:02<00:20, 109.52it/s][A
 75%|███████▍  | 6646/8901 [01:02<00:20, 109.21it/s][A
 75%|███████▍  | 6658/8901 [01:02<00:20, 110.21it/s][A
 75%|███████▍  | 6670/8901 [01:02<00:20, 109.54it/s][A
 75%|███████▌  | 6682/8901 [01:02<00:20, 109.91it/s][A
 75%|███████▌  | 6694/8901 [01:02<00:20, 109.38it/s][A
 75%|███████▌  | 6706/8901 [01:02<00:20, 108.86i

 10%|▉         | 926/9297 [00:10<01:30, 92.34it/s][A
 10%|█         | 936/9297 [00:10<01:28, 94.01it/s][A
 10%|█         | 946/9297 [00:10<01:33, 88.94it/s][A
 10%|█         | 955/9297 [00:10<01:35, 87.28it/s][A
 10%|█         | 964/9297 [00:10<01:36, 86.32it/s][A
 10%|█         | 973/9297 [00:11<01:35, 87.08it/s][A
 11%|█         | 984/9297 [00:11<01:29, 93.39it/s][A
 11%|█         | 994/9297 [00:11<01:33, 89.15it/s][A
 11%|█         | 1003/9297 [00:11<01:32, 89.35it/s][A
 11%|█         | 1012/9297 [00:11<01:34, 87.59it/s][A
 11%|█         | 1021/9297 [00:11<01:34, 87.19it/s][A
 11%|█         | 1031/9297 [00:11<01:31, 90.78it/s][A
 11%|█         | 1041/9297 [00:11<01:29, 92.49it/s][A
 11%|█▏        | 1051/9297 [00:11<01:34, 87.18it/s][A
 11%|█▏        | 1060/9297 [00:12<01:35, 86.21it/s][A
 11%|█▏        | 1069/9297 [00:12<01:34, 87.11it/s][A
 12%|█▏        | 1079/9297 [00:12<01:30, 90.51it/s][A
 12%|█▏        | 1089/9297 [00:12<01:28, 92.24it/s][A
 12%|█▏        | 1

 39%|███▉      | 3668/9297 [00:42<01:04, 87.41it/s][A
 40%|███▉      | 3677/9297 [00:42<01:05, 85.60it/s][A
 40%|███▉      | 3686/9297 [00:42<01:05, 85.36it/s][A
 40%|███▉      | 3695/9297 [00:42<01:05, 85.55it/s][A
 40%|███▉      | 3704/9297 [00:42<01:05, 85.89it/s][A
 40%|███▉      | 3713/9297 [00:42<01:05, 85.79it/s][A
 40%|████      | 3722/9297 [00:42<01:05, 85.70it/s][A
 40%|████      | 3731/9297 [00:42<01:05, 85.62it/s][A
 40%|████      | 3740/9297 [00:43<01:05, 85.45it/s][A
 40%|████      | 3749/9297 [00:43<01:04, 85.95it/s][A
 40%|████      | 3758/9297 [00:43<01:03, 86.72it/s][A
 41%|████      | 3767/9297 [00:43<01:03, 87.25it/s][A
 41%|████      | 3776/9297 [00:43<01:02, 87.93it/s][A
 41%|████      | 3785/9297 [00:43<01:02, 87.95it/s][A
 41%|████      | 3794/9297 [00:43<01:03, 87.03it/s][A
 41%|████      | 3803/9297 [00:43<01:03, 86.46it/s][A
 41%|████      | 3812/9297 [00:43<01:04, 85.03it/s][A
 41%|████      | 3821/9297 [00:43<01:04, 85.51it/s][A
 41%|████ 

 69%|██████▉   | 6394/9297 [01:13<00:33, 86.15it/s][A
 69%|██████▉   | 6403/9297 [01:13<00:33, 85.86it/s][A
 69%|██████▉   | 6412/9297 [01:13<00:33, 86.27it/s][A
 69%|██████▉   | 6422/9297 [01:13<00:32, 89.82it/s][A
 69%|██████▉   | 6431/9297 [01:13<00:31, 89.83it/s][A
 69%|██████▉   | 6440/9297 [01:14<00:31, 89.83it/s][A
 69%|██████▉   | 6449/9297 [01:14<00:32, 88.53it/s][A
 69%|██████▉   | 6458/9297 [01:14<00:32, 88.00it/s][A
 70%|██████▉   | 6467/9297 [01:14<00:32, 87.51it/s][A
 70%|██████▉   | 6476/9297 [01:14<00:32, 86.81it/s][A
 70%|██████▉   | 6485/9297 [01:14<00:32, 87.34it/s][A
 70%|██████▉   | 6494/9297 [01:14<00:31, 87.66it/s][A
 70%|██████▉   | 6503/9297 [01:14<00:31, 87.96it/s][A
 70%|███████   | 6512/9297 [01:14<00:31, 87.45it/s][A
 70%|███████   | 6521/9297 [01:15<00:32, 86.64it/s][A
 70%|███████   | 6530/9297 [01:15<00:31, 87.41it/s][A
 70%|███████   | 6539/9297 [01:15<00:31, 86.96it/s][A
 70%|███████   | 6548/9297 [01:15<00:31, 87.37it/s][A
 71%|█████

 98%|█████████▊| 9079/9297 [01:45<00:02, 82.39it/s][A
 98%|█████████▊| 9088/9297 [01:45<00:02, 82.53it/s][A
 98%|█████████▊| 9097/9297 [01:45<00:02, 82.48it/s][A
 98%|█████████▊| 9106/9297 [01:45<00:02, 81.43it/s][A
 98%|█████████▊| 9115/9297 [01:45<00:02, 81.36it/s][A
 98%|█████████▊| 9124/9297 [01:45<00:02, 81.72it/s][A
 98%|█████████▊| 9133/9297 [01:46<00:02, 81.76it/s][A
 98%|█████████▊| 9142/9297 [01:46<00:01, 81.99it/s][A
 98%|█████████▊| 9151/9297 [01:46<00:01, 73.50it/s][A
 99%|█████████▊| 9160/9297 [01:46<00:01, 76.41it/s][A
 99%|█████████▊| 9169/9297 [01:46<00:01, 77.79it/s][A
 99%|█████████▊| 9178/9297 [01:46<00:01, 78.73it/s][A
 99%|█████████▉| 9187/9297 [01:46<00:01, 79.23it/s][A
 99%|█████████▉| 9196/9297 [01:46<00:01, 79.73it/s][A
 99%|█████████▉| 9205/9297 [01:47<00:01, 79.95it/s][A
 99%|█████████▉| 9214/9297 [01:47<00:01, 79.90it/s][A
 99%|█████████▉| 9223/9297 [01:47<00:00, 81.03it/s][A
 99%|█████████▉| 9232/9297 [01:47<00:00, 80.39it/s][A
 99%|█████

 27%|██▋       | 2455/9090 [00:30<01:25, 77.32it/s][A
 27%|██▋       | 2464/9090 [00:31<01:24, 78.52it/s][A
 27%|██▋       | 2472/9090 [00:31<01:26, 76.77it/s][A
 27%|██▋       | 2480/9090 [00:31<01:35, 69.56it/s][A
 27%|██▋       | 2488/9090 [00:31<01:34, 69.95it/s][A
 27%|██▋       | 2497/9090 [00:31<01:30, 72.57it/s][A
 28%|██▊       | 2506/9090 [00:31<01:27, 75.15it/s][A
 28%|██▊       | 2515/9090 [00:31<01:26, 75.92it/s][A
 28%|██▊       | 2524/9090 [00:31<01:26, 76.06it/s][A
 28%|██▊       | 2533/9090 [00:31<01:26, 75.63it/s][A
 28%|██▊       | 2542/9090 [00:32<01:24, 77.04it/s][A
 28%|██▊       | 2551/9090 [00:32<01:25, 76.47it/s][A
 28%|██▊       | 2560/9090 [00:32<01:24, 77.04it/s][A
 28%|██▊       | 2569/9090 [00:32<01:24, 77.40it/s][A
 28%|██▊       | 2578/9090 [00:32<01:24, 76.70it/s][A
 28%|██▊       | 2587/9090 [00:32<01:23, 77.66it/s][A
 29%|██▊       | 2596/9090 [00:32<01:24, 76.49it/s][A
 29%|██▊       | 2605/9090 [00:32<01:24, 76.58it/s][A
 29%|██▉  

Unable to process ligand of CHEMBL3976587
Unable to process ligand of CHEMBL3976587
Unable to process ligand of CHEMBL3976587



 44%|████▍     | 4024/9090 [00:52<01:13, 68.60it/s][A
 44%|████▍     | 4033/9090 [00:52<01:11, 71.16it/s][A
 44%|████▍     | 4041/9090 [00:52<01:12, 69.94it/s][A
 45%|████▍     | 4049/9090 [00:52<01:12, 69.25it/s][A
 45%|████▍     | 4057/9090 [00:52<01:13, 68.45it/s][A
 45%|████▍     | 4066/9090 [00:52<01:10, 71.29it/s][A
 45%|████▍     | 4075/9090 [00:52<01:09, 72.14it/s][A
 45%|████▍     | 4084/9090 [00:53<01:07, 73.97it/s][A
 45%|████▌     | 4093/9090 [00:53<01:06, 74.66it/s][A
 45%|████▌     | 4102/9090 [00:53<01:06, 75.42it/s][A
 45%|████▌     | 4111/9090 [00:53<01:05, 75.76it/s][A
 45%|████▌     | 4120/9090 [00:53<01:05, 75.97it/s][A
 45%|████▌     | 4129/9090 [00:53<01:05, 75.44it/s][A
 46%|████▌     | 4138/9090 [00:53<01:06, 74.74it/s][A
 46%|████▌     | 4147/9090 [00:53<01:05, 75.08it/s][A
 46%|████▌     | 4156/9090 [00:53<01:05, 75.15it/s][A
 46%|████▌     | 4165/9090 [00:54<01:05, 75.16it/s][A
 46%|████▌     | 4174/9090 [00:54<01:04, 76.68it/s][A
 46%|████

Unable to process ligand of CHEMBL3976587
Unable to process ligand of CHEMBL3976587
Unable to process ligand of CHEMBL3976587



 52%|█████▏    | 4711/9090 [01:01<01:06, 65.51it/s][A
 52%|█████▏    | 4720/9090 [01:01<01:02, 69.52it/s][A
 52%|█████▏    | 4729/9090 [01:01<01:00, 71.96it/s][A
 52%|█████▏    | 4738/9090 [01:01<00:59, 73.71it/s][A
 52%|█████▏    | 4747/9090 [01:01<00:59, 72.72it/s][A
 52%|█████▏    | 4756/9090 [01:02<00:59, 73.29it/s][A
 52%|█████▏    | 4765/9090 [01:02<00:59, 72.54it/s][A
 53%|█████▎    | 4774/9090 [01:02<00:58, 73.84it/s][A
 53%|█████▎    | 4783/9090 [01:02<00:58, 73.65it/s][A
 53%|█████▎    | 4792/9090 [01:02<00:58, 74.09it/s][A
 53%|█████▎    | 4801/9090 [01:02<00:57, 74.63it/s][A
 53%|█████▎    | 4810/9090 [01:02<00:59, 71.60it/s][A
 53%|█████▎    | 4818/9090 [01:02<00:59, 72.40it/s][A
 53%|█████▎    | 4826/9090 [01:03<01:00, 70.31it/s][A
 53%|█████▎    | 4834/9090 [01:03<01:02, 68.64it/s][A
 53%|█████▎    | 4843/9090 [01:03<00:59, 71.01it/s][A
 53%|█████▎    | 4852/9090 [01:03<00:59, 71.56it/s][A
 53%|█████▎    | 4860/9090 [01:03<01:00, 69.39it/s][A
 54%|████

 81%|████████  | 7360/9090 [01:36<00:22, 77.67it/s][A
 81%|████████  | 7369/9090 [01:36<00:22, 77.32it/s][A
 81%|████████  | 7378/9090 [01:36<00:22, 76.39it/s][A
 81%|████████▏ | 7387/9090 [01:36<00:22, 76.12it/s][A
 81%|████████▏ | 7396/9090 [01:36<00:22, 76.15it/s][A
 81%|████████▏ | 7405/9090 [01:36<00:21, 76.68it/s][A
 82%|████████▏ | 7414/9090 [01:36<00:22, 76.17it/s][A
 82%|████████▏ | 7423/9090 [01:36<00:21, 77.03it/s][A
 82%|████████▏ | 7432/9090 [01:37<00:21, 78.63it/s][A
 82%|████████▏ | 7441/9090 [01:37<00:21, 78.44it/s][A
 82%|████████▏ | 7450/9090 [01:37<00:20, 78.73it/s][A
 82%|████████▏ | 7459/9090 [01:37<00:20, 80.09it/s][A
 82%|████████▏ | 7468/9090 [01:37<00:20, 78.87it/s][A
 82%|████████▏ | 7477/9090 [01:37<00:20, 77.94it/s][A
 82%|████████▏ | 7486/9090 [01:37<00:20, 78.01it/s][A
 82%|████████▏ | 7495/9090 [01:37<00:20, 77.52it/s][A
 83%|████████▎ | 7504/9090 [01:37<00:20, 77.96it/s][A
 83%|████████▎ | 7513/9090 [01:38<00:20, 77.48it/s][A
 83%|█████

Unable to process ligand of CHEMBL3976587
Unable to process ligand of CHEMBL3976587
Unable to process ligand of CHEMBL3976587



 95%|█████████▍| 8635/9090 [01:52<00:06, 71.62it/s][A
 95%|█████████▌| 8644/9090 [01:52<00:06, 73.91it/s][A
 95%|█████████▌| 8653/9090 [01:52<00:06, 70.17it/s][A
 95%|█████████▌| 8662/9090 [01:53<00:05, 71.76it/s][A
 95%|█████████▌| 8671/9090 [01:53<00:05, 74.30it/s][A
 95%|█████████▌| 8680/9090 [01:53<00:05, 75.75it/s][A
 96%|█████████▌| 8689/9090 [01:53<00:05, 76.14it/s][A
 96%|█████████▌| 8697/9090 [01:53<00:05, 74.93it/s][A
 96%|█████████▌| 8705/9090 [01:53<00:05, 72.66it/s][A
 96%|█████████▌| 8713/9090 [01:53<00:05, 72.17it/s][A
 96%|█████████▌| 8722/9090 [01:53<00:05, 73.40it/s][A
 96%|█████████▌| 8731/9090 [01:53<00:04, 74.38it/s][A
 96%|█████████▌| 8740/9090 [01:54<00:04, 74.15it/s][A
 96%|█████████▌| 8749/9090 [01:54<00:04, 74.31it/s][A
 96%|█████████▋| 8758/9090 [01:54<00:04, 74.82it/s][A
 96%|█████████▋| 8767/9090 [01:54<00:04, 76.10it/s][A
 97%|█████████▋| 8776/9090 [01:54<00:04, 75.58it/s][A
 97%|█████████▋| 8785/9090 [01:54<00:04, 75.96it/s][A
 97%|████

 25%|██▌       | 2209/8685 [00:30<01:30, 71.82it/s][A
 26%|██▌       | 2218/8685 [00:30<01:29, 72.22it/s][A
 26%|██▌       | 2227/8685 [00:30<01:28, 73.00it/s][A
 26%|██▌       | 2236/8685 [00:30<01:28, 73.16it/s][A
 26%|██▌       | 2245/8685 [00:30<01:27, 73.51it/s][A
 26%|██▌       | 2254/8685 [00:30<01:29, 72.12it/s][A
 26%|██▌       | 2263/8685 [00:30<01:28, 72.65it/s][A
 26%|██▌       | 2272/8685 [00:30<01:28, 72.82it/s][A
 26%|██▋       | 2281/8685 [00:31<01:28, 72.76it/s][A
 26%|██▋       | 2290/8685 [00:31<01:27, 72.82it/s][A
 26%|██▋       | 2299/8685 [00:31<01:27, 72.86it/s][A
 27%|██▋       | 2308/8685 [00:31<01:27, 72.58it/s][A
 27%|██▋       | 2317/8685 [00:31<01:28, 72.33it/s][A
 27%|██▋       | 2326/8685 [00:31<01:27, 72.33it/s][A
 27%|██▋       | 2334/8685 [00:31<01:31, 69.69it/s][A
 27%|██▋       | 2341/8685 [00:31<01:35, 66.70it/s][A
 27%|██▋       | 2350/8685 [00:32<01:32, 68.14it/s][A
 27%|██▋       | 2359/8685 [00:32<01:30, 69.76it/s][A
 27%|██▋  

 56%|█████▌    | 4837/8685 [01:06<00:53, 72.31it/s][A
 56%|█████▌    | 4846/8685 [01:06<00:53, 72.01it/s][A
 56%|█████▌    | 4855/8685 [01:06<00:53, 71.72it/s][A
 56%|█████▌    | 4864/8685 [01:07<00:53, 71.07it/s][A
 56%|█████▌    | 4873/8685 [01:07<00:52, 72.13it/s][A
 56%|█████▌    | 4882/8685 [01:07<00:52, 72.98it/s][A
 56%|█████▋    | 4891/8685 [01:07<00:52, 72.93it/s][A
 56%|█████▋    | 4900/8685 [01:07<00:51, 73.15it/s][A
 57%|█████▋    | 4909/8685 [01:07<00:51, 73.48it/s][A
 57%|█████▋    | 4918/8685 [01:07<00:51, 73.63it/s][A
 57%|█████▋    | 4927/8685 [01:07<00:51, 73.60it/s][A
 57%|█████▋    | 4936/8685 [01:08<00:51, 73.49it/s][A
 57%|█████▋    | 4945/8685 [01:08<00:51, 73.32it/s][A
 57%|█████▋    | 4954/8685 [01:08<00:51, 73.12it/s][A
 57%|█████▋    | 4963/8685 [01:08<00:50, 73.30it/s][A
 57%|█████▋    | 4971/8685 [01:08<00:50, 73.58it/s][A
 57%|█████▋    | 4979/8685 [01:08<00:52, 70.24it/s][A
 57%|█████▋    | 4987/8685 [01:08<00:53, 69.16it/s][A
 58%|█████

 86%|████████▌ | 7480/8685 [01:42<00:16, 74.64it/s][A
 86%|████████▌ | 7489/8685 [01:43<00:16, 74.46it/s][A
 86%|████████▋ | 7498/8685 [01:43<00:16, 74.03it/s][A
 86%|████████▋ | 7507/8685 [01:43<00:15, 73.92it/s][A
 87%|████████▋ | 7516/8685 [01:43<00:15, 74.51it/s][A
 87%|████████▋ | 7525/8685 [01:43<00:15, 74.53it/s][A
 87%|████████▋ | 7534/8685 [01:43<00:15, 74.67it/s][A
 87%|████████▋ | 7543/8685 [01:43<00:15, 74.43it/s][A
 87%|████████▋ | 7552/8685 [01:43<00:15, 74.06it/s][A
 87%|████████▋ | 7561/8685 [01:44<00:15, 74.27it/s][A
 87%|████████▋ | 7570/8685 [01:44<00:14, 74.44it/s][A
 87%|████████▋ | 7579/8685 [01:44<00:14, 74.74it/s][A
 87%|████████▋ | 7588/8685 [01:44<00:14, 74.71it/s][A
 87%|████████▋ | 7597/8685 [01:44<00:14, 75.01it/s][A
 88%|████████▊ | 7606/8685 [01:44<00:14, 75.26it/s][A
 88%|████████▊ | 7615/8685 [01:44<00:14, 75.09it/s][A
 88%|████████▊ | 7624/8685 [01:44<00:14, 75.12it/s][A
 88%|████████▊ | 7633/8685 [01:44<00:14, 74.73it/s][A
 88%|█████

 16%|█▌        | 1465/9027 [00:20<01:45, 71.70it/s][A
 16%|█▋        | 1474/9027 [00:20<01:45, 71.41it/s][A
 16%|█▋        | 1483/9027 [00:21<01:47, 70.42it/s][A
 17%|█▋        | 1492/9027 [00:21<01:46, 70.45it/s][A
 17%|█▋        | 1501/9027 [00:21<01:46, 70.63it/s][A
 17%|█▋        | 1510/9027 [00:21<01:46, 70.75it/s][A
 17%|█▋        | 1519/9027 [00:21<01:45, 70.91it/s][A
 17%|█▋        | 1528/9027 [00:21<01:46, 70.72it/s][A
 17%|█▋        | 1537/9027 [00:21<01:45, 70.81it/s][A
 17%|█▋        | 1546/9027 [00:21<01:45, 70.80it/s][A
 17%|█▋        | 1555/9027 [00:22<01:45, 70.63it/s][A
 17%|█▋        | 1564/9027 [00:22<01:47, 69.69it/s][A
 17%|█▋        | 1573/9027 [00:22<01:46, 70.26it/s][A
 18%|█▊        | 1582/9027 [00:22<01:45, 70.72it/s][A
 18%|█▊        | 1591/9027 [00:22<01:45, 70.53it/s][A
 18%|█▊        | 1600/9027 [00:22<01:44, 70.79it/s][A
 18%|█▊        | 1609/9027 [00:22<01:44, 70.71it/s][A
 18%|█▊        | 1618/9027 [00:22<01:44, 70.63it/s][A
 18%|█▊   

 46%|████▌     | 4126/9027 [00:58<01:10, 69.50it/s][A
 46%|████▌     | 4135/9027 [00:58<01:10, 69.39it/s][A
 46%|████▌     | 4144/9027 [00:58<01:10, 69.07it/s][A
 46%|████▌     | 4153/9027 [00:58<01:10, 69.44it/s][A
 46%|████▌     | 4162/9027 [00:58<01:09, 70.00it/s][A
 46%|████▌     | 4171/9027 [00:59<01:08, 70.70it/s][A
 46%|████▋     | 4180/9027 [00:59<01:08, 71.14it/s][A
 46%|████▋     | 4189/9027 [00:59<01:08, 71.07it/s][A
 47%|████▋     | 4198/9027 [00:59<01:07, 71.04it/s][A
 47%|████▋     | 4207/9027 [00:59<01:07, 70.95it/s][A
 47%|████▋     | 4216/9027 [00:59<01:08, 70.41it/s][A
 47%|████▋     | 4225/9027 [00:59<01:08, 70.18it/s][A
 47%|████▋     | 4234/9027 [00:59<01:08, 70.22it/s][A
 47%|████▋     | 4243/9027 [01:00<01:07, 70.70it/s][A
 47%|████▋     | 4252/9027 [01:00<01:07, 70.99it/s][A
 47%|████▋     | 4261/9027 [01:00<01:06, 71.26it/s][A
 47%|████▋     | 4270/9027 [01:00<01:06, 71.04it/s][A
 47%|████▋     | 4279/9027 [01:00<01:06, 71.60it/s][A
 48%|████▊

 75%|███████▍  | 6760/9027 [01:36<00:33, 67.40it/s][A
 75%|███████▍  | 6769/9027 [01:36<00:33, 67.74it/s][A
 75%|███████▌  | 6778/9027 [01:36<00:33, 68.02it/s][A
 75%|███████▌  | 6787/9027 [01:37<00:32, 68.48it/s][A
 75%|███████▌  | 6796/9027 [01:37<00:32, 68.78it/s][A
 75%|███████▌  | 6805/9027 [01:37<00:32, 69.28it/s][A
 75%|███████▌  | 6814/9027 [01:37<00:31, 69.23it/s][A
 76%|███████▌  | 6823/9027 [01:37<00:31, 69.16it/s][A
 76%|███████▌  | 6832/9027 [01:37<00:31, 69.58it/s][A
 76%|███████▌  | 6841/9027 [01:37<00:31, 69.32it/s][A
 76%|███████▌  | 6850/9027 [01:37<00:33, 64.05it/s][A
 76%|███████▌  | 6859/9027 [01:38<00:33, 65.05it/s][A
 76%|███████▌  | 6868/9027 [01:38<00:32, 66.65it/s][A
 76%|███████▌  | 6877/9027 [01:38<00:32, 67.14it/s][A
 76%|███████▋  | 6886/9027 [01:38<00:31, 67.46it/s][A
 76%|███████▋  | 6895/9027 [01:38<00:31, 68.43it/s][A
 76%|███████▋  | 6904/9027 [01:38<00:30, 68.91it/s][A
 77%|███████▋  | 6913/9027 [01:38<00:30, 68.84it/s][A
 77%|█████

  2%|▏         | 331/14292 [00:05<03:31, 65.95it/s][A
  2%|▏         | 340/14292 [00:05<03:32, 65.60it/s][A
  2%|▏         | 349/14292 [00:05<03:32, 65.50it/s][A
  3%|▎         | 358/14292 [00:05<03:34, 65.11it/s][A
  3%|▎         | 367/14292 [00:05<03:33, 65.24it/s][A
  3%|▎         | 376/14292 [00:05<03:31, 65.68it/s][A
  3%|▎         | 385/14292 [00:05<03:31, 65.68it/s][A
  3%|▎         | 394/14292 [00:05<03:33, 65.16it/s][A
  3%|▎         | 403/14292 [00:06<03:33, 64.90it/s][A
  3%|▎         | 412/14292 [00:06<03:34, 64.70it/s][A
  3%|▎         | 421/14292 [00:06<03:34, 64.76it/s][A
  3%|▎         | 430/14292 [00:06<03:35, 64.45it/s][A
  3%|▎         | 437/14292 [00:06<03:31, 65.63it/s][A
  3%|▎         | 445/14292 [00:06<03:37, 63.66it/s][A
  3%|▎         | 452/14292 [00:06<03:54, 58.93it/s][A
  3%|▎         | 460/14292 [00:07<03:56, 58.51it/s][A
  3%|▎         | 469/14292 [00:07<03:50, 60.01it/s][A
  3%|▎         | 478/14292 [00:07<03:46, 61.04it/s][A
  3%|▎    

 18%|█▊        | 2516/14292 [00:41<03:10, 61.76it/s][A
 18%|█▊        | 2523/14292 [00:41<03:05, 63.56it/s][A
 18%|█▊        | 2530/14292 [00:41<03:20, 58.80it/s][A
 18%|█▊        | 2538/14292 [00:41<03:02, 64.38it/s][A
 18%|█▊        | 2545/14292 [00:41<03:20, 58.62it/s][A
 18%|█▊        | 2553/14292 [00:41<03:03, 63.86it/s][A
 18%|█▊        | 2560/14292 [00:41<03:19, 58.79it/s][A
 18%|█▊        | 2567/14292 [00:42<03:29, 55.94it/s][A
 18%|█▊        | 2575/14292 [00:42<03:27, 56.40it/s][A
 18%|█▊        | 2583/14292 [00:42<03:08, 62.08it/s][A
 18%|█▊        | 2590/14292 [00:42<03:20, 58.33it/s][A
 18%|█▊        | 2598/14292 [00:42<03:03, 63.73it/s][A
 18%|█▊        | 2605/14292 [00:42<03:18, 59.02it/s][A
 18%|█▊        | 2612/14292 [00:42<03:10, 61.42it/s][A
 18%|█▊        | 2619/14292 [00:42<03:04, 63.18it/s][A
 18%|█▊        | 2626/14292 [00:43<03:22, 57.54it/s][A
 18%|█▊        | 2633/14292 [00:43<03:12, 60.69it/s][A
 18%|█▊        | 2641/14292 [00:43<03:16, 59.18i

 33%|███▎      | 4649/14292 [01:17<02:36, 61.55it/s][A
 33%|███▎      | 4657/14292 [01:17<02:41, 59.58it/s][A
 33%|███▎      | 4664/14292 [01:17<02:35, 61.94it/s][A
 33%|███▎      | 4672/14292 [01:17<02:40, 59.99it/s][A
 33%|███▎      | 4679/14292 [01:17<02:33, 62.46it/s][A
 33%|███▎      | 4686/14292 [01:17<02:47, 57.43it/s][A
 33%|███▎      | 4693/14292 [01:17<02:55, 54.66it/s][A
 33%|███▎      | 4699/14292 [01:17<02:59, 53.49it/s][A
 33%|███▎      | 4707/14292 [01:18<02:39, 60.08it/s][A
 33%|███▎      | 4714/14292 [01:18<02:49, 56.54it/s][A
 33%|███▎      | 4720/14292 [01:18<02:46, 57.39it/s][A
 33%|███▎      | 4726/14292 [01:18<02:45, 57.97it/s][A
 33%|███▎      | 4733/14292 [01:18<02:36, 61.01it/s][A
 33%|███▎      | 4740/14292 [01:18<02:30, 63.50it/s][A
 33%|███▎      | 4747/14292 [01:18<02:45, 57.72it/s][A
 33%|███▎      | 4755/14292 [01:18<02:30, 63.45it/s][A
 33%|███▎      | 4762/14292 [01:19<02:43, 58.23it/s][A
 33%|███▎      | 4769/14292 [01:19<02:35, 61.15i

 49%|████▉     | 7063/14292 [01:55<01:53, 63.89it/s][A
 49%|████▉     | 7072/14292 [01:55<01:53, 63.80it/s][A
 50%|████▉     | 7081/14292 [01:55<01:52, 64.19it/s][A
 50%|████▉     | 7090/14292 [01:56<01:52, 64.25it/s][A
 50%|████▉     | 7099/14292 [01:56<01:51, 64.23it/s][A
 50%|████▉     | 7108/14292 [01:56<01:52, 64.00it/s][A
 50%|████▉     | 7117/14292 [01:56<01:51, 64.15it/s][A
 50%|████▉     | 7126/14292 [01:56<01:51, 64.39it/s][A
 50%|████▉     | 7135/14292 [01:56<01:51, 64.10it/s][A
 50%|████▉     | 7144/14292 [01:56<01:52, 63.82it/s][A
 50%|█████     | 7153/14292 [01:57<01:51, 64.05it/s][A
 50%|█████     | 7162/14292 [01:57<01:51, 64.09it/s][A
 50%|█████     | 7171/14292 [01:57<01:50, 64.18it/s][A
 50%|█████     | 7180/14292 [01:57<01:50, 64.12it/s][A
 50%|█████     | 7189/14292 [01:57<01:50, 64.05it/s][A
 50%|█████     | 7198/14292 [01:57<01:49, 64.71it/s][A
 50%|█████     | 7207/14292 [01:57<01:50, 64.29it/s][A
 50%|█████     | 7216/14292 [01:58<01:49, 64.35i

 67%|██████▋   | 9562/14292 [02:35<01:14, 63.55it/s][A
 67%|██████▋   | 9571/14292 [02:35<01:14, 63.52it/s][A
 67%|██████▋   | 9580/14292 [02:35<01:14, 63.51it/s][A
 67%|██████▋   | 9589/14292 [02:35<01:22, 56.77it/s][A
 67%|██████▋   | 9598/14292 [02:35<01:25, 54.92it/s][A
 67%|██████▋   | 9606/14292 [02:35<01:17, 60.12it/s][A
 67%|██████▋   | 9613/14292 [02:35<01:22, 56.85it/s][A
 67%|██████▋   | 9622/14292 [02:36<01:19, 58.92it/s][A
 67%|██████▋   | 9631/14292 [02:36<01:15, 61.36it/s][A
 67%|██████▋   | 9639/14292 [02:36<01:10, 65.69it/s][A
 67%|██████▋   | 9646/14292 [02:36<01:16, 60.87it/s][A
 68%|██████▊   | 9655/14292 [02:36<01:15, 61.58it/s][A
 68%|██████▊   | 9664/14292 [02:36<01:14, 62.06it/s][A
 68%|██████▊   | 9671/14292 [02:36<01:12, 63.78it/s][A
 68%|██████▊   | 9679/14292 [02:36<01:14, 62.21it/s][A
 68%|██████▊   | 9688/14292 [02:37<01:13, 62.45it/s][A
 68%|██████▊   | 9696/14292 [02:37<01:08, 66.70it/s][A
 68%|██████▊   | 9703/14292 [02:37<01:14, 61.41i

 83%|████████▎ | 11833/14292 [03:11<00:39, 62.10it/s][A
 83%|████████▎ | 11841/14292 [03:11<00:36, 66.38it/s][A
 83%|████████▎ | 11848/14292 [03:11<00:40, 61.00it/s][A
 83%|████████▎ | 11855/14292 [03:11<00:39, 61.35it/s][A
 83%|████████▎ | 11863/14292 [03:12<00:40, 59.49it/s][A
 83%|████████▎ | 11871/14292 [03:12<00:37, 64.29it/s][A
 83%|████████▎ | 11878/14292 [03:12<00:40, 59.22it/s][A
 83%|████████▎ | 11886/14292 [03:12<00:37, 64.28it/s][A
 83%|████████▎ | 11893/14292 [03:12<00:40, 59.06it/s][A
 83%|████████▎ | 11901/14292 [03:12<00:37, 64.22it/s][A
 83%|████████▎ | 11908/14292 [03:12<00:39, 59.72it/s][A
 83%|████████▎ | 11917/14292 [03:12<00:39, 60.63it/s][A
 83%|████████▎ | 11925/14292 [03:12<00:36, 65.34it/s][A
 83%|████████▎ | 11932/14292 [03:13<00:38, 61.10it/s][A
 84%|████████▎ | 11941/14292 [03:13<00:38, 61.64it/s][A
 84%|████████▎ | 11949/14292 [03:13<00:35, 66.12it/s][A
 84%|████████▎ | 11956/14292 [03:13<00:38, 60.81it/s][A
 84%|████████▎ | 11965/14292 [0

 98%|█████████▊| 14029/14292 [03:46<00:04, 59.78it/s][A
 98%|█████████▊| 14036/14292 [03:47<00:04, 62.27it/s][A
 98%|█████████▊| 14043/14292 [03:47<00:03, 64.32it/s][A
 98%|█████████▊| 14050/14292 [03:47<00:04, 58.80it/s][A
 98%|█████████▊| 14057/14292 [03:47<00:03, 61.68it/s][A
 98%|█████████▊| 14064/14292 [03:47<00:03, 63.90it/s][A
 98%|█████████▊| 14071/14292 [03:47<00:03, 58.56it/s][A
 99%|█████████▊| 14079/14292 [03:47<00:03, 64.05it/s][A
 99%|█████████▊| 14086/14292 [03:47<00:03, 58.94it/s][A
 99%|█████████▊| 14094/14292 [03:47<00:03, 64.24it/s][A
 99%|█████████▊| 14101/14292 [03:48<00:03, 59.24it/s][A
 99%|█████████▊| 14108/14292 [03:48<00:02, 61.96it/s][A
 99%|█████████▉| 14115/14292 [03:48<00:02, 64.09it/s][A
 99%|█████████▉| 14122/14292 [03:48<00:02, 58.70it/s][A
 99%|█████████▉| 14129/14292 [03:48<00:02, 61.63it/s][A
 99%|█████████▉| 14137/14292 [03:48<00:02, 59.69it/s][A
 99%|█████████▉| 14145/14292 [03:48<00:02, 64.83it/s][A
 99%|█████████▉| 14152/14292 [0

 11%|█         | 1546/14067 [00:27<03:42, 56.27it/s][A
 11%|█         | 1552/14067 [00:27<03:40, 56.73it/s][A
 11%|█         | 1558/14067 [00:27<03:40, 56.67it/s][A
 11%|█         | 1564/14067 [00:27<03:42, 56.08it/s][A
 11%|█         | 1570/14067 [00:28<03:44, 55.60it/s][A
 11%|█         | 1576/14067 [00:28<03:46, 55.27it/s][A
 11%|█         | 1582/14067 [00:28<03:44, 55.50it/s][A
 11%|█▏        | 1588/14067 [00:28<03:43, 55.72it/s][A
 11%|█▏        | 1594/14067 [00:28<03:43, 55.75it/s][A
 11%|█▏        | 1600/14067 [00:28<03:44, 55.65it/s][A
 11%|█▏        | 1606/14067 [00:28<03:44, 55.62it/s][A
 11%|█▏        | 1612/14067 [00:28<03:51, 53.88it/s][A
 12%|█▏        | 1618/14067 [00:28<03:48, 54.50it/s][A
 12%|█▏        | 1624/14067 [00:29<03:45, 55.28it/s][A
 12%|█▏        | 1630/14067 [00:29<03:47, 54.66it/s][A
 12%|█▏        | 1636/14067 [00:29<03:46, 54.93it/s][A
 12%|█▏        | 1642/14067 [00:29<03:46, 54.87it/s][A
 12%|█▏        | 1648/14067 [00:29<03:44, 55.24i

 23%|██▎       | 3298/14067 [00:59<03:12, 55.90it/s][A
 23%|██▎       | 3304/14067 [00:59<03:13, 55.67it/s][A
 24%|██▎       | 3310/14067 [00:59<03:13, 55.51it/s][A
 24%|██▎       | 3316/14067 [00:59<03:12, 55.73it/s][A
 24%|██▎       | 3322/14067 [00:59<03:12, 55.84it/s][A
 24%|██▎       | 3328/14067 [00:59<03:12, 55.72it/s][A
 24%|██▎       | 3334/14067 [00:59<03:12, 55.84it/s][A
 24%|██▎       | 3340/14067 [00:59<03:11, 55.95it/s][A
 24%|██▍       | 3346/14067 [00:59<03:09, 56.53it/s][A
 24%|██▍       | 3352/14067 [01:00<03:10, 56.29it/s][A
 24%|██▍       | 3358/14067 [01:00<03:10, 56.13it/s][A
 24%|██▍       | 3364/14067 [01:00<03:11, 55.80it/s][A
 24%|██▍       | 3370/14067 [01:00<03:12, 55.71it/s][A
 24%|██▍       | 3376/14067 [01:00<03:12, 55.64it/s][A
 24%|██▍       | 3382/14067 [01:00<03:11, 55.67it/s][A
 24%|██▍       | 3388/14067 [01:00<03:11, 55.71it/s][A
 24%|██▍       | 3394/14067 [01:00<03:11, 55.76it/s][A
 24%|██▍       | 3400/14067 [01:00<03:10, 56.05i

 36%|███▌      | 5051/14067 [01:30<02:46, 54.19it/s][A
 36%|███▌      | 5057/14067 [01:30<02:45, 54.60it/s][A
 36%|███▌      | 5063/14067 [01:30<02:44, 54.60it/s][A
 36%|███▌      | 5069/14067 [01:31<02:44, 54.72it/s][A
 36%|███▌      | 5075/14067 [01:31<02:44, 54.76it/s][A
 36%|███▌      | 5081/14067 [01:31<02:44, 54.77it/s][A
 36%|███▌      | 5087/14067 [01:31<02:42, 55.23it/s][A
 36%|███▌      | 5093/14067 [01:31<02:43, 55.03it/s][A
 36%|███▌      | 5099/14067 [01:31<02:43, 54.72it/s][A
 36%|███▋      | 5105/14067 [01:31<02:41, 55.55it/s][A
 36%|███▋      | 5111/14067 [01:31<02:42, 55.08it/s][A
 36%|███▋      | 5117/14067 [01:31<02:45, 54.22it/s][A
 36%|███▋      | 5123/14067 [01:32<02:45, 53.88it/s][A
 36%|███▋      | 5129/14067 [01:32<02:47, 53.24it/s][A
 37%|███▋      | 5135/14067 [01:32<02:46, 53.59it/s][A
 37%|███▋      | 5141/14067 [01:32<02:46, 53.61it/s][A
 37%|███▋      | 5147/14067 [01:32<02:46, 53.65it/s][A
 37%|███▋      | 5153/14067 [01:32<02:45, 53.74i

 48%|████▊     | 6805/14067 [02:02<02:08, 56.45it/s][A
 48%|████▊     | 6811/14067 [02:02<02:09, 55.91it/s][A
 48%|████▊     | 6817/14067 [02:02<02:09, 56.03it/s][A
 49%|████▊     | 6823/14067 [02:02<02:09, 55.89it/s][A
 49%|████▊     | 6829/14067 [02:03<02:09, 55.85it/s][A
 49%|████▊     | 6835/14067 [02:03<02:09, 55.99it/s][A
 49%|████▊     | 6841/14067 [02:03<02:08, 56.34it/s][A
 49%|████▊     | 6847/14067 [02:03<02:08, 56.10it/s][A
 49%|████▊     | 6853/14067 [02:03<02:08, 55.96it/s][A
 49%|████▉     | 6859/14067 [02:03<02:08, 56.25it/s][A
 49%|████▉     | 6865/14067 [02:03<02:07, 56.49it/s][A
 49%|████▉     | 6871/14067 [02:03<02:06, 56.66it/s][A
 49%|████▉     | 6877/14067 [02:03<02:06, 56.71it/s][A
 49%|████▉     | 6883/14067 [02:04<02:06, 56.69it/s][A
 49%|████▉     | 6889/14067 [02:04<02:07, 56.51it/s][A
 49%|████▉     | 6895/14067 [02:04<02:06, 56.71it/s][A
 49%|████▉     | 6901/14067 [02:04<02:06, 56.70it/s][A
 49%|████▉     | 6907/14067 [02:04<02:06, 56.66i

 61%|██████    | 8563/14067 [02:34<01:45, 52.08it/s][A
 61%|██████    | 8569/14067 [02:34<01:41, 54.03it/s][A
 61%|██████    | 8575/14067 [02:34<01:40, 54.61it/s][A
 61%|██████    | 8581/14067 [02:34<01:39, 55.08it/s][A
 61%|██████    | 8587/14067 [02:34<01:39, 55.16it/s][A
 61%|██████    | 8593/14067 [02:34<01:39, 55.21it/s][A
 61%|██████    | 8599/14067 [02:34<01:38, 55.26it/s][A
 61%|██████    | 8605/14067 [02:34<01:38, 55.50it/s][A
 61%|██████    | 8611/14067 [02:34<01:38, 55.60it/s][A
 61%|██████▏   | 8617/14067 [02:35<01:38, 55.50it/s][A
 61%|██████▏   | 8623/14067 [02:35<01:37, 55.79it/s][A
 61%|██████▏   | 8629/14067 [02:35<01:36, 56.06it/s][A
 61%|██████▏   | 8635/14067 [02:35<01:37, 55.80it/s][A
 61%|██████▏   | 8641/14067 [02:35<01:37, 55.66it/s][A
 61%|██████▏   | 8647/14067 [02:35<01:36, 55.94it/s][A
 62%|██████▏   | 8653/14067 [02:35<01:37, 55.81it/s][A
 62%|██████▏   | 8659/14067 [02:35<01:36, 55.90it/s][A
 62%|██████▏   | 8665/14067 [02:35<01:36, 55.74i

 73%|███████▎  | 10309/14067 [03:06<01:12, 52.00it/s][A
 73%|███████▎  | 10315/14067 [03:06<01:11, 52.37it/s][A
 73%|███████▎  | 10321/14067 [03:06<01:10, 52.98it/s][A
 73%|███████▎  | 10327/14067 [03:06<01:10, 53.02it/s][A
 73%|███████▎  | 10333/14067 [03:06<01:10, 53.25it/s][A
 73%|███████▎  | 10339/14067 [03:06<01:09, 53.29it/s][A
 74%|███████▎  | 10345/14067 [03:06<01:09, 53.50it/s][A
 74%|███████▎  | 10351/14067 [03:07<01:19, 46.75it/s][A
 74%|███████▎  | 10357/14067 [03:07<01:16, 48.30it/s][A
 74%|███████▎  | 10363/14067 [03:07<01:14, 49.83it/s][A
 74%|███████▎  | 10369/14067 [03:07<01:13, 50.41it/s][A
 74%|███████▍  | 10375/14067 [03:07<01:12, 51.25it/s][A
 74%|███████▍  | 10381/14067 [03:07<01:11, 51.81it/s][A
 74%|███████▍  | 10387/14067 [03:07<01:10, 52.19it/s][A
 74%|███████▍  | 10393/14067 [03:07<01:10, 52.36it/s][A
 74%|███████▍  | 10399/14067 [03:07<01:09, 52.52it/s][A
 74%|███████▍  | 10405/14067 [03:08<01:09, 52.57it/s][A
 74%|███████▍  | 10411/14067 [0

 85%|████████▌ | 12025/14067 [03:39<00:41, 49.27it/s][A
 86%|████████▌ | 12031/14067 [03:39<00:40, 49.84it/s][A
 86%|████████▌ | 12037/14067 [03:39<00:40, 49.98it/s][A
 86%|████████▌ | 12043/14067 [03:39<00:40, 49.75it/s][A
 86%|████████▌ | 12049/14067 [03:39<00:40, 50.16it/s][A
 86%|████████▌ | 12055/14067 [03:39<00:39, 50.52it/s][A
 86%|████████▌ | 12061/14067 [03:40<00:39, 50.88it/s][A
 86%|████████▌ | 12067/14067 [03:40<00:39, 50.87it/s][A
 86%|████████▌ | 12073/14067 [03:40<00:39, 50.78it/s][A
 86%|████████▌ | 12079/14067 [03:40<00:38, 51.74it/s][A
 86%|████████▌ | 12085/14067 [03:40<00:38, 51.01it/s][A
 86%|████████▌ | 12091/14067 [03:40<00:38, 51.31it/s][A
 86%|████████▌ | 12097/14067 [03:40<00:38, 51.20it/s][A
 86%|████████▌ | 12103/14067 [03:40<00:37, 52.15it/s][A
 86%|████████▌ | 12109/14067 [03:41<00:43, 44.99it/s][A
 86%|████████▌ | 12115/14067 [03:41<00:41, 46.63it/s][A
 86%|████████▌ | 12121/14067 [03:41<00:40, 47.50it/s][A
 86%|████████▌ | 12127/14067 [0

 98%|█████████▊| 13741/14067 [04:13<00:06, 50.20it/s][A
 98%|█████████▊| 13747/14067 [04:13<00:06, 50.45it/s][A
 98%|█████████▊| 13753/14067 [04:13<00:06, 50.73it/s][A
 98%|█████████▊| 13759/14067 [04:14<00:06, 50.58it/s][A
 98%|█████████▊| 13765/14067 [04:14<00:05, 50.76it/s][A
 98%|█████████▊| 13771/14067 [04:14<00:05, 51.05it/s][A
 98%|█████████▊| 13777/14067 [04:14<00:05, 51.21it/s][A
 98%|█████████▊| 13783/14067 [04:14<00:05, 51.29it/s][A
 98%|█████████▊| 13789/14067 [04:14<00:05, 51.25it/s][A
 98%|█████████▊| 13795/14067 [04:14<00:05, 51.23it/s][A
 98%|█████████▊| 13801/14067 [04:14<00:05, 51.57it/s][A
 98%|█████████▊| 13807/14067 [04:15<00:05, 51.32it/s][A
 98%|█████████▊| 13813/14067 [04:15<00:04, 51.21it/s][A
 98%|█████████▊| 13819/14067 [04:15<00:04, 52.41it/s][A
 98%|█████████▊| 13825/14067 [04:15<00:04, 49.88it/s][A
 98%|█████████▊| 13831/14067 [04:15<00:05, 41.13it/s][A
 98%|█████████▊| 13837/14067 [04:15<00:05, 43.79it/s][A
 98%|█████████▊| 13843/14067 [0

 10%|█         | 1423/14040 [00:28<04:07, 51.01it/s][A
 10%|█         | 1429/14040 [00:29<04:06, 51.14it/s][A
 10%|█         | 1435/14040 [00:29<04:07, 51.02it/s][A
 10%|█         | 1441/14040 [00:29<04:08, 50.71it/s][A
 10%|█         | 1447/14040 [00:29<04:08, 50.65it/s][A
 10%|█         | 1453/14040 [00:29<04:08, 50.56it/s][A
 10%|█         | 1459/14040 [00:29<04:06, 51.00it/s][A
 10%|█         | 1465/14040 [00:29<04:05, 51.20it/s][A
 10%|█         | 1471/14040 [00:29<04:07, 50.72it/s][A
 11%|█         | 1477/14040 [00:29<04:09, 50.41it/s][A
 11%|█         | 1483/14040 [00:30<04:04, 51.46it/s][A
 11%|█         | 1489/14040 [00:30<04:05, 51.16it/s][A
 11%|█         | 1495/14040 [00:30<04:07, 50.76it/s][A
 11%|█         | 1501/14040 [00:30<04:08, 50.49it/s][A
 11%|█         | 1507/14040 [00:30<04:08, 50.45it/s][A
 11%|█         | 1513/14040 [00:30<04:07, 50.63it/s][A
 11%|█         | 1519/14040 [00:30<04:27, 46.89it/s][A
 11%|█         | 1525/14040 [00:30<04:25, 47.22i

 23%|██▎       | 3175/14040 [01:03<03:32, 51.13it/s][A
 23%|██▎       | 3181/14040 [01:03<03:33, 50.92it/s][A
 23%|██▎       | 3187/14040 [01:04<03:34, 50.70it/s][A
 23%|██▎       | 3193/14040 [01:04<03:32, 51.10it/s][A
 23%|██▎       | 3199/14040 [01:04<03:33, 50.69it/s][A
 23%|██▎       | 3205/14040 [01:04<03:35, 50.36it/s][A
 23%|██▎       | 3211/14040 [01:04<03:36, 50.06it/s][A
 23%|██▎       | 3217/14040 [01:04<03:33, 50.79it/s][A
 23%|██▎       | 3223/14040 [01:04<03:34, 50.46it/s][A
 23%|██▎       | 3229/14040 [01:04<03:33, 50.58it/s][A
 23%|██▎       | 3235/14040 [01:04<03:34, 50.43it/s][A
 23%|██▎       | 3241/14040 [01:05<03:33, 50.48it/s][A
 23%|██▎       | 3247/14040 [01:05<03:34, 50.41it/s][A
 23%|██▎       | 3253/14040 [01:05<03:33, 50.55it/s][A
 23%|██▎       | 3259/14040 [01:05<03:33, 50.46it/s][A
 23%|██▎       | 3265/14040 [01:05<03:34, 50.30it/s][A
 23%|██▎       | 3271/14040 [01:05<03:33, 50.53it/s][A
 23%|██▎       | 3277/14040 [01:05<03:34, 50.13i

 35%|███▌      | 4927/14040 [01:38<03:04, 49.50it/s][A
 35%|███▌      | 4933/14040 [01:38<03:04, 49.24it/s][A
 35%|███▌      | 4939/14040 [01:38<03:04, 49.28it/s][A
 35%|███▌      | 4945/14040 [01:38<03:04, 49.36it/s][A
 35%|███▌      | 4951/14040 [01:39<03:01, 49.97it/s][A
 35%|███▌      | 4957/14040 [01:39<03:00, 50.41it/s][A
 35%|███▌      | 4963/14040 [01:39<02:59, 50.67it/s][A
 35%|███▌      | 4969/14040 [01:39<03:00, 50.29it/s][A
 35%|███▌      | 4975/14040 [01:39<03:00, 50.27it/s][A
 35%|███▌      | 4981/14040 [01:39<02:59, 50.47it/s][A
 36%|███▌      | 4987/14040 [01:39<03:00, 50.26it/s][A
 36%|███▌      | 4993/14040 [01:39<02:59, 50.51it/s][A
 36%|███▌      | 4999/14040 [01:40<02:59, 50.27it/s][A
 36%|███▌      | 5005/14040 [01:40<02:58, 50.66it/s][A
 36%|███▌      | 5011/14040 [01:40<02:57, 50.74it/s][A
 36%|███▌      | 5017/14040 [01:40<02:58, 50.45it/s][A
 36%|███▌      | 5023/14040 [01:40<03:00, 50.02it/s][A
 36%|███▌      | 5029/14040 [01:40<02:59, 50.14i

 48%|████▊     | 6679/14040 [02:13<02:31, 48.72it/s][A
 48%|████▊     | 6685/14040 [02:13<02:29, 49.24it/s][A
 48%|████▊     | 6691/14040 [02:14<02:29, 49.13it/s][A
 48%|████▊     | 6697/14040 [02:14<02:29, 49.24it/s][A
 48%|████▊     | 6703/14040 [02:14<02:29, 49.17it/s][A
 48%|████▊     | 6709/14040 [02:14<02:29, 49.14it/s][A
 48%|████▊     | 6715/14040 [02:14<02:28, 49.17it/s][A
 48%|████▊     | 6721/14040 [02:14<02:29, 48.99it/s][A
 48%|████▊     | 6727/14040 [02:14<02:28, 49.17it/s][A
 48%|████▊     | 6733/14040 [02:14<02:29, 48.81it/s][A
 48%|████▊     | 6739/14040 [02:15<02:28, 49.22it/s][A
 48%|████▊     | 6745/14040 [02:15<02:27, 49.33it/s][A
 48%|████▊     | 6751/14040 [02:15<02:27, 49.40it/s][A
 48%|████▊     | 6757/14040 [02:15<02:27, 49.37it/s][A
 48%|████▊     | 6763/14040 [02:15<02:25, 49.95it/s][A
 48%|████▊     | 6769/14040 [02:15<02:24, 50.42it/s][A
 48%|████▊     | 6775/14040 [02:15<02:24, 50.39it/s][A
 48%|████▊     | 6781/14040 [02:15<02:23, 50.47i

 60%|██████    | 8431/14040 [02:49<01:51, 50.38it/s][A
 60%|██████    | 8437/14040 [02:49<01:51, 50.04it/s][A
 60%|██████    | 8443/14040 [02:49<01:53, 49.52it/s][A
 60%|██████    | 8449/14040 [02:49<01:53, 49.09it/s][A
 60%|██████    | 8455/14040 [02:49<01:53, 49.41it/s][A
 60%|██████    | 8461/14040 [02:49<01:53, 49.24it/s][A
 60%|██████    | 8467/14040 [02:49<01:52, 49.58it/s][A
 60%|██████    | 8473/14040 [02:50<01:52, 49.57it/s][A
 60%|██████    | 8479/14040 [02:50<01:52, 49.27it/s][A
 60%|██████    | 8485/14040 [02:50<01:52, 49.28it/s][A
 60%|██████    | 8491/14040 [02:50<01:52, 49.31it/s][A
 61%|██████    | 8497/14040 [02:50<01:51, 49.50it/s][A
 61%|██████    | 8503/14040 [02:50<01:52, 49.11it/s][A
 61%|██████    | 8509/14040 [02:50<01:50, 49.90it/s][A
 61%|██████    | 8515/14040 [02:50<01:51, 49.58it/s][A
 61%|██████    | 8521/14040 [02:51<01:51, 49.47it/s][A
 61%|██████    | 8527/14040 [02:51<01:51, 49.47it/s][A
 61%|██████    | 8533/14040 [02:51<01:51, 49.44i

 72%|███████▏  | 10177/14040 [03:24<01:18, 49.12it/s][A
 73%|███████▎  | 10183/14040 [03:24<01:17, 49.53it/s][A
 73%|███████▎  | 10189/14040 [03:24<01:17, 49.82it/s][A
 73%|███████▎  | 10195/14040 [03:24<01:17, 49.52it/s][A
 73%|███████▎  | 10201/14040 [03:25<01:17, 49.27it/s][A
 73%|███████▎  | 10207/14040 [03:25<01:17, 49.15it/s][A
 73%|███████▎  | 10213/14040 [03:25<01:18, 48.98it/s][A
 73%|███████▎  | 10219/14040 [03:25<01:18, 48.96it/s][A
 73%|███████▎  | 10225/14040 [03:25<01:17, 49.18it/s][A
 73%|███████▎  | 10231/14040 [03:25<01:17, 49.24it/s][A
 73%|███████▎  | 10237/14040 [03:25<01:16, 49.44it/s][A
 73%|███████▎  | 10243/14040 [03:25<01:16, 49.80it/s][A
 73%|███████▎  | 10249/14040 [03:25<01:16, 49.53it/s][A
 73%|███████▎  | 10255/14040 [03:26<01:16, 49.37it/s][A
 73%|███████▎  | 10261/14040 [03:26<01:15, 50.19it/s][A
 73%|███████▎  | 10267/14040 [03:26<01:15, 49.80it/s][A
 73%|███████▎  | 10273/14040 [03:26<01:15, 49.65it/s][A
 73%|███████▎  | 10279/14040 [0

 85%|████████▍ | 11893/14040 [03:59<00:43, 49.31it/s][A
 85%|████████▍ | 11899/14040 [03:59<00:43, 49.13it/s][A
 85%|████████▍ | 11905/14040 [03:59<00:43, 48.99it/s][A
 85%|████████▍ | 11911/14040 [03:59<00:43, 49.42it/s][A
 85%|████████▍ | 11917/14040 [03:59<00:43, 49.12it/s][A
 85%|████████▍ | 11923/14040 [04:00<00:42, 49.26it/s][A
 85%|████████▍ | 11929/14040 [04:00<00:42, 49.10it/s][A
 85%|████████▌ | 11935/14040 [04:00<00:43, 48.70it/s][A
 85%|████████▌ | 11941/14040 [04:00<00:43, 48.57it/s][A
 85%|████████▌ | 11947/14040 [04:00<00:43, 48.45it/s][A
 85%|████████▌ | 11953/14040 [04:00<00:43, 48.35it/s][A
 85%|████████▌ | 11959/14040 [04:00<00:43, 48.26it/s][A
 85%|████████▌ | 11965/14040 [04:00<00:42, 48.93it/s][A
 85%|████████▌ | 11971/14040 [04:01<00:42, 48.82it/s][A
 85%|████████▌ | 11977/14040 [04:01<00:42, 48.35it/s][A
 85%|████████▌ | 11983/14040 [04:01<00:42, 48.30it/s][A
 85%|████████▌ | 11989/14040 [04:01<00:42, 48.36it/s][A
 85%|████████▌ | 11995/14040 [0

 97%|█████████▋| 13606/14040 [04:34<00:09, 47.79it/s][A
 97%|█████████▋| 13612/14040 [04:34<00:08, 48.10it/s][A
 97%|█████████▋| 13618/14040 [04:34<00:08, 47.51it/s][A
 97%|█████████▋| 13624/14040 [04:35<00:08, 47.73it/s][A
 97%|█████████▋| 13630/14040 [04:35<00:08, 47.44it/s][A
 97%|█████████▋| 13636/14040 [04:35<00:08, 47.38it/s][A
 97%|█████████▋| 13642/14040 [04:35<00:08, 47.62it/s][A
 97%|█████████▋| 13648/14040 [04:35<00:08, 47.34it/s][A
 97%|█████████▋| 13654/14040 [04:35<00:08, 46.85it/s][A
 97%|█████████▋| 13660/14040 [04:35<00:08, 47.21it/s][A
 97%|█████████▋| 13666/14040 [04:35<00:07, 47.11it/s][A
 97%|█████████▋| 13672/14040 [04:36<00:07, 47.11it/s][A
 97%|█████████▋| 13678/14040 [04:36<00:07, 47.05it/s][A
 97%|█████████▋| 13684/14040 [04:36<00:07, 47.19it/s][A
 98%|█████████▊| 13690/14040 [04:36<00:07, 47.48it/s][A
 98%|█████████▊| 13696/14040 [04:36<00:07, 47.42it/s][A
 98%|█████████▊| 13702/14040 [04:36<00:07, 47.26it/s][A
 98%|█████████▊| 13708/14040 [0

  9%|▉         | 1294/14166 [00:30<05:24, 39.66it/s][A
  9%|▉         | 1300/14166 [00:30<05:20, 40.09it/s][A
  9%|▉         | 1306/14166 [00:30<05:12, 41.17it/s][A
  9%|▉         | 1312/14166 [00:30<05:04, 42.24it/s][A
  9%|▉         | 1318/14166 [00:30<04:59, 42.86it/s][A
  9%|▉         | 1324/14166 [00:30<04:55, 43.52it/s][A
  9%|▉         | 1330/14166 [00:31<04:53, 43.79it/s][A
  9%|▉         | 1336/14166 [00:31<04:47, 44.65it/s][A
  9%|▉         | 1342/14166 [00:31<04:45, 44.93it/s][A
 10%|▉         | 1348/14166 [00:31<04:46, 44.80it/s][A
 10%|▉         | 1354/14166 [00:31<04:45, 44.95it/s][A
 10%|▉         | 1360/14166 [00:31<04:47, 44.58it/s][A
 10%|▉         | 1366/14166 [00:31<04:47, 44.56it/s][A
 10%|▉         | 1372/14166 [00:31<04:46, 44.69it/s][A
 10%|▉         | 1378/14166 [00:32<04:45, 44.77it/s][A
 10%|▉         | 1384/14166 [00:32<04:46, 44.54it/s][A
 10%|▉         | 1390/14166 [00:32<04:47, 44.37it/s][A
 10%|▉         | 1396/14166 [00:32<04:47, 44.36i

 21%|██▏       | 3028/14166 [01:10<04:07, 44.99it/s][A
 21%|██▏       | 3034/14166 [01:10<04:09, 44.54it/s][A
 21%|██▏       | 3040/14166 [01:11<04:10, 44.34it/s][A
 22%|██▏       | 3046/14166 [01:11<04:09, 44.63it/s][A
 22%|██▏       | 3052/14166 [01:11<04:08, 44.81it/s][A
 22%|██▏       | 3058/14166 [01:11<04:05, 45.17it/s][A
 22%|██▏       | 3064/14166 [01:11<04:04, 45.37it/s][A
 22%|██▏       | 3070/14166 [01:11<04:03, 45.53it/s][A
 22%|██▏       | 3076/14166 [01:11<04:02, 45.65it/s][A
 22%|██▏       | 3082/14166 [01:12<04:03, 45.49it/s][A
 22%|██▏       | 3088/14166 [01:12<04:04, 45.23it/s][A
 22%|██▏       | 3094/14166 [01:12<04:05, 45.18it/s][A
 22%|██▏       | 3100/14166 [01:12<04:03, 45.42it/s][A
 22%|██▏       | 3106/14166 [01:12<04:02, 45.62it/s][A
 22%|██▏       | 3112/14166 [01:12<04:01, 45.73it/s][A
 22%|██▏       | 3118/14166 [01:12<04:02, 45.59it/s][A
 22%|██▏       | 3124/14166 [01:12<04:01, 45.75it/s][A
 22%|██▏       | 3130/14166 [01:13<04:02, 45.42i

 34%|███▎      | 4777/14166 [01:49<03:23, 46.06it/s][A
 34%|███▍      | 4783/14166 [01:49<03:24, 45.77it/s][A
 34%|███▍      | 4789/14166 [01:49<03:25, 45.60it/s][A
 34%|███▍      | 4795/14166 [01:49<03:25, 45.70it/s][A
 34%|███▍      | 4801/14166 [01:49<03:24, 45.71it/s][A
 34%|███▍      | 4807/14166 [01:50<03:25, 45.58it/s][A
 34%|███▍      | 4813/14166 [01:50<03:25, 45.50it/s][A
 34%|███▍      | 4819/14166 [01:50<03:25, 45.54it/s][A
 34%|███▍      | 4825/14166 [01:50<03:25, 45.55it/s][A
 34%|███▍      | 4831/14166 [01:50<03:24, 45.71it/s][A
 34%|███▍      | 4837/14166 [01:50<03:24, 45.71it/s][A
 34%|███▍      | 4843/14166 [01:50<03:24, 45.69it/s][A
 34%|███▍      | 4849/14166 [01:50<03:24, 45.55it/s][A
 34%|███▍      | 4855/14166 [01:51<03:25, 45.27it/s][A
 34%|███▍      | 4861/14166 [01:51<03:25, 45.23it/s][A
 34%|███▍      | 4867/14166 [01:51<03:25, 45.17it/s][A
 34%|███▍      | 4873/14166 [01:51<03:24, 45.46it/s][A
 34%|███▍      | 4879/14166 [01:51<03:24, 45.50i

 46%|████▌     | 6526/14166 [02:27<02:47, 45.49it/s][A
 46%|████▌     | 6532/14166 [02:28<02:47, 45.46it/s][A
 46%|████▌     | 6538/14166 [02:28<02:47, 45.46it/s][A
 46%|████▌     | 6544/14166 [02:28<02:47, 45.55it/s][A
 46%|████▌     | 6550/14166 [02:28<02:47, 45.58it/s][A
 46%|████▋     | 6556/14166 [02:28<02:46, 45.58it/s][A
 46%|████▋     | 6562/14166 [02:28<02:47, 45.34it/s][A
 46%|████▋     | 6568/14166 [02:28<02:47, 45.39it/s][A
 46%|████▋     | 6573/14166 [02:28<02:50, 44.63it/s][A
 46%|████▋     | 6578/14166 [02:29<02:57, 42.72it/s][A
 46%|████▋     | 6583/14166 [02:29<03:02, 41.53it/s][A
 47%|████▋     | 6589/14166 [02:29<02:56, 42.82it/s][A
 47%|████▋     | 6595/14166 [02:29<02:53, 43.64it/s][A
 47%|████▋     | 6601/14166 [02:29<02:50, 44.27it/s][A
 47%|████▋     | 6607/14166 [02:29<02:49, 44.59it/s][A
 47%|████▋     | 6613/14166 [02:29<02:48, 44.85it/s][A
 47%|████▋     | 6619/14166 [02:30<02:47, 44.95it/s][A
 47%|████▋     | 6625/14166 [02:30<02:47, 45.15i

 58%|█████▊    | 8275/14166 [03:06<02:12, 44.39it/s][A
 58%|█████▊    | 8281/14166 [03:07<02:12, 44.52it/s][A
 58%|█████▊    | 8287/14166 [03:07<02:12, 44.38it/s][A
 59%|█████▊    | 8293/14166 [03:07<02:11, 44.60it/s][A
 59%|█████▊    | 8299/14166 [03:07<02:10, 44.79it/s][A
 59%|█████▊    | 8305/14166 [03:07<02:11, 44.51it/s][A
 59%|█████▊    | 8311/14166 [03:07<02:11, 44.51it/s][A
 59%|█████▊    | 8317/14166 [03:07<02:12, 44.30it/s][A
 59%|█████▉    | 8323/14166 [03:08<02:12, 44.23it/s][A
 59%|█████▉    | 8329/14166 [03:08<02:11, 44.36it/s][A
 59%|█████▉    | 8335/14166 [03:08<02:11, 44.41it/s][A
 59%|█████▉    | 8341/14166 [03:08<02:10, 44.74it/s][A
 59%|█████▉    | 8347/14166 [03:08<02:09, 44.93it/s][A
 59%|█████▉    | 8353/14166 [03:08<02:09, 44.79it/s][A
 59%|█████▉    | 8359/14166 [03:08<02:09, 44.90it/s][A
 59%|█████▉    | 8365/14166 [03:08<02:09, 44.76it/s][A
 59%|█████▉    | 8371/14166 [03:09<02:09, 44.78it/s][A
 59%|█████▉    | 8377/14166 [03:09<02:10, 44.43i

 71%|███████   | 10024/14166 [03:46<01:36, 42.91it/s][A
 71%|███████   | 10030/14166 [03:46<01:35, 43.09it/s][A
 71%|███████   | 10036/14166 [03:47<01:35, 43.42it/s][A
 71%|███████   | 10042/14166 [03:47<01:34, 43.75it/s][A
 71%|███████   | 10048/14166 [03:47<01:33, 43.99it/s][A
 71%|███████   | 10054/14166 [03:47<01:33, 44.11it/s][A
 71%|███████   | 10060/14166 [03:47<01:32, 44.16it/s][A
 71%|███████   | 10066/14166 [03:47<01:32, 44.11it/s][A
 71%|███████   | 10072/14166 [03:47<01:32, 44.09it/s][A
 71%|███████   | 10078/14166 [03:48<01:32, 44.20it/s][A
 71%|███████   | 10084/14166 [03:48<01:32, 44.36it/s][A
 71%|███████   | 10090/14166 [03:48<01:31, 44.47it/s][A
 71%|███████▏  | 10096/14166 [03:48<01:31, 44.29it/s][A
 71%|███████▏  | 10102/14166 [03:48<01:31, 44.31it/s][A
 71%|███████▏  | 10108/14166 [03:48<01:32, 44.10it/s][A
 71%|███████▏  | 10114/14166 [03:48<01:32, 43.91it/s][A
 71%|███████▏  | 10120/14166 [03:48<01:31, 44.15it/s][A
 71%|███████▏  | 10126/14166 [0

 83%|████████▎ | 11737/14166 [04:25<00:53, 45.35it/s][A
 83%|████████▎ | 11743/14166 [04:25<00:53, 45.32it/s][A
 83%|████████▎ | 11749/14166 [04:25<00:53, 45.11it/s][A
 83%|████████▎ | 11755/14166 [04:25<00:53, 44.97it/s][A
 83%|████████▎ | 11761/14166 [04:26<00:53, 44.82it/s][A
 83%|████████▎ | 11767/14166 [04:26<00:53, 44.44it/s][A
 83%|████████▎ | 11773/14166 [04:26<00:53, 44.36it/s][A
 83%|████████▎ | 11779/14166 [04:26<00:53, 44.32it/s][A
 83%|████████▎ | 11785/14166 [04:26<00:53, 44.18it/s][A
 83%|████████▎ | 11791/14166 [04:26<00:52, 44.91it/s][A
 83%|████████▎ | 11797/14166 [04:26<00:52, 44.96it/s][A
 83%|████████▎ | 11803/14166 [04:26<00:52, 44.73it/s][A
 83%|████████▎ | 11809/14166 [04:27<00:53, 44.36it/s][A
 83%|████████▎ | 11815/14166 [04:27<00:52, 44.41it/s][A
 83%|████████▎ | 11821/14166 [04:27<00:52, 44.28it/s][A
 83%|████████▎ | 11827/14166 [04:27<00:52, 44.25it/s][A
 84%|████████▎ | 11833/14166 [04:27<00:52, 44.39it/s][A
 84%|████████▎ | 11839/14166 [0

 95%|█████████▍| 13453/14166 [05:04<00:15, 44.95it/s][A
 95%|█████████▌| 13459/14166 [05:04<00:15, 44.80it/s][A
 95%|█████████▌| 13465/14166 [05:04<00:15, 44.69it/s][A
 95%|█████████▌| 13471/14166 [05:04<00:15, 44.35it/s][A
 95%|█████████▌| 13477/14166 [05:04<00:15, 44.07it/s][A
 95%|█████████▌| 13483/14166 [05:05<00:15, 43.88it/s][A
 95%|█████████▌| 13489/14166 [05:05<00:15, 43.84it/s][A
 95%|█████████▌| 13495/14166 [05:05<00:15, 43.80it/s][A
 95%|█████████▌| 13501/14166 [05:05<00:15, 44.27it/s][A
 95%|█████████▌| 13507/14166 [05:05<00:14, 44.03it/s][A
 95%|█████████▌| 13513/14166 [05:05<00:14, 43.83it/s][A
 95%|█████████▌| 13519/14166 [05:05<00:14, 43.75it/s][A
 95%|█████████▌| 13525/14166 [05:06<00:14, 43.84it/s][A
 96%|█████████▌| 13531/14166 [05:06<00:14, 43.85it/s][A
 96%|█████████▌| 13537/14166 [05:06<00:14, 44.08it/s][A
 96%|█████████▌| 13543/14166 [05:06<00:14, 44.12it/s][A
 96%|█████████▌| 13549/14166 [05:06<00:13, 44.15it/s][A
 96%|█████████▌| 13555/14166 [0

  8%|▊         | 1015/13527 [00:26<05:03, 41.18it/s][A
  8%|▊         | 1021/13527 [00:26<05:05, 40.95it/s][A
  8%|▊         | 1027/13527 [00:26<05:06, 40.85it/s][A
  8%|▊         | 1033/13527 [00:26<05:04, 41.05it/s][A
  8%|▊         | 1039/13527 [00:26<05:04, 40.99it/s][A
  8%|▊         | 1045/13527 [00:26<05:02, 41.22it/s][A
  8%|▊         | 1051/13527 [00:27<05:06, 40.64it/s][A
  8%|▊         | 1057/13527 [00:27<05:00, 41.50it/s][A
  8%|▊         | 1062/13527 [00:27<05:22, 38.70it/s][A
  8%|▊         | 1066/13527 [00:27<05:55, 35.06it/s][A
  8%|▊         | 1072/13527 [00:27<05:39, 36.73it/s][A
  8%|▊         | 1078/13527 [00:27<05:22, 38.59it/s][A
  8%|▊         | 1084/13527 [00:27<05:17, 39.18it/s][A
  8%|▊         | 1090/13527 [00:28<05:14, 39.58it/s][A
  8%|▊         | 1096/13527 [00:28<05:08, 40.27it/s][A
  8%|▊         | 1102/13527 [00:28<05:05, 40.63it/s][A
  8%|▊         | 1108/13527 [00:28<05:05, 40.64it/s][A
  8%|▊         | 1114/13527 [00:28<05:02, 41.02i

 20%|██        | 2728/13527 [01:10<04:30, 39.95it/s][A
 20%|██        | 2734/13527 [01:10<04:36, 39.04it/s][A
 20%|██        | 2740/13527 [01:10<05:10, 34.72it/s][A
 20%|██        | 2746/13527 [01:10<05:00, 35.90it/s][A
 20%|██        | 2752/13527 [01:10<04:48, 37.38it/s][A
 20%|██        | 2758/13527 [01:11<04:44, 37.85it/s][A
 20%|██        | 2764/13527 [01:11<04:41, 38.25it/s][A
 20%|██        | 2770/13527 [01:11<04:38, 38.56it/s][A
 21%|██        | 2776/13527 [01:11<04:37, 38.79it/s][A
 21%|██        | 2782/13527 [01:11<04:34, 39.21it/s][A
 21%|██        | 2788/13527 [01:11<04:32, 39.35it/s][A
 21%|██        | 2794/13527 [01:12<04:32, 39.36it/s][A
 21%|██        | 2800/13527 [01:12<04:33, 39.29it/s][A
 21%|██        | 2806/13527 [01:12<04:33, 39.23it/s][A
 21%|██        | 2812/13527 [01:12<04:33, 39.17it/s][A
 21%|██        | 2818/13527 [01:12<04:32, 39.35it/s][A
 21%|██        | 2824/13527 [01:12<04:31, 39.46it/s][A
 21%|██        | 2828/13527 [01:12<04:50, 36.87i

 33%|███▎      | 4447/13527 [01:52<03:30, 43.11it/s][A
 33%|███▎      | 4453/13527 [01:53<03:30, 43.11it/s][A
 33%|███▎      | 4459/13527 [01:53<03:30, 43.02it/s][A
 33%|███▎      | 4465/13527 [01:53<03:31, 42.80it/s][A
 33%|███▎      | 4471/13527 [01:53<03:30, 42.97it/s][A
 33%|███▎      | 4477/13527 [01:53<03:30, 42.99it/s][A
 33%|███▎      | 4483/13527 [01:53<03:30, 42.97it/s][A
 33%|███▎      | 4489/13527 [01:53<03:30, 42.92it/s][A
 33%|███▎      | 4495/13527 [01:54<03:30, 43.01it/s][A
 33%|███▎      | 4501/13527 [01:54<03:26, 43.62it/s][A
 33%|███▎      | 4507/13527 [01:54<03:27, 43.42it/s][A
 33%|███▎      | 4513/13527 [01:54<03:28, 43.16it/s][A
 33%|███▎      | 4519/13527 [01:54<03:32, 42.36it/s][A
 33%|███▎      | 4525/13527 [01:54<03:30, 42.84it/s][A
 33%|███▎      | 4531/13527 [01:54<03:30, 42.78it/s][A
 34%|███▎      | 4537/13527 [01:55<03:30, 42.71it/s][A
 34%|███▎      | 4543/13527 [01:55<03:30, 42.75it/s][A
 34%|███▎      | 4549/13527 [01:55<03:30, 42.62i

 46%|████▌     | 6196/13527 [02:34<02:50, 43.05it/s][A
 46%|████▌     | 6202/13527 [02:34<02:50, 43.03it/s][A
 46%|████▌     | 6208/13527 [02:34<02:51, 42.79it/s][A
 46%|████▌     | 6214/13527 [02:34<02:50, 42.77it/s][A
 46%|████▌     | 6220/13527 [02:34<02:48, 43.49it/s][A
 46%|████▌     | 6226/13527 [02:34<02:48, 43.34it/s][A
 46%|████▌     | 6232/13527 [02:34<02:48, 43.33it/s][A
 46%|████▌     | 6238/13527 [02:34<02:49, 43.11it/s][A
 46%|████▌     | 6244/13527 [02:35<02:49, 43.06it/s][A
 46%|████▌     | 6250/13527 [02:35<02:49, 43.02it/s][A
 46%|████▌     | 6256/13527 [02:35<02:49, 43.00it/s][A
 46%|████▋     | 6262/13527 [02:35<02:49, 42.94it/s][A
 46%|████▋     | 6268/13527 [02:35<02:48, 43.02it/s][A
 46%|████▋     | 6274/13527 [02:35<02:48, 43.08it/s][A
 46%|████▋     | 6280/13527 [02:35<02:46, 43.60it/s][A
 46%|████▋     | 6286/13527 [02:36<02:46, 43.39it/s][A
 47%|████▋     | 6292/13527 [02:36<02:47, 43.09it/s][A
 47%|████▋     | 6298/13527 [02:36<02:50, 42.52i

 59%|█████▊    | 7945/13527 [03:15<02:10, 42.71it/s][A
 59%|█████▉    | 7951/13527 [03:15<02:10, 42.78it/s][A
 59%|█████▉    | 7957/13527 [03:15<02:10, 42.56it/s][A
 59%|█████▉    | 7963/13527 [03:15<02:10, 42.65it/s][A
 59%|█████▉    | 7969/13527 [03:15<02:10, 42.68it/s][A
 59%|█████▉    | 7975/13527 [03:15<02:08, 43.29it/s][A
 59%|█████▉    | 7981/13527 [03:16<02:08, 43.31it/s][A
 59%|█████▉    | 7987/13527 [03:16<02:08, 43.10it/s][A
 59%|█████▉    | 7993/13527 [03:16<02:10, 42.54it/s][A
 59%|█████▉    | 7999/13527 [03:16<02:09, 42.70it/s][A
 59%|█████▉    | 8005/13527 [03:16<02:09, 42.74it/s][A
 59%|█████▉    | 8011/13527 [03:16<02:09, 42.70it/s][A
 59%|█████▉    | 8017/13527 [03:16<02:07, 43.25it/s][A
 59%|█████▉    | 8023/13527 [03:17<02:07, 43.09it/s][A
 59%|█████▉    | 8029/13527 [03:17<02:08, 42.88it/s][A
 59%|█████▉    | 8035/13527 [03:17<02:09, 42.53it/s][A
 59%|█████▉    | 8041/13527 [03:17<02:09, 42.35it/s][A
 59%|█████▉    | 8047/13527 [03:17<02:09, 42.28i

 72%|███████▏  | 9697/13527 [03:56<01:33, 40.76it/s][A
 72%|███████▏  | 9703/13527 [03:56<01:34, 40.29it/s][A
 72%|███████▏  | 9709/13527 [03:57<01:34, 40.42it/s][A
 72%|███████▏  | 9715/13527 [03:57<01:33, 40.68it/s][A
 72%|███████▏  | 9721/13527 [03:57<01:33, 40.76it/s][A
 72%|███████▏  | 9727/13527 [03:57<01:33, 40.79it/s][A
 72%|███████▏  | 9733/13527 [03:57<01:33, 40.58it/s][A
 72%|███████▏  | 9739/13527 [03:57<01:33, 40.69it/s][A
 72%|███████▏  | 9745/13527 [03:57<01:32, 40.85it/s][A
 72%|███████▏  | 9751/13527 [03:58<01:32, 40.82it/s][A
 72%|███████▏  | 9757/13527 [03:58<01:32, 40.82it/s][A
 72%|███████▏  | 9763/13527 [03:58<01:32, 40.82it/s][A
 72%|███████▏  | 9769/13527 [03:58<01:32, 40.74it/s][A
 72%|███████▏  | 9775/13527 [03:58<01:32, 40.70it/s][A
 72%|███████▏  | 9781/13527 [03:58<01:31, 40.92it/s][A
 72%|███████▏  | 9787/13527 [03:58<01:30, 41.35it/s][A
 72%|███████▏  | 9793/13527 [03:59<01:30, 41.24it/s][A
 72%|███████▏  | 9799/13527 [03:59<01:29, 41.61i

 84%|████████▍ | 11416/13527 [04:37<00:49, 42.28it/s][A
 84%|████████▍ | 11422/13527 [04:37<00:50, 41.98it/s][A
 84%|████████▍ | 11428/13527 [04:37<00:50, 41.86it/s][A
 85%|████████▍ | 11434/13527 [04:38<00:49, 41.89it/s][A
 85%|████████▍ | 11440/13527 [04:38<00:49, 41.86it/s][A
 85%|████████▍ | 11446/13527 [04:38<00:49, 42.45it/s][A
 85%|████████▍ | 11452/13527 [04:38<00:49, 42.27it/s][A
 85%|████████▍ | 11458/13527 [04:38<00:48, 42.30it/s][A
 85%|████████▍ | 11464/13527 [04:38<00:48, 42.39it/s][A
 85%|████████▍ | 11470/13527 [04:38<00:48, 42.23it/s][A
 85%|████████▍ | 11476/13527 [04:39<00:48, 42.02it/s][A
 85%|████████▍ | 11482/13527 [04:39<00:48, 42.16it/s][A
 85%|████████▍ | 11488/13527 [04:39<00:48, 42.12it/s][A
 85%|████████▍ | 11494/13527 [04:39<00:48, 42.13it/s][A
 85%|████████▌ | 11500/13527 [04:39<00:48, 42.11it/s][A
 85%|████████▌ | 11506/13527 [04:39<00:48, 42.04it/s][A
 85%|████████▌ | 11512/13527 [04:39<00:48, 41.84it/s][A
 85%|████████▌ | 11518/13527 [0

 97%|█████████▋| 13129/13527 [05:18<00:09, 41.02it/s][A
 97%|█████████▋| 13135/13527 [05:18<00:09, 41.34it/s][A
 97%|█████████▋| 13141/13527 [05:18<00:09, 41.45it/s][A
 97%|█████████▋| 13147/13527 [05:19<00:09, 41.56it/s][A
 97%|█████████▋| 13153/13527 [05:19<00:09, 41.37it/s][A
 97%|█████████▋| 13159/13527 [05:19<00:08, 41.34it/s][A
 97%|█████████▋| 13165/13527 [05:19<00:08, 41.52it/s][A
 97%|█████████▋| 13171/13527 [05:19<00:08, 41.73it/s][A
 97%|█████████▋| 13177/13527 [05:19<00:08, 42.36it/s][A
 97%|█████████▋| 13183/13527 [05:19<00:08, 42.10it/s][A
 98%|█████████▊| 13189/13527 [05:20<00:07, 42.29it/s][A
 98%|█████████▊| 13195/13527 [05:20<00:07, 42.10it/s][A
 98%|█████████▊| 13201/13527 [05:20<00:07, 41.89it/s][A
 98%|█████████▊| 13207/13527 [05:20<00:07, 41.85it/s][A
 98%|█████████▊| 13213/13527 [05:20<00:07, 41.84it/s][A
 98%|█████████▊| 13219/13527 [05:20<00:07, 41.63it/s][A
 98%|█████████▊| 13225/13527 [05:20<00:07, 41.42it/s][A
 98%|█████████▊| 13231/13527 [0

 10%|▉         | 1348/14121 [00:36<05:35, 38.03it/s][A
 10%|▉         | 1354/14121 [00:36<05:35, 38.04it/s][A
 10%|▉         | 1360/14121 [00:36<05:34, 38.19it/s][A
 10%|▉         | 1366/14121 [00:37<05:36, 37.95it/s][A
 10%|▉         | 1372/14121 [00:37<05:35, 38.00it/s][A
 10%|▉         | 1378/14121 [00:37<05:36, 37.86it/s][A
 10%|▉         | 1384/14121 [00:37<05:36, 37.80it/s][A
 10%|▉         | 1390/14121 [00:37<05:34, 38.04it/s][A
 10%|▉         | 1396/14121 [00:37<05:33, 38.12it/s][A
 10%|▉         | 1402/14121 [00:37<05:33, 38.16it/s][A
 10%|▉         | 1408/14121 [00:38<05:33, 38.15it/s][A
 10%|█         | 1414/14121 [00:38<05:33, 38.11it/s][A
 10%|█         | 1420/14121 [00:38<05:31, 38.29it/s][A
 10%|█         | 1426/14121 [00:38<05:32, 38.16it/s][A
 10%|█         | 1432/14121 [00:38<05:33, 38.07it/s][A
 10%|█         | 1438/14121 [00:38<05:33, 38.06it/s][A
 10%|█         | 1444/14121 [00:39<05:32, 38.09it/s][A
 10%|█         | 1450/14121 [00:39<05:31, 38.19i

 22%|██▏       | 3094/14121 [01:23<05:35, 32.84it/s][A
 22%|██▏       | 3100/14121 [01:23<05:31, 33.28it/s][A
 22%|██▏       | 3106/14121 [01:23<05:28, 33.56it/s][A
 22%|██▏       | 3112/14121 [01:24<05:27, 33.63it/s][A
 22%|██▏       | 3118/14121 [01:24<05:23, 34.02it/s][A
 22%|██▏       | 3124/14121 [01:24<05:21, 34.23it/s][A
 22%|██▏       | 3129/14121 [01:24<04:54, 37.32it/s][A
 22%|██▏       | 3133/14121 [01:24<05:34, 32.87it/s][A
 22%|██▏       | 3137/14121 [01:24<05:46, 31.71it/s][A
 22%|██▏       | 3142/14121 [01:24<06:01, 30.38it/s][A
 22%|██▏       | 3146/14121 [01:25<06:11, 29.58it/s][A
 22%|██▏       | 3151/14121 [01:25<06:18, 28.97it/s][A
 22%|██▏       | 3157/14121 [01:25<05:55, 30.82it/s][A
 22%|██▏       | 3163/14121 [01:25<05:43, 31.93it/s][A
 22%|██▏       | 3169/14121 [01:25<05:34, 32.76it/s][A
 22%|██▏       | 3175/14121 [01:25<05:28, 33.29it/s][A
 23%|██▎       | 3180/14121 [01:26<04:58, 36.60it/s][A
 23%|██▎       | 3184/14121 [01:26<05:36, 32.50i

 33%|███▎      | 4682/14121 [02:12<04:45, 33.12it/s][A
 33%|███▎      | 4686/14121 [02:12<04:45, 33.06it/s][A
 33%|███▎      | 4690/14121 [02:12<05:13, 30.08it/s][A
 33%|███▎      | 4696/14121 [02:12<04:58, 31.53it/s][A
 33%|███▎      | 4702/14121 [02:13<04:47, 32.79it/s][A
 33%|███▎      | 4708/14121 [02:13<04:41, 33.40it/s][A
 33%|███▎      | 4714/14121 [02:13<04:39, 33.66it/s][A
 33%|███▎      | 4720/14121 [02:13<04:35, 34.15it/s][A
 33%|███▎      | 4726/14121 [02:13<04:30, 34.74it/s][A
 34%|███▎      | 4732/14121 [02:13<04:29, 34.89it/s][A
 34%|███▎      | 4738/14121 [02:14<04:28, 34.93it/s][A
 34%|███▎      | 4744/14121 [02:14<04:28, 34.93it/s][A
 34%|███▎      | 4750/14121 [02:14<04:27, 34.99it/s][A
 34%|███▎      | 4756/14121 [02:14<04:28, 34.88it/s][A
 34%|███▎      | 4762/14121 [02:14<05:05, 30.60it/s][A
 34%|███▍      | 4768/14121 [02:15<04:56, 31.58it/s][A
 34%|███▍      | 4774/14121 [02:15<04:46, 32.59it/s][A
 34%|███▍      | 4780/14121 [02:15<04:40, 33.35i

 45%|████▌     | 6376/14121 [03:01<03:24, 37.96it/s][A
 45%|████▌     | 6382/14121 [03:01<03:24, 37.83it/s][A
 45%|████▌     | 6388/14121 [03:01<03:23, 37.96it/s][A
 45%|████▌     | 6394/14121 [03:01<03:22, 38.09it/s][A
 45%|████▌     | 6400/14121 [03:02<03:22, 38.21it/s][A
 45%|████▌     | 6406/14121 [03:02<03:22, 38.11it/s][A
 45%|████▌     | 6412/14121 [03:02<03:22, 38.01it/s][A
 45%|████▌     | 6418/14121 [03:02<03:23, 37.91it/s][A
 45%|████▌     | 6424/14121 [03:02<03:23, 37.86it/s][A
 46%|████▌     | 6430/14121 [03:02<03:22, 37.93it/s][A
 46%|████▌     | 6436/14121 [03:02<03:23, 37.72it/s][A
 46%|████▌     | 6442/14121 [03:03<03:23, 37.81it/s][A
 46%|████▌     | 6448/14121 [03:03<03:22, 37.81it/s][A
 46%|████▌     | 6454/14121 [03:03<03:23, 37.75it/s][A
 46%|████▌     | 6460/14121 [03:03<03:23, 37.61it/s][A
 46%|████▌     | 6464/14121 [03:03<03:22, 37.76it/s][A
 46%|████▌     | 6469/14121 [03:03<03:33, 35.86it/s][A
 46%|████▌     | 6475/14121 [03:04<03:29, 36.46i

 58%|█████▊    | 8122/14121 [03:47<02:38, 37.97it/s][A
 58%|█████▊    | 8128/14121 [03:47<02:39, 37.66it/s][A
 58%|█████▊    | 8134/14121 [03:48<02:39, 37.60it/s][A
 58%|█████▊    | 8140/14121 [03:48<02:39, 37.59it/s][A
 58%|█████▊    | 8146/14121 [03:48<02:38, 37.67it/s][A
 58%|█████▊    | 8152/14121 [03:48<02:39, 37.48it/s][A
 58%|█████▊    | 8158/14121 [03:48<02:39, 37.46it/s][A
 58%|█████▊    | 8164/14121 [03:48<02:39, 37.38it/s][A
 58%|█████▊    | 8170/14121 [03:49<02:39, 37.41it/s][A
 58%|█████▊    | 8176/14121 [03:49<02:38, 37.40it/s][A
 58%|█████▊    | 8182/14121 [03:49<02:37, 37.62it/s][A
 58%|█████▊    | 8188/14121 [03:49<02:37, 37.71it/s][A
 58%|█████▊    | 8194/14121 [03:49<02:37, 37.52it/s][A
 58%|█████▊    | 8200/14121 [03:49<02:38, 37.44it/s][A
 58%|█████▊    | 8206/14121 [03:50<02:37, 37.48it/s][A
 58%|█████▊    | 8212/14121 [03:50<02:37, 37.47it/s][A
 58%|█████▊    | 8218/14121 [03:50<02:37, 37.59it/s][A
 58%|█████▊    | 8224/14121 [03:50<02:37, 37.49i

 70%|██████▉   | 9871/14121 [04:34<01:53, 37.54it/s][A
 70%|██████▉   | 9877/14121 [04:34<01:52, 37.70it/s][A
 70%|██████▉   | 9883/14121 [04:34<01:52, 37.58it/s][A
 70%|███████   | 9889/14121 [04:34<01:52, 37.48it/s][A
 70%|███████   | 9895/14121 [04:35<01:52, 37.51it/s][A
 70%|███████   | 9901/14121 [04:35<01:52, 37.41it/s][A
 70%|███████   | 9907/14121 [04:35<01:52, 37.40it/s][A
 70%|███████   | 9913/14121 [04:35<01:52, 37.28it/s][A
 70%|███████   | 9919/14121 [04:35<01:52, 37.39it/s][A
 70%|███████   | 9925/14121 [04:35<01:52, 37.44it/s][A
 70%|███████   | 9931/14121 [04:36<01:51, 37.50it/s][A
 70%|███████   | 9937/14121 [04:36<01:51, 37.38it/s][A
 70%|███████   | 9943/14121 [04:36<01:51, 37.51it/s][A
 70%|███████   | 9949/14121 [04:36<01:50, 37.62it/s][A
 70%|███████   | 9955/14121 [04:36<01:51, 37.44it/s][A
 71%|███████   | 9961/14121 [04:36<01:49, 38.01it/s][A
 71%|███████   | 9967/14121 [04:37<01:49, 37.92it/s][A
 71%|███████   | 9973/14121 [04:37<01:50, 37.46i

 82%|████████▏ | 11584/14121 [05:21<01:07, 37.37it/s][A
 82%|████████▏ | 11590/14121 [05:21<01:08, 37.05it/s][A
 82%|████████▏ | 11596/14121 [05:21<01:08, 37.09it/s][A
 82%|████████▏ | 11602/14121 [05:21<01:07, 37.07it/s][A
 82%|████████▏ | 11608/14121 [05:21<01:07, 37.08it/s][A
 82%|████████▏ | 11614/14121 [05:21<01:07, 37.20it/s][A
 82%|████████▏ | 11620/14121 [05:22<01:07, 37.18it/s][A
 82%|████████▏ | 11626/14121 [05:22<01:06, 37.27it/s][A
 82%|████████▏ | 11632/14121 [05:22<01:07, 36.78it/s][A
 82%|████████▏ | 11638/14121 [05:22<01:07, 37.00it/s][A
 82%|████████▏ | 11644/14121 [05:22<01:06, 37.00it/s][A
 83%|████████▎ | 11650/14121 [05:22<01:06, 37.22it/s][A
 83%|████████▎ | 11656/14121 [05:22<01:06, 37.30it/s][A
 83%|████████▎ | 11662/14121 [05:23<01:05, 37.31it/s][A
 83%|████████▎ | 11668/14121 [05:23<01:05, 37.21it/s][A
 83%|████████▎ | 11674/14121 [05:23<01:05, 37.12it/s][A
 83%|████████▎ | 11680/14121 [05:23<01:05, 37.29it/s][A
 83%|████████▎ | 11686/14121 [0

 94%|█████████▍| 13294/14121 [06:07<00:22, 37.01it/s][A
 94%|█████████▍| 13300/14121 [06:07<00:22, 37.04it/s][A
 94%|█████████▍| 13306/14121 [06:07<00:22, 37.01it/s][A
 94%|█████████▍| 13312/14121 [06:08<00:21, 37.05it/s][A
 94%|█████████▍| 13318/14121 [06:08<00:21, 37.14it/s][A
 94%|█████████▍| 13324/14121 [06:08<00:21, 37.04it/s][A
 94%|█████████▍| 13330/14121 [06:08<00:21, 37.00it/s][A
 94%|█████████▍| 13336/14121 [06:08<00:21, 37.08it/s][A
 94%|█████████▍| 13342/14121 [06:08<00:21, 36.90it/s][A
 95%|█████████▍| 13348/14121 [06:09<00:20, 36.96it/s][A
 95%|█████████▍| 13354/14121 [06:09<00:20, 36.88it/s][A
 95%|█████████▍| 13360/14121 [06:09<00:20, 36.87it/s][A
 95%|█████████▍| 13366/14121 [06:09<00:20, 36.98it/s][A
 95%|█████████▍| 13372/14121 [06:09<00:20, 37.03it/s][A
 95%|█████████▍| 13378/14121 [06:09<00:20, 36.87it/s][A
 95%|█████████▍| 13384/14121 [06:10<00:20, 36.76it/s][A
 95%|█████████▍| 13390/14121 [06:10<00:19, 36.75it/s][A
 95%|█████████▍| 13396/14121 [0

# Save GIGN graphs for each ligand

In [4]:
# conda activate base
import os, re
import pandas as pd
import numpy as np
import pickle
from scipy.spatial import distance_matrix
import multiprocessing
from itertools import repeat
import networkx as nx
import torch 
from torch.utils.data import Dataset, DataLoader
from rdkit import Chem
from rdkit import RDLogger
from rdkit import Chem
from torch_geometric.data import Batch, Data
from tqdm import tqdm
import warnings
RDLogger.DisableLog('rdApp.*')
np.set_printoptions(threshold=np.inf)
warnings.filterwarnings('ignore')
from torch_geometric.data import Batch

# %%
def one_of_k_encoding(k, possible_values):
    if k not in possible_values:
        raise ValueError(f"{k} is not a valid value in {possible_values}")
    return [k == e for e in possible_values]


def one_of_k_encoding_unk(x, allowable_set):
    if x not in allowable_set:
        x = allowable_set[-1]
    return list(map(lambda s: x == s, allowable_set))


def atom_features(mol, graph, atom_symbols=['C', 'N', 'O', 'S', 'F', 'P', 'Cl', 'Br', 'I'], explicit_H=True):

    for atom in mol.GetAtoms():
        results = one_of_k_encoding_unk(atom.GetSymbol(), atom_symbols + ['Unknown']) + \
                one_of_k_encoding_unk(atom.GetDegree(),[0, 1, 2, 3, 4, 5, 6]) + \
                one_of_k_encoding_unk(atom.GetImplicitValence(), [0, 1, 2, 3, 4, 5, 6]) + \
                one_of_k_encoding_unk(atom.GetHybridization(), [
                    Chem.rdchem.HybridizationType.SP, Chem.rdchem.HybridizationType.SP2,
                    Chem.rdchem.HybridizationType.SP3, Chem.rdchem.HybridizationType.
                                        SP3D, Chem.rdchem.HybridizationType.SP3D2
                    ]) + [atom.GetIsAromatic()]
        # In case of explicit hydrogen(QM8, QM9), avoid calling `GetTotalNumHs`
        if explicit_H:
            results = results + one_of_k_encoding_unk(atom.GetTotalNumHs(),
                                                    [0, 1, 2, 3, 4])

        atom_feats = np.array(results).astype(np.float32)

        graph.add_node(atom.GetIdx(), feats=torch.from_numpy(atom_feats))

def get_edge_index(mol, graph):
    for bond in mol.GetBonds():
        i = bond.GetBeginAtomIdx()
        j = bond.GetEndAtomIdx()

        graph.add_edge(i, j)

def mol2graph(mol):
    graph = nx.Graph()
    atom_features(mol, graph)
    get_edge_index(mol, graph)

    graph = graph.to_directed()
    x = torch.stack([feats['feats'] for n, feats in graph.nodes(data=True)])
    if not graph.edges(data=False):
        return [], [], True
    edge_index = torch.stack([torch.LongTensor((u, v)) for u, v in graph.edges(data=False)]).T

    return x, edge_index, False

def inter_graph(ligand, pocket, dis_threshold = 5.):
    atom_num_l = ligand.GetNumAtoms()
    atom_num_p = pocket.GetNumAtoms()

    graph_inter = nx.Graph()
    pos_l = ligand.GetConformers()[0].GetPositions()
    pos_p = pocket.GetConformers()[0].GetPositions()
    dis_matrix = distance_matrix(pos_l, pos_p)
    node_idx = np.where(dis_matrix < dis_threshold)
    for i, j in zip(node_idx[0], node_idx[1]):
        graph_inter.add_edge(i, j+atom_num_l) 

    graph_inter = graph_inter.to_directed()
    edge_index_inter = torch.stack([torch.LongTensor((u, v)) for u, v in graph_inter.edges(data=False)]).T

    return edge_index_inter

# %%
def mols2graphs(complex_path_list, label, vina_score_list, save_path, dis_threshold):
    data_list = []
    fail_path = []
    for i, complex_path in enumerate(complex_path_list):
        if os.path.exists(complex_path):
            with open(complex_path, 'rb') as f:
                ligand, pocket = pickle.load(f)
        else:
            print('Complex file not found:', complex_path)
            fail_path.append(complex_path)
            continue

        atom_num_l = ligand.GetNumAtoms()
        atom_num_p = pocket.GetNumAtoms()

        pos_l = torch.FloatTensor(ligand.GetConformers()[0].GetPositions())
        pos_p = torch.FloatTensor(pocket.GetConformers()[0].GetPositions())
        x_l, edge_index_l, fail_l = mol2graph(ligand)
        x_p, edge_index_p, fail_p = mol2graph(pocket)
        if fail_l or fail_p:
            print('Failed to read complex file:', complex_path)
            fail_path.append(complex_path)
            continue

        x = torch.cat([x_l, x_p], dim=0)
        edge_index_intra = torch.cat([edge_index_l, edge_index_p+atom_num_l], dim=-1)
        try:
            edge_index_inter = inter_graph(ligand, pocket, dis_threshold=dis_threshold)
        except:
            print('Failed to read complex edges:', complex_path)
            fail_path.append(complex_path)
            continue
            
        y = torch.FloatTensor([label])
        vina_score = torch.FloatTensor([vina_score_list[i]])
        pos = torch.concat([pos_l, pos_p], dim=0)
        split = torch.cat([torch.zeros((atom_num_l, )), torch.ones((atom_num_p,))], dim=0)
        pocket = complex_path.split('/')[-1].split('_')[2]
        
        data = Data(x=x, edge_index_intra=edge_index_intra, edge_index_inter=edge_index_inter, y=y, vina_score=vina_score, pos=pos, pocket=pocket, split=split)
        data_list.append(data)
        
    if len(fail_path) == len(complex_path_list):
        return complex_path_list
    else:
        merged_data = Batch.from_data_list(data_list)
        torch.save(merged_data, save_path)
        return fail_path

# %%
class PLIDataLoader(DataLoader):
    def __init__(self, data, **kwargs):
        super().__init__(data, collate_fn=data.collate_fn, **kwargs)

class GraphDataset(Dataset):
    """
    This class is used for generating graph objects using multi process
    """
    def __init__(self, data_dir, data_df, dis_threshold=5, num_pose=1, graph_type='Graph_GIGN', assay_type='pIC50', num_process=8, create=False):
        self.data_dir = data_dir
        self.data_df = data_df
        self.dis_threshold = dis_threshold
        self.num_pose = num_pose
        self.graph_type = graph_type
        self.create = create
        self.graph_paths = None
        self.complex_ids = None
        self.assay_type = assay_type
        self.num_process = num_process
        self.mean, self.std = 0, 1
        self._pre_process()

    def _pre_process(self):
        data_dir = self.data_dir
        data_df = self.data_df
        graph_type = self.graph_type
        pocket_num = len(os.listdir(data_dir))

        complex_path_list, complex_id_list, pIC50_list, score_list, graph_path_list, dis_threshold_list = [], [], [], [], [], []
        file_list = os.listdir(data_dir)
        pocket_list_all = [x for x in file_list if x.split('_')[-1] == f'{self.dis_threshold}A.rdkit']
        not_found_list = []
        for i, row in data_df.iterrows():
            cid, pIC50 = row['ChEMBL_Compound_ID'], float(row[self.assay_type])
            complex_path_list_cid, complex_id_list_cid, score_list_cid = [], [], []
            graph_path = os.path.join(data_dir, f"{cid}_{graph_type}_{self.dis_threshold}A.pyg")
            pocket_list = [x for x in pocket_list_all if x.split('_')[0] == cid]
            if pocket_list:
                for pocket in pocket_list:
                    pocket_idx = pocket.split('_')[2]
                    score = float(row[f'Pocket_{pocket_idx}_Vina_Score'])
                    complex_path = os.path.join(data_dir, f"{cid}_Complex_{pocket_idx}_{self.dis_threshold}A.rdkit")
                    if len(pocket_idx.split('-')) > 1 and int(pocket_idx.split('-')[1]) <= self.num_pose or len(pocket_idx.split('-')) == 1:
                        complex_path_list_cid.append(complex_path)
                        score_list_cid.append(score)
                complex_path_list.append(complex_path_list_cid)
                score_list.append(score_list_cid)
                complex_id_list.append(cid)
                pIC50_list.append(pIC50)
                graph_path_list.append(graph_path)
                dis_threshold_list.append(self.dis_threshold)
            else:  
                not_found_list.append(graph_path)

        self.mean, self.std = np.mean(pIC50_list), np.std(pIC50_list)
        if self.create:
            print('Generate complex graph...')
            # multi-thread processing
            pool = multiprocessing.Pool(self.num_process)
            for complex_path, pIC50, vina_score ,graph_path, dis_threshold in zip(complex_path_list, pIC50_list, score_list, graph_path_list, dis_threshold_list):
                not_found_path = mols2graphs(complex_path, pIC50, vina_score ,graph_path, dis_threshold)
                if len(not_found_path) == len(complex_path):
                    not_found_list.append(graph_path)
            with open(data_dir + f'/not_found_list.pkl', 'wb') as f:
                pickle.dump(not_found_list, f)
            pool.close()
            pool.join()
        
        with open(data_dir + '/not_found_list.pkl', 'rb') as f:
            not_found_list = pickle.load(f)
        self.complex_ids = complex_id_list
        self.graph_paths = [x for x in graph_path_list if x not in not_found_list]

    def __getitem__(self, idx):
        data = torch.load(self.graph_paths[idx])
        match = re.search(r'CHEMBL(\d+)_', self.graph_paths[idx])
        chembl_id = match.group(0)
        data['idx'] = chembl_id[:-1]
        return data

    def collate_fn(self, batch):
        return Batch.from_data_list(batch)

    def __len__(self):
        return len(self.graph_paths)

task_list = ['E3', 'I4', 'I5']
task_dict = {'I1': ('CHEMBL202', 'pIC50', '1boz', 7, 1), 'E3': ('CHEMBL235', 'pEC50', '1zgy', 4, 2), 
             'I5': ('CHEMBL279', 'pIC50', '1ywn',3, 3), 'I4': ('CHEMBL2971', 'pIC50', '3ugc', 3, 3), 
             'I3': ('CHEMBL333', 'pIC50', '1ck7', 6, 2), 'E1': ('CHEMBL3820', 'pEC50', '3f9m', 6 ,1), 
             'I2': ('CHEMBL3976', 'pIC50', '4ebb', 2, 4), 'E2': ('CHEMBL4422', 'pEC50', '5tzr', 3, 3)}
for task_id in task_list:
    protein_name, assay_type, pdb_name, pocket_num, pose_num = task_dict[task_id]
    data_root = f'/home/yueming/Drug_Discovery/Baselines/GIGN-main/GIGN/data/{protein_name}/{pdb_name}/vina/'
    set_list = ['test', 'train_1', 'train_2', 'train_3', 'train_4', 'train_5']
    for set_name in set_list:
        data_dir = data_root + set_name + '/'
        data_df = pd.read_csv(os.path.join(data_root, f'{protein_name}_{assay_type}_{set_name}.csv'))
        dataset = GraphDataset(data_dir, data_df, graph_type='Graph_GIGN', assay_type=assay_type, dis_threshold=5, create=True)
        print('Dataset size:', len(dataset))
        data_loader = PLIDataLoader(dataset, batch_size=256, shuffle=True, num_workers=4)
        for data in data_loader:
            # print(data) --> DataBatch(x=[2481, 35], y=[8], pos=[2481, 3], edge_index_intra=[2, 4884], edge_index_inter=[2, 4456], split=[2481], pocket=[8], batch=[2481], ptr=[9])
            data, pocket, idx, label, socre = data, data.pocket, data.idx, data.y, data.vina_score
            print(f'Loading {len(pocket)} data successfully')

Generate complex graph...
Dataset size: 162
Loading 162 data successfully
Generate complex graph...
Failed to read complex file: /home/yueming/Drug_Discovery/Baselines/GIGN-main/GIGN/data/CHEMBL202/1boz/vina/train_1/CHEMBL4160822_Complex_2_5A.rdkit
Failed to read complex file: /home/yueming/Drug_Discovery/Baselines/GIGN-main/GIGN/data/CHEMBL202/1boz/vina/train_1/CHEMBL4160822_Complex_5_5A.rdkit
Failed to read complex file: /home/yueming/Drug_Discovery/Baselines/GIGN-main/GIGN/data/CHEMBL202/1boz/vina/train_1/CHEMBL4160822_Complex_4_5A.rdkit
Failed to read complex file: /home/yueming/Drug_Discovery/Baselines/GIGN-main/GIGN/data/CHEMBL202/1boz/vina/train_1/CHEMBL4160822_Complex_1_5A.rdkit
Failed to read complex file: /home/yueming/Drug_Discovery/Baselines/GIGN-main/GIGN/data/CHEMBL202/1boz/vina/train_1/CHEMBL4160822_Complex_3_5A.rdkit
Failed to read complex file: /home/yueming/Drug_Discovery/Baselines/GIGN-main/GIGN/data/CHEMBL202/1boz/vina/train_1/CHEMBL4160822_Complex_6_5A.rdkit
Failed

Failed to read complex file: /home/yueming/Drug_Discovery/Baselines/GIGN-main/GIGN/data/CHEMBL202/1boz/vina/train_2/CHEMBL1800948_Complex_4_5A.rdkit
Failed to read complex file: /home/yueming/Drug_Discovery/Baselines/GIGN-main/GIGN/data/CHEMBL202/1boz/vina/train_2/CHEMBL3943671_Complex_6_5A.rdkit
Failed to read complex file: /home/yueming/Drug_Discovery/Baselines/GIGN-main/GIGN/data/CHEMBL202/1boz/vina/train_2/CHEMBL3943671_Complex_1_5A.rdkit
Failed to read complex file: /home/yueming/Drug_Discovery/Baselines/GIGN-main/GIGN/data/CHEMBL202/1boz/vina/train_2/CHEMBL3943671_Complex_5_5A.rdkit
Failed to read complex file: /home/yueming/Drug_Discovery/Baselines/GIGN-main/GIGN/data/CHEMBL202/1boz/vina/train_2/CHEMBL3943671_Complex_7_5A.rdkit
Failed to read complex file: /home/yueming/Drug_Discovery/Baselines/GIGN-main/GIGN/data/CHEMBL202/1boz/vina/train_2/CHEMBL3943671_Complex_2_5A.rdkit
Failed to read complex file: /home/yueming/Drug_Discovery/Baselines/GIGN-main/GIGN/data/CHEMBL202/1boz/vin

Failed to read complex file: /home/yueming/Drug_Discovery/Baselines/GIGN-main/GIGN/data/CHEMBL202/1boz/vina/train_2/CHEMBL3974488_Complex_3_5A.rdkit
Failed to read complex file: /home/yueming/Drug_Discovery/Baselines/GIGN-main/GIGN/data/CHEMBL202/1boz/vina/train_2/CHEMBL3974488_Complex_1_5A.rdkit
Failed to read complex file: /home/yueming/Drug_Discovery/Baselines/GIGN-main/GIGN/data/CHEMBL202/1boz/vina/train_2/CHEMBL3974488_Complex_4_5A.rdkit
Failed to read complex file: /home/yueming/Drug_Discovery/Baselines/GIGN-main/GIGN/data/CHEMBL202/1boz/vina/train_2/CHEMBL3974488_Complex_7_5A.rdkit
Failed to read complex file: /home/yueming/Drug_Discovery/Baselines/GIGN-main/GIGN/data/CHEMBL202/1boz/vina/train_2/CHEMBL3974488_Complex_5_5A.rdkit
Failed to read complex file: /home/yueming/Drug_Discovery/Baselines/GIGN-main/GIGN/data/CHEMBL202/1boz/vina/train_2/CHEMBL3974488_Complex_2_5A.rdkit
Failed to read complex file: /home/yueming/Drug_Discovery/Baselines/GIGN-main/GIGN/data/CHEMBL202/1boz/vin

Failed to read complex file: /home/yueming/Drug_Discovery/Baselines/GIGN-main/GIGN/data/CHEMBL202/1boz/vina/train_3/CHEMBL1824534_Complex_3_5A.rdkit
Failed to read complex file: /home/yueming/Drug_Discovery/Baselines/GIGN-main/GIGN/data/CHEMBL202/1boz/vina/train_3/CHEMBL1824534_Complex_1_5A.rdkit
Failed to read complex file: /home/yueming/Drug_Discovery/Baselines/GIGN-main/GIGN/data/CHEMBL202/1boz/vina/train_3/CHEMBL1824534_Complex_2_5A.rdkit
Failed to read complex file: /home/yueming/Drug_Discovery/Baselines/GIGN-main/GIGN/data/CHEMBL202/1boz/vina/train_3/CHEMBL1824534_Complex_7_5A.rdkit
Failed to read complex file: /home/yueming/Drug_Discovery/Baselines/GIGN-main/GIGN/data/CHEMBL202/1boz/vina/train_3/CHEMBL1824534_Complex_4_5A.rdkit
Failed to read complex file: /home/yueming/Drug_Discovery/Baselines/GIGN-main/GIGN/data/CHEMBL202/1boz/vina/train_3/CHEMBL1824534_Complex_6_5A.rdkit
Failed to read complex file: /home/yueming/Drug_Discovery/Baselines/GIGN-main/GIGN/data/CHEMBL202/1boz/vin

# Save GIGN graphs for each ligand with bond features

In [8]:
# conda activate base
import os, re
import pandas as pd
import numpy as np
import pickle
from scipy.spatial import distance_matrix
import multiprocessing
from itertools import repeat
import networkx as nx
import torch 
from torch.utils.data import Dataset, DataLoader
from rdkit import Chem
from rdkit import RDLogger
from rdkit import Chem
from torch_geometric.data import Batch, Data
from tqdm import tqdm
import warnings
RDLogger.DisableLog('rdApp.*')
np.set_printoptions(threshold=np.inf)
warnings.filterwarnings('ignore')
from torch_geometric.data import Batch

graph_type = ['Graph_GIGN', 'Graph_Bond'][1]
task_list = ['I1']
task_dict = {'I1': ('CHEMBL202', 'pIC50', '1boz', 7, 1), 'E3': ('CHEMBL235', 'pEC50', '1zgy', 4, 2), 
             'I5': ('CHEMBL279', 'pIC50', '1ywn',3, 3), 'I4': ('CHEMBL2971', 'pIC50', '3ugc', 3, 3), 
             'I3': ('CHEMBL333', 'pIC50', '1ck7', 6, 2), 'E1': ('CHEMBL3820', 'pEC50', '3f9m', 6 ,1), 
             'I2': ('CHEMBL3976', 'pIC50', '4ebb', 2, 4), 'E2': ('CHEMBL4422', 'pEC50', '5tzr', 3, 3)}

# %%
def one_of_k_encoding(k, possible_values):
    if k not in possible_values:
        raise ValueError(f"{k} is not a valid value in {possible_values}")
    return [k == e for e in possible_values]


def one_of_k_encoding_unk(x, allowable_set):
    if x not in allowable_set:
        x = allowable_set[-1]
    return list(map(lambda s: x == s, allowable_set))


def atom_features(mol, graph, atom_symbols=['C', 'N', 'O', 'S', 'F', 'P', 'Cl', 'Br', 'I'], explicit_H=True):

    for atom in mol.GetAtoms():
        results = one_of_k_encoding_unk(atom.GetSymbol(), atom_symbols + ['Unknown']) + \
                one_of_k_encoding_unk(atom.GetDegree(),[0, 1, 2, 3, 4, 5, 6]) + \
                one_of_k_encoding_unk(atom.GetImplicitValence(), [0, 1, 2, 3, 4, 5, 6]) + \
                one_of_k_encoding_unk(atom.GetHybridization(), [
                    Chem.rdchem.HybridizationType.SP, Chem.rdchem.HybridizationType.SP2,
                    Chem.rdchem.HybridizationType.SP3, Chem.rdchem.HybridizationType.
                                        SP3D, Chem.rdchem.HybridizationType.SP3D2
                    ]) + [atom.GetIsAromatic()]
        # In case of explicit hydrogen(QM8, QM9), avoid calling `GetTotalNumHs`
        if explicit_H:
            results = results + one_of_k_encoding_unk(atom.GetTotalNumHs(),
                                                    [0, 1, 2, 3, 4])

        atom_feats = np.array(results).astype(np.float32)

        graph.add_node(atom.GetIdx(), feats=torch.from_numpy(atom_feats))

def bond_features(bond, use_chirality=True):
    bt = bond.GetBondType()
    bond_feats = [
        bt == Chem.rdchem.BondType.SINGLE, bt == Chem.rdchem.BondType.DOUBLE,
        bt == Chem.rdchem.BondType.TRIPLE, bt == Chem.rdchem.BondType.AROMATIC,
        bond.GetIsConjugated(),
        bond.IsInRing()
    ]
    if use_chirality:
        bond_feats = bond_feats + one_of_k_encoding_unk(
            str(bond.GetStereo()),
            ["STEREONONE", "STEREOANY", "STEREOZ", "STEREOE"])
    return np.array(bond_feats)
        
def get_edge_index(mol, graph):
    for bond in mol.GetBonds():
        i = bond.GetBeginAtomIdx()
        j = bond.GetEndAtomIdx()
        bond_feats = bond_features(bond)
        graph.add_edge(i, j, weight=bond_feats)

def mol2graph(mol):
    graph = nx.Graph()
    atom_features(mol, graph)
    get_edge_index(mol, graph)

    graph = graph.to_directed()
    x = torch.stack([feats['feats'] for n, feats in graph.nodes(data=True)])
    x_bond = torch.tensor([graph[u][v]['weight'] for u, v in graph.edges()])
    if not graph.edges(data=False):
        return [], [], [], True
    edge_index = torch.stack([torch.LongTensor((u, v)) for u, v in graph.edges(data=False)]).T

    return x, x_bond, edge_index, False

def inter_graph(ligand, pocket, dis_threshold = 5.):
    atom_num_l = ligand.GetNumAtoms()
    atom_num_p = pocket.GetNumAtoms()

    graph_inter = nx.Graph()
    pos_l = ligand.GetConformers()[0].GetPositions()
    pos_p = pocket.GetConformers()[0].GetPositions()
    dis_matrix = distance_matrix(pos_l, pos_p)
    node_idx = np.where(dis_matrix < dis_threshold)
    for i, j in zip(node_idx[0], node_idx[1]):
        graph_inter.add_edge(i, j+atom_num_l) 

    graph_inter = graph_inter.to_directed()
    edge_index_inter = torch.stack([torch.LongTensor((u, v)) for u, v in graph_inter.edges(data=False)]).T

    return edge_index_inter

# %%
def mols2graphs(complex_path_list, label, vina_score_list, save_path, dis_threshold):
    data_list = []
    fail_path = []
    for i, complex_path in enumerate(complex_path_list):
        if os.path.exists(complex_path):
            with open(complex_path, 'rb') as f:
                ligand, pocket = pickle.load(f)
        else:
            print('Complex file not found:', complex_path)
            fail_path.append(complex_path)
            continue

        atom_num_l = ligand.GetNumAtoms()
        atom_num_p = pocket.GetNumAtoms()

        pos_l = torch.FloatTensor(ligand.GetConformers()[0].GetPositions())
        pos_p = torch.FloatTensor(pocket.GetConformers()[0].GetPositions())
        x_l, x_l_bond, edge_index_l, fail_l = mol2graph(ligand)
        x_p, x_p_bond, edge_index_p, fail_p = mol2graph(pocket)
        if fail_l or fail_p:
            print('Failed to read complex file:', complex_path)
            fail_path.append(complex_path)
            continue

        x = torch.cat([x_l, x_p], dim=0)
        x_bond = torch.cat([x_l_bond, x_p_bond], dim=0)
        edge_index_intra = torch.cat([edge_index_l, edge_index_p+atom_num_l], dim=-1)
        try:
            edge_index_inter = inter_graph(ligand, pocket, dis_threshold=dis_threshold)
        except:
            print('Failed to read complex edges:', complex_path)
            fail_path.append(complex_path)
            continue
            
        y = torch.FloatTensor([label])
        vina_score = torch.FloatTensor([vina_score_list[i]])
        pos = torch.concat([pos_l, pos_p], dim=0)
        split = torch.cat([torch.zeros((atom_num_l, )), torch.ones((atom_num_p,))], dim=0)
        pocket = complex_path.split('/')[-1].split('_')[2]
        
        data = Data(x=x, x_bond=x_bond, edge_index_intra=edge_index_intra, edge_index_inter=edge_index_inter, y=y, vina_score=vina_score, pos=pos, pocket=pocket, split=split)
        data_list.append(data)
        
    if len(fail_path) == len(complex_path_list):
        return complex_path_list
    else:
        merged_data = Batch.from_data_list(data_list)
        torch.save(merged_data, save_path)
        return fail_path

# %%
class PLIDataLoader(DataLoader):
    def __init__(self, data, **kwargs):
        super().__init__(data, collate_fn=data.collate_fn, **kwargs)

class GraphDataset(Dataset):
    """
    This class is used for generating graph objects using multi process
    """
    def __init__(self, data_dir, data_df, dis_threshold=5, num_pose=1, graph_type='Graph_GIGN', assay_type='pIC50', num_process=8, create=False):
        self.data_dir = data_dir
        self.data_df = data_df
        self.dis_threshold = dis_threshold
        self.num_pose = num_pose
        self.graph_type = graph_type
        self.create = create
        self.graph_paths = None
        self.complex_ids = None
        self.assay_type = assay_type
        self.num_process = num_process
        self.mean, self.std = 0, 1
        self._pre_process()

    def _pre_process(self):
        data_dir = self.data_dir
        data_df = self.data_df
        graph_type = self.graph_type
        pocket_num = len(os.listdir(data_dir))

        complex_path_list, complex_id_list, pIC50_list, score_list, graph_path_list, dis_threshold_list = [], [], [], [], [], []
        file_list = os.listdir(data_dir)
        pocket_list_all = [x for x in file_list if x.split('_')[-1] == f'{self.dis_threshold}A.rdkit']
        not_found_list = []
        for i, row in data_df.iterrows():
            cid, pIC50 = row['ChEMBL_Compound_ID'], float(row[self.assay_type])
            complex_path_list_cid, complex_id_list_cid, score_list_cid = [], [], []
            graph_path = os.path.join(data_dir, f"{cid}_{graph_type}_{self.dis_threshold}A.pyg")
            pocket_list = [x for x in pocket_list_all if x.split('_')[0] == cid]
            if pocket_list:
                for pocket in pocket_list:
                    pocket_idx = pocket.split('_')[2]
                    score = float(row[f'Pocket_{pocket_idx}_Vina_Score'])
                    complex_path = os.path.join(data_dir, f"{cid}_Complex_{pocket_idx}_{self.dis_threshold}A.rdkit")
                    if len(pocket_idx.split('-')) > 1 and int(pocket_idx.split('-')[1]) <= self.num_pose or len(pocket_idx.split('-')) == 1:
                        complex_path_list_cid.append(complex_path)
                        score_list_cid.append(score)
                complex_path_list.append(complex_path_list_cid)
                score_list.append(score_list_cid)
                complex_id_list.append(cid)
                pIC50_list.append(pIC50)
                graph_path_list.append(graph_path)
                dis_threshold_list.append(self.dis_threshold)
            else:  
                not_found_list.append(graph_path)

        self.mean, self.std = np.mean(pIC50_list), np.std(pIC50_list)
        if self.create:
            print('Generate complex graph...')
            # multi-thread processing
            pool = multiprocessing.Pool(self.num_process)
            for complex_path, pIC50, vina_score ,graph_path, dis_threshold in zip(complex_path_list, pIC50_list, score_list, graph_path_list, dis_threshold_list):
                not_found_path = mols2graphs(complex_path, pIC50, vina_score ,graph_path, dis_threshold)
                if len(not_found_path) == len(complex_path):
                    not_found_list.append(graph_path)
            with open(data_dir + f'/not_found_list.pkl', 'wb') as f:
                pickle.dump(not_found_list, f)
            pool.close()
            pool.join()
        
        with open(data_dir + '/not_found_list.pkl', 'rb') as f:
            not_found_list = pickle.load(f)
        self.complex_ids = complex_id_list
        self.graph_paths = [x for x in graph_path_list if x not in not_found_list]

    def __getitem__(self, idx):
        data = torch.load(self.graph_paths[idx])
        match = re.search(r'CHEMBL(\d+)_', self.graph_paths[idx])
        chembl_id = match.group(0)
        data['idx'] = chembl_id[:-1]
        return data

    def collate_fn(self, batch):
        return Batch.from_data_list(batch)

    def __len__(self):
        return len(self.graph_paths)

for task_id in task_list:
    protein_name, assay_type, pdb_name, pocket_num, pose_num = task_dict[task_id]
    data_root = f'/home/yueming/Drug_Discovery/Baselines/GIGN-main/GIGN/data/{protein_name}/{pdb_name}/vina/'
    set_list = ['test', 'train_1', 'train_2', 'train_3', 'train_4', 'train_5']
    for set_name in set_list:
        data_dir = data_root + set_name + '/'
        data_df = pd.read_csv(os.path.join(data_root, f'{protein_name}_{assay_type}_{set_name}.csv'))
        dataset = GraphDataset(data_dir, data_df, graph_type=graph_type, assay_type=assay_type, dis_threshold=5, create=True)
        print('Dataset size:', len(dataset))
        data_loader = PLIDataLoader(dataset, batch_size=256, shuffle=True, num_workers=4)
        for data in data_loader:
            # print(data) --> DataBatch(x=[2481, 35], y=[8], pos=[2481, 3], edge_index_intra=[2, 4884], edge_index_inter=[2, 4456], split=[2481], pocket=[8], batch=[2481], ptr=[9])
            x, pocket, idx, label, socre, x_bond = data.x, data.pocket, data.idx, data.y, data.vina_score, data.x_bond
            print(f'Loading {len(pocket)} data successfully, atom feature dim: {x.shape}, bond feature dim: {x_bond.shape}')

Generate complex graph...
Dataset size: 162
Loading 162 data successfully, atom feature dim: torch.Size([328815, 35]), bond feature dim: torch.Size([646708, 10])
Generate complex graph...
Failed to read complex file: /home/yueming/Drug_Discovery/Baselines/GIGN-main/GIGN/data/CHEMBL202/1boz/vina/train_1/CHEMBL4160822_Complex_2_5A.rdkit
Failed to read complex file: /home/yueming/Drug_Discovery/Baselines/GIGN-main/GIGN/data/CHEMBL202/1boz/vina/train_1/CHEMBL4160822_Complex_5_5A.rdkit
Failed to read complex file: /home/yueming/Drug_Discovery/Baselines/GIGN-main/GIGN/data/CHEMBL202/1boz/vina/train_1/CHEMBL4160822_Complex_4_5A.rdkit
Failed to read complex file: /home/yueming/Drug_Discovery/Baselines/GIGN-main/GIGN/data/CHEMBL202/1boz/vina/train_1/CHEMBL4160822_Complex_1_5A.rdkit
Failed to read complex file: /home/yueming/Drug_Discovery/Baselines/GIGN-main/GIGN/data/CHEMBL202/1boz/vina/train_1/CHEMBL4160822_Complex_3_5A.rdkit
Failed to read complex file: /home/yueming/Drug_Discovery/Baselines

Failed to read complex file: /home/yueming/Drug_Discovery/Baselines/GIGN-main/GIGN/data/CHEMBL202/1boz/vina/train_2/CHEMBL1800948_Complex_4_5A.rdkit
Failed to read complex file: /home/yueming/Drug_Discovery/Baselines/GIGN-main/GIGN/data/CHEMBL202/1boz/vina/train_2/CHEMBL3943671_Complex_6_5A.rdkit
Failed to read complex file: /home/yueming/Drug_Discovery/Baselines/GIGN-main/GIGN/data/CHEMBL202/1boz/vina/train_2/CHEMBL3943671_Complex_1_5A.rdkit
Failed to read complex file: /home/yueming/Drug_Discovery/Baselines/GIGN-main/GIGN/data/CHEMBL202/1boz/vina/train_2/CHEMBL3943671_Complex_5_5A.rdkit
Failed to read complex file: /home/yueming/Drug_Discovery/Baselines/GIGN-main/GIGN/data/CHEMBL202/1boz/vina/train_2/CHEMBL3943671_Complex_7_5A.rdkit
Failed to read complex file: /home/yueming/Drug_Discovery/Baselines/GIGN-main/GIGN/data/CHEMBL202/1boz/vina/train_2/CHEMBL3943671_Complex_2_5A.rdkit
Failed to read complex file: /home/yueming/Drug_Discovery/Baselines/GIGN-main/GIGN/data/CHEMBL202/1boz/vin

Failed to read complex file: /home/yueming/Drug_Discovery/Baselines/GIGN-main/GIGN/data/CHEMBL202/1boz/vina/train_2/CHEMBL3912587_Complex_3_5A.rdkit
Failed to read complex file: /home/yueming/Drug_Discovery/Baselines/GIGN-main/GIGN/data/CHEMBL202/1boz/vina/train_2/CHEMBL3912587_Complex_4_5A.rdkit
Failed to read complex file: /home/yueming/Drug_Discovery/Baselines/GIGN-main/GIGN/data/CHEMBL202/1boz/vina/train_2/CHEMBL3974488_Complex_6_5A.rdkit
Failed to read complex file: /home/yueming/Drug_Discovery/Baselines/GIGN-main/GIGN/data/CHEMBL202/1boz/vina/train_2/CHEMBL3974488_Complex_3_5A.rdkit
Failed to read complex file: /home/yueming/Drug_Discovery/Baselines/GIGN-main/GIGN/data/CHEMBL202/1boz/vina/train_2/CHEMBL3974488_Complex_1_5A.rdkit
Failed to read complex file: /home/yueming/Drug_Discovery/Baselines/GIGN-main/GIGN/data/CHEMBL202/1boz/vina/train_2/CHEMBL3974488_Complex_4_5A.rdkit
Failed to read complex file: /home/yueming/Drug_Discovery/Baselines/GIGN-main/GIGN/data/CHEMBL202/1boz/vin

Failed to read complex file: /home/yueming/Drug_Discovery/Baselines/GIGN-main/GIGN/data/CHEMBL202/1boz/vina/train_3/CHEMBL1824772_Complex_6_5A.rdkit
Failed to read complex file: /home/yueming/Drug_Discovery/Baselines/GIGN-main/GIGN/data/CHEMBL202/1boz/vina/train_3/CHEMBL1824534_Complex_3_5A.rdkit
Failed to read complex file: /home/yueming/Drug_Discovery/Baselines/GIGN-main/GIGN/data/CHEMBL202/1boz/vina/train_3/CHEMBL1824534_Complex_1_5A.rdkit
Failed to read complex file: /home/yueming/Drug_Discovery/Baselines/GIGN-main/GIGN/data/CHEMBL202/1boz/vina/train_3/CHEMBL1824534_Complex_2_5A.rdkit
Failed to read complex file: /home/yueming/Drug_Discovery/Baselines/GIGN-main/GIGN/data/CHEMBL202/1boz/vina/train_3/CHEMBL1824534_Complex_7_5A.rdkit
Failed to read complex file: /home/yueming/Drug_Discovery/Baselines/GIGN-main/GIGN/data/CHEMBL202/1boz/vina/train_3/CHEMBL1824534_Complex_4_5A.rdkit
Failed to read complex file: /home/yueming/Drug_Discovery/Baselines/GIGN-main/GIGN/data/CHEMBL202/1boz/vin

# Count pocket number for each ligand ready to read

In [None]:
import os, json, re
from tqdm import tqdm

folder_path = '/home/yueming/Drug_Discovery/Baselines/GIGN-main/GIGN/data/6yg9'
save_file = '/home/yueming/Drug_Discovery/Baselines/GIGN-main/GIGN/data/6yg9.json'
count_dict, file_count = {}, 0

# 获取文件夹中所有文件
files = os.listdir(folder_path)

for file in tqdm(files):
    cid = file.split('_')[0]
    match = re.search(r'Graph_GIGN_Pocket_(\d+)_5A.pyg', file)
    if match:
        pocket_idx = int(match.group(1))
        # 检查文件名是否包含目标 ID
        if cid in count_dict.keys():
            count_dict[cid].append(pocket_idx)
        else:
            count_dict[cid] = [pocket_idx]

with open(save_file, 'w') as f:
    json.dump(count_dict, f, indent=4, separators=(",", ": "))

# Do QVina-W on ligands with activity

In [None]:
import subprocess
import pandas as pd
from tqdm import tqdm

# 定义输入输出文件路径列表
receptor_file = '/home/yueming/Drug_Discovery/Baselines/GIGN-main/GIGN/data/CHEMBL202/protein/pdb1boz.pdbqt'
input_dir_ligand = '/home/yueming/Drug_Discovery/Baselines/GIGN-main/GIGN/data/CHEMBL202/ligand/pdbqt/'
output_dir = '/home/yueming/Drug_Discovery/Baselines/GIGN-main/GIGN/data/CHEMBL202/1boz/qvina/'
config_file = '/home/yueming/Drug_Discovery/Baselines/GIGN-main/GIGN/data/CHEMBL202/protein/1boz_config.txt'
qvina_w_path = '/home/yueming/Drug_Discovery/Baselines/qvina'
input_sets = os.listdir(input_dir_ligand)
# 循环处理每个文件路径
for set_name in tqdm(input_sets):
    input_ligands = os.listdir(input_dir_ligand + set_name)
    for input_file in tqdm(input_ligands):
        if not os.path.exists(output_dir + set_name):
            os.makedirs(output_dir + set_name)
        if not os.path.exists(f'{output_dir + set_name}/{input_file}'):
            input_file_path = input_dir_ligand + set_name + '/' + input_file
            # 更改配置文件中的 receptor 路径为当前的 input_file
            with open(config_file, 'r') as f:
                config_lines = f.readlines()
            config_lines[0] = f'receptor = {receptor_file}\n'
            config_lines[1] = f'ligand = {input_dir_ligand + set_name}/{input_file}\n'
            config_lines[2] = f'out  = {output_dir + set_name}/{input_file}\n'
            config_lines[3] = f'log  = {output_dir + set_name}/{input_file[:-6]}.txt\n'
            with open(config_file, 'w') as f:
                f.writelines(config_lines)

        # 执行命令
        command = f'{qvina_w_path}/qvina-w_serial --config {config_file}'
        os.system(command)

# Read and save QVina-W results

In [None]:
import os, re
from tqdm import tqdm
from rdkit import Chem
import pandas as pd

# List of input SDF files
result_dir = '/home/yueming/Drug_Discovery/Baselines/GIGN-main/GIGN/data/CHEMBL202/1boz/qvina/'
data_sets = os.listdir(result_dir)
csv_dir = '/home/yueming/Drug_Discovery/Baselines/GIGN-main/GIGN/data/CHEMBL202/activity/CHEMBL202_IC50_in_nM_'
output_file = '/home/yueming/Drug_Discovery/Baselines/GIGN-main/GIGN/data/CHEMBL202/1boz/qvina/CHEMBL202_pIC50_'
    
# 定义正则表达式模式
pattern = r'^\s+(\d+)\s+(-?\d+\.\d+)'
pattern_ = r'^\s+(\d+)\s+(-?\d+)'
new_column_names = {'value': 'pIC50', 'smiles': 'SMILES', 'ChEMBL_Compound_ID': 'ChEMBL_Compound_ID'}

for dataset in tqdm(data_sets):
    if dataset[-4:] != '.csv':
        dataset_path = result_dir + dataset + '/'
        dataset_df = pd.read_csv(csv_dir + dataset + '.csv')
        dataset_df = dataset_df.rename(columns=new_column_names)

        # Loop through input files
        for input_file in tqdm(os.listdir(dataset_path)):
            if input_file[-4:] == '.txt':
                # 读取文本文件
                file_path = dataset_path + input_file
                with open(file_path, 'r') as file:
                    lines = file.readlines()

                # 提取符合模式的行，并保存到列表中
                for line in lines:
                    match_ = re.match(pattern, line)
                    match__ = re.match(pattern_, line)
                    match = match_ if match_ else match__
                    if match:
                        number = int(match.group(1))
                        affinity = float(match.group().rsplit(maxsplit=1)[-1])
                        # 根据文件名索引，将数据添加到CSV表格中
                        file_index = input_file[:-4]  # 文件名索引
                        # 设置条件
                        condition = (dataset_df['ChEMBL_Compound_ID'] == file_index)  # 示例条件，可根据实际情况修改
                        # 根据条件筛选满足条件的行索引
                        row_indices = dataset_df[condition].index
                        column_name = f'Pocket_{number}_QVina-W_Score'  # 列名，根据文件名索引生成
                        dataset_df.loc[row_indices, column_name] = affinity

    # 保存更新后的CSV表格
    dataset_df.to_csv(output_file + dataset + '.csv', index=False)


# Prepare ground truth ligand files

In [None]:
# Desktop workstation
import os, shutil
from rdkit import Chem

ground_dir = '/data1/Drug_Discovery/Docking/data/pdb_with_activity/CHEMBL202'
file_list = os.listdir(ground_dir)
set_name_list = [x for x in file_list if len(x.split(".")) < 2]
for set_name in set_name_list:
    set_path = os.path.join(ground_dir, set_name)
    set_file_list = os.listdir(set_path)
    pdb_list = [x.split(".")[0] for x in set_file_list if x.split(".")[-1] == 'pdb']
    sdf_list = [x for x in set_file_list if x.split(".")[-1] == 'sdf']
    for pdb in pdb_list:
        ligand_name = [elem for elem in sdf_list if pdb in elem]
        input_file_path = os.path.join(set_path, ligand_name[0])
        output_file_path = os.path.join(set_path, ligand_name[0].replace('sdf', 'pdbqt'))
        if os.path.exists(output_file_path):
            continue
        try:
            # Load the molecule from the SDF file
            mol = Chem.SDMolSupplier(input_file_path)[0]
            # Add explicit hydrogens
#             sanitizedmol_mol = Chem.SanitizeMol(mol)
#             mol = Chem.Kekulize(mol)
            mol = Chem.AddHs(mol)
        except:
            print(input_file_path)
            continue
        # Save the modified molecule with explicit hydrogens
        Chem.SDWriter(input_file_path).write(mol)
        command = f"mk_prepare_ligand.py -i {input_file_path} -o {output_file_path}"
        print(command)
        !{command}

# Preprocessing docking data

In [None]:
# conda activate rdkit
import os
import pickle
from rdkit import Chem
import pandas as pd
from tqdm import tqdm
import pymol
from rdkit import RDLogger
RDLogger.DisableLog('rdApp.*')

def pymol_operations(lig_native_path, protein_path, pocket_prefix, pocket_suffix, pocket_id):
    pymol.cmd.delete('all')
    pymol.cmd.load(lig_native_path)
    pymol.cmd.remove('hydrogens')
    pymol.cmd.load(protein_path)
    pymol.cmd.remove('resn HOH')
    object_list = pymol.cmd.get_object_list()  # 获取所有对象列表
    obj_ligand, obj_protein = object_list[0], object_list[1]
    num_states = pymol.cmd.count_states(obj_ligand)

    for state in range(1, num_states + 1):
        pocket_path = pocket_prefix + f'{state}' + pocket_suffix if num_states > 1 else pocket_prefix + f'{pocket_id}' + pocket_suffix
        if not os.path.exists(pocket_path):
            pymol.cmd.create(f"state_{state}", obj_ligand, state, 1)
            pymol.cmd.select('Pocket', f'byres {obj_protein} within {distance} of state_{state}')
            pymol.cmd.save(pocket_path, 'Pocket')
            pymol.cmd.delete('Pocket')
        
def generate_pocket(data_dir, protein_path, ground_dir, distance=5):
    complex_id = os.listdir(data_dir)
    for file in complex_id:
        if file[-6:] == '.pdbqt':
            cid = file[:-6]
            lig_native_path = os.path.join(data_dir, file)
            protein_name = protein_path.split('/')[-1].split('.')[0]
            pocket_prefix = os.path.join(data_dir, f'{cid}_Pocket_')
            pocket_suffix = f'_{distance}A.pdb'
            pymol_operations(lig_native_path, protein_path, pocket_prefix, pocket_suffix, pocket_id)
                
    ground_files = os.listdir(ground_dir)
    ground_pdb_list = [x for x in ground_files if x.split(".")[-1] == 'pdb' and len(x.split("_")) < 2]
    ground_pdbqt_list = [x for x in ground_files if x.split(".")[-1] == 'pdbqt']
    ligand_counter = {}
    for pdb_file in ground_pdb_list:
        pdb = pdb_file.split(".")[0]
        ligand_name = [elem for elem in ground_pdbqt_list if pdb in elem]
        try:
            ligand_id = ligand_name[0].split(".")[0].split("_")[1]
        except:
            print(f'Cannot read pdbqt ligand of protein: {pdb}')
            continue
        ligand_counter[ligand_id] = ligand_counter[ligand_id] + 1 if ligand_id in ligand_counter.keys() else 1
        lig_native_path = os.path.join(ground_dir, ligand_name[0])
        protein_path = os.path.join(ground_dir, pdb_file)
        pocket_prefix = os.path.join(ground_dir, f'{ligand_id}_Pocket_G')
        pocket_suffix = f'_{pdb}_{distance}A.pdb'
        pocket_id = ligand_counter[ligand_id]
        pymol_operations(lig_native_path, protein_path, pocket_prefix, pocket_suffix, pocket_id)


def generate_complex(data_dir, distance=5, input_ligand_format='mol2'):
    file_list = os.listdir(data_dir)
    pdb_list = [x for x in file_list if x.split('_')[-1] == f'{distance}A.pdb']
    pdbqt_list = [x for x in file_list if x.split('.')[-1] == f'pdbqt']
    pbar = tqdm(total=len(pdb_list))
    for pdb_file in pdb_list:
        cid, pocket = pdb_file.split('_')[0], pdb_file.split('_')[2]
        pocket_path = data_dir + pdb_file
        ligand_name = [x for x in pdbqt_list if x.split('.')[0].split('_')[-1] == cid]
        ligand_input_path = os.path.join(data_dir, ligand_name[0])
        ligand_path = ligand_input_path.replace(f".{input_ligand_format}", ".pdb")
        if not os.path.exists(ligand_input_path):
            if os.path.exists(ligand_path):
                os.remove(ligand_path)
            continue
        if input_ligand_format != 'pdb' and not os.path.exists(ligand_path):
            os.system(f'obabel {ligand_input_path} -O {ligand_path} -d')
        else:
            ligand_path = os.path.join(data_dir, ligand_name[0].replace(f".{input_ligand_format}", ".pdb"))

        save_path = os.path.join(data_dir, f"{cid}_Complex_{pocket}_{distance}A.rdkit")
        ligand = Chem.MolFromPDBFile(ligand_path, removeHs=True)
        if ligand == None:
            print(f"Unable to process ligand of {cid}")
            continue
            
        pocket = Chem.MolFromPDBFile(pocket_path, removeHs=True)
        if pocket == None:
            print(f"Unable to process protein of {pocket_path}")
            continue

        complex = (ligand, pocket)
        with open(save_path, 'wb') as f:
            pickle.dump(complex, f)
        os.remove(ligand_path)
        pbar.update(1)

distance = 5
input_ligand_format = 'pdbqt'
protein_path = '/home/yueming/Drug_Discovery/Baselines/GIGN-main/GIGN/data/CHEMBL202/protein/pdb1boz.pdbqt'
ground_root = '/home/yueming/Drug_Discovery/Baselines/GIGN-main/GIGN/data/CHEMBL202/pdb_with_activity/'
data_root = './data/CHEMBL202/1boz/'
set_list = ['test', 'train_1', 'train_2', 'train_3', 'train_4', 'train_5']
for dataset in tqdm(set_list):
    data_df = pd.read_csv(os.path.join(data_root, f'CHEMBL202_pIC50_{dataset}.csv'))
    dataset_path = data_root + dataset + '/'
    ground_dir = ground_root + dataset + '/'
    ## generate pocket within 5 Ångström around ligand 
    generate_pocket(data_dir=dataset_path, protein_path=protein_path, ground_dir=ground_dir, distance=distance)
    generate_complex(data_dir=dataset_path, distance=distance, input_ligand_format=input_ligand_format)
    generate_complex(data_dir=ground_dir, distance=distance, input_ligand_format=input_ligand_format)

# Process activity and structure data for model reading

In [None]:
# conda activate base
import os, re
import pandas as pd
import numpy as np
import pickle
from scipy.spatial import distance_matrix
import multiprocessing
from itertools import repeat
import networkx as nx
import torch 
from torch.utils.data import Dataset, DataLoader
from rdkit import Chem
from rdkit import RDLogger
from rdkit import Chem
from torch_geometric.data import Batch, Data
from tqdm import tqdm
import warnings
RDLogger.DisableLog('rdApp.*')
np.set_printoptions(threshold=np.inf)
warnings.filterwarnings('ignore')
from torch_geometric.data import Batch

# %%
def one_of_k_encoding(k, possible_values):
    if k not in possible_values:
        raise ValueError(f"{k} is not a valid value in {possible_values}")
    return [k == e for e in possible_values]


def one_of_k_encoding_unk(x, allowable_set):
    if x not in allowable_set:
        x = allowable_set[-1]
    return list(map(lambda s: x == s, allowable_set))


def atom_features(mol, graph, atom_symbols=['C', 'N', 'O', 'S', 'F', 'P', 'Cl', 'Br', 'I'], explicit_H=True):

    for atom in mol.GetAtoms():
        results = one_of_k_encoding_unk(atom.GetSymbol(), atom_symbols + ['Unknown']) + \
                one_of_k_encoding_unk(atom.GetDegree(),[0, 1, 2, 3, 4, 5, 6]) + \
                one_of_k_encoding_unk(atom.GetImplicitValence(), [0, 1, 2, 3, 4, 5, 6]) + \
                one_of_k_encoding_unk(atom.GetHybridization(), [
                    Chem.rdchem.HybridizationType.SP, Chem.rdchem.HybridizationType.SP2,
                    Chem.rdchem.HybridizationType.SP3, Chem.rdchem.HybridizationType.
                                        SP3D, Chem.rdchem.HybridizationType.SP3D2
                    ]) + [atom.GetIsAromatic()]
        # In case of explicit hydrogen(QM8, QM9), avoid calling `GetTotalNumHs`
        if explicit_H:
            results = results + one_of_k_encoding_unk(atom.GetTotalNumHs(),
                                                    [0, 1, 2, 3, 4])

        atom_feats = np.array(results).astype(np.float32)

        graph.add_node(atom.GetIdx(), feats=torch.from_numpy(atom_feats))

def get_edge_index(mol, graph):
    for bond in mol.GetBonds():
        i = bond.GetBeginAtomIdx()
        j = bond.GetEndAtomIdx()

        graph.add_edge(i, j)

def mol2graph(mol):
    graph = nx.Graph()
    atom_features(mol, graph)
    get_edge_index(mol, graph)

    graph = graph.to_directed()
    x = torch.stack([feats['feats'] for n, feats in graph.nodes(data=True)])
    if not graph.edges(data=False):
        return [], [], True
    edge_index = torch.stack([torch.LongTensor((u, v)) for u, v in graph.edges(data=False)]).T

    return x, edge_index, False

def inter_graph(ligand, pocket, dis_threshold = 5.):
    atom_num_l = ligand.GetNumAtoms()
    atom_num_p = pocket.GetNumAtoms()

    graph_inter = nx.Graph()
    pos_l = ligand.GetConformers()[0].GetPositions()
    pos_p = pocket.GetConformers()[0].GetPositions()
    dis_matrix = distance_matrix(pos_l, pos_p)
    node_idx = np.where(dis_matrix < dis_threshold)
    for i, j in zip(node_idx[0], node_idx[1]):
        graph_inter.add_edge(i, j+atom_num_l) 

    graph_inter = graph_inter.to_directed()
    edge_index_inter = torch.stack([torch.LongTensor((u, v)) for u, v in graph_inter.edges(data=False)]).T

    return edge_index_inter

# %%
def mols2graphs(complex_path_list, label, vina_score_list, save_path, dis_threshold):
    data_list = []
    fail_path = []
    for i, complex_path in enumerate(complex_path_list):
        if os.path.exists(complex_path):
            with open(complex_path, 'rb') as f:
                ligand, pocket = pickle.load(f)
        else:
            print('Complex file not found:', complex_path)
            fail_path.append(complex_path)
            continue

        atom_num_l = ligand.GetNumAtoms()
        atom_num_p = pocket.GetNumAtoms()

        pos_l = torch.FloatTensor(ligand.GetConformers()[0].GetPositions())
        pos_p = torch.FloatTensor(pocket.GetConformers()[0].GetPositions())
        x_l, edge_index_l, fail_l = mol2graph(ligand)
        x_p, edge_index_p, fail_p = mol2graph(pocket)
        if fail_l or fail_p:
            print('Failed to read complex file:', complex_path)
            fail_path.append(complex_path)
            continue

        x = torch.cat([x_l, x_p], dim=0)
        edge_index_intra = torch.cat([edge_index_l, edge_index_p+atom_num_l], dim=-1)
        try:
            edge_index_inter = inter_graph(ligand, pocket, dis_threshold=dis_threshold)
        except:
            print('Failed to read complex edges:', complex_path)
            fail_path.append(complex_path)
            continue
            
        y = torch.FloatTensor([label])
        vina_score = torch.FloatTensor([vina_score_list[i]])
        pos = torch.concat([pos_l, pos_p], dim=0)
        split = torch.cat([torch.zeros((atom_num_l, )), torch.ones((atom_num_p,))], dim=0)
        pocket = complex_path.split('/')[-1].split('_')[2]
        
        data = Data(x=x, edge_index_intra=edge_index_intra, edge_index_inter=edge_index_inter, y=y, vina_score=vina_score, pos=pos, pocket=pocket, split=split)
        data_list.append(data)
        
    if len(fail_path) == len(complex_path_list):
        return complex_path_list
    else:
        merged_data = Batch.from_data_list(data_list)
        torch.save(merged_data, save_path)
        return fail_path

# %%
class PLIDataLoader(DataLoader):
    def __init__(self, data, **kwargs):
        super().__init__(data, collate_fn=data.collate_fn, **kwargs)

class GraphDataset(Dataset):
    """
    This class is used for generating graph objects using multi process
    """
    def __init__(self, data_dir, ground_dir, data_df, dis_threshold=5, graph_type='Graph_GIGN', num_process=8, create=False):
        self.data_dir = data_dir
        self.ground_dir = ground_dir
        self.data_df = data_df
        self.dis_threshold = dis_threshold
        self.graph_type = graph_type
        self.create = create
        self.graph_paths = None
        self.complex_ids = None
        self.num_process = num_process
        self.mean, self.std = 0, 1
        self._pre_process()

    def _pre_process(self):
        data_dir = self.data_dir
        ground_dir = self.ground_dir
        data_df = self.data_df
        graph_type = self.graph_type

        complex_path_list, complex_id_list, pIC50_list, vina_score_list, graph_path_list, dis_threshold_list = [], [], [], [], [], []
        file_list = os.listdir(data_dir)
        pocket_list_all = [x for x in file_list if x.split('_')[-1] == f'{self.dis_threshold}A.rdkit']
        ground_file_list = os.listdir(ground_dir)
        ground_pocket_list_all = [x for x in ground_file_list if x.split('_')[-1] == f'{self.dis_threshold}A.rdkit']
        not_found_list = []
        for i, row in data_df.iterrows():
            cid, pIC50 = row['ChEMBL_Compound_ID'], float(row['pIC50'])
            complex_path_list_cid, complex_id_list_cid, pIC50_list_cid, vina_score_list_cid, graph_path_list_cid, dis_threshold_list_cid = [], [], [], [], [], []
            graph_path = os.path.join(data_dir, f"{cid}_{graph_type}_{self.dis_threshold}A.pyg")
            pocket_list = [x for x in pocket_list_all if x.split('_')[0] == cid]
            ground_pocket_list = [x for x in ground_pocket_list_all if x.split('_')[0] == cid]
            if pocket_list:
                for pocket in pocket_list:
                    pocket_idx = pocket.split('_')[2]
#                     if int(pocket_idx) <= 3:
                    complex_path = os.path.join(data_dir, f"{cid}_Complex_{pocket_idx}_{self.dis_threshold}A.rdkit")
                    complex_path_list_cid.append(complex_path)
                    vina_score_list_cid.append(row[f'Pocket_{pocket_idx}_QVina-W_Score'])
                if ground_pocket_list:
                    for pocket in ground_pocket_list:
                        pocket_idx = pocket.split('_')[2]
                        complex_path = os.path.join(ground_dir, f"{cid}_Complex_{pocket_idx}_{self.dis_threshold}A.rdkit")
                        complex_path_list_cid.append(complex_path)
                        vina_score_list_cid.append(-1e2)
                complex_path_list.append(complex_path_list_cid)
                vina_score_list.append(vina_score_list_cid)
                complex_id_list.append(cid)
                pIC50_list.append(pIC50)
                graph_path_list.append(graph_path)
                dis_threshold_list.append(self.dis_threshold)
            else:  
                not_found_list.append(graph_path)

        self.mean, self.std = np.mean(pIC50_list), np.std(pIC50_list)
        if self.create:
            print('Generate complex graph...')
            # multi-thread processing
            pool = multiprocessing.Pool(self.num_process)
            complex_paths_list = [len(x) for x in complex_path_list]
            vina_scores_list = [len(x) for x in vina_score_list]
            print(len(complex_paths_list), len(pIC50_list), len(vina_scores_list), len(graph_path_list), len(dis_threshold_list))
            for complex_path, pIC50, vina_score ,graph_path, dis_threshold in zip(complex_path_list, pIC50_list, vina_score_list, graph_path_list, dis_threshold_list):
                not_found_path = mols2graphs(complex_path, pIC50, vina_score ,graph_path, dis_threshold)
                if len(not_found_path) == len(complex_path):
                    not_found_list.append(graph_path)
            with open(data_dir + f'/not_found_list.pkl', 'wb') as f:
                pickle.dump(not_found_list, f)
            pool.close()
            pool.join()
        
        with open(data_dir + '/not_found_list.pkl', 'rb') as f:
            not_found_list = pickle.load(f)
        self.complex_ids = complex_id_list
        self.graph_paths = graph_path_list
        self.graph_paths = [x for x in graph_path_list if x not in not_found_list]

    def __getitem__(self, idx):
        data = torch.load(self.graph_paths[idx])
        match = re.search(r'CHEMBL(\d+)_', self.graph_paths[idx])
        chembl_id = match.group(0)
        data['idx'] = chembl_id[:-1]
        return data

    def collate_fn(self, batch):
        return Batch.from_data_list(batch)

    def __len__(self):
        return len(self.graph_paths)


data_root = './data/CHEMBL202/1boz/qvina/'
ground_root = './data/CHEMBL202/pdb_with_activity/'
set_list = ['test', 'train_1', 'train_2', 'train_3', 'train_4', 'train_5']
for dataset in tqdm(set_list):
    data_df = pd.read_csv(os.path.join(data_root, f'CHEMBL202_pIC50_{dataset}.csv'))
    dataset_path = data_root + dataset + '/'
    ground_path = ground_root + dataset + '/'
    data_set = GraphDataset(dataset_path, ground_path, data_df, graph_type='Graph_GIGN', dis_threshold=5, create=True)
    print(len(data_set))
    data_loader = PLIDataLoader(data_set, batch_size=10, shuffle=True, num_workers=4)
    for data in data_loader:
        # print(data) --> DataBatch(x=[2481, 35], y=[8], pos=[2481, 3], edge_index_intra=[2, 4884], edge_index_inter=[2, 4456], split=[2481], pocket=[8], batch=[2481], ptr=[9])
        data, pocket, idx, vina_score = data, data.pocket, data.idx, data.vina_score
        print(f'Loading {len(idx)} compounds with pockets: {pocket}')

# Assemble the same compound with different pockets

In [None]:
from tqdm import tqdm
import shutil

data_dir = '/home/yueming/Drug_Discovery/Baselines/GIGN-main/GIGN/data/CHEMBL202/1boz/'
input_set_paths = os.listdir(data_dir)
input_sets = [x for x in input_set_paths if x.split('.')[-1] != 'pkl']
for set_name in tqdm(input_sets):
    set_path = data_dir + set_name + '/'
    pocket_names = os.listdir(set_path)
    for pocket_name in pocket_names:
        pocket = pocket_name.split('_')[-1]
        pocket_dir = data_dir + set_name + '/' + pocket_name + '/'
        file_paths = os.listdir(pocket_dir)
        input_rdkit = [file for file in file_paths if file.split('.')[-1] == 'rdkit']
        for input_complex in tqdm(input_rdkit):
            # 提取文件名（不包含扩展名）
            shutil.move(pocket_dir + input_complex, set_path + input_complex.replace('Complex', f'Complex_{pocket}'))