In [5]:
import os
import Bio
from Bio.PDB import PDBParser
import nglview
import process
import pandas as pd

DESKTOP = '/mnt/c/users/serge/desktop'
OUT_DIR = os.path.join(DESKTOP, 'test_structures')

num_atoms = 10
mol_weight = 150
distance_threshold = 1.7

with open(os.path.join(DESKTOP, 'test_structs.txt'), 'r') as f:
    test_structures = f.read().split()
test_structures = [sid.lower() for sid in test_structures]
test_structures.remove('2vrv')

In [6]:
import time
from multiprocessing import Pool, Process
# file_name = 'str_' + f'num_atoms={num_atoms}_mol_weight={mol_weight}_dt{distance_threshold}'
# df = pd.DataFrame(columns=['Structure', 'Num ligands', 'Ligand ID', ])

def do_it(name) -> tuple[str, float]:
    start_job = time.perf_counter()
    
    original_structure = process.get_structure(name, process.PDB_PATH)
    hetero_residues = process.get_hetero_residues(original_structure)
    structure = process.StructureGraph(hetero_residues)
    structure.create_graph(distance_threshold=1.7)
    clusters = structure.get_clusters()
    residues = list()
    for cluster in clusters:
        residues.append(process.combine(cluster))
    ligands = process.filter_ligands(residues, num_atoms=10, mol_weight=150, rmsd=2.0)
    ligand_structures = [process.resi_to_struct(ligand, original_structure=original_structure) for ligand in ligands]
    process.save_ligand_to_file(ligand_structures, OUT_DIR, original_structure, debug=True)
    
    # df.loc[len(df)] = [original_structure.id, len(ligands), [ligand.id[0] for ligand in ligands]]
    
    end_job = time.perf_counter()
    
    job_duration = end_job - start_job

    return name, job_duration

start_time = time.perf_counter()

with Pool() as pool:
    results = pool.map(do_it, test_structures)
    for structure_id, job_duration in results:
        print(f'Structure {structure_id} completed in {job_duration:.2f}s')

# df.to_excel(excel_writer=OUT_DIR + '/' + file_name + '.xlsx')

end_time = time.perf_counter()

total_duration = end_time - start_time

print(f"{len(test_structures)} structures took {total_duration:.2f}s total")

Structure 1a28 completed in 2.52s
Structure 1phg completed in 0.41s
Structure 2jap completed in 16.53s
Structure 1a4l completed in 11.81s
Structure 1pk9 completed in 4.17s
Structure 2no6 completed in 3.04s
Structure 1avn completed in 0.17s
Structure 1q0y completed in 0.23s
Structure 2noa completed in 3.30s
Structure 1ax9 completed in 0.29s
Structure 1q7a completed in 0.13s
Structure 2pnc completed in 8.66s
Structure 1azm completed in 0.22s
Structure 1qzr completed in 3.64s
Structure 2q6h completed in 8.57s
Structure 1bzm completed in 0.23s
Structure 1rqj completed in 4.50s
Structure 2qm9 completed in 0.98s
Structure 1c14 completed in 3.35s
Structure 1s9p completed in 7.30s
Structure 2qwx completed in 3.70s
Structure 1cea completed in 0.13s
Structure 1tnl completed in 0.14s
Structure 2uvn completed in 5.18s
Structure 1ceb completed in 0.15s
Structure 1tyl completed in 0.08s
Structure 2v95 completed in 0.34s
Structure 1cil completed in 0.22s
Structure 1uv6 completed in 1.24s
Structure 2v