In [1]:
import MDAnalysis as mda
import numpy as np
import matplotlib.pyplot as plt
import sys,os
import math
from tqdm.autonotebook import tqdm as tq
import psutil
from concurrent.futures import ProcessPoolExecutor, as_completed

In [2]:
###These two lines applies the first 16 cpus of from computer. 
###From our CP the first 16 cpus are the fastest. If you do not need this, please comment them out.
p = psutil.Process(os.getpid())
p.cpu_affinity(list(range(16)))

### Read in pdb and dcd files with MDAnalysis, replace with your trajectory files
u = mda.Universe('trajectory/1jwp_oh2.pdb', 
                 'trajectory/1jwp_noaligned_oh2.dcd')

### Create directory for separating PDBs
directory = '/scratch/users/tq19b/analysis/crypwater_github/separate_pdbs/'
if not os.path.exists(directory):
    os.makedirs(directory)

a = 0    
for ts in tq(u.trajectory):
    Atoms = u.select_atoms('all')
    file_path = os.path.join(directory, '{:04d}.pdb'.format(a))
    Atoms.write(file_path)
    a += 1




  0%|          | 0/3205 [00:00<?, ?it/s]



In [3]:
# Read the dimension
dim_float = u.trajectory.ts.dimensions[0]

# Round up to the next integer
dim_rounded = math.ceil(dim_float)

# If the rounded dimension is odd, add 1 to make it even
if dim_rounded % 2 != 0:
    dim_rounded += 1

dim = dim_rounded

# Copy waters for further density calculation
pdb_dir = '/scratch/users/tq19b/analysis/crypwater_github/separate_pdbs/'
output_dir = '/scratch/users/tq19b/analysis/crypwater_github/copied_pdbs/pdbs'

os.makedirs(output_dir, exist_ok=True)  # Ensure the output directory exists

#List and sort PDB files
pdb_files = [f for f in os.listdir(pdb_dir) if f.endswith('.pdb')]
pdb_files.sort(key=lambda x: int(x.split('.')[0]))

# Define translation directions
directions = [np.array([dim, 0, 0]), np.array([-dim, 0, 0]), np.array([0, dim, 0]),
              np.array([0, -dim, 0]), np.array([0, 0, dim]), np.array([0, 0, -dim])]

def process_pdb_file(pdb_file_path):
    # Load the PDB file
    u = mda.Universe(pdb_file_path)
    water = u.select_atoms('name OH2')
    
    # Initialize a list to hold all atoms, starting with the original
    all_positions = [u.atoms.positions]
    all_types = [u.atoms.types]
    all_names = [u.atoms.names]
    
    # Calculate new positions for water molecules and add them to the list
    for direction in directions:
        new_positions = water.positions + direction
        all_positions.append(new_positions)
        all_types.append(water.types)
        all_names.append(water.names)
    
    # Concatenate all positions, types, and names
    concatenated_positions = np.concatenate(all_positions)
    concatenated_types = np.concatenate(all_types)
    concatenated_names = np.concatenate(all_names)
    
    # Create a new universe with concatenated positions, types, and names
    new_u = mda.Universe.empty(concatenated_positions.shape[0], trajectory=True)
    new_u.add_TopologyAttr('type', concatenated_types)
    new_u.add_TopologyAttr('name', concatenated_names)
    new_u.load_new(concatenated_positions, order='fac')
    
    # Save the new universe to a PDB file
    output_file_path = os.path.join(output_dir, f"expanded_{os.path.basename(pdb_file_path)}")
    new_u.atoms.write(output_file_path)
    
# The multiprocessing part
num_processes = os.cpu_count()  # Or set to a specific number, e.g., 32
pdb_file_paths = [os.path.join(pdb_dir, f) for f in pdb_files]  # Full paths

with ProcessPoolExecutor(max_workers=num_processes) as executor:
    # Submit all the tasks and get back Futures
    futures = [executor.submit(process_pdb_file, pdb_path) for pdb_path in pdb_file_paths]
    
    # Iterate over the futures as they complete and wrap in tqdm for the progress bar
    for future in tq(as_completed(futures), total=len(pdb_file_paths), desc="Processing PDB files"):
        _ = future.result()

Processing PDB files:   0%|          | 0/3205 [00:00<?, ?it/s]















