In [2]:
import MDAnalysis as mda
import numpy as np
import pandas as pd
import os
import warnings
from MDAnalysis.analysis import align

# Suppress specific warnings
warnings.filterwarnings("ignore", category=UserWarning, module="MDAnalysis.topology.PDBParser")
warnings.filterwarnings("ignore", category=DeprecationWarning, module="MDAnalysis.coordinates.DCD")

# Function to process each trajectory and extract coordinates
def process_trajectory(traj_dir, pdb_file, dcd_file, residues_of_interest):
    # Check if the PDB file exists
    if not os.path.exists(pdb_file):
        print(f"PDB file not found: {pdb_file}")
        return None

    # Check if the DCD file exists
    if not os.path.exists(os.path.join(traj_dir, dcd_file)):
        print(f"DCD file not found: {os.path.join(traj_dir, dcd_file)}")
        return None

    # Load the trajectory and topology
    u = mda.Universe(pdb_file, os.path.join(traj_dir, dcd_file))

    # Select all atoms
    selection = u.select_atoms("all")

    # Optional: guess elements (if needed for further analysis)
    # u.add_TopologyAttr('elements')  # Uncomment this if you need to guess elements based on atom names.

    # RMSD alignment to the first frame to remove global rotation/translation
    align.AlignTraj(u, u, select="all", in_memory=True).run()

    # Initialize a list to accumulate the positions
    data = []

    # Loop over each frame and accumulate the positions
    for ts in u.trajectory:
        positions = selection.positions
        for atom in selection.atoms:
            res_name = atom.resname
            res_id = atom.resid
            atom_name = atom.name
            coord = positions[atom.index]
            data.append([res_name, res_id, atom_name, *coord])

    # Convert to a DataFrame
    df = pd.DataFrame(data, columns=['Residue', 'Residue_ID', 'Atom', 'X', 'Y', 'Z'])

    # Filter for the specific residues of interest
    filtered_df = df[df.apply(lambda row: f"{row['Residue']} {row['Residue_ID']}" in residues_of_interest, axis=1)]

    # Calculate the average coordinates
    avg_df = filtered_df.groupby(['Residue', 'Residue_ID', 'Atom']).mean().reset_index()

    return avg_df

# Main function to automate across multiple trajectory folders
def automate_trajectory_processing(base_dir, residues_of_interest):
    pdb_file = os.path.join(base_dir, "minim.pdb")
    
    # Check if the PDB file exists
    if not os.path.exists(pdb_file):
        print(f"PDB file not found: {pdb_file}")
        return

    for i in range(100):  # For folders traj00 to traj99
        traj_dir = os.path.join(base_dir, f"traj{i:02d}")
        dcd_file = "fep_050_0.000.dcd"
        
        if os.path.exists(os.path.join(traj_dir, dcd_file)):
            print(f"Processing: {traj_dir}/{dcd_file}")
            avg_df = process_trajectory(traj_dir, pdb_file, dcd_file, residues_of_interest)
            
            if avg_df is not None:
                # Save the result to a CSV file
                output_csv = os.path.join(traj_dir, "product_qatoms.csv")
                avg_df.to_csv(output_csv, index=False)
                print(f"Saved: {output_csv}")
            else:
                print(f"Skipping: {traj_dir} due to missing files or processing error.")
        else:
            print(f"File not found: {traj_dir}/{dcd_file}")

# Example usage
base_dir = "/home/hp/nayanika/github/PhD_Thesis/EVB/protein_stepwise/GPX6MUT/combination_4/mousecys/1-prep/47_48_52_54"
residues_of_interest = ["CYS 49", "GLN 83", "PRX 196"]

# Run the automated processing across all traj00 to traj99 folders
automate_trajectory_processing(base_dir, residues_of_interest)


Processing: /home/hp/nayanika/github/PhD_Thesis/EVB/protein_stepwise/GPX6MUT/combination_4/mousecys/1-prep/47_48_52_54/traj00/fep_050_0.000.dcd


Saved: /home/hp/nayanika/github/PhD_Thesis/EVB/protein_stepwise/GPX6MUT/combination_4/mousecys/1-prep/47_48_52_54/traj00/product_qatoms.csv
Processing: /home/hp/nayanika/github/PhD_Thesis/EVB/protein_stepwise/GPX6MUT/combination_4/mousecys/1-prep/47_48_52_54/traj01/fep_050_0.000.dcd
Saved: /home/hp/nayanika/github/PhD_Thesis/EVB/protein_stepwise/GPX6MUT/combination_4/mousecys/1-prep/47_48_52_54/traj01/product_qatoms.csv
Processing: /home/hp/nayanika/github/PhD_Thesis/EVB/protein_stepwise/GPX6MUT/combination_4/mousecys/1-prep/47_48_52_54/traj02/fep_050_0.000.dcd
Saved: /home/hp/nayanika/github/PhD_Thesis/EVB/protein_stepwise/GPX6MUT/combination_4/mousecys/1-prep/47_48_52_54/traj02/product_qatoms.csv
Processing: /home/hp/nayanika/github/PhD_Thesis/EVB/protein_stepwise/GPX6MUT/combination_4/mousecys/1-prep/47_48_52_54/traj03/fep_050_0.000.dcd
Saved: /home/hp/nayanika/github/PhD_Thesis/EVB/protein_stepwise/GPX6MUT/combination_4/mousecys/1-prep/47_48_52_54/traj03/product_qatoms.csv
Processi