In [4]:
import csv
from Bio.PDB import PDBParser
import numpy as np

# Input PDB file path
pdb_file = "/home/hp/nayanika/github/GPX6/prep_structures/original_mousecys.pdb"

# Output CSV file path
csv_file_path = "/home/hp/nayanika/github/GPX6/table/mouse_dist.csv"

# Residue mapping table (Position, Human Cys, Mouse Cys)
selected_residues = [
    (3, 'N', 'K'), (4, 'R', 'S'), (16, 'I', 'V'), (22, 'L', 'N'), (24, 'L', 'I'),
    (25, 'N', 'D'), (27, 'E', 'G'), (29, 'Y', 'F'), (30, 'I', 'V'), (31, 'Q', 'N'),
    (33, 'K', 'Q'), (35, 'F', 'Y'), (40, 'V', 'I'), (47, 'A', 'S'), (48, 'Y', 'F'),
    (52, 'A', 'T'), (54, 'Q', 'T'), (60, 'A', 'T'), (67, 'N', 'P'), (69, 'G', 'N'),
    (71, 'I', 'T'), (74, 'A', 'G'), (87, 'T', 'K'), (99, 'C', 'R'), (102, 'S', 'G'),
    (104, 'F', 'Y'), (107, 'S', 'N'), (119, 'E', 'D'), (120, 'K', 'N'), (126, 'T', 'S'),
    (137, 'D', 'E'), (139, 'L', 'F'), (142, 'S', 'P'), (143, 'S', 'E'), (144, 'Q', 'H'),
    (148, 'E', 'D'), (173, 'H', 'R'), (177, 'Q', 'H'), (178, 'A', 'T'), (181, 'S', 'R'),
    (182, 'T', 'I'), (184, 'K', 'Q'), (188, 'L', 'M'), (192, 'K', 'N'), (194, 'F', 'T'),
    (195, 'N', 'S'), (196, 'T', '-'), (197, 'H', '-')
]

def get_residue_ca_coord(structure, chain_id, res_id):
    """Extract the C-alpha (CA) coordinate of a given residue."""
    try:
        residue = structure[0][chain_id][res_id]
        ca_atom = residue['CA']
        return np.array(ca_atom.coord)
    except KeyError:
        print(f"Residue {res_id} not found or does not have a CA atom.")
        return None

# Load the PDB structure
parser = PDBParser(QUIET=True)
structure = parser.get_structure("protein", pdb_file)

# Define the chain where residue 49 is located (assuming it's on Chain X)
chain_id = 'X'
reference_residue_id = 49  # Residue to measure distances from (C-alpha of 49)

# Get the C-alpha coordinate of residue 49
ref_coord = get_residue_ca_coord(structure, chain_id, reference_residue_id)
if ref_coord is None:
    raise ValueError("Could not find the C-alpha coordinate for residue 49.")

# Calculate distances to selected residues
distances = []
for pos, human_res, mouse_res in selected_residues:
    target_coord = get_residue_ca_coord(structure, chain_id, pos)
    if target_coord is not None:
        distance = np.linalg.norm(ref_coord - target_coord)
        distances.append((pos, human_res, mouse_res, distance))

# Save the distances to a CSV file
with open(csv_file_path, mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(["Position", "Human Cys", "Mouse Cys", "Distance (Å)"])
    for pos, human_res, mouse_res, dist in distances:
        writer.writerow([pos, human_res, mouse_res, f"{dist:.2f}"])

print(f"Distances saved to {csv_file_path}")


Residue 196 not found or does not have a CA atom.
Residue 197 not found or does not have a CA atom.
Distances saved to /home/hp/nayanika/github/GPX6/table/mouse_dist.csv
