In [3]:
sequence_path = "seqs.dat"
with open(sequence_path, 'r') as file:
    sequences = file.read().splitlines()

for seq in sequences:
    print(seq)

AAAAAAAAAAAAAAAAAAAAA
GGGGGGGGGGGGGGGGGGGGG
ATATATATATATATATATATA
ACACACACACACACACACACA
AGAGAGAGAGAGAGAGAGAGA
GCGCGCGCGCGCGCGCGCGCG
GTGTGTGTGTGTGTGTGTGTG
GAGAGAGAGAGAGAGAGAGAG
TAGAAAAGACTTTTCAGATGC
CCGTGGCGGCCAATCCCCACC
AAGGGCGGGCTATACGGCGTC
GTATTTTACCTTTATACGCGC
GGTTCTAGTATAGCGCCATTG
TTGTAAAAGGTGGAGGCGAAA
TCAACGAGCAGCACGCATTCT
TATGGCTTGCCAGGAGGGAGT
TTGACACTGTATAGCATTATC
TATAAAAGTAACCCATATGCT
ATACGGTGGACCCTGTTCGTT
TTACGACTTACTTCACTTTGC
CCACTAATCGTCGGTCGACAG
TGTACGTGACTGAGAGACAAC
GGCATATATATTAGACGGCCA
CGCGGCAGTTGATCTTCAGCG
GGTAGTTTGTAATTTCGAGGG
TCGCGTTCACGTGGTTGGGCC
AGCAACCCGATAGGAGTAAGA
CTACAGTGCTGAGACCGATAA
ACGAGCTAATCGCGATGTTGG
ACGTAAATGCATTTGTATAGT
AGTGTGATAAGTTGTACGTGT
CTAAAGCACTGACGTCGCCGG
TGCTTCCATACCGTGTCTGGT
CGAGAGCGTCCCTTACCGACC
GGAACGTCGCTGCTTTGGAAG
CGACTGACTTGTTTCCCCAGG
CGAAGGCTGGCGGTTGCACCG
CGATAAGCGGGTAACGCCCGC
CTTGCATCCAAGGCGATTTCT
GGAACTGCTGTCCCACGCTTC
GGGGCGTTACGAAGCGGATGC
TCGCTATTATCACCTAATGCC
CCTCAGATCACCAACCATAGC
CCAGGGGTCCCAGACTCAGCC
TTTGCCTGTGTAGGTGCAATG
GGTAACATTT

In [16]:
from Bio.PDB import PDBParser, PDBIO, Superimposer
import sys

# Constants
BACKBONE_ATOMS = ["P", "O5'", "C5'", "C4'", "C3'", "O3'"]
# Adjust as needed
RESIDUES_COMPLEX = list(range(2, 16))
RESIDUES_MUTANT = list(range(5, 19))

# Utility: Get residues with full backbone atoms
def get_residues_with_full_backbone(structure, chain_id, allowed_residues):
    full_res = []
    for residue in structure[0][chain_id]:
        if residue.id[1] not in allowed_residues:
            continue
        if all(atom in residue for atom in BACKBONE_ATOMS):
            full_res.append(residue.id[1])
    return full_res

# Utility: Get ordered backbone atoms for selected residues
def get_backbone_atoms_filtered(structure, chain_id, residue_ids):
    atoms = []
    for residue in structure[0][chain_id]:
        if residue.id[1] not in residue_ids:
            continue
        if all(atom in residue for atom in BACKBONE_ATOMS):
            for atom_name in BACKBONE_ATOMS:
                atoms.append(residue[atom_name])
    return atoms

# Load structures
parser = PDBParser(QUIET=True)
complex_structure = parser.get_structure("complex", "2ff0.pdb")
mutant_structure = parser.get_structure("mutant", "dna_pdbs/AAAAAAAAAAAAAAAAAAAAA.pdb")

print("Chains in complex structure:", [chain.id for chain in complex_structure[0]])
print("Chains in mutant structure:", [chain.id for chain in mutant_structure[0]])

# Get residues with full backbone
complex_res_B = set(get_residues_with_full_backbone(complex_structure, "B", RESIDUES_COMPLEX))
mutant_res_A  = set(get_residues_with_full_backbone(mutant_structure, "A", RESIDUES_MUTANT))

print(f"Complex chain B residues with full backbone: {sorted(complex_res_B)}")
print(f"Mutant chain A residues with full backbone: {sorted(mutant_res_A)}")

# # Intersection for alignment
# common_res_B = sorted(complex_res_B.intersection(mutant_res_A))
# if not common_res_B:
#     sys.exit("Error: No common residues with full backbone found for alignment.")

# print(f"Common residues for superposition: {common_res_B}")

# Get backbone atoms
complex_atoms_B = get_backbone_atoms_filtered(complex_structure, "B", RESIDUES_COMPLEX)
mutant_atoms_A  = get_backbone_atoms_filtered(mutant_structure, "A", RESIDUES_MUTANT)

if len(complex_atoms_B) != len(mutant_atoms_A):
    sys.exit(f"Backbone atom counts differ: {len(complex_atoms_B)} vs {len(mutant_atoms_A)}")

# Superimpose
sup = Superimposer()
sup.set_atoms(complex_atoms_B, mutant_atoms_A)
sup.apply(mutant_structure.get_atoms())

print("RMSD:", sup.rms)

# Replace chains B and C in complex with mutant A and B
def replace_chain(complex_struct, mutant_struct, chain_id_complex, chain_id_mutant, new_chain_id):
    model_c = complex_struct[0]
    model_m = mutant_struct[0]
    if chain_id_complex in model_c:
        model_c.detach_child(chain_id_complex)
    if chain_id_mutant in model_m:
        # Copy and rename the mutant chain
        new_chain = model_m[chain_id_mutant].copy()
        new_chain.id = new_chain_id
        model_c.add(new_chain)


replace_chain(complex_structure, mutant_structure, "B", "A", "B")
replace_chain(complex_structure, mutant_structure, "C", "B", "C")


# Save output
io = PDBIO()
io.set_structure(complex_structure)
io.save("complex_with_mutant_dna2.pdb")
print("Saved: complex_with_mutant_dna.pdb")


Chains in complex structure: ['B', 'C', 'A']
Chains in mutant structure: ['A', 'B']
Complex chain B residues with full backbone: [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
Mutant chain A residues with full backbone: [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]
RMSD: 0.8931652040943187
Saved: complex_with_mutant_dna.pdb


In [22]:
import os

directory = os.fsencode("dna_pdbs")
for file in os.listdir(directory):
    file = os.fsdecode(file)
    print(type(file))


<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class

In [23]:
file

'GCTGCTGGAGTTCATGCCCTA.pdb'