In [138]:
from os import listdir
from Bio.PDB.MMCIFParser import MMCIFParser
from Bio.PDB import PPBuilder
from Bio import AlignIO
from Bio.Align import MultipleSeqAlignment
from Bio.SeqRecord import SeqRecord
from Bio.PDB import Superimposer, StructureAlignment
from utils.sequence import padding, seq_print
from Bio.Align.Applications import MuscleCommandline
from numpy import argmax
from Bio.PDB.Polypeptide import Polypeptide

In [6]:
parser = MMCIFParser()

In [21]:
locs = ["pdb_structures/eu/1eum.cif", "pdb_structures/jd/2jd7.cif"]
structures = [parser.get_structure(loc[-8:-4], loc) for loc in locs]


In [130]:
ppb = PPBuilder()
sequences = []
best_peptides = []

for i, structure in enumerate(structures):
    pdb_id = locs[i][-8:-4]
    print(pdb_id)
    peptides = ppb.build_peptides(structure)
    seqs = []
    for peptide in peptides:
        seqs.append(peptide.get_sequence())

    longest_seq = argmax([len(seq) for seq in seqs])
    sequences.append(SeqRecord(sorted_seqs[longest_seq], id=pdb_id))
    best_peptides.append(peptides[longest_seq])
padded = padding(sequences)
AlignIO.write(MultipleSeqAlignment(padded), "structure_alignment.fasta", "fasta")

1eum
2jd7


1

In [67]:

muscle_cline = MuscleCommandline("muscle.exe", input="structure_alignment.fasta", out = "structure_alignment_results.fasta")
stdout, stderr = muscle_cline()
aligned = AlignIO.read("structure_alignment_results.fasta", "fasta")

print("Aligned: ")
for seq in aligned: 
    seq_print(seq)

Aligned: 
1eum|-LKPEMIEKLNEQMNLELYSSLLYQQMSAWCSYHTFEGAAAFLRRHAQEEMTHMQRLFDYLTDTGNLPRINTVESPFAEYSSLDELFQETYKHEQLITQKINELAHAAMTNQDYPTFNFLQWYVSEQHEEEKLFKSIIDKLSLAGKSGEGLYFIDKELSTLD-----
2jd7|MLSERMLKALNDQLNRELYSAYLYFAMAAYFEDLGLEGFANWMKAQAEEEIGHALRFYNYIYDRNGRVELDEIPKPPKEWESPLKAFEAAYEHEKFISKSIYELAALAEEEKDYSTRAFLEWFINEQVEEEASVKKILDKLKFAKDSPQILFMLDKELSARAPKLPG


In [139]:
ec_chain = Polypeptide(best_peptides[0][0:161])
pf_chain = Polypeptide(best_peptides[1][1:162])

In [140]:
ec_backbone = ec_chain.get_ca_list()
pf_backbone = pf_chain.get_ca_list()

In [144]:
superimposer.rms, superimposer.rotran

(1.2201657820892462, (array([[ 0.99437227,  0.07716325, -0.07259219],
         [ 0.04690803, -0.93505502, -0.35138547],
         [-0.09499174,  0.34600281, -0.93341235]]),
  array([52.45706762, 89.81185317, 29.96649629])))