In [None]:
# do not run after filtering the templates you want to use.
# !"./2.download_pdb.sh"

- Note that the code names in `build_profile.prf` (like "5ef9A"), is actually the protein code name ("5ef9") + the chain ("A").

- To filter out some of the templates, set them into "FAILED" in the "pdb_codes.txt" file.

- To select the most appropriate template for our query sequence over the similar structures, we will use the `Alignment.compare_structures()` command to assess the structural and sequence similarity between the possible templates.

In [None]:
from modeller import *
from pathlib import Path


pdb_dir = Path('../data/pdb')

In [None]:
# get the pdb and chain
with open(pdb_dir/'pdb_codes.txt', 'r') as f:
    pdbs = f.read().splitlines()

pdbs = [pdb.split()[:3] for pdb in pdbs if len(pdb) > 0]
pdbs = [pdb for pdb in pdbs if pdb[2] != 'FAILED']

print(len(pdbs))
pdbs


In [None]:
env = Environ()
aln = Alignment(env)

for (pdb, chain, _) in pdbs:
    pdb_file = str(pdb_dir/pdb)
    m = Model(env, file=pdb_file, model_segment=('FIRST:'+chain, 'LAST:'+chain))
    aln.append_model(m, atom_files=pdb_file, align_codes=pdb+chain)


In [None]:
# improve the alignment by calculating multiple sequence alignment
aln.malign()


In [None]:
# do least-squares superposition of the 3D structures, using the multiple sequence alignment as its starting point
aln.malign3d()

In [None]:

aln.compare_structures()
aln.id_table(matrix_file='family.mat')
env.dendrogram(matrix_file='family.mat', cluster_cut=-1.0)

In [None]:
for (pdb, chain) in (('1b8p', 'A'), ('1bdm', 'A'), ('1civ', 'A'),
                     ('5mdh', 'A'), ('7mdh', 'A'), ('1smk', 'A')):
    m = Model(env, file=pdb, model_segment=('FIRST:'+chain, 'LAST:'+chain))
    aln.append_model(m, atom_files=pdb, align_codes=pdb+chain)
aln.malign()
aln.malign3d()
aln.compare_structures()
aln.id_table(matrix_file='family.mat')
env.dendrogram(matrix_file='family.mat', cluster_cut=-1.0)