In [32]:
from Bio import pairwise2
from glob import glob
from math import sqrt
import numpy as np
import pickle
from pyrosetta import *
from pyrosetta.rosetta.core.scoring import rmsd_atoms
from pyrosetta.rosetta.core.scoring import superimpose_pose
from pyrosetta.rosetta.core.select.residue_selector import ResidueIndexSelector
from pyrosetta.rosetta.core.simple_metrics.per_residue_metrics import \
    PerResidueRMSDMetric
from pyrosetta.rosetta.core.pack.task import TaskFactory
from pyrosetta.rosetta.core.pack.task.operation import \
    OperateOnResidueSubset, RestrictAbsentCanonicalAASRLT
from pyrosetta.rosetta.protocols.grafting import CCDEndsGraftMover
from pyrosetta.rosetta.protocols.minimization_packing import PackRotamersMover
from pyrosetta.rosetta.core.util import ChainbreakUtil

# General utility functions

In [2]:
def get_distance(c1, c2):
    """ Returns the distance between two Rosetts XYZ coordinate vectors"""
    dist = sqrt((c2.x - c1.x) ** 2 + (c2.y - c1.y) ** 2 + (c2.z - c1.z) ** 2)
    return dist

In [3]:
def find_res_coords(pose, resnum, atom_type='CA'):
    """ For a given pose and residue number, returns the coordinates of a 
    specified atom type """
    residue = pose.residue(resnum)
    res = residue.atom(atom_type)
    return res.xyz()

In [4]:
def list_pose_coords(pose, atom_type='CA'):
    """ For a given pose, list all CA coordinates """
    # Initialize list of CA coordinates
    ca_coords = []

    # Populating list of CA coordinates 
    for res in range(1, pose.total_residue() + 1):
        res_ca = find_res_coords(pose, res, atom_type=atom_type)
        ca_coords.append(res_ca)

    return ca_coords

In [5]:
def check_pose_continuity(pose):
    """
    Scans through all residues in a pose, checking CA and N coordinates, and 
    identifies chain breaks by distances deviating more than 10% from the ideal 
    1.33 A. (This is how the ChainBreakFilter worked.) Returns a bool indicating 
    if loop is continuous (so True means no breaks were found), a list of 
    C-N distances, and a list of pose numbers at which breaks were found. 
    """
    # Get lists of N and C residues
    n_coords = list_pose_coords(pose, atom_type='N')
    c_coords = list_pose_coords(pose, atom_type='C')

    # Check C-N diatances
    continuous = True
    c_n_distances = []
    break_sites = []
    for i in range(len(n_coords) - 1):
        distance = get_distance(c_coords[i], n_coords[i+1])
        c_n_distances.append(distance)

    # Check whether distance indicates a chain break
        if not 0.9 * 1.33 <= distance <= 1.1 * 1.33:
            continuous = False 
            break_sites.append(i)

    return continuous, c_n_distances, break_sites

In [54]:
def align_protein_sections(pose_1, selector_1, pose_2, selector_2, mode='CA'):
    """
    Aligns selected regions of two poses, superimposing the second pose onto 
    the first, based on RMSD. Returns the RMSD value. Input selections can
    either be Rosetta selector objects or lists of residue by pose number, 
    which will be made into a selector. By default, uses CA RMSD. Can also 
    use full backbone.
    """
    # Put residue inputs into selector format
    # If a list or range is given, make a selector
    if type(selector_1) in [list, range]:
        select_str_1 = ','.join([str(i) for i in selector_1])
        selector_1 = ResidueIndexSelector(select_str_1)
    if type(selector_2) in [list, range]:
        select_str_2 = ','.join([str(i) for i in selector_2])
        selector_2 = ResidueIndexSelector(select_str_2)

    # Verify mode is acceptable
    assert mode in ['CA', 'BB']
    
    # Set RMSD type based on mode
    if mode == 'CA':
        rmsd_type = rmsd_atoms.rmsd_protein_bb_ca
    if mode == 'BB':
        rmsd_type = rmsd_atoms.rmsd_protein_bb_heavy

    # Set up RMSD metric to align poses
    prmsd = PerResidueRMSDMetric()
    prmsd.set_rmsd_type(rmsd_type)
    prmsd.set_comparison_pose(pose_1)
    prmsd.set_residue_selector_reference(selector_1)
    prmsd.set_residue_selector(selector_2)
    amap = prmsd.create_atom_id_map(pose_2)

    return superimpose_pose(pose_2, pose_1, amap)

In [52]:
def get_b_factor(pose, residue):
    """ 
    Given a pose and a residue number, will return the average b-factor of the 
    backbone atoms (N, CA, C) for the specified residue. Requires residue to  
    be input as a pose number, as opposed to a PDB number. 
    """
    bfactor = pose.pdb_info().bfactor
    atom_index = pose.residue(residue).atom_index

    total_b = 0.0
    for atom in ['N', 'CA', 'C']:
        total_b += bfactor(residue, atom_index(atom))

    # Return average for three atoms
    return total_b / 3

In [8]:
def variable_sliding_window(inp, min_size=0, max_size=0):
    """
    Takes a string or list input and returns a list of frames in that input. The 
    frame size increases with each iteration. Thus, running on an input of 'str' 
    with produce the output ['s', 't', 'r', 'st', 'tr', 'str']. Default behavior 
    will go from a frame size of 1 up to the full size of the input. Optionally, 
    this can be constrained to a set window size range. 
    """
    # Initialize output list
    out_windows_list = []

    # Set initial window size
    if min_size:
        window_size = min_size
    else:
        window_size = 1

    # Set final window size
    if max_size:
        window_max = max_size
    else:
        window_max = len(inp) + 1

    # Outer loop with increasing window size
    while window_size <= window_max:
        frame_start = 0
        frame_end = frame_start + window_size

        # Inner loop sliding the window
        while frame_end <= len(inp):
            # Add frame to output list
            out_windows_list.append(inp[frame_start:frame_end])

            # Increment start of frame and end of frame
            frame_start += 1
            frame_end = frame_start + window_size

        # Increment window size
        window_size += 1

    return out_windows_list

# Functions used by protease_info class

In [9]:
class protease_info():
    """
    Data storage structure that encompasses the relationship between two 
    proteins based on their Dali alignments and structural info calculated 
    using Rosetta. The query is assumed to be the reference protein, and the 
    subject a match identified by Dali search. Included are their names, the 
    comparison statistics collected by Dali (Z_SCORE, RMSD, LALI, NRES, PID, 
    and PDB description of the subject), a list of aligned_residue objects 
    corresponding to each residue in the alignment, an identification of 
    catalytic triad residues in the subject, and a list of matched_loop objects 
    reflecting structural comparisons between identified loop regions of the 
    query and subject, which might be exchanged in design applications.
    """
    def __init__(self, query_name, subject_name, dali_file, align_file, 
        query_pose, subject_pose, catalytic_residues, structure_map, auto=True, report=False, verbose=False):

        # Protein names
        self.query_name = query_name.upper()
        self.subject_name = subject_name.upper()

        # Dali info -- update_dali_info
        self.Z_score = None 
        self.rmsd = None
        self.lali = None
        self.nres = None
        self.pID = None
        self.description = None

        # Alignment -- update_aligned_residues
        self.aligned_residues = []

        # Catalytic triad -- update_catalytic_residues
        self.catalytic_nuc = None
        self.catalytic_his = None
        self.catalytic_acid = None

        # Loops -- update_loop_maps
        self.loop_maps = {}

        if auto:
            self.auto_calculate(dali_file, align_file, query_pose, subject_pose, 
                catalytic_residues, structure_map, report=report, verbose=verbose)

    def auto_calculate(self, dali_file, align_file, query_pose, subject_pose, 
        catalytic_residues, structure_map, report=False, verbose=False):
        """ Runs all update methods """
        if report:
            lr = '{:<50}{:>50}\n'
            report.write('HEADER: ' + self.subject_name + '\n')
            report.write(lr.format('Query Name:', self.query_name))
            report.write(lr.format('Subject Name:', self.subject_name))
            report.write('\n')
            
        self.update_dali_info(dali_file)
        if report:
            report.write(lr.format('Z score:', self.Z_score))
            report.write(lr.format('RMSD:', self.rmsd))
            report.write(lr.format('LALI:', self.lali))
            report.write(lr.format('NRES:', self.nres))
            report.write(lr.format('pID:', self.pID))
            report.write(lr.format('Description:', self.description))
            report.write('\n')

        self.update_aligned_residues(align_file, query_pose, subject_pose, 
                                     report=report, verbose=verbose)

        self.update_catalytic_residues(catalytic_residues)
        if report:
            res_line = '{:<8}' * 12 + '\n'
            report.write('\n')
            report.write('Catalytic residues\n')
            report.write(res_line.format('Equal', 'Aligned', 
                                         'AA_type', 'PDB_no', 'Pose_no', 'DSSP', 'Bfactor', 
                                         'AA_type', 'PDB_no', 'Pose_no', 'DSSP', 'Bfactor'))
            report.write('Nucleophile: \n')
            if self.catalytic_nuc:
                report.write(self.catalytic_nuc.get_attributes_printout())
            else:
                report.write('None')
            report.write('Histidine: \n')
            if self.catalytic_his:
                report.write(self.catalytic_his.get_attributes_printout())
            else:
                report.write('None')
            report.write('Acid: \n')
            if self.catalytic_acid:
                report.write(self.catalytic_acid.get_attributes_printout())
            else:
                report.write('None')
            report.write('\n')
            report.write('Loops\n')

        self.update_loop_maps(structure_map, query_pose, subject_pose, report=report, verbose=verbose)
        if report:
            report.write('\n')
            report.write('END SUBJECT ' + self.subject_name + '\n')
            report.write('\n'*2)

        return

    def update_dali_info(self, dali_file):
        """
        Update Z_score, rmsd, lali, nres, pID, and description attributes, based 
        on a given dali data file.
        """
        dali_info = get_dali_info(dali_file, self.subject_name)
        self.Z_score = dali_info['Z_score'] 
        self.rmsd = dali_info['rmsd']
        self.lali = dali_info['lali']
        self.nres = dali_info['nres']
        self.pID = dali_info['pID']
        self.description = dali_info['description']
        return

    def update_aligned_residues(self, align_file, query_pose, subject_pose, 
                                report=False, verbose=False):
        """
        Updates aligned_residues property with a list populated from a Dali 
        alignmant file and query and subject poses. The latter are necessary 
        to determine pose numbers and B-factors.
        """
        self.aligned_residues = map_aligned_residues(align_file, 
            self.subject_name, query_pose, subject_pose, 
            report=report, verbose=verbose)
        return

    def update_catalytic_residues(self, catalytic_residues, mode='pdb'):
        """
        Updates the six properties related to identifying the subject protein's 
        catalytic residues, based on a dict of the catalytic residues in the 
        query, in the form of {H: histidine, A: acid residue, N: nucleophile}.
        Default behavior expects that the input dict identifies the catalytic 
        residues by PDB number, not pose number, though this can be altered.
        """
        # Make sure mode is either pdb or pose
        assert mode.lower() in ['pdb', 'pose']

        # Identify he catalytic residues in the subject
        catalytic_residues = map_cat_res(self.aligned_residues, 
            catalytic_residues, mode=mode)

        # Set attribute values
        if catalytic_residues['N']:
            if catalytic_residues['N'].subject_res_type in ['A', 'C', 'S']:
                self.catalytic_nuc = catalytic_residues['N']

        if catalytic_residues['H']:
            if catalytic_residues['H'].subject_res_type in ['H']:
                self.catalytic_his = catalytic_residues['H']
        
        if catalytic_residues['A']:
            if catalytic_residues['A'].subject_res_type in ['D', 'E']:
                self.catalytic_acid = catalytic_residues['A']

        return

    def update_loop_maps(self, structure_map, query_pose, subject_pose, report=False, verbose=False):
        """
        Updates loop_maps attribute with a list of matched_loop objects, based 
        on a structure map and query and subject poses. The structure map should 
        be a dict of the form {'N': range(<N-term_start>, <N-term_end>), 
        1:range(<loop_1_satrt>,<loop_1_end>), ..., 'C': range(<C-term_start>, 
        <C-term_end>)}. The ranges are assumed to be in PDB numbers.
        """
        self.loop_maps = map_structure_elements(structure_map, query_pose, 
            subject_pose, self.aligned_residues, self.subject_name, report=report, verbose=verbose)
        return

In [10]:
def get_dali_info(dali_file, subject):
    """
    Read in appropriate summary from Dali download about this protein, including 
    Z score (indicating structural similarity to the query structure), RMSD to 
    TEV protease (the original query), lali (the number of structurally 
    equivalent CA atoms), nres (the total number of residues in the chain), pID 
    (percentage of identical amino acids in equivalent residues), and PDB 
    description.

    Header line:
    Chain   Z   rmsd lali nres  %id Description
    """
    # Read in Dali summary
    with open(dali_file, 'r') as r:
        match_summaries = r.readlines()

    # Find appropriate line in the summary by PDB name, stopping when found
    summary_line = None
    for ms in match_summaries:
        if subject in ms.upper():
            summary_line = ms.split()
            break

    # Initialize output dict
    dali_info = {}
    dali_info['Z_score'] = None
    dali_info['rmsd'] = None
    dali_info['lali'] = None
    dali_info['nres'] = None
    dali_info['pID'] = None
    dali_info['description'] = None
    
    # If no appropriate line is found, print error message and exit
    if summary_line == None:
        print("No matching protein identified in Dali summary")

    # If line was found, read in its values
    else: 
        dali_info['Z_score'] = summary_line[1]
        dali_info['rmsd'] = summary_line[2]
        dali_info['lali'] = summary_line[3]
        dali_info['nres'] = summary_line[4]
        dali_info['pID'] = summary_line[5]
        dali_info['description'] = ' '.join(summary_line[6:])

    return dali_info

In [11]:
def extract_alignment_block(align_file, subject):
    """
    Reads in a text file with many Dali alignments, and extracts the relevant 
    lines for a given subject. Each alignment block starts with a line that 
    includes 'Z-score', and that string does not appear anywhere else in the 
    blocks but the start line, hence that is used to delineate blocks. 
    """
    # Read in sequence alignment
    with open(align_file, 'r') as r:
        seq_aligns = r.readlines()

    # Find appropriate lines in the summary by PDB name
    begin_block = None
    end_block = None

    for n, sa in enumerate(seq_aligns):
        # Only check starting lines
        if 'Z-score' in sa:
            # Stop capture at the next block after finding the start
            if begin_block != None:
                end_block = n 
                break
    
            # Find beginning of block, where start line includes name
            if subject in sa.upper():
                begin_block = n

    # Extracting relevant text block
    alignment_block = seq_aligns[begin_block:end_block]

    return alignment_block

In [12]:
def get_dali_alignment(align_file, subject):
    """
    Read in sequence alignment file as a set of contiguous strings
    Hacky--may need tweaking to generalize.

    Alignment file has sets of five lines, with each set covering 60 residues 
    in the alignment. 
    The first line (0) is the secondary structure of the query (TEV). 
    The second (1) is the query sequence. 
    The third (2) is the identity match (indicated as positive by |). 
    The fourth (3) is the subject sequence. 
    The fifth (4) is the subject secondary structure.

    Returns these as a dict
    """
    # Extracting approrpiate alignment block from alignment file
    alignment_block = extract_alignment_block(align_file, subject)

    # Cleaning block: delete the first two lines of the block, which are not 
    # alignment, and all blank lines
    abclean = [i.strip() for i in alignment_block[2:] if i != '\n']

    # Chack that there are the right number of lines (a multiple of 5)
    assert len(abclean) % 5 == 0 

    # Concatenating data portions of each alignment line. See docstring.
    align_lines = {0: '', 1: '', 2: '', 3: '', 4: ''}
    for n, line in enumerate(abclean):
        which_set = n % 5
        # Cut off before residue numbers
        if which_set == 0:
            max_len = len(line)
        # Pad short lines
        line_info = line[6:max_len]
        while len(line_info) < max_len - 6:
            line_info += ' '
        # Adding to appropriate set
        align_lines[which_set] += line_info

    # Verifying all lines are equal length, set dict value
    line_lengths = [len(i) for i in align_lines.values()]
    assert all([elem == line_lengths[0] for elem in line_lengths])
    align_lines['length'] = line_lengths[0]

    # Correct dict keys
    align_lines['query_secstruct'] = align_lines.pop(0)
    align_lines['query_sequence'] = align_lines.pop(1)
    align_lines['identity'] = align_lines.pop(2)
    align_lines['subject_sequence'] = align_lines.pop(3)
    align_lines['subject_secstruct'] = align_lines.pop(4)

    return align_lines

In [13]:
def get_posenums_from_dali_str(pose, dali_string, report=False):
    """
    For a given pose and matching string from a Dali alignment, returns a list 
    of pose numbers. Where there ar gaps, the list will include a None entry. 
    Uses Biopython's global alignment with maximum similarity function, with 
    scores from https://towardsdatascience.com 
    Matching characters: +2
    Mismatching character: -1
    Opening a gap: -0.5
    Extending a gap: -0.1 
    """ 
    # If pose has multiple chains, only take first one
    pose_chain_1 = pose.split_by_chain()[1]

    # Get sequence strings from pose and Dali
    ps = pose_chain_1.sequence().upper()    # Pose sequence
    ds = dali_string.upper()        # Dali sequence

    # Aligning pose sequence and Dali sequence
    alignments = pairwise2.align.globalms(ps, ds, 2, -1, -0.5, -0.1)
    
    # Verify that there is only one best alignment
    assert len(alignments) == 1

    # Initializing pose numbering and empty list
    posnum = 1
    pnlist = []

    # Filling in list; the first element in the alignment is the pose sequence
    # with dashes inserted to align it with the Dali string
    for a in alignments[0][0]:
        if a == '-':
            pnlist.append(None)
        else:
            assert pose_chain_1.residue(posnum).name1() == a # Verify correct residue 
            pnlist.append(posnum)
            posnum += 1

    # Warning for weird cases like 1CU1, where residues int he pose are placed 
    # differently from the alignment in Dali, possibly due to a reordering or 
    # circular permutation, or a detached chain
    if len(pnlist) > len(dali_string):
        print("Warning: Pose includes residues beyond Dali alignment.")

    # For other edge case, if pose had more residues on the N-term side than 
    # the Dali alignment. If this were to happen, pose numbers would be 
    # incorrect. Break if that happens. The result of such an input would be 
    # extra dashes inserted at the beginning of the second element in the 
    # alignment
    assert alignments[0][1][:len(ds)] == ds 

    if report:
        report.write('Aligning for pose numbering:\n')
        report.write('Original string:\n')
        report.write(ds)
        report.write('\n')
        report.write('Alignment:\n')
        for a in alignments[0]:
            report.write(str(a))
            report.write('\n')
        report.write('\n')

    return pnlist[:len(ds)] 

In [14]:
def map_aligned_residues(align_file, subject, query_pose, subject_pose, 
                         report=False, verbose=False):
    """
    Feed alignment data into a list of aligned_residue objects, each with 
    corresponding information about the position in both query and the subject 
    protease being analyzed.
    """
    # Get alignment 
    alignment = get_dali_alignment(align_file, subject)

    # Get pose number lists, add to alignment
    if verbose:
        alignment['query_pose_numbers'] = get_posenums_from_dali_str(query_pose, 
            alignment['query_sequence'], report=report)
        alignment['subject_pose_numbers'] = get_posenums_from_dali_str(subject_pose, 
            alignment['subject_sequence'], report=report)
    else:
        alignment['query_pose_numbers'] = get_posenums_from_dali_str(query_pose, 
            alignment['query_sequence'])
        alignment['subject_pose_numbers'] = get_posenums_from_dali_str(subject_pose, 
            alignment['subject_sequence'])

    # Initialize aligned residues list
    aligned_residues = []
    
    if report:
        res_line = '{:<8}' * 12 + '\n'
        report.write('Residue alignments:\n')
        report.write(' ' * 16 + '{:<40}{:<40}\n'.format('Query', 'Subject'))
        report.write(res_line.format('Equal', 'Aligned', 
                                     'AA_type', 'PDB_no', 'Pose_no', 'DSSP', 'Bfactor', 
                                     'AA_type', 'PDB_no', 'Pose_no', 'DSSP', 'Bfactor'))

    # Loop through each residue in the alignment, adding aligned_residue
    # objects to the list for each
    for i in range(alignment['length']):
        # Check that at least one pose-numbered residue is present
        q_res = alignment['query_pose_numbers'][i]
        s_res =  alignment['subject_pose_numbers'][i]
        if not any([q_res, s_res]):
            continue

        # Populating aligned_residue object
        a_residue = aligned_residue(i, alignment, query_pose, subject_pose, report=report)

        # Adding aligned_residue object to self.aligned_residues
        aligned_residues.append(a_residue)

    return aligned_residues

In [15]:
def map_cat_res(aligned_residues, catalytic_map, mode='pdb'):
    """
    Using the list of aligned residues, identify the residues in the subject 
    pose that match the query. Requires an input for the catalytic triad in the 
    form of a dict, {H: histidine, A: acid residue, N: nucleophile}, using PDB 
    (not pose) numbering by default. Can use pose numbering instead.
    """
    # Make sure mode is either pdb or pose
    assert mode.lower() in ['pdb', 'pose']

    # Check which mode
    mode_pdb = mode.lower() == 'pdb'
    mode_pose = mode.lower() == 'pose' 

    # Initialize list of matched catalytic residues as all None
    catalytic_residues = {'H': None, 'A': None, 'N': None} 

    # Collect list of just the aligned_residue objects for the catalytic 
    # residues, based on the appropriate pdb/pose numbers
    target_res = catalytic_map.values()
    for ar in aligned_residues:
        # Get appropriate residue number
        if mode_pdb:
            res_number = ar.query_pdb_number
        if mode_pose:
            res_number = ar.query_pose_number

        # Check if that residue number is in the catalytic list
        if res_number in target_res:
            # Check that residues are aligned
            if ar.residues_align:
                # Add to apropriate part of dict
                for k, v in catalytic_map.items():
                    if res_number == v:
                        catalytic_residues[k] = ar

    return catalytic_residues

In [16]:
def map_structure_elements(structure_map, query_pose, subject_pose, 
    aligned_residues, subject_name, mode='pdb', target='query', report=False, verbose=False):
    """
    Assemble a dict of matched_loop objects, based on the given structure map.
    The structure map should be a dict with the keys listing identified loops, 
    including the unstructured terminal regions. The matched_loop generation 
    will look in the regions between loops for potential splicing sites, so 
    this function takes all loop cases and identifies the residue range within  
    which the matched_loop might look for those sites. Default behavior assumes 
    that the structure_map is in PDB numbers (mode) and that the map is of the 
    query (not subject) protein.
    """
    # Make sure mode is either pdb or pose
    assert mode.lower() in ['pdb', 'pose']

    # Make sure target is either query or subject
    assert target.lower() in ['query', 'subject']

    # Determine number of loops to match, based on map
    first_loop = min([x for x in structure_map.keys() if isinstance(x, int)])
    last_loop = max([x for x in structure_map.keys() if isinstance(x, int)])

    # Initialize collection dict
    loop_maps = {}

    # Generate a matched_loop for each loop identified in the structure_map
    for loop in structure_map:
        # Get boundaries:
        # One residue past the last residue of upstream loop through
        # One residue before the first residue of downstream loop
        if loop == 'N': # Edge case for N-terminal region (not a loop)
            continue
            n_bound = None
            c_bound = structure_map[1][0] - 1 
            # ^ = Everything up to first res of first loop
        elif loop == 'C': # Edge case for C-terminal region (not a loop)
            continue
            n_bound = structure_map[last_loop][-1] + 1 
            # ^ = Everything after last res of last loop
            c_bound = None
        elif loop == first_loop: # Edge case for N-terminal loop
            n_bound = structure_map['N'][-1] + 1
            c_bound = structure_map[loop + 1][0] - 1
        elif loop == last_loop: # Edge case for C-terminal loop
            n_bound = structure_map[loop - 1][-1] + 1
            c_bound = structure_map['C'][0] - 1
        else: # General case for all interrior loops
            n_bound = structure_map[loop - 1][-1] + 1
            c_bound = structure_map[loop + 1][0] - 1

        # Get subset of aligned_residues between determined bounds
        ar_subset = partition_aligned_residues_list(aligned_residues, 
            n_bound, c_bound, mode=mode, target=target)[1]

        # Make matched_loop object and add it to the dict
        loop_map = matched_loop(subject_name, loop, ar_subset, 
            structure_map[loop], query_pose, subject_pose, report=report, verbose=verbose)
        loop_maps[loop] = loop_map

    return loop_maps

# Functions used by aligned_residue class or on lists of them

In [17]:
class aligned_residue():
    """
    Data storage structure for a single residue. Includes information about 
    both the target residue in the subject protein and the corresponding aligned 
    residue in the query structure. Information includes secondary structure, 
    whether residues are structurally matched (as opposed to unaligned), and 
    whether the residues are identical. Also stores residue numbers (both PDB
    and pose) for both residues, and B-factor info if available.
    """
    def __init__(self, index, alignment, query_pose, subject_pose, auto=True, report=False):
        # Query attributes 
        self.query_res_type = None
        self.query_sec_struct = None
        self.query_pdb_number = None
        self.query_pose_number = None
        self.query_b_factor = None

        # Subject attributes
        self.subject_res_type = None
        self.subject_sec_struct = None
        self.subject_pdb_number = None
        self.subject_pose_number = None
        self.subject_b_factor = None

        # Determine whether residues are structurally aligned
        self.residues_align = False

        # Determine whether residues are identical
        self.residues_equal = False

        if auto:
            self.auto_populate(index, alignment, query_pose, subject_pose, report=report)

    def auto_populate(self, index, alignment, query_pose, subject_pose, report=False):
        """
        Populates all attributes of the aligned_residue object, based on a 
        given index in an alignment (a dict including strings from a Dali 
        alignment including secondary structure and sequence for both query and 
        subject proteins, and whether the residues are identical; the dict also 
        includes lists of PDB numbers for query and subject--see 
        get_dali_alignment and map_aligned_residues), and query and subject 
        poses.
        """
        # Generate dicts of residue properties for subject and query
        query_res = get_numbers_dss_bfac(index, 
            alignment['query_pose_numbers'], alignment['query_sequence'], 
            alignment['query_secstruct'], query_pose)
        subject_res = get_numbers_dss_bfac(index, 
            alignment['subject_pose_numbers'], alignment['subject_sequence'], 
            alignment['subject_secstruct'], subject_pose)

        # Collect residue identity
        res_identity = alignment['identity'][index]

        # Determine if both query and subjectresidues are present
        query_present = bool(query_res['pose_num'])
        subject_present = bool(subject_res['pose_num'])

        # Determine whether residues are structurally aligned, based on case
        if all([query_present, subject_present]):
            if   all([i['sequence'].isupper() for i in [query_res, subject_res]]):
                residues_align = True
            elif all([i['sequence'].islower() for i in [query_res, subject_res]]):
                residues_align = False
            else:
                print('Residue cases do not match')
                print(query_res['pose_num'], query_res['sequence'], 
                    subject_res['pose_num'], subject_res['sequence'])
                assert False
        else:
            residues_align = False

        # Determine res identity, based on whether connection line was drawn
        if res_identity == '|':
            residues_equal = True
            assert query_res['sequence'] == subject_res['sequence']
        else:
            residues_equal = False

        # Update attributes
        if query_present:
            self.query_res_type = query_res['sequence'].upper()
            self.query_sec_struct = query_res['secstruct']
            self.query_pdb_number = query_res['pdb_num']
            self.query_pose_number = query_res['pose_num']
            self.query_b_factor = query_res['b_factor']

        if subject_present:
            self.subject_res_type = subject_res['sequence'].upper()
            self.subject_sec_struct = subject_res['secstruct']
            self.subject_pdb_number = subject_res['pdb_num']
            self.subject_pose_number = subject_res['pose_num']
            self.subject_b_factor = subject_res['b_factor']

        self.residues_align = residues_align
        self.residues_equal = residues_equal
        
        # Report
        if report:
            report.write(self.get_attributes_printout())

        return

    def get_attributes_printout(self):
        """ 
        Makes a single string will all residue attributes. 
        Column 1:  Whether the residues are equivalent
        Column 2:  Whether the residues are aligned
        Column 3:  Query amino acid type
        Column 4:  Query PDB number
        Column 5:  Query Pose number
        Column 6:  Query secondary structure
        Column 7:  Query B factor
        Column 8:  Subject amino acid type
        Column 9:  Subject PDB number
        Column 10: Subject Pose number
        Column 11: Subject secondary structure
        Column 12: Subject B factor
        """
        # Template
        res_line = '{:<8}' * 12 + '\n'

        # Round B factors, if they are numerical
        if self.query_b_factor:
            qb = round(self.query_b_factor, 3)
        else:
            qb = None
        if self.subject_b_factor:
            sb = round(self.subject_b_factor, 3)
        else:
            sb = None

        # List attributes
        attribs = [bool(self.residues_equal), bool(self.residues_align),
            self.query_res_type, self.query_pdb_number, 
            self.query_pose_number, self.query_sec_struct, qb,
            self.subject_res_type, self.subject_pdb_number, 
            self.subject_pose_number,self.subject_sec_struct, sb]

        # Convert all attributes to a string
        out_as_str = [str(i) for i in attribs]

        # Return formatted output
        return res_line.format(*out_as_str)

In [18]:
def get_numbers_dss_bfac(index, pose_numbers, seq_string, secstruct_string, 
    pose):
    """
    For a given index, list of pose numbers, protein sequence string, secondary  
    structure string, and pose, pull the secondary structure and sequence at the 
    given index and determine the PDB and pose numbers and B factor from the pose. It is 
    possible that there are gaps in the strings, which correspond to nothing in 
    the pose. In these cases, will return None for all values. Returns a dict 
    with these five values. 
    """
    # Initialize dict
    res_specs = {'sequence': None, 'secstruct': None, 'pdb_num': None, 
        'pose_num': None, 'b_factor': None}

    # Check whether there is a pose number for this residue
    pose_number = pose_numbers[index]

    # If there is, update values
    if pose_number:
        # Residue letter--case left unchanged to check alignment
        res_specs['sequence'] = seq_string[index]

        # Secondary structure
        res_specs['secstruct'] = secstruct_string[index].upper()
        
        # PDB number
        pdb_number = pose.pdb_info().pose2pdb(pose_number).split()[0]
        res_specs['pdb_num'] = int(pdb_number)

        # Pose number
        res_specs['pose_num'] = pose_number

        # Get query B-factor
        res_specs['b_factor'] = get_b_factor(pose, pose_number)

    return res_specs

In [19]:
def partition_aligned_residues_list(aligned_residues, n_bound, c_bound, 
    mode='pdb', target='query'):
    """
    Returns a subsets of a list of aligned_residue objects, partitioned with a  
    given set of boundaries. The boundaries can be either PDB or pose numbers.  
    The bounds can correspond to residues in either the query protein or the 
    subject. Returns a list of three lists: the set before, the set between, 
    and the set after. The bounds are included in the middle set.
    """
    # Make sure mode is either pdb or pose
    assert mode.lower() in ['pdb', 'pose']

    # Make sure target is either query or subject
    assert target.lower() in ['query', 'subject']

    # Loop through list to find boundary residues for given mode and target
    for n, ar in enumerate(aligned_residues):
        if mode.lower() == 'pdb':
            if target.lower() == 'query':
                if ar.query_pdb_number == n_bound:
                    start_index = n
                if ar.query_pdb_number == c_bound:
                    end_index = n
            if target.lower() == 'subject':
                if ar.subject_pdb_number == n_bound:
                    start_index = n
                if ar.subject_pdb_number == c_bound:
                    end_index = n
        if mode.lower() == 'pose':
            if target.lower() == 'query':
                if ar.query_pose_number == n_bound:
                    start_index = n
                if ar.query_pose_number == c_bound:
                    end_index = n
            if target.lower() == 'subject':
                if ar.subject_pose_number == n_bound:
                    start_index = n
                if ar.subject_pose_number == c_bound:
                    end_index = n

    # Address edge cases where no boundary is given, overwriting 
    if n_bound == None:
        start_index = 0

    if c_bound == None: 
        end_index = len(aligned_residues)

    # Define lists
    set_before = aligned_residues[:start_index]
    middle_set = aligned_residues[start_index : end_index + 1]
    set_after = aligned_residues[end_index + 1:]

    return [set_before, middle_set, set_after]

In [20]:
def find_nearest_matches(aligned_residues):
    """
    Scans through a list of aligned_residue objects and identifies, starting 
    from the beginning of the list, the first matching residue, the first 
    residue that matches and is also a B-sheet, the last residue that matches, 
    and the last residue that matches and is a B-sheet. Returns these as a dict.
    It is possible that no matches will be found, in which case the dict will 
    include None.
    """
    # Initialize collection dict
    matches = {'first_match': None, 'first_b_match': None,
               'last_match': None, 'last_b_match': None}

    # Scan through list of aligned residues
    for ar in aligned_residues:
        # Ignore all residues that do not align until first match found
        if ar.residues_align:

            # Continue updating farthest_match through list
            matches['last_match'] = ar

            # Only capture first match as nearest_match
            if not matches['first_match']:
                matches['first_match'] = ar

            # Similar process, but limited to B-sheet residues
            if ar.subject_sec_struct == 'E':
                matches['last_b_match'] = ar
                if not matches['first_b_match']:
                    matches['first_b_match'] = ar

        # After finding matches, stop searching at the first unmatched residue
        else:
            if matches['first_match']:
                break

    return matches

In [21]:
def get_residue_list(aligned_residues, mode='pdb', target='query'):
    """
    For a given list of aligned_residue objects, returns a list of residues of 
    the specified type. Default behavior is to return the PDB numbers of the 
    query protein, though the subject protein can be searched instead, and the 
    pose numbers or B factors can be searched instead.
    """
    # Make sure mode is either pdb or pose
    assert mode.lower() in ['pdb', 'pose', 'b']

    # Make sure target is either query or subject
    assert target.lower() in ['query', 'subject']

    # Initialize output list
    out_res_list = []

    # Loop through list and collect the desired attribute
    for ar in aligned_residues:
        if mode == 'pdb' and target == 'query':
            out_res_list.append(ar.query_pdb_number)

        if mode == 'pdb' and target == 'subject':
            out_res_list.append(ar.subject_pdb_number)

        if mode == 'pose' and target == 'query':
            out_res_list.append(ar.query_pose_number)

        if mode == 'pose' and target == 'subject':
            out_res_list.append(ar.subject_pose_number)

        if mode == 'b' and target == 'query':
            out_res_list.append(ar.query_b_factor)

        if mode == 'b' and target == 'subject':
            out_res_list.append(ar.subject_b_factor)

    return out_res_list

# Functions used by matched_loop class

In [22]:
class matched_loop():
    """
    Data storage structure for loops. When taking in a loop of a query 
    structure, finds the edges bordering it (usually B-sheets) and looks for 
    residue matches within the given boundaries, which should be the starts of
    the next loops. Input residues should use PDB (not pose) numbers.
    """
    def __init__(self, source, l_name, local_residues, l_range, query_pose, 
        subject_pose, auto=True, report=False, verbose=False, dump=False):

        # Loop source and name
        self.loop_source = source
        self.loop_name = l_name

        # Flanking residues -- update_matches
        self.n_nearest_match = None
        self.n_farthest_match = None
        self.c_nearest_match = None
        self.c_farthest_match = None
        self.flanking_matches_found = False

        # Best matched residues for loop swap -- update_splice_sites
        self.N_splice_residue = None
        self.C_splice_residue = None
        self.N_loop_end_residue = None
        self.C_loop_end_residue = None
        self.N_outside_overlap_residue = None
        self.C_outside_overlap_residue = None
        self.N_overlap_size = None
        self.C_overlap_size = None
        self.query_loop_size = None
        self.subject_loop_size = None
        self.loop_size_change = None

        # Loop RMSD is query and subject are the same size -- update_loop_rmsd
        self.rmsd = None

        # Average B factor for the loop -- update_b_factor
        self.b_factor = None

        # Check for discontinuities -- update_continuity
        self.is_continuous = None

        # Loop proximity to peptide substrate -- update_proximity_to_substrate
        self.closest_residue_distance = None
        self.close_substrate_residues = []

        # Evaluate whether loop is a suitable target -- update_suitability
        self.is_n_match = None
        self.is_c_match = None
        self.is_near_target = None
        self.is_not_domain = None
        self.is_different_from_original = None
        self.is_ordered = None
        self.is_possible_target = None
        
        if auto:
            self.auto_calculate(local_residues, l_range, query_pose, subject_pose, 
                                report=report, verbose=verbose, dump=dump)

    def auto_calculate(self, local_residues, l_range, query_pose, subject_pose, 
        report=False, verbose=False, dump=False):
        """ Runs all update methods """
        if report:
            lr = '{:<50}{:>50}\n'
            res_line = '{:<8}' * 12 + '\n'
            report.write(lr.format('Source PDB:', self.loop_source))
            report.write(lr.format('Loop site:', self.loop_name))
            report.write('\n')
            report.write('Local residues:\n')
            report.write(' ' * 16 + '{:<40}{:<40}\n'.format('Query', 'Subject'))
            report.write(res_line.format('Equal', 'Aligned', 
                                         'AA_type', 'PDB_no', 'Pose_no', 'DSSP', 'Bfactor', 
                                         'AA_type', 'PDB_no', 'Pose_no', 'DSSP', 'Bfactor'))
            for res in local_residues:
                report.write(res.get_attributes_printout())
            report.write('\n')
            
        res_sets = self.update_matches(local_residues, l_range[0], l_range[-1])
        if report:
            report.write(lr.format('Flanking matches found:', str(self.flanking_matches_found)))
            report.write('Farthest N-terminal match:\n')
            if self.n_farthest_match:
                report.write(self.n_farthest_match.get_attributes_printout())
            else:
                report.write('None\n')
            report.write('Nearest N-terminal match:\n')
            if self.n_nearest_match:
                report.write(self.n_nearest_match.get_attributes_printout())
            else:
                report.write('None\n')
            report.write('Nearest C-terminal match:\n')
            if self.c_nearest_match:
                report.write(self.c_nearest_match.get_attributes_printout())
            else:
                report.write('None\n')
            report.write('Farthest C-terminal match:\n')
            if self.c_farthest_match:
                report.write(self.c_farthest_match.get_attributes_printout())
            else:
                report.write('None\n')
            report.write('\n')

        if report:
            if not self.flanking_matches_found:
                report.write('Skipped alignment window check due to lack of matched residues.\n')
                report.write('Skipped RMSD comparison due to lack of matched residues.\n')
                report.write('Skipped B factor calculation due to lack of matched residues.\n')
            else:    
                report.write('Alignment window check:\n')
        n_splice_range, c_splice_range = self.update_splice_sites(res_sets, query_pose, subject_pose, 
                                                                  report=report, verbose=verbose, dump=dump)
        if report:
            if self.flanking_matches_found:
                if self.loop_size_change != 0:
                    report.write('Skipped RMSD comparison since subject loop is not the same size as query.\n')
                    report.write('\n')
                
        self.update_loop_rmsd(query_pose, subject_pose)
        self.update_b_factor(local_residues)
        self.update_continuity(subject_pose, report=report)

        if all([n_splice_range, c_splice_range]):
            self.update_proximity_to_substrate(query_pose, subject_pose, 
                                               n_splice_range + c_splice_range)

        self.update_suitability()
        if report:
            report.write(lr.format('LOOP IS CANDIDATE FOR EXCHANGE:', self.is_possible_target))
            report.write('\n')
            report.write('N splice site:\n')
            if self.N_splice_residue:
                report.write( self.N_splice_residue.get_attributes_printout())
            else:
                report.write('None\n')
            report.write('C splice site:\n')
            if self.C_splice_residue:
                report.write( self.C_splice_residue.get_attributes_printout())
            else:
                report.write('None\n')
            report.write(lr.format('N-side overlap length:', str(self.N_overlap_size)))
            report.write(lr.format('C-side overlap length:', str(self.C_overlap_size)))
            report.write('\n')
            if self.closest_residue_distance:
                crd = round(self.closest_residue_distance, 3)
            else:
                crd = 'None'
            report.write(lr.format('Closest CA distance to substrate:', crd))
            if self.close_substrate_residues:
                cr_list = ','.join([str(i) for i in self.close_substrate_residues])
            else:
                cr_list = 'None'
            report.write(lr.format('Substrate residues within 8A of loop:', cr_list))
            report.write(lr.format('Loop has residues within 8A of substrate:', self.is_near_target))
            report.write('\n')
            report.write(lr.format('Query loop length:', str(self.query_loop_size)))
            report.write(lr.format('Subject loop length:', str(self.subject_loop_size)))
            report.write(lr.format('Loop is not a full domain (>50):', self.is_not_domain))
            report.write('\n')
            report.write(lr.format('Difference in subject vs query length:', str(self.loop_size_change)))
            report.write(lr.format('Loop RMSD:', str(self.rmsd)))
            report.write(lr.format('Loop is structurally asimilar:', self.is_different_from_original))
            report.write('\n')
            if self.b_factor: 
                bf = round(self.b_factor, 3)
            else:
                bf = 'None'
            report.write(lr.format('Loop average B factor:', bf))
            if self.is_ordered: 
                ordered = self.is_ordered
            else:
                ordered = 'None'
            report.write(lr.format('Average B factor is low:', ordered))
            report.write('\n')
            if self.is_continuous: 
                continuous = self.is_continuous
            else:
                continuous = 'None'
            report.write(lr.format('Loop is not missing residues:', continuous))
            report.write('\n'*2)

        return

    def update_matches(self, local_residues, loop_start, loop_end):
        """
        Update n_ and c_ nearest_match and properties, identifying the closest 
        and farthest matching residues flanking the loop. Inputs include a 
        list of aligned_residue objects that should be trimmed down from the 
        full alignment to just the region between the adjacent loops, and the 
        PDB numbers of the start and end of the target loop, which should be 
        within the listed residues.
        """
        # Partition local residue set at loop start and end points
        res_sets = partition_aligned_residues_list(local_residues,  
            loop_start, loop_end, mode='pdb', target='query')

        # Collect matching residues, reversing the C-side set so as to count 
        # toward the loop.
        n_matches = find_nearest_matches(res_sets[0])
        c_matches = find_nearest_matches(res_sets[2][::-1])

        # Update properties 
        self.n_farthest_match = n_matches['first_match']
        self.n_nearest_match = n_matches['last_match']
        self.c_nearest_match = c_matches['last_match']
        self.c_farthest_match = c_matches['first_match']

        # Indicate if matches were found
        if self.n_nearest_match and self.c_nearest_match:
            self.flanking_matches_found = True

        return res_sets
        
    def update_splice_sites(self, res_sets, query_pose, subject_pose, 
                            report=False, verbose=False, dump=False, out_name=None):
        """
        Updates splice residues, where residues from the query protein should be 
        replaced with residues from the subject protein. If no overlapping 
        residues have been found for this loop between the query and subject, 
        splice sites will remain None. If matches have been found, the ranges of 
        matched residues (both on the N-side and C-side) will be compared in 
        sliding windows to determine the set of residues that yield the lowest 
        backbone RMSD between the subject and query. The set that yields the 
        lowest RMSD will be taken as the splice residues. Updates the splice 
        N_ and C_ splice_residue and overlap_size attributes, as well as the 
        query_ and subject_loop_size attributes.
        """
        # Nothing to update if no matching residues were identified
        if not self.flanking_matches_found:
            return None, None

        # Generate sub-lists for the overlap windows
        n_start = res_sets[0].index(self.n_farthest_match)
        n_end = res_sets[0].index(self.n_nearest_match) 
        n_window = res_sets[0][n_start: n_end + 1]

        c_start = res_sets[2].index(self.c_nearest_match)
        c_end = res_sets[2].index(self.c_farthest_match) 
        c_window = res_sets[2][c_start: c_end + 1]

        # Find windows of best-fitting residues
        n_splice_range, c_splice_range = find_splice_sites(n_window, c_window, 
            query_pose, subject_pose, report=report, verbose=verbose, dump=dump,
            out_name=out_name)
        
        # Find spliced loop termini
        all_res = res_sets[0] + res_sets[1] + res_sets[2]
        N_splice = n_splice_range[-1]                      # Closest to loop
        C_splice = c_splice_range[0]                       # Closest to loop
        N_loop_end = None
        C_loop_end = None
        for res in all_res:
            if res.subject_pose_number == N_splice.subject_pose_number + 1:
                N_loop_end = res
            if res.subject_pose_number == C_splice.subject_pose_number - 1:
                C_loop_end = res

        # Update attributes 
        self.N_splice_residue = N_splice                    
        self.C_splice_residue = C_splice                    
        self.N_loop_end_residue = N_loop_end
        self.C_loop_end_residue = C_loop_end
        self.N_outside_overlap_residue = n_splice_range[0]  # Farthest from loop
        self.C_outside_overlap_residue = c_splice_range[-1] # Farthest from loop
        self.N_overlap_size = len(n_splice_range)
        self.C_overlap_size = len(c_splice_range)
        self.query_loop_size = self.C_splice_residue.query_pose_number - \
            self.N_splice_residue.query_pose_number - 1
        self.subject_loop_size = self.C_splice_residue.subject_pose_number - \
             self.N_splice_residue.subject_pose_number - 1
        self.loop_size_change = self.subject_loop_size - self.query_loop_size

        return n_splice_range, c_splice_range

    def update_loop_rmsd(self, query_pose, subject_pose):
        """ 
        If the subject and query loops are the same size, take CA RMSD 
        """
        # Nothing to update if no matching residues were identified
        if not self.flanking_matches_found:
            return 
        
        # Nothing to update if the loops are not the same size
        if self.loop_size_change != 0:
            return

        # Getting lists of pose numbers for the query and subject loops between 
        # the splice sites
        query_list = list(range(self.N_splice_residue.query_pose_number,
            self.C_splice_residue.query_pose_number + 1))
        subject_list = list(range(self.N_splice_residue.subject_pose_number,
            self.C_splice_residue.subject_pose_number + 1))
        
        # Calculate RMSD
        self.rmsd = round(align_protein_sections(query_pose, query_list, 
            Pose(subject_pose), subject_list), 3)

        return

    def update_b_factor(self, local_residues):
        """
        Updates the b_factor attribute with the average backbone B factor of 
        all residues in the loop.
        """
        # Nothing to update if no matching residues were identified
        if not self.flanking_matches_found:
            return 
        
        # Get subset of residue range
        first_loop_res = local_residues.index(self.N_splice_residue) + 1
        last_loop_res = local_residues.index(self.C_splice_residue)
        loop_residues = local_residues[first_loop_res:last_loop_res]

        # Collect list of B factors 
        b_factors = get_residue_list(loop_residues, mode='b', target='subject')

        # Exclude empty values
        b_factors_clean = [i for i in b_factors if isinstance(i, float)]

        # Take average backbone B factor for the loop
        b_average = np.average(b_factors_clean)

        # Update attribute
        self.b_factor = b_average

        return

    def update_continuity(self, subject_pose, report=False):
        """
        Checks whether the loop includes chain breaks. Updates the is_continuous 
        attribute.
        """
        # Nothing to update if no matching residues were identified
        if not self.flanking_matches_found:
            return

        # Create loop subpose
        subpose_start = self.N_outside_overlap_residue.subject_pose_number
        subpose_end = self.C_outside_overlap_residue.subject_pose_number
        subpose = Pose(subject_pose, subpose_start, subpose_end)

        # Check for breaks
        continuous, c_n_distances, break_sites = check_pose_continuity(subpose)
        
        if report:
            res_in_subpose = range(subpose_start, subpose_end)
            assert len(c_n_distances) == len(res_in_subpose)
            report.write("Continuity check: C-N distances\n")
            for n, i in enumerate(c_n_distances):
                report.write('{} \t {}\n'.format(res_in_subpose[n], round(i, 3)))
            report.write('\n')

        # Announce identified breaks
        if not continuous:
            print('Chain broken:')
            for b in break_sites:
                print('\t', subpose.pdb_info().pose2pdb(b).split()[0])

        # Update attribute
        self.is_continuous = continuous

        return

    def update_proximity_to_substrate(self, query_pose, subject_pose, overlaps):
        """
        Updates the attributes relating to the subject loop's proximity to the 
        query's substrate peptide. This is for purposes of weeding out loops 
        that are not close enough to the substrate to directly affect 
        specificity. Determines the coordinates of the substrate in the query 
        and, after aligning the substrate appropriately with the query, the 
        coordinates of the loop in the subject. Updates loop_near_substrate, 
        closest_residue_distance, and close_substrate_residues.
        """
        # Nothing to update if no matching residues were identified
        if not self.flanking_matches_found:
            return 
        
        # Getting coordinates
        loop_start = self.N_splice_residue.subject_pose_number + 1
        loop_end = self.C_splice_residue.subject_pose_number - 1
        substrate_coords, loop_coords = get_substrate_and_loop_coords(
            query_pose, subject_pose, overlaps, loop_start, loop_end)

        # Determine closest proximity and list of nearby substrate residues
        closest_distance, nearby_substrate_residues = \
            find_proximity_to_substrate (substrate_coords, loop_coords)

        # Update attributes
        self.closest_residue_distance = closest_distance
        self.close_substrate_residues = nearby_substrate_residues 

        return

    def update_suitability(self, max_res_count=50, min_rmsd=0.5, 
        max_b_factor=50):
        """
        Runs a set of checks to determine whether the loop may be viable for 
        substitution. Rejects loops on the basis of being too far away from the
        substrate to interact, being too large (avoiding domain insertions), 
        being of the same size as the original loop with a small RMSD, lacking 
        matched residues flanking the loop (cannot graft), or having gaps in the 
        crystal structure.
        """
        # Check that there is a matched residue on the N-terminal side
        if self.loop_name == 'N':
            n_match_check = True
        else:
            n_match_check = bool(self.N_splice_residue)

        # Check that there is a matched residue on the C-terminal side
        if self.loop_name == 'C':
            c_match_check = True
        else:
            c_match_check = bool(self.C_splice_residue)

        # Check that there are residues within range of substrate
        proximity_check = bool(self.close_substrate_residues)

        # Check that loop residue count is not too large
        if self.subject_loop_size:
            res_count_check = self.subject_loop_size <= max_res_count
        else: 
            res_count_check = True

        # Check that if loop is the same size as the query, that it is 
        # structurally different, based on backbone RMSD
        if self.loop_size_change == 0:
            similarity_check = self.rmsd > min_rmsd
        else:
            similarity_check = True

        # B factor check
        if self.b_factor:
            b_factor_check = self.b_factor <= max_b_factor
        else:
            b_factor_check = True

        # Updating attributes
        self.is_n_match = n_match_check
        self.is_c_match = c_match_check
        self.is_near_target = proximity_check
        self.is_not_domain = res_count_check
        self.is_different_from_original = similarity_check
        self.is_ordered = b_factor_check
        self.is_possible_target = all([n_match_check, c_match_check, 
            proximity_check, res_count_check, similarity_check, b_factor_check, 
            self.is_continuous])

        return

In [23]:
def find_splice_sites(n_window, c_window, query_pose, subject_pose, 
                      report=False, verbose=False, dump=False, out_name=None):
    """
    For a given pair of residue windows on the N-side and C-side of a loop, 
    break each window into subframes of length varied between 1 and the full 
    window size. Then take those subframes from each and align based on the 
    residues in those subframes, and determine the best backbone RMSD score.
    The combination of residues that yields the lowest RMSD will be returned.
    """
    if report:
        lf = '{:<45}{:<45}{:>10}\n'
        full_n = get_residue_list(n_window, mode='pose', target='subject')
        full_c = get_residue_list(c_window, mode='pose', target='subject')
        nwin = ','.join([str(i) for i in full_n])
        cwin = ','.join([str(i) for i in full_c])
        report.write(lf.format(nwin, cwin, ''))
        report.write('\n')
        report.write(lf.format('Left Frame', 'Right Frame', 'RMSD'))
        
    # Prioritize B-sheet splice sites
    #is_n_beta = False
    #n_beta = [i for i in n_window if i.subject_sec_struct == 'E']
    #if n_beta:
    #    n_frame = n_beta
    #    is_n_beta = True
    #else:
    #    n_frame = n_window
    #is_n_beta = True
    #n_frame = n_window    
    #is_c_beta = False
    #c_beta = [i for i in c_window if i.subject_sec_struct == 'E']
    #if c_beta:
    #    c_frame = c_beta
    #    is_c_beta = True
    #else:
    #    c_frame = c_window
    #is_c_beta = True
    #c_frame = c_window    
    #if report:
        #report.write('Beta-only windows\n')
        #if is_n_beta:
        #    beta_n = get_residue_list(n_frame, mode='pose', target='subject')
        #    nbwin = ','.join([str(i) for i in beta_n])
        #else:
        #    nbwin = 'None'
        #if is_c_beta:
        #    beta_c = get_residue_list(c_frame, mode='pose', target='subject')
        #    cbwin = ','.join([str(i) for i in beta_c])
        #else:
        #    cbwin = 'None'
        #report.write(lf.format(nbwin, cbwin, ''))
        #report.write('\n')
        #report.write(lf.format('Left Frame', 'Right Frame', 'RMSD'))
        
    # List subframes within overlap windows
    n_subframes = variable_sliding_window(n_window)
    c_subframes = variable_sliding_window(c_window)

    # Initialize collection objects for best alignment
    best_rmsd = 1000 # Arbitrarily high
    best_n_set = n_subframes[-1] # Defaults to the full alignment region
    best_c_set = c_subframes[-1] # Defaults to the full alignment region

    # Loop through all subframes and align based on those residues, taking 
    # the set with the lowest RMSD
    for ns in n_subframes:
        for cs in c_subframes:
            # Get pose residues list for the residues to take RMSD
            q_list_n = get_residue_list(ns, mode='pose', target='query')
            q_list_c = get_residue_list(cs, mode='pose', target='query')
            s_list_n = get_residue_list(ns, mode='pose', target='subject')
            s_list_c = get_residue_list(cs, mode='pose', target='subject')

            # Align these residues and get RMSD
            aligned_subject = Pose(subject_pose)
            rmsd = align_protein_sections(query_pose, q_list_n + q_list_c, 
                aligned_subject, s_list_n + s_list_c)

            # Compare RMSD to best, update best if lower
            if rmsd < best_rmsd:
                best_rmsd = rmsd
                best_n_set = ns
                best_c_set = cs
            
            if report and verbose:
                nwin = ','.join([str(i) for i in s_list_n])
                cwin = ','.join([str(i) for i in s_list_c])
                report.write(lf.format(nwin, cwin, round(rmsd,5)))
                
            if dump:
                subpose = Pose(aligned_subject, s_list_n[0], s_list_c[-1])
                overlap_bounds = [s_list_n[0], s_list_n[-1], s_list_c[0], s_list_c[-1]]
                if out_name:
                    outpdb = out_name + '_N{}-{}_C{}-{}.pdb'.format(*[str(i) for i in overlap_bounds])
                else:
                    outpdb = 'aligned_loop_N{}-{}_C{}-{}.pdb'.format(*[str(i) for i in overlap_bounds])
                subpose.dump_pdb(outpdb)                
    
    if report:
        best_n_list = get_residue_list(best_n_set, mode='pose', target='subject')
        best_c_list = get_residue_list(best_c_set, mode='pose', target='subject')
        nbest = ','.join([str(i) for i in best_n_list])
        cbest = ','.join([str(i) for i in best_c_list])
        report.write('Best frames:\n')
        report.write(lf.format(nbest, cbest, round(best_rmsd, 3)))
        report.write('\n')

    return best_n_set, best_c_set

In [24]:
def get_substrate_and_loop_coords(query_pose, subject_pose, overlaps, 
    loop_start, loop_end):
    """
    Given a query pose with two chains, where the second chain is the substrate, 
    and a subject pose, the list of overlapping residues to superimpose the 
    loop of the substrate onto the query, and the pose residue numbers that 
    start and end the loop on the subject, returns lists of the CA coordinates 
    of the substrate and the subject loop.
    """
    # Populating list of CA coordinates of substrate peptide
    substrate_chain = query_pose.split_by_chain()[2]
    substrate_coords = list_pose_coords(substrate_chain)

    # Get pose residues list for the overlap residues
    q_list = get_residue_list(overlaps, mode='pose', target='query')
    s_list = get_residue_list(overlaps, mode='pose', target='subject')

    # Creating copy of subject pose and aligning its loop to the query
    aligned_subject = Pose(subject_pose)
    rmsd = align_protein_sections(query_pose, q_list, aligned_subject, s_list)

    # Making loop-only subpose
    aligned_loop = Pose(aligned_subject, loop_start, loop_end)

    # Populating list of CA coordinates of loop
    loop_coords = list_pose_coords(aligned_loop)

    return substrate_coords, loop_coords

In [25]:
def find_proximity_to_substrate(substrate_coords, loop_coords):
    """
    Finds all CA-CA distances between the coordinates of the substrate peptide
    and the candidate loop, if it were to be spliced into the query structure.
    Determines the cosest proximity and a list of residues in the peptide within 
    8A of the loop. 
    """
    # Finding close residues
    closest_distance = 1000 # Arbitrary large number
    nearby_substrate_residues = []
    for n, sr in enumerate(substrate_coords):
        for lr in loop_coords:
            substrate_dist = get_distance(sr, lr)
            if substrate_dist < closest_distance:
                closest_distance = substrate_dist
            if substrate_dist <= 8:
                if (n + 1) not in nearby_substrate_residues:
                    nearby_substrate_residues.append(n + 1)

    return closest_distance, nearby_substrate_residues

# Execute

In [26]:
init('-mute all -ignore_zero_occupancy false')

# Global data
hcv_cat_res = {'H':72, 'A':96, 'N':154}
tev_cat_res = {'H': 46, 'A': 81, 'N': 151} # In pose: H39, D74, C144

# Map lists all loop regions, catalytic triad, all in PDB (not pose) numbers
# Had to generate manually since loops might contain beta-fingers
tev_map = {    'N':  range(  8,  18),
                1:   range( 26,  28),
                2:   range( 38,  40),
                3:   range( 44,  54),
                4:   range( 59,  63),
                5:   range( 67,  74),
                6:   range( 78,  82),
                7:   range( 87, 109),
                8:   range(117, 121),
                9:   range(126, 139),
                10:  range(143, 152),
                11:  range(158, 161),
                12:  range(172, 176),
               'C':  range(182, 221)}

htra1_map = {  'N':  range(160, 183),
                1:   range(192, 198),
                2:   range(209, 214),
                3:   range(218, 226),
                4:   range(233, 235),
                5:   range(240, 241),
                6:   range(247, 250),
                7:   range(256, 276),
                8:   range(284, 290),
                9:   range(300, 317),
                10:  range(320, 329),
                11:  range(335, 337),
                12:  range(349, 351),
               'C':  range(357, 370)}

dali_file='aligned_pdbs/0000_dali_pdb90_tev.txt'
align_file='aligned_pdbs/0000_seq_align.txt'

query_name = 'TEV'
query_pose = pose_from_pdb('tev.pdb')

PyRosetta-4 2019 [Rosetta PyRosetta4.conda.linux.CentOS.python36.Release 2019.23+release.b0c18bd01c0dd36a5d94675dd95403c4077125d2 2019-06-07T13:40:47] retrieved from: http://www.pyrosetta.org
(C) Copyright Rosetta Commons Member Institutions. Created in JHU by Sergey Lyskov and PyRosetta Team.


In [186]:
#subject_name = '1A0J'
#subject_name = '1CU1'
#subject_name = '1EZX'
#subject_name = '1GPZ'
#subject_name = '6BJR'
#subject_name = '1FI8'
#subject_name = '5JB8'
#subject_name = '2AS9'
#subject_name = '1A5I'
#subject_name = '1B0F'
subject_name = '1lvm'
subject_pose = pose_from_pdb('aligned_pdbs/{}.pdb'.format(subject_name))
with open('small_data_dump.txt', 'w') as w:
    test_case = protease_info(query_name, subject_name, dali_file, align_file, 
                          query_pose, subject_pose, tev_cat_res, tev_map, report=w, verbose=False)
print('Done')

Done


In [181]:
pdb_list = glob('aligned_pdbs/*.pdb')
pdb_list.sort()
pdb_list.remove('aligned_pdbs/0000_master_pdb.pdb')
#db_collect = []
db_dict = {}
fail_list = []
with open('alignment_data_dump_in.txt', 'w') as w:
    for i in pdb_list:
        print(i)
        subj_name = i.replace('aligned_pdbs/','').replace('.pdb', '')
        subj_pose = pose_from_pdb(i)
        try:
            # Note: ran with auto_calculate on
            pinf = protease_info(query_name, subj_name, dali_file, align_file, 
                                 query_pose, subj_pose, tev_cat_res, tev_map, report=w)
            #db_collect.append(pinf)
            db_dict[subj_name] = pinf
        except:
            fail_list.append(i)
            print(i, 'fail')
print(len(fail_list))
for i in fail_list:
    print('\t', i)
#print(', '.join("%s: %s" % item for item in vars(db_collect[0]).items()))
outfile = 'protease_db_3.pkl'
with open(outfile, 'wb') as o:
    pickle.dump(db_dict, o)

aligned_pdbs/1A0J.pdb
aligned_pdbs/1A5I.pdb
aligned_pdbs/1AGJ.pdb
aligned_pdbs/1AMH.pdb
aligned_pdbs/1AN1.pdb
Chain broken:
	 113
aligned_pdbs/1AO5.pdb
Chain broken:
	 146
aligned_pdbs/1AZZ.pdb
aligned_pdbs/1B0F.pdb
aligned_pdbs/1BDA.pdb
aligned_pdbs/1BRU.pdb
aligned_pdbs/1C5M.pdb
aligned_pdbs/1CU1.pdb
aligned_pdbs/1CVW.pdb
Chain broken:
	 193
aligned_pdbs/1EKB.pdb
aligned_pdbs/1EQ9.pdb
aligned_pdbs/1EZX.pdb
Chain broken:
	 108
aligned_pdbs/1EZX.pdb fail
aligned_pdbs/1FDP.pdb
aligned_pdbs/1FDP.pdb fail
aligned_pdbs/1FI8.pdb
Chain broken:
	 36
aligned_pdbs/1FIW.pdb
Chain broken:
	 202
aligned_pdbs/1FIZ.pdb
Chain broken:
	 216
aligned_pdbs/1FUJ.pdb
aligned_pdbs/1FXY.pdb
aligned_pdbs/1FY1.pdb
aligned_pdbs/1G3C.pdb
aligned_pdbs/1GPZ.pdb
Chain broken:
	 491
Chain broken:
	 579
Chain broken:
	 604
aligned_pdbs/1GVL.pdb
aligned_pdbs/1H4W.pdb
aligned_pdbs/1HYL.pdb
aligned_pdbs/1KIG.pdb
aligned_pdbs/1KY9.pdb
Chain broken:
	 40
	 50
	 83
Chain broken:
aligned_pdbs/1KY9.pdb fail
aligned_pdbs/1L1J

In [182]:
protease_super_header = ['Structures', '', 
                         'Dali', '', '', '', '', '',
                         'Catalytic', '', '']
protease_header = ['Query', 'Subject', 
                   'Z_score', 'RMSD', 'LALI', 'NRES', '%ID', 'Description',
                   'Nucleophile', 'Histidine', 'Acid']
for i in range(1, 13):
    protease_super_header += ['Loop {}'.format(i)] + [''] * 6
    protease_header += ['Potential Target',  
                        'Query Start', 'Query End', 
                        'Subject Start', 'Subject End', 
                        'Length Change', 'Reasons Rejected']

protease_summaries = []
for prot in db_dict.values():
    ps = []
    ps.append(prot.query_name)
    ps.append(prot.subject_name)
    ps.append(prot.Z_score)
    ps.append(prot.rmsd)
    ps.append(prot.lali)
    ps.append(prot.nres)
    ps.append(prot.pID)
    ps.append(prot.description.replace(',', ';'))

    if prot.catalytic_nuc:
        ps.append(prot.catalytic_nuc.subject_res_type + 
                  str(prot.catalytic_nuc.subject_pdb_number))
    else:
        ps.append('None')
    if prot.catalytic_his:
        ps.append(prot.catalytic_his.subject_res_type + 
                  str(prot.catalytic_his.subject_pdb_number))
    else:
        ps.append('None')
    if prot.catalytic_acid:
        ps.append(prot.catalytic_acid.subject_res_type + 
                  str(prot.catalytic_acid.subject_pdb_number))
    else:
        ps.append('None')
    
    for lm in prot.loop_maps.values():
        ps.append(str(lm.is_possible_target))
        
        if lm.N_splice_residue:
            ps.append(lm.N_splice_residue.query_pdb_number)
        else:
            ps.append('')
        if lm.N_splice_residue:
            ps.append(lm.C_splice_residue.query_pdb_number)
        else:
            ps.append('')
        if lm.N_splice_residue:
            ps.append(lm.N_splice_residue.subject_pdb_number)
        else:
            ps.append('')
        if lm.N_splice_residue:
            ps.append(lm.C_splice_residue.subject_pdb_number)
        else:
            ps.append('')
        if lm.loop_size_change:
            ps.append(lm.loop_size_change)
        else:
            ps.append('')
            
        if lm.is_possible_target:
            ps.append('')
        else:
            reject_reasons = []
            if not lm.is_n_match:
                reject_reasons.append('No N match')
            if not lm.is_c_match:
                reject_reasons.append('No C match')
            if not lm.is_near_target:
                reject_reasons.append('Not near substrate')
            if not lm.is_not_domain:
                reject_reasons.append('Too large')
            if not lm.is_different_from_original:
                reject_reasons.append('Similar to original')
            if not lm.is_ordered:
                reject_reasons.append('High B factor')
            if not lm.is_continuous:
                reject_reasons.append('Residues missing')
            ps.append('; '.join(reject_reasons))

    ps = [str(i) for i in ps]
    protease_summaries.append(ps)
    
with open('protease_database_3.csv', 'w') as w:
    w.write(','.join(protease_super_header) + '\n')
    w.write(','.join(protease_header) + '\n')
    for s in protease_summaries:
        w.write(','.join(s) + '\n')

print(','.join(protease_super_header) + '\n')
print(','.join(protease_header) + '\n')
for s in protease_summaries:
    print(','.join(s) + '\n')

Structures,,Dali,,,,,,Catalytic,,,Loop 1,,,,,,,Loop 2,,,,,,,Loop 3,,,,,,,Loop 4,,,,,,,Loop 5,,,,,,,Loop 6,,,,,,,Loop 7,,,,,,,Loop 8,,,,,,,Loop 9,,,,,,,Loop 10,,,,,,,Loop 11,,,,,,,Loop 12,,,,,,

Query,Subject,Z_score,RMSD,LALI,NRES,%ID,Description,Nucleophile,Histidine,Acid,Potential Target,Query Start,Query End,Subject Start,Subject End,Length Change,Reasons Rejected,Potential Target,Query Start,Query End,Subject Start,Subject End,Length Change,Reasons Rejected,Potential Target,Query Start,Query End,Subject Start,Subject End,Length Change,Reasons Rejected,Potential Target,Query Start,Query End,Subject Start,Subject End,Length Change,Reasons Rejected,Potential Target,Query Start,Query End,Subject Start,Subject End,Length Change,Reasons Rejected,Potential Target,Query Start,Query End,Subject Start,Subject End,Length Change,Reasons Rejected,Potential Target,Query Start,Query End,Subject Start,Subject End,Length Change,Reasons Rejected,Potential Target,Query Start,Query End,Subject Start,S

In [183]:
failing_loops = {'distance': [],
               'size': [],
               'rmsd': [],
               'bfactor': [],
               'discontinuous': [],
               'target': []}
fail_reports = {'distance': [],
               'size': [],
               'rmsd': [],
               'bfactor': [],
               'discontinuous': [],
               'target': []}
unmatched_loops = []

for prot in db_dict.values():
    for lm in prot.loop_maps.values():
        if lm.is_possible_target:
            failing_loops['target'].append(lm)
        if lm.flanking_matches_found:
            if not lm.is_near_target:
                failing_loops['distance'].append(lm)
            if not lm.is_not_domain:
                failing_loops['size'].append(lm)
            if not lm.is_different_from_original:
                failing_loops['rmsd'].append(lm)
            if not lm.is_ordered:
                failing_loops['bfactor'].append(lm)
            if not lm.is_continuous:
                failing_loops['discontinuous'].append(lm)
        else:
            unmatched_loops.append(lm)

for loops in failing_loops.values():
    loops.sort(key=lambda x:(x.loop_name, x.loop_size_change, x.loop_source))

loop_header = ['Loop', 'Source', 'Length', 'Length Change', 
                    'Distance to substrate', 'Close substrate residues',
                    'Query PDB Start', 'Query PDB End', 
                    'Subject PDB Start', 'Subject PDB End', 
                    'Query pose Start', 'Query pose End', 
                    'Subject pose Start', 'Subject pose End', 
                     'Pymol Command']

com_temp =  "load aligned_pdbs/{1}.pdb; "
com_temp += "create {0}_{1}, {1} and res {8}-{9}; "
com_temp += "delete {1}; "
com_temp += "pair_fit {0}_{1}///{8}+{9}/C+CA+N, tev///{6}+{7}/C+CA+N; "
com_temp += "hide everything, {0}_{1} and res {8}+{9}; "

for reason, l in failing_loops.items():
    for loop in l:
        ls = []
        ls.append(loop.loop_name)
        ls.append(loop.loop_source)
        ls.append(loop.subject_loop_size)
        ls.append(loop.loop_size_change)
        ls.append(loop.closest_residue_distance)
        ls.append(';'.join([str(i) for i in loop.close_substrate_residues]))
        ls.append(loop.N_splice_residue.query_pdb_number)
        ls.append(loop.C_splice_residue.query_pdb_number)
        ls.append(loop.N_splice_residue.subject_pdb_number)
        ls.append(loop.C_splice_residue.subject_pdb_number)
        ls.append(loop.N_splice_residue.query_pose_number)
        ls.append(loop.C_splice_residue.query_pose_number)
        ls.append(loop.N_splice_residue.subject_pose_number)
        ls.append(loop.C_splice_residue.subject_pose_number)

        ls = [str(i) for i in ls]
        com = com_temp.format(*ls)
        fail_reports[reason].append(ls + [com])

for reason, reports in fail_reports.items():
    with open('loops_by_{}_3.csv'.format(reason), 'w') as w:
        w.write(','.join(loop_header) + '\n')
        for ss in reports:
            w.write(','.join(ss) + '\n')

for k, v in failing_loops.items():
    print(k, len(v))
print("Unmatched", len(unmatched_loops))
print(len(db_dict.values()))

distance 589
size 15
rmsd 14
bfactor 194
discontinuous 45
target 457
Unmatched 16
98


In [185]:
subject_name = '1lvm'
subject_pose = pose_from_pdb('aligned_pdbs/{}.pdb'.format(subject_name))
lvm_dat = db_dict['1lvm']

# All anine-swap the start pose
sf = get_fa_scorefxn()
tf = TaskFactory()
restriction = RestrictAbsentCanonicalAASRLT()
restriction.aas_to_keep('A')
selection = ResidueIndexSelector('1-{}'.format(subject_pose.total_residue()))
tf.push_back(OperateOnResidueSubset(restriction, selection))
pt = tf.create_task_and_apply_taskoperations(subject_pose)
prm = PackRotamersMover(sf, pt)
all_A_pose = Pose(subject_pose)
prm.apply(all_A_pose)
all_A_pose.dump_pdb('self_swaps_1lvm_test/1lvm_all_A.pdb')

# Put in loops
for loop_name, loop in lvm_dat.loop_maps.items():
    print(loop_name)

    # Setting up poses
    swapped_pose = Pose(query_pose)
    loop_pose = Pose(all_A_pose, 
        loop.N_outside_overlap_residue.subject_pose_number, 
        loop.C_outside_overlap_residue.subject_pose_number)
    loop_pose.dump_pdb('self_swaps_1lvm_test/loop_{}_loop_only.pdb'.format(loop_name))

    # Setting up CCDEndsGraftMover
    ccdgm = CCDEndsGraftMover()
    ccdgm.set_insert_region(loop.N_splice_residue.query_pose_number,
        loop.C_splice_residue.query_pose_number)
    ccdgm.set_piece(loop_pose, 
        loop.N_overlap_size, loop.C_overlap_size)

    # Applying mover and scoring and dumping the pose
    ccdgm.apply(swapped_pose)
    sf(swapped_pose)
    swapped_pose.dump_pdb('self_swaps_1lvm_test/loop_{}_tev_insert.pdb'.format(loop_name))


1
2
3
4
5
6
7
8
9
10
11
12


# Testing 1lvm alignments

In [27]:
subject_name = '1lvm'
subject_pose = pose_from_pdb('aligned_pdbs/{}.pdb'.format(subject_name))
lvm_dat = protease_info(query_name, subject_name, dali_file, align_file, 
                          query_pose, subject_pose, tev_cat_res, tev_map)

dump_dir = '1lvm_loops_all_frames/'

In [31]:
for loop_name, loop in lvm_dat.loop_maps.items():
    print(loop_name)
    if loop_name == 1:
        continue
    
    if loop_name == 1: # Edge case for N-terminal loop
        n_bound = tev_map['N'][-1] + 1
        c_bound = tev_map[loop_name + 1][0] - 1
    elif loop_name == 12: # Edge case for C-terminal loop
        n_bound = tev_map[loop_name - 1][-1] + 1
        c_bound = tev_map['C'][0] - 1
    else: # General case for all interrior loops
        n_bound = tev_map[loop_name - 1][-1] + 1
        c_bound = tev_map[loop_name + 1][0] - 1
    ar_subset = partition_aligned_residues_list(lvm_dat.aligned_residues, 
            n_bound, c_bound)[1]
    
    l_range = tev_map[loop_name]
    
    res_sets = partition_aligned_residues_list(ar_subset,  
            l_range[0], l_range[-1], mode='pdb', target='query')
    
    dec_name = dump_dir + '1lvm_loop{}'.format(loop_name)
    loop.update_splice_sites(res_sets, query_pose, subject_pose, dump=True, out_name=dec_name)

1
2
3
4
5
6
7
8
9
10
11
12


In [31]:
lvm_pdbs = glob('1lvm_loops_all_frames/*.pdb')
lvm_pdbs.sort()

sf = get_fa_scorefxn()

tev_res = []
lvm_res = []
for ar in lvm_dat.aligned_residues:
    if ar.query_pose_number and ar.subject_pose_number:
        tev_res.append(ar.query_pose_number)
        lvm_res.append(ar.subject_pose_number)

assert len(tev_res) == len(lvm_res)

for pdb in lvm_pdbs:
    print(pdb)
    model_name = pdb.replace('1lvm_loops_all_frames/','').replace('.pdb', '')
    n_side_text = model_name[model_name.index('N') + 1:
                        model_name.index('C') - 1]
    n_side_list = [int(i) for i in n_side_text.split('-')]
    n_splice = tev_res[lvm_res.index(n_side_list[1])] 
    n_side_length = n_side_list[1] - n_side_list[0] + 1
    c_side_text = model_name[model_name.index('C') + 1:]
    c_side_list = [int(i) for i in c_side_text.split('-')]
    c_splice = tev_res[lvm_res.index(c_side_list[0])] 
    c_side_length = c_side_list[1] - c_side_list[0] + 1

    print(n_side_length, n_splice, c_splice, c_side_length)

    swapped_pose = Pose(query_pose)
    loop_pose = pose_from_pdb(pdb)

    try:
        # Setting up CCDEndsGraftMover
        ccdgm = CCDEndsGraftMover()
        ccdgm.set_insert_region(n_splice, c_splice)
        ccdgm.set_piece(loop_pose, n_side_length, c_side_length)

        # Applying mover and scoring and dumping the pose
        ccdgm.apply(swapped_pose)
        sf(swapped_pose)
        swapped_pose.dump_pdb('1lvm_loops_all_frames_spliced/{}.pdb'.format(model_name))
        
    except Exception as e: 
        print(e)  

7292
7292


In [57]:
spliced_pdbs = glob('1lvm_loops_all_frames_spliced/*.pdb')
spliced_pdbs.sort()

tev_pdb = []
tev_pose = []
for ar in lvm_dat.aligned_residues:
    if ar.query_pdb_number and ar.query_pose_number:
        tev_pdb.append(ar.query_pdb_number)
        tev_pose.append(ar.query_pose_number)
        
assert len(tev_pdb) == len(tev_pose)

discontinuous = []
best_rmsd =  {1: 1000, 2: 1000, 3: 1000,  4: 1000,  5: 1000,  6: 1000, 
              7: 1000, 8: 1000, 9: 1000, 10: 1000, 11: 1000, 12: 1000} 
best_model = {1: None, 2: None, 3: None,  4: None,  5: None,  6: None, 
              7: None, 8: None, 9: None, 10: None, 11: None, 12: None}

for pdb in spliced_pdbs:    
    model_name = pdb.replace('1lvm_loops_all_frames_spliced/','').replace('.pdb', '')
    swapped_pose = pose_from_pdb(pdb)
    
    continuity = check_pose_continuity(swapped_pose.split_by_chain()[1])
    if not continuity[0]:
        print(model_name, "Discontinuous")
        discontinuous.append(pdb)
        continue
    
    loop_num = int(model_name.split('_')[1].replace('loop',''))
    
    if loop_num == 1: # Edge case for N-terminal loop
        n_bound = tev_map['N'][-1] + 1
        c_bound = tev_map[loop_num + 1][0] - 1
    elif loop_num == 12: # Edge case for C-terminal loop
        n_bound = tev_map[loop_num - 1][-1] + 1
        c_bound = tev_map['C'][0] - 1
    else: # General case for all interrior loops
        n_bound = tev_map[loop_num - 1][-1] + 1
        c_bound = tev_map[loop_num + 1][0] - 1
        
    n_pose = tev_pose[tev_pdb.index(n_bound)]
    c_pose = tev_pose[tev_pdb.index(c_bound)]
    
    res_range = range(n_pose, c_pose + 1)
    
    rmsd = align_protein_sections(query_pose, res_range, swapped_pose, res_range, mode='BB')
    print(model_name, rmsd)
    
    if rmsd < best_rmsd[loop_num]:
        best_rmsd[loop_num] = rmsd
        best_model[loop_num] = model_name

print('\n')
print('Bests:')
for i in range(1,13):
    print(i, best_model[i], best_rmsd[i])

print('\n')
print('Broken:')
for i in discontinuous:
    print(i)

1lvm_loop10_N147-147_C160-161 0.3638447281562183
1lvm_loop10_N147-147_C160-162 0.3707847389519249
1lvm_loop10_N147-147_C160-163 0.3653528527568605
1lvm_loop10_N147-147_C160-164 0.36236189434665744
1lvm_loop10_N147-147_C160-165 0.36514299694475966
1lvm_loop10_N147-147_C161-161 Discontinuous
1lvm_loop10_N147-147_C161-162 0.35655008025088
1lvm_loop10_N147-147_C161-163 0.3708444733255258
1lvm_loop10_N147-147_C161-164 0.37038542388196194
1lvm_loop10_N147-147_C161-165 0.36578583059791203
1lvm_loop10_N147-147_C162-163 0.39008394976780175
1lvm_loop10_N147-147_C162-164 0.3840277719629923
1lvm_loop10_N147-147_C162-165 0.37224389682166203
1lvm_loop10_N147-147_C163-164 0.5370539305534152
1lvm_loop10_N147-147_C163-165 0.5439042357185433
1lvm_loop10_N147-147_C164-164 0.3604396936814916
1lvm_loop10_N147-147_C164-165 0.3773607559343424
1lvm_loop10_N147-148_C160-160 0.3429698075438933
1lvm_loop10_N147-148_C160-161 Discontinuous
1lvm_loop10_N147-148_C160-162 0.3519896565126097
1lvm_loop10_N147-148_C160-

1lvm_loop10_N149-150_C161-165 0.3343856550282524
1lvm_loop10_N149-150_C162-162 0.3706896651520576
1lvm_loop10_N149-150_C162-163 0.3514859483549047
1lvm_loop10_N149-150_C162-164 0.36594451252684457
1lvm_loop10_N149-150_C162-165 Discontinuous
1lvm_loop10_N149-150_C163-163 0.5267835479144398
1lvm_loop10_N149-150_C163-164 0.533968386232345
1lvm_loop10_N149-150_C163-165 0.5274868935665868
1lvm_loop10_N149-150_C164-164 0.37135936253551327
1lvm_loop10_N149-150_C164-165 0.3611050672662238
1lvm_loop10_N149-150_C165-165 0.3807183478867469
1lvm_loop10_N150-150_C160-160 Discontinuous
1lvm_loop10_N150-150_C160-161 0.3459546792971706
1lvm_loop10_N150-150_C160-162 0.3299795422405638
1lvm_loop10_N150-150_C160-163 0.31473504758960635
1lvm_loop10_N150-150_C160-164 0.31254759390702297
1lvm_loop10_N150-150_C160-165 0.31990489121944554
1lvm_loop10_N150-150_C161-162 0.3199876481097529
1lvm_loop10_N150-150_C161-163 0.3137907864096833
1lvm_loop10_N150-150_C161-164 0.33583777232992595
1lvm_loop10_N150-150_C161

1lvm_loop11_N160-162_C170-172 0.1229249976753897
1lvm_loop11_N160-162_C170-173 Discontinuous
1lvm_loop11_N160-162_C170-174 Discontinuous
1lvm_loop11_N160-162_C170-175 Discontinuous
1lvm_loop11_N160-162_C170-176 0.12294614065253505
1lvm_loop11_N160-162_C170-177 Discontinuous
1lvm_loop11_N160-162_C170-178 Discontinuous
1lvm_loop11_N160-162_C170-179 Discontinuous
1lvm_loop11_N160-162_C171-171 0.44500705754902764
1lvm_loop11_N160-162_C171-172 0.12822881359299979
1lvm_loop11_N160-162_C171-173 0.4551368844405072
1lvm_loop11_N160-162_C171-174 0.44553053213113053
1lvm_loop11_N160-162_C171-175 0.13382118626289513
1lvm_loop11_N160-162_C171-176 0.43577052073526484
1lvm_loop11_N160-162_C171-177 0.4349522453480979
1lvm_loop11_N160-162_C171-178 0.4335609391317871
1lvm_loop11_N160-162_C171-179 0.12862377176998596
1lvm_loop11_N160-162_C172-172 0.14565639847682366
1lvm_loop11_N160-162_C172-173 0.14631602314402078
1lvm_loop11_N160-162_C172-174 Discontinuous
1lvm_loop11_N160-162_C172-175 Discontinuous
1l

1lvm_loop11_N160-164_C174-177 0.16825499361689375
1lvm_loop11_N160-164_C174-178 0.15506827645481033
1lvm_loop11_N160-164_C174-179 0.16261491576373438
1lvm_loop11_N160-164_C175-175 Discontinuous
1lvm_loop11_N160-164_C175-176 0.19664099470354962
1lvm_loop11_N160-164_C175-177 0.18329235756683204
1lvm_loop11_N160-164_C175-178 0.20536739394687228
1lvm_loop11_N160-164_C175-179 Discontinuous
1lvm_loop11_N160-164_C176-176 0.2682820391011055
1lvm_loop11_N160-164_C176-177 0.2581503625461831
1lvm_loop11_N160-164_C176-178 0.1734044532986948
1lvm_loop11_N160-164_C176-179 0.17863415828972276
1lvm_loop11_N160-164_C177-177 0.19276287840514755
1lvm_loop11_N160-164_C177-178 0.182243277739744
1lvm_loop11_N160-164_C177-179 0.19472427959727429
1lvm_loop11_N160-164_C178-178 0.2771343549248149
1lvm_loop11_N160-164_C178-179 0.1907806690570641
1lvm_loop11_N160-164_C179-179 Discontinuous
1lvm_loop11_N160-165_C169-169 0.10132740636624928
1lvm_loop11_N160-165_C169-170 0.10032997768380884
1lvm_loop11_N160-165_C169

1lvm_loop11_N161-162_C171-177 0.13020219453380968
1lvm_loop11_N161-162_C171-178 0.4290639201148215
1lvm_loop11_N161-162_C171-179 0.1316831166199306
1lvm_loop11_N161-162_C172-172 0.13433902740838777
1lvm_loop11_N161-162_C172-173 0.21146558266596008
1lvm_loop11_N161-162_C172-174 0.14461600192850707
1lvm_loop11_N161-162_C172-175 0.14878553929810856
1lvm_loop11_N161-162_C172-176 Discontinuous
1lvm_loop11_N161-162_C172-177 0.15751541928011858
1lvm_loop11_N161-162_C172-178 Discontinuous
1lvm_loop11_N161-162_C172-179 Discontinuous
1lvm_loop11_N161-162_C173-173 0.17718752127318713
1lvm_loop11_N161-162_C173-174 0.13826106263583446
1lvm_loop11_N161-162_C173-175 0.15093253523234937
1lvm_loop11_N161-162_C173-176 0.152903045110568
1lvm_loop11_N161-162_C173-177 0.1507783185251459
1lvm_loop11_N161-162_C173-178 0.13935269252239044
1lvm_loop11_N161-162_C173-179 0.14939499693822966
1lvm_loop11_N161-162_C174-174 0.19689761863367972
1lvm_loop11_N161-162_C174-175 0.2008362049308299
1lvm_loop11_N161-162_C17

1lvm_loop11_N161-164_C177-179 0.20337546927016484
1lvm_loop11_N161-164_C178-178 0.27189101308748415
1lvm_loop11_N161-164_C178-179 0.21868196184822797
1lvm_loop11_N161-164_C179-179 Discontinuous
1lvm_loop11_N161-165_C169-169 0.10922906768337244
1lvm_loop11_N161-165_C169-170 0.10007475886737839
1lvm_loop11_N161-165_C169-171 0.09562292951377722
1lvm_loop11_N161-165_C169-172 0.09525597911167255
1lvm_loop11_N161-165_C169-173 0.10648753816169446
1lvm_loop11_N161-165_C169-174 0.10044086747954711
1lvm_loop11_N161-165_C169-175 0.09699005393481032
1lvm_loop11_N161-165_C169-176 0.09119579311918528
1lvm_loop11_N161-165_C169-177 0.08581706300314333
1lvm_loop11_N161-165_C169-178 0.0891585454825911
1lvm_loop11_N161-165_C169-179 0.09251844960761042
1lvm_loop11_N161-165_C170-170 0.09981754607150839
1lvm_loop11_N161-165_C170-171 0.09316406324456994
1lvm_loop11_N161-165_C170-172 0.09023393539606094
1lvm_loop11_N161-165_C170-173 Discontinuous
1lvm_loop11_N161-165_C170-174 0.09635368071007916
1lvm_loop11_N

1lvm_loop11_N162-163_C173-173 0.14948916961033895
1lvm_loop11_N162-163_C173-174 0.1387020347319875
1lvm_loop11_N162-163_C173-175 0.15333480547986483
1lvm_loop11_N162-163_C173-176 0.15268395625533995
1lvm_loop11_N162-163_C173-177 0.15259014063815374
1lvm_loop11_N162-163_C173-178 0.15781712010659527
1lvm_loop11_N162-163_C173-179 0.17121960038730763
1lvm_loop11_N162-163_C174-174 0.22789151880223704
1lvm_loop11_N162-163_C174-175 0.20780160878405107
1lvm_loop11_N162-163_C174-176 0.18044280621406766
1lvm_loop11_N162-163_C174-177 0.1589600595472558
1lvm_loop11_N162-163_C174-178 0.15544187687791222
1lvm_loop11_N162-163_C174-179 0.16402747852505542
1lvm_loop11_N162-163_C175-175 Discontinuous
1lvm_loop11_N162-163_C175-176 Discontinuous
1lvm_loop11_N162-163_C175-177 0.18008919505163856
1lvm_loop11_N162-163_C175-178 Discontinuous
1lvm_loop11_N162-163_C175-179 0.2892366270691714
1lvm_loop11_N162-163_C176-176 0.2667733084601976
1lvm_loop11_N162-163_C176-177 0.22728748009613625
1lvm_loop11_N162-163_C

1lvm_loop11_N163-163_C169-177 0.1194211590191533
1lvm_loop11_N163-163_C169-178 0.12148098371634514
1lvm_loop11_N163-163_C169-179 0.12835598010330806
1lvm_loop11_N163-163_C170-170 Discontinuous
1lvm_loop11_N163-163_C170-171 0.10713589991138131
1lvm_loop11_N163-163_C170-172 0.12044053147975178
1lvm_loop11_N163-163_C170-173 0.14933341240776798
1lvm_loop11_N163-163_C170-174 0.14774111024323933
1lvm_loop11_N163-163_C170-175 0.14196755507531447
1lvm_loop11_N163-163_C170-176 0.1318198644972922
1lvm_loop11_N163-163_C170-177 0.12318474407172499
1lvm_loop11_N163-163_C170-178 0.1259866223033891
1lvm_loop11_N163-163_C170-179 0.12944351338645665
1lvm_loop11_N163-163_C171-171 Discontinuous
1lvm_loop11_N163-163_C171-172 0.1150707004619183
1lvm_loop11_N163-163_C171-173 0.45318111168469966
1lvm_loop11_N163-163_C171-174 0.14169743952013603
1lvm_loop11_N163-163_C171-175 0.14159255418372466
1lvm_loop11_N163-163_C171-176 0.4347994374125846
1lvm_loop11_N163-163_C171-177 0.43086026812249945
1lvm_loop11_N163-

1lvm_loop11_N163-165_C174-174 0.15399707595336198
1lvm_loop11_N163-165_C174-175 0.14505416855799855
1lvm_loop11_N163-165_C174-176 0.1472336040434203
1lvm_loop11_N163-165_C174-177 0.1407411457652711
1lvm_loop11_N163-165_C174-178 0.1408376912137495
1lvm_loop11_N163-165_C174-179 0.14916785079046027
1lvm_loop11_N163-165_C175-175 0.16737814251922767
1lvm_loop11_N163-165_C175-176 0.17513106266933828
1lvm_loop11_N163-165_C175-177 0.1625370260240475
1lvm_loop11_N163-165_C175-178 0.15569667345270957
1lvm_loop11_N163-165_C175-179 0.1665301432842617
1lvm_loop11_N163-165_C176-176 0.25747114353424033
1lvm_loop11_N163-165_C176-177 0.24178491779323005
1lvm_loop11_N163-165_C176-178 0.2308221387351952
1lvm_loop11_N163-165_C176-179 0.2372499068213678
1lvm_loop11_N163-165_C177-177 0.20246556945620126
1lvm_loop11_N163-165_C177-178 0.172980893831462
1lvm_loop11_N163-165_C177-179 0.18115891344029655
1lvm_loop11_N163-165_C178-178 0.21074657431775912
1lvm_loop11_N163-165_C178-179 0.2042457254662019
1lvm_loop1

1lvm_loop11_N165-165_C171-173 0.1324309432227447
1lvm_loop11_N165-165_C171-174 0.1043575156817012
1lvm_loop11_N165-165_C171-175 0.4295513324948297
1lvm_loop11_N165-165_C171-176 0.08756754453842593
1lvm_loop11_N165-165_C171-177 0.08929232330924598
1lvm_loop11_N165-165_C171-178 0.09209883801247856
1lvm_loop11_N165-165_C171-179 0.10256836151930422
1lvm_loop11_N165-165_C172-172 Discontinuous
1lvm_loop11_N165-165_C172-173 0.14206065726166645
1lvm_loop11_N165-165_C172-174 0.10986245922202187
1lvm_loop11_N165-165_C172-175 0.10307550560717679
1lvm_loop11_N165-165_C172-176 0.09505233661472844
1lvm_loop11_N165-165_C172-177 0.0943946232486879
1lvm_loop11_N165-165_C172-178 0.09656754863020069
1lvm_loop11_N165-165_C172-179 0.11103222751843467
1lvm_loop11_N165-165_C173-174 0.18531244503421562
1lvm_loop11_N165-165_C173-175 0.10931332363835818
1lvm_loop11_N165-165_C173-176 0.09854813601242295
1lvm_loop11_N165-165_C173-177 0.09673029471021992
1lvm_loop11_N165-165_C173-178 0.10839319433935957
1lvm_loop1

1lvm_loop12_N169-175_C184-189 0.16859075288892383
1lvm_loop12_N169-175_C185-185 0.42859442598448844
1lvm_loop12_N169-175_C185-186 0.43907696938485863
1lvm_loop12_N169-175_C185-187 0.1518653104281617
1lvm_loop12_N169-175_C185-188 0.4055911565655323
1lvm_loop12_N169-175_C185-189 0.14887964314805563
1lvm_loop12_N169-175_C186-186 0.1543848205725493
1lvm_loop12_N169-175_C186-187 Discontinuous
1lvm_loop12_N169-175_C186-188 0.1605583227777774
1lvm_loop12_N169-175_C186-189 0.15547899134995719
1lvm_loop12_N169-175_C187-187 Discontinuous
1lvm_loop12_N169-175_C187-188 Discontinuous
1lvm_loop12_N169-175_C187-189 Discontinuous
1lvm_loop12_N169-175_C188-188 0.473221133271047
1lvm_loop12_N169-175_C188-189 0.18330693640682535
1lvm_loop12_N169-175_C189-189 0.17845650435865865
1lvm_loop12_N169-176_C184-184 0.15445387560558707
1lvm_loop12_N169-176_C184-185 0.1504339501254533
1lvm_loop12_N169-176_C184-186 0.1510698617636846
1lvm_loop12_N169-176_C184-187 0.16129760419381767
1lvm_loop12_N169-176_C184-188 0.

1lvm_loop12_N170-173_C185-187 0.4146242623452966
1lvm_loop12_N170-173_C185-188 0.17112931896343056
1lvm_loop12_N170-173_C185-189 0.17047509781845457
1lvm_loop12_N170-173_C186-186 0.15765088542993505
1lvm_loop12_N170-173_C186-187 0.1634219843278842
1lvm_loop12_N170-173_C186-188 0.17365884293321837
1lvm_loop12_N170-173_C186-189 0.16274281422639586
1lvm_loop12_N170-173_C187-187 0.179283145903576
1lvm_loop12_N170-173_C187-188 Discontinuous
1lvm_loop12_N170-173_C187-189 0.17598704675893614
1lvm_loop12_N170-173_C188-188 0.18514824241174602
1lvm_loop12_N170-173_C188-189 0.4769028679977431
1lvm_loop12_N170-173_C189-189 0.1851964275917133
1lvm_loop12_N170-174_C184-184 0.1473656213381516
1lvm_loop12_N170-174_C184-185 0.14695545393181825
1lvm_loop12_N170-174_C184-186 0.14743126486684355
1lvm_loop12_N170-174_C184-187 0.14858393778776663
1lvm_loop12_N170-174_C184-188 0.15740938990419906
1lvm_loop12_N170-174_C184-189 0.15350718804244526
1lvm_loop12_N170-174_C185-185 0.433872996440034
1lvm_loop12_N17

1lvm_loop12_N171-172_C185-189 0.18264912632843947
1lvm_loop12_N171-172_C186-186 0.1885812840181704
1lvm_loop12_N171-172_C186-187 Discontinuous
1lvm_loop12_N171-172_C186-188 Discontinuous
1lvm_loop12_N171-172_C186-189 0.18432132528559325
1lvm_loop12_N171-172_C187-187 Discontinuous
1lvm_loop12_N171-172_C187-188 Discontinuous
1lvm_loop12_N171-172_C187-189 0.19429961028684975
1lvm_loop12_N171-172_C188-188 0.19823553151106607
1lvm_loop12_N171-172_C188-189 0.4820441794492208
1lvm_loop12_N171-172_C189-189 0.195011174080614
1lvm_loop12_N171-173_C184-184 0.20073352780011117
1lvm_loop12_N171-173_C184-185 0.18836893616102396
1lvm_loop12_N171-173_C184-186 0.17865194985060365
1lvm_loop12_N171-173_C184-187 0.19037779019251214
1lvm_loop12_N171-173_C184-188 0.19015217589356015
1lvm_loop12_N171-173_C184-189 Discontinuous
1lvm_loop12_N171-173_C185-185 0.1911295467198894
1lvm_loop12_N171-173_C185-186 0.1747388373013773
1lvm_loop12_N171-173_C185-187 0.18401478000127997
1lvm_loop12_N171-173_C185-188 0.1876

1lvm_loop12_N172-172_C186-188 Discontinuous
1lvm_loop12_N172-172_C186-189 0.18959828522131283
1lvm_loop12_N172-172_C187-187 Discontinuous
1lvm_loop12_N172-172_C187-188 0.19401144773274162
1lvm_loop12_N172-172_C187-189 0.19274542188095645
1lvm_loop12_N172-172_C188-189 0.19433354925191715
1lvm_loop12_N172-172_C189-189 0.20582017853629272
1lvm_loop12_N172-173_C184-184 0.17628252496693547
1lvm_loop12_N172-173_C184-185 0.1746459265754475
1lvm_loop12_N172-173_C184-186 0.16827004353996308
1lvm_loop12_N172-173_C184-187 0.1825386146370792
1lvm_loop12_N172-173_C184-188 0.18783694468322043
1lvm_loop12_N172-173_C184-189 0.17971270429190117
1lvm_loop12_N172-173_C185-185 0.17674526048530736
1lvm_loop12_N172-173_C185-186 0.4129973511526639
1lvm_loop12_N172-173_C185-187 0.17214410486722778
1lvm_loop12_N172-173_C185-188 Discontinuous
1lvm_loop12_N172-173_C185-189 0.17586747842627323
1lvm_loop12_N172-173_C186-186 0.16559649749434383
1lvm_loop12_N172-173_C186-187 0.17822031034919292
1lvm_loop12_N172-173_

1lvm_loop12_N173-173_C187-188 0.22128045671873253
1lvm_loop12_N173-173_C187-189 0.20258565116887145
1lvm_loop12_N173-173_C188-188 0.19563714466905305
1lvm_loop12_N173-173_C188-189 Discontinuous
1lvm_loop12_N173-174_C184-184 0.1745410520276276
1lvm_loop12_N173-174_C184-185 0.16780451566087573
1lvm_loop12_N173-174_C184-186 0.14953830708792284
1lvm_loop12_N173-174_C184-187 0.14648766226245918
1lvm_loop12_N173-174_C184-188 0.16255194684855792
1lvm_loop12_N173-174_C184-189 0.14903125645434753
1lvm_loop12_N173-174_C185-185 0.4080382386444089
1lvm_loop12_N173-174_C185-186 0.43592826766010695
1lvm_loop12_N173-174_C185-187 0.43402450971807566
1lvm_loop12_N173-174_C185-188 0.16793768769539302
1lvm_loop12_N173-174_C185-189 0.15396184193568888
1lvm_loop12_N173-174_C186-186 0.20727098388164794
1lvm_loop12_N173-174_C186-187 0.22252025716190926
1lvm_loop12_N173-174_C186-188 0.21654071905981617
1lvm_loop12_N173-174_C186-189 Discontinuous
1lvm_loop12_N173-174_C187-187 0.21401987051386662
1lvm_loop12_N1

1lvm_loop12_N174-176_C184-185 0.1766041535228517
1lvm_loop12_N174-176_C184-186 0.14402559239062504
1lvm_loop12_N174-176_C184-187 0.13853934334031834
1lvm_loop12_N174-176_C184-188 0.14638429945732565
1lvm_loop12_N174-176_C184-189 0.15137606601067463
1lvm_loop12_N174-176_C185-185 0.16454861426212322
1lvm_loop12_N174-176_C185-186 0.4080275569877624
1lvm_loop12_N174-176_C185-187 0.1411526612160218
1lvm_loop12_N174-176_C185-188 0.41052975657653495
1lvm_loop12_N174-176_C185-189 0.14130648475097032
1lvm_loop12_N174-176_C186-186 0.18027684206912045
1lvm_loop12_N174-176_C186-187 0.17581703274892363
1lvm_loop12_N174-176_C186-188 0.1843639505899943
1lvm_loop12_N174-176_C186-189 0.16443682722862543
1lvm_loop12_N174-176_C187-187 Discontinuous
1lvm_loop12_N174-176_C187-188 0.16752195984305238
1lvm_loop12_N174-176_C187-189 Discontinuous
1lvm_loop12_N174-176_C188-188 0.16655540958361875
1lvm_loop12_N174-176_C188-189 0.16765955340499056
1lvm_loop12_N174-176_C189-189 0.16363096165976918
1lvm_loop12_N174

1lvm_loop12_N175-179_C184-189 0.11243492916886269
1lvm_loop12_N175-179_C185-185 Discontinuous
1lvm_loop12_N175-179_C185-186 0.39812561426025106
1lvm_loop12_N175-179_C185-187 0.12323404553353097
1lvm_loop12_N175-179_C185-188 0.4364382703804932
1lvm_loop12_N175-179_C185-189 0.1209177169617759
1lvm_loop12_N175-179_C186-186 Discontinuous
1lvm_loop12_N175-179_C186-187 0.13355854466095451
1lvm_loop12_N175-179_C186-188 0.12981572390181842
1lvm_loop12_N175-179_C186-189 0.12243429930467724
1lvm_loop12_N175-179_C187-187 0.1384034198981135
1lvm_loop12_N175-179_C187-188 0.13932634203323674
1lvm_loop12_N175-179_C187-189 0.12831074219195637
1lvm_loop12_N175-179_C188-188 0.13508548589596245
1lvm_loop12_N175-179_C188-189 0.13566521666449344
1lvm_loop12_N175-179_C189-189 0.1363598099894615
1lvm_loop12_N176-176_C184-184 0.18913624613640873
1lvm_loop12_N176-176_C184-185 0.28565269343071814
1lvm_loop12_N176-176_C184-186 0.11070882196524318
1lvm_loop12_N176-176_C184-187 0.1414755792607428
1lvm_loop12_N176-

1lvm_loop12_N178-178_C185-189 0.12207509364285592
1lvm_loop12_N178-178_C186-186 Discontinuous
1lvm_loop12_N178-178_C186-187 0.12203020981814429
1lvm_loop12_N178-178_C186-188 Discontinuous
1lvm_loop12_N178-178_C186-189 0.1268759063781211
1lvm_loop12_N178-178_C187-188 0.13151232119985687
1lvm_loop12_N178-178_C187-189 0.13159862363297428
1lvm_loop12_N178-178_C188-189 0.47478505853430114
1lvm_loop12_N178-179_C184-184 0.102315700555331
1lvm_loop12_N178-179_C184-185 0.08866286701202916
1lvm_loop12_N178-179_C184-186 0.08940111010723714
1lvm_loop12_N178-179_C184-187 0.08902067512816612
1lvm_loop12_N178-179_C184-188 0.09697362656292684
1lvm_loop12_N178-179_C184-189 0.09884586586983264
1lvm_loop12_N178-179_C185-185 0.4195540374725368
1lvm_loop12_N178-179_C185-186 0.10778729173568365
1lvm_loop12_N178-179_C185-187 0.12224206128059299
1lvm_loop12_N178-179_C185-188 0.11451989779569965
1lvm_loop12_N178-179_C185-189 0.10491995714211491
1lvm_loop12_N178-179_C186-186 0.1122531682341064
1lvm_loop12_N178-

1lvm_loop1_N26-28_C39-45 0.31895826119990506
1lvm_loop1_N26-28_C40-40 0.2579870401741319
1lvm_loop1_N26-28_C40-41 0.2581373359297389
1lvm_loop1_N26-28_C40-42 0.26132636301493906
1lvm_loop1_N26-28_C40-43 0.2846378903920626
1lvm_loop1_N26-28_C40-44 0.3126727055859501
1lvm_loop1_N26-28_C40-45 0.3188670278650893
1lvm_loop1_N26-28_C41-41 Discontinuous
1lvm_loop1_N26-28_C41-42 Discontinuous
1lvm_loop1_N26-28_C41-43 Discontinuous
1lvm_loop1_N26-28_C41-44 0.30843945363552483
1lvm_loop1_N26-28_C41-45 Discontinuous
1lvm_loop1_N26-28_C42-42 0.3515734001408999
1lvm_loop1_N26-28_C42-43 0.2947065956817553
1lvm_loop1_N26-28_C42-44 0.37346227921654124
1lvm_loop1_N26-28_C42-45 0.31946139543873064
1lvm_loop1_N26-28_C43-43 0.2916353440430448
1lvm_loop1_N26-28_C43-44 0.3114030365969821
1lvm_loop1_N26-28_C43-45 0.3221721402107282
1lvm_loop1_N26-28_C44-44 0.45257910758522474
1lvm_loop1_N26-28_C44-45 0.476691319850775
1lvm_loop1_N26-28_C45-45 0.3396268633394543
1lvm_loop1_N26-29_C36-36 0.1712080550085976
1lv

1lvm_loop1_N26-32_C36-41 0.20318272981226268
1lvm_loop1_N26-32_C36-42 0.2074444130013299
1lvm_loop1_N26-32_C36-43 0.2146038960249341
1lvm_loop1_N26-32_C36-44 0.2288129155387407
1lvm_loop1_N26-32_C36-45 0.23864802024957985
1lvm_loop1_N26-32_C37-37 0.2151797491415803
1lvm_loop1_N26-32_C37-38 0.23386851062904723
1lvm_loop1_N26-32_C37-39 0.22566806357703098
1lvm_loop1_N26-32_C37-40 0.23310541766789408
1lvm_loop1_N26-32_C37-41 0.23050981825769107
1lvm_loop1_N26-32_C37-42 0.22723980604315114
1lvm_loop1_N26-32_C37-43 0.23875164560404236
1lvm_loop1_N26-32_C37-44 0.24697458842647835
1lvm_loop1_N26-32_C37-45 0.2533621072765026
1lvm_loop1_N26-32_C38-38 Discontinuous
1lvm_loop1_N26-32_C38-39 0.5178799609572412
1lvm_loop1_N26-32_C38-40 0.49754915693343854
1lvm_loop1_N26-32_C38-41 Discontinuous
1lvm_loop1_N26-32_C38-42 Discontinuous
1lvm_loop1_N26-32_C38-43 Discontinuous
1lvm_loop1_N26-32_C38-44 0.5291405993908582
1lvm_loop1_N26-32_C38-45 0.2958051731454209
1lvm_loop1_N26-32_C39-39 0.538188937353376

1lvm_loop1_N27-28_C42-44 0.3256606534335746
1lvm_loop1_N27-28_C42-45 0.32544548675049845
1lvm_loop1_N27-28_C43-43 0.29527153793965283
1lvm_loop1_N27-28_C43-44 0.32190563839949704
1lvm_loop1_N27-28_C43-45 0.32761840088088245
1lvm_loop1_N27-28_C44-44 Discontinuous
1lvm_loop1_N27-28_C44-45 0.4791605696442163
1lvm_loop1_N27-28_C45-45 0.3455143684408512
1lvm_loop1_N27-29_C36-36 0.18668204941999192
1lvm_loop1_N27-29_C36-37 0.2280622017213679
1lvm_loop1_N27-29_C36-38 0.2490210949309358
1lvm_loop1_N27-29_C36-39 0.230723728352981
1lvm_loop1_N27-29_C36-40 0.23104249162652593
1lvm_loop1_N27-29_C36-41 0.22131548890722585
1lvm_loop1_N27-29_C36-42 0.22624114193636935
1lvm_loop1_N27-29_C36-43 0.23647240041613463
1lvm_loop1_N27-29_C36-44 Discontinuous
1lvm_loop1_N27-29_C36-45 0.26511868545420636
1lvm_loop1_N27-29_C37-37 0.24452040288858626
1lvm_loop1_N27-29_C37-38 0.2499077694561501
1lvm_loop1_N27-29_C37-39 0.24965284325080853
1lvm_loop1_N27-29_C37-40 0.24820184377231572
1lvm_loop1_N27-29_C37-41 0.245

1lvm_loop1_N27-32_C37-45 0.2578932318474071
1lvm_loop1_N27-32_C38-38 Discontinuous
1lvm_loop1_N27-32_C38-39 0.5096003688737675
1lvm_loop1_N27-32_C38-40 Discontinuous
1lvm_loop1_N27-32_C38-41 Discontinuous
1lvm_loop1_N27-32_C38-42 Discontinuous
1lvm_loop1_N27-32_C38-43 Discontinuous
1lvm_loop1_N27-32_C38-44 Discontinuous
1lvm_loop1_N27-32_C38-45 0.28402280948687636
1lvm_loop1_N27-32_C39-39 0.5396079501723592
1lvm_loop1_N27-32_C39-40 Discontinuous
1lvm_loop1_N27-32_C39-41 0.5404273932761883
1lvm_loop1_N27-32_C39-42 0.2579270940158982
1lvm_loop1_N27-32_C39-43 Discontinuous
1lvm_loop1_N27-32_C39-44 0.5552045908727161
1lvm_loop1_N27-32_C39-45 0.560062856116292
1lvm_loop1_N27-32_C40-40 0.2548120364394598
1lvm_loop1_N27-32_C40-41 0.2532092077984209
1lvm_loop1_N27-32_C40-42 0.2556939144255316
1lvm_loop1_N27-32_C40-43 0.26588391463521954
1lvm_loop1_N27-32_C40-44 0.28188583346543894
1lvm_loop1_N27-32_C40-45 0.29358829037030937
1lvm_loop1_N27-32_C41-41 Discontinuous
1lvm_loop1_N27-32_C41-42 0.254

1lvm_loop1_N28-30_C36-39 0.22693017835232504
1lvm_loop1_N28-30_C36-40 0.2273162804486257
1lvm_loop1_N28-30_C36-41 0.21597469738058034
1lvm_loop1_N28-30_C36-42 0.22041956519824282
1lvm_loop1_N28-30_C36-43 0.23408846970743116
1lvm_loop1_N28-30_C36-44 0.25241035634410003
1lvm_loop1_N28-30_C36-45 0.26188198173415206
1lvm_loop1_N28-30_C37-37 0.25563004393114863
1lvm_loop1_N28-30_C37-38 0.25854691748215275
1lvm_loop1_N28-30_C37-39 0.24799306872377863
1lvm_loop1_N28-30_C37-40 0.2497646383254635
1lvm_loop1_N28-30_C37-41 0.24853648287917765
1lvm_loop1_N28-30_C37-42 0.24471464320661895
1lvm_loop1_N28-30_C37-43 0.2573190555483451
1lvm_loop1_N28-30_C37-44 0.2726698693982244
1lvm_loop1_N28-30_C37-45 0.2834496856954447
1lvm_loop1_N28-30_C38-38 0.5218576947975535
1lvm_loop1_N28-30_C38-39 Discontinuous
1lvm_loop1_N28-30_C38-40 Discontinuous
1lvm_loop1_N28-30_C38-41 Discontinuous
1lvm_loop1_N28-30_C38-42 Discontinuous
1lvm_loop1_N28-30_C38-43 Discontinuous
1lvm_loop1_N28-30_C38-44 Discontinuous
1lvm_lo

1lvm_loop1_N28-33_C40-45 0.30500272148034124
1lvm_loop1_N28-33_C41-41 Discontinuous
1lvm_loop1_N28-33_C41-42 0.2564834748224886
1lvm_loop1_N28-33_C41-43 Discontinuous
1lvm_loop1_N28-33_C41-44 0.47583664655325036
1lvm_loop1_N28-33_C41-45 0.28587690617722594
1lvm_loop1_N28-33_C42-42 0.2602759401188676
1lvm_loop1_N28-33_C42-43 0.2648031467193581
1lvm_loop1_N28-33_C42-44 Discontinuous
1lvm_loop1_N28-33_C42-45 0.2971197394579247
1lvm_loop1_N28-33_C43-43 0.2900274904502227
1lvm_loop1_N28-33_C43-44 0.3078570025976635
1lvm_loop1_N28-33_C43-45 0.312228399521104
1lvm_loop1_N28-33_C44-44 0.442992707471599
1lvm_loop1_N28-33_C44-45 0.45935048610040213
1lvm_loop1_N28-33_C45-45 0.33185375404625134
1lvm_loop1_N29-29_C36-36 0.26305848581669683
1lvm_loop1_N29-29_C36-37 0.5885526367227458
1lvm_loop1_N29-29_C36-38 0.28144528420845244
1lvm_loop1_N29-29_C36-39 0.22343245395487105
1lvm_loop1_N29-29_C36-40 0.22226837219558412
1lvm_loop1_N29-29_C36-41 0.21573166956356035
1lvm_loop1_N29-29_C36-42 0.219845030760

1lvm_loop1_N29-32_C37-38 0.2358082689547134
1lvm_loop1_N29-32_C37-39 0.23238716718924607
1lvm_loop1_N29-32_C37-40 0.23627089822701827
1lvm_loop1_N29-32_C37-41 0.23487126901064195
1lvm_loop1_N29-32_C37-42 0.2291233406518651
1lvm_loop1_N29-32_C37-43 0.23670328540034302
1lvm_loop1_N29-32_C37-44 0.25589854920718247
1lvm_loop1_N29-32_C37-45 0.2620272653812214
1lvm_loop1_N29-32_C38-38 Discontinuous
1lvm_loop1_N29-32_C38-39 Discontinuous
1lvm_loop1_N29-32_C38-40 0.505494901746647
1lvm_loop1_N29-32_C38-41 Discontinuous
1lvm_loop1_N29-32_C38-42 0.513622461651883
1lvm_loop1_N29-32_C38-43 Discontinuous
1lvm_loop1_N29-32_C38-44 Discontinuous
1lvm_loop1_N29-32_C38-45 Discontinuous
1lvm_loop1_N29-32_C39-39 0.2449550945497825
1lvm_loop1_N29-32_C39-40 0.24186820710618895
1lvm_loop1_N29-32_C39-41 0.5405291881727696
1lvm_loop1_N29-32_C39-42 Discontinuous
1lvm_loop1_N29-32_C39-43 0.547527168308371
1lvm_loop1_N29-32_C39-44 0.5561751810498499
1lvm_loop1_N29-32_C39-45 0.5590030068023812
1lvm_loop1_N29-32_C4

1lvm_loop1_N30-31_C43-45 0.32801064236146454
1lvm_loop1_N30-31_C44-44 0.48722126846743274
1lvm_loop1_N30-31_C44-45 0.49926108142668557
1lvm_loop1_N30-31_C45-45 0.37539158618493085
1lvm_loop1_N30-32_C36-36 0.1493372648788568
1lvm_loop1_N30-32_C36-37 0.18000732406906944
1lvm_loop1_N30-32_C36-38 0.19915655297479584
1lvm_loop1_N30-32_C36-39 0.18538052401186353
1lvm_loop1_N30-32_C36-40 0.19425426474064325
1lvm_loop1_N30-32_C36-41 0.19752255460806722
1lvm_loop1_N30-32_C36-42 0.20424774316551764
1lvm_loop1_N30-32_C36-43 0.2187661621657727
1lvm_loop1_N30-32_C36-44 0.23657227544518283
1lvm_loop1_N30-32_C36-45 0.246198426980056
1lvm_loop1_N30-32_C37-37 0.2508462527723433
1lvm_loop1_N30-32_C37-38 0.24522737686213952
1lvm_loop1_N30-32_C37-39 0.23478721322988955
1lvm_loop1_N30-32_C37-40 0.2384476238497191
1lvm_loop1_N30-32_C37-41 0.23795597043054112
1lvm_loop1_N30-32_C37-42 0.23047425050602888
1lvm_loop1_N30-32_C37-43 0.2401251732244163
1lvm_loop1_N30-32_C37-44 0.2549155462242603
1lvm_loop1_N30-32_

1lvm_loop1_N31-32_C40-43 0.27277829008588916
1lvm_loop1_N31-32_C40-44 0.29503959156813053
1lvm_loop1_N31-32_C40-45 0.3065522039035268
1lvm_loop1_N31-32_C41-41 0.46410327647823907
1lvm_loop1_N31-32_C41-42 0.25694626000286763
1lvm_loop1_N31-32_C41-43 Discontinuous
1lvm_loop1_N31-32_C41-44 Discontinuous
1lvm_loop1_N31-32_C41-45 Discontinuous
1lvm_loop1_N31-32_C42-42 0.5058490794622661
1lvm_loop1_N31-32_C42-43 Discontinuous
1lvm_loop1_N31-32_C42-44 0.2916400445559912
1lvm_loop1_N31-32_C42-45 0.30901800060413215
1lvm_loop1_N31-32_C43-43 0.5784936627287733
1lvm_loop1_N31-32_C43-44 0.2997275204565805
1lvm_loop1_N31-32_C43-45 0.31293436774940525
1lvm_loop1_N31-32_C44-44 0.5344708432190451
1lvm_loop1_N31-32_C44-45 0.5028020300366128
1lvm_loop1_N31-32_C45-45 0.41632950084621567
1lvm_loop1_N31-33_C37-37 0.26529823317821705
1lvm_loop1_N31-33_C37-38 0.2548182429896713
1lvm_loop1_N31-33_C37-39 0.2727512936591901
1lvm_loop1_N31-33_C37-40 0.2650268662364648
1lvm_loop1_N31-33_C37-41 0.2714915169649464


1lvm_loop1_N33-33_C43-43 Discontinuous
1lvm_loop1_N33-33_C43-44 Discontinuous
1lvm_loop1_N33-33_C43-45 Discontinuous
1lvm_loop1_N33-33_C44-44 Discontinuous
1lvm_loop1_N33-33_C44-45 Discontinuous
1lvm_loop1_N33-33_C45-45 Discontinuous
1lvm_loop2_N36-36_C48-48 Discontinuous
1lvm_loop2_N36-36_C48-49 0.26274175679696343
1lvm_loop2_N36-36_C48-50 0.22937668946301878
1lvm_loop2_N36-36_C48-51 0.2196804493177862
1lvm_loop2_N36-36_C49-49 0.22194787749296566
1lvm_loop2_N36-36_C49-50 0.2269169614106463
1lvm_loop2_N36-36_C49-51 0.22466040510858215
1lvm_loop2_N36-36_C50-51 0.6113157340478855
1lvm_loop2_N36-36_C51-51 Discontinuous
1lvm_loop2_N36-37_C48-48 0.49643790179985536
1lvm_loop2_N36-37_C48-49 0.3450984226076468
1lvm_loop2_N36-37_C48-50 0.21447531630283004
1lvm_loop2_N36-37_C48-51 0.2018493977695393
1lvm_loop2_N36-37_C49-49 0.4251765624971115
1lvm_loop2_N36-37_C49-50 0.21700474818908874
1lvm_loop2_N36-37_C49-51 0.20746470989916735
1lvm_loop2_N36-37_C50-50 0.6223830978113715
1lvm_loop2_N36-37_C5

1lvm_loop2_N38-38_C49-51 0.14585196656866736
1lvm_loop2_N38-38_C50-50 Discontinuous
1lvm_loop2_N38-38_C50-51 0.6019203520791081
1lvm_loop2_N38-39_C48-48 0.1733183819728854
1lvm_loop2_N38-39_C48-49 0.141342174516783
1lvm_loop2_N38-39_C48-50 0.13889444240829352
1lvm_loop2_N38-39_C48-51 0.14117062497889088
1lvm_loop2_N38-39_C49-49 0.14914105236829192
1lvm_loop2_N38-39_C49-50 0.1394185979431488
1lvm_loop2_N38-39_C49-51 0.14154517916734283
1lvm_loop2_N38-39_C50-50 0.6123967079026826
1lvm_loop2_N38-39_C50-51 0.5950405237991314
1lvm_loop2_N38-39_C51-51 0.15469719017555889
1lvm_loop2_N38-40_C48-48 0.16170433384760338
1lvm_loop2_N38-40_C48-49 0.15345512041856543
1lvm_loop2_N38-40_C48-50 0.15375031941453202
1lvm_loop2_N38-40_C48-51 0.1524919750428621
1lvm_loop2_N38-40_C49-49 0.164759491593596
1lvm_loop2_N38-40_C49-50 Discontinuous
1lvm_loop2_N38-40_C49-51 Discontinuous
1lvm_loop2_N38-40_C50-50 Discontinuous
1lvm_loop2_N38-40_C50-51 Discontinuous
1lvm_loop2_N38-40_C51-51 0.1755731479058351
1lvm_l

1lvm_loop2_N40-45_C51-51 0.11262095816287007
1lvm_loop2_N41-41_C48-49 Discontinuous
1lvm_loop2_N41-41_C48-50 Discontinuous
1lvm_loop2_N41-41_C48-51 0.14200878247250182
1lvm_loop2_N41-41_C49-49 Discontinuous
1lvm_loop2_N41-41_C49-50 0.1487024501857987
1lvm_loop2_N41-41_C49-51 Discontinuous
1lvm_loop2_N41-41_C50-51 Discontinuous
1lvm_loop2_N41-42_C48-48 0.1558406032049571
1lvm_loop2_N41-42_C48-49 0.13386079779029408
1lvm_loop2_N41-42_C48-50 0.12842995529169376
1lvm_loop2_N41-42_C48-51 0.13333134078218598
1lvm_loop2_N41-42_C49-49 0.1556902570054207
1lvm_loop2_N41-42_C49-50 0.18229104354233594
1lvm_loop2_N41-42_C49-51 0.15319364171093097
1lvm_loop2_N41-42_C50-50 0.6008891184747265
1lvm_loop2_N41-42_C50-51 0.5961107125315164
1lvm_loop2_N41-42_C51-51 0.15164489612916615
1lvm_loop2_N41-43_C48-48 0.12352188205340205
1lvm_loop2_N41-43_C48-49 0.12670371075405948
1lvm_loop2_N41-43_C48-50 0.12242529144349613
1lvm_loop2_N41-43_C48-51 0.1263241694645969
1lvm_loop2_N41-43_C49-49 0.1259949506131591
1l

1lvm_loop3_N49-49_C66-66 0.49970357432279355
1lvm_loop3_N49-50_C62-62 0.1771322128295861
1lvm_loop3_N49-50_C62-63 0.17634851878244326
1lvm_loop3_N49-50_C62-64 0.14736276732986472
1lvm_loop3_N49-50_C62-65 0.15783617440627615
1lvm_loop3_N49-50_C62-66 0.15586108624305636
1lvm_loop3_N49-50_C63-63 0.19020923272559576
1lvm_loop3_N49-50_C63-64 0.15838161267246625
1lvm_loop3_N49-50_C63-65 0.1621572531540391
1lvm_loop3_N49-50_C63-66 0.1596827844151526
1lvm_loop3_N49-50_C64-64 0.1621858151322804
1lvm_loop3_N49-50_C64-65 0.17789400302576167
1lvm_loop3_N49-50_C64-66 0.1701110889838208
1lvm_loop3_N49-50_C65-65 Discontinuous
1lvm_loop3_N49-50_C65-66 Discontinuous
1lvm_loop3_N49-50_C66-66 0.492902424938003
1lvm_loop3_N49-51_C62-62 0.16539643863338202
1lvm_loop3_N49-51_C62-63 0.16256082839605968
1lvm_loop3_N49-51_C62-64 0.1625108263046084
1lvm_loop3_N49-51_C62-65 0.1652374428015949
1lvm_loop3_N49-51_C62-66 0.1648280072913341
1lvm_loop3_N49-51_C63-63 Discontinuous
1lvm_loop3_N49-51_C63-64 0.16154836297

1lvm_loop4_N65-66_C71-72 0.1846640365461671
1lvm_loop4_N65-66_C71-73 0.18553700585001778
1lvm_loop4_N65-66_C71-74 0.1830476524352079
1lvm_loop4_N65-66_C72-72 0.19357245503577503
1lvm_loop4_N65-66_C72-73 0.4453918139149607
1lvm_loop4_N65-66_C72-74 0.4480855644728288
1lvm_loop4_N65-66_C73-73 Discontinuous
1lvm_loop4_N65-66_C73-74 Discontinuous
1lvm_loop4_N65-66_C74-74 0.19586755376881673
1lvm_loop4_N66-66_C71-72 0.19370808223411964
1lvm_loop4_N66-66_C71-73 0.18913808303522014
1lvm_loop4_N66-66_C71-74 0.19704332018966547
1lvm_loop4_N66-66_C72-73 0.4827747075938462
1lvm_loop4_N66-66_C72-74 0.450962460839227
1lvm_loop4_N66-66_C73-73 0.277494620260132
1lvm_loop4_N66-66_C73-74 0.20934728820067486
1lvm_loop4_N66-66_C74-74 0.25904481819826913
1lvm_loop5_N71-71_C82-82 Discontinuous
1lvm_loop5_N71-71_C82-83 0.3760668370318858
1lvm_loop5_N71-71_C82-84 0.3558564280117998
1lvm_loop5_N71-71_C82-85 0.4796701542943162
1lvm_loop5_N71-71_C83-83 0.37760695399616395
1lvm_loop5_N71-71_C83-84 0.3566120903733

1lvm_loop6_N83-84_C93-94 0.5358035356181259
1lvm_loop6_N83-84_C94-94 0.5440469186205691
1lvm_loop6_N83-85_C90-90 0.1457593680590865
1lvm_loop6_N83-85_C90-91 0.15569129194472636
1lvm_loop6_N83-85_C90-92 0.17912343946599174
1lvm_loop6_N83-85_C90-93 0.17883621767690025
1lvm_loop6_N83-85_C90-94 0.1632305573225944
1lvm_loop6_N83-85_C91-91 0.15391553554415902
1lvm_loop6_N83-85_C91-92 0.19295351227960383
1lvm_loop6_N83-85_C91-93 0.17890428007288744
1lvm_loop6_N83-85_C91-94 0.16894971106521556
1lvm_loop6_N83-85_C92-92 0.19113372427568237
1lvm_loop6_N83-85_C92-93 0.18409220765679185
1lvm_loop6_N83-85_C92-94 0.5154606593633912
1lvm_loop6_N83-85_C93-93 0.5385612358647383
1lvm_loop6_N83-85_C93-94 0.5381082444493515
1lvm_loop6_N83-85_C94-94 0.5307805509864044
1lvm_loop6_N84-84_C90-90 0.16086424956941267
1lvm_loop6_N84-84_C90-91 0.15683752819483554
1lvm_loop6_N84-84_C90-92 0.19423588028668376
1lvm_loop6_N84-84_C90-93 0.18398062628696035
1lvm_loop6_N84-84_C90-94 0.16962208779562996
1lvm_loop6_N84-84_

1lvm_loop7_N90-93_C120-124 0.5710007949927925
1lvm_loop7_N90-93_C121-121 0.5823578337944152
1lvm_loop7_N90-93_C121-122 0.600850416231728
1lvm_loop7_N90-93_C121-123 Discontinuous
1lvm_loop7_N90-93_C121-124 Discontinuous
1lvm_loop7_N90-93_C122-122 0.5896254730420659
1lvm_loop7_N90-93_C122-123 0.6060512773960034
1lvm_loop7_N90-93_C122-124 0.6103869539754869
1lvm_loop7_N90-93_C123-123 0.6182043953919195
1lvm_loop7_N90-93_C123-124 0.6130807072494331
1lvm_loop7_N90-93_C124-124 0.7278673551670346
1lvm_loop7_N90-94_C117-117 0.5656417102895337
1lvm_loop7_N90-94_C117-118 0.5752182527260759
1lvm_loop7_N90-94_C117-119 0.5722182577081177
1lvm_loop7_N90-94_C117-120 0.5670212257117654
1lvm_loop7_N90-94_C117-121 0.5508827365599354
1lvm_loop7_N90-94_C117-122 0.5524369560602238
1lvm_loop7_N90-94_C117-123 0.5561776151634569
1lvm_loop7_N90-94_C117-124 0.5606419007458022
1lvm_loop7_N90-94_C118-118 0.5772592401882901
1lvm_loop7_N90-94_C118-119 0.5680751437084693
1lvm_loop7_N90-94_C118-120 0.5634624599384777

1lvm_loop7_N91-94_C122-123 0.6055055940243947
1lvm_loop7_N91-94_C122-124 0.6026482019068503
1lvm_loop7_N91-94_C123-123 0.6214792133349402
1lvm_loop7_N91-94_C123-124 0.6100362920136609
1lvm_loop7_N91-94_C124-124 Discontinuous
1lvm_loop7_N92-92_C117-118 0.709129269200405
1lvm_loop7_N92-92_C117-119 0.6011502039829459
1lvm_loop7_N92-92_C117-120 0.5776294364272048
1lvm_loop7_N92-92_C117-121 0.5651166320807207
1lvm_loop7_N92-92_C117-122 0.5660110819739298
1lvm_loop7_N92-92_C117-123 0.5737770531769922
1lvm_loop7_N92-92_C117-124 0.5648280790768949
1lvm_loop7_N92-92_C118-118 0.5560542940524789
1lvm_loop7_N92-92_C118-119 0.553745685714915
1lvm_loop7_N92-92_C118-120 0.5567934825303393
1lvm_loop7_N92-92_C118-121 0.6005933889248075
1lvm_loop7_N92-92_C118-122 0.6026737090031958
1lvm_loop7_N92-92_C118-123 0.609625057844352
1lvm_loop7_N92-92_C118-124 0.5799091622623642
1lvm_loop7_N92-92_C119-119 Discontinuous
1lvm_loop7_N92-92_C119-120 0.7044960190490582
1lvm_loop7_N92-92_C119-121 0.7370953400227596
1

1lvm_loop7_N94-94_C117-122 0.5618768053956955
1lvm_loop7_N94-94_C117-123 0.5763013138806897
1lvm_loop7_N94-94_C117-124 0.5645239657507634
1lvm_loop7_N94-94_C118-119 0.5617095480324598
1lvm_loop7_N94-94_C118-120 0.5591845615355178
1lvm_loop7_N94-94_C118-121 0.5854907345997782
1lvm_loop7_N94-94_C118-122 0.5983699861137696
1lvm_loop7_N94-94_C118-123 0.6332573488280578
1lvm_loop7_N94-94_C118-124 0.5900736250487049
1lvm_loop7_N94-94_C119-119 0.6777500327987347
1lvm_loop7_N94-94_C119-120 0.7080151777213706
1lvm_loop7_N94-94_C119-121 0.7287880112370276
1lvm_loop7_N94-94_C119-122 0.7322990888379166
1lvm_loop7_N94-94_C119-123 0.7611482496292795
1lvm_loop7_N94-94_C119-124 0.7186555755469628
1lvm_loop7_N94-94_C120-121 Discontinuous
1lvm_loop7_N94-94_C120-122 Discontinuous
1lvm_loop7_N94-94_C120-123 0.6911869010584181
1lvm_loop7_N94-94_C120-124 0.597083236801771
1lvm_loop7_N94-94_C121-122 0.6150944664182254
1lvm_loop7_N94-94_C121-123 0.6825612722953357
1lvm_loop7_N94-94_C121-124 0.5925817691083308

1lvm_loop8_N118-120_C130-132 1.5687599386832622
1lvm_loop8_N118-120_C130-133 1.5957569494204777
1lvm_loop8_N118-120_C131-131 1.5295040564931728
1lvm_loop8_N118-120_C131-132 1.6468134650041866
1lvm_loop8_N118-120_C131-133 1.6669153818037674
1lvm_loop8_N118-120_C132-132 Discontinuous
1lvm_loop8_N118-120_C132-133 1.617999750619457
1lvm_loop8_N118-120_C133-133 1.662455505142576
1lvm_loop8_N118-121_C129-129 Discontinuous
1lvm_loop8_N118-121_C129-130 Discontinuous
1lvm_loop8_N118-121_C129-131 Discontinuous
1lvm_loop8_N118-121_C129-132 Discontinuous
1lvm_loop8_N118-121_C129-133 Discontinuous
1lvm_loop8_N118-121_C130-130 Discontinuous
1lvm_loop8_N118-121_C130-131 Discontinuous
1lvm_loop8_N118-121_C130-132 Discontinuous
1lvm_loop8_N118-121_C130-133 Discontinuous
1lvm_loop8_N118-121_C131-131 Discontinuous
1lvm_loop8_N118-121_C131-132 Discontinuous
1lvm_loop8_N118-121_C131-133 Discontinuous
1lvm_loop8_N118-121_C132-132 1.627985065037215
1lvm_loop8_N118-121_C132-133 1.6255573426850485
1lvm_loop8_N

1lvm_loop8_N120-121_C131-132 1.7713453488323336
1lvm_loop8_N120-121_C131-133 1.6763779940091819
1lvm_loop8_N120-121_C132-132 1.6772994469195863
1lvm_loop8_N120-121_C132-133 Discontinuous
1lvm_loop8_N120-121_C133-133 1.6581869602910668
1lvm_loop8_N120-122_C129-129 1.4846727128720292
1lvm_loop8_N120-122_C129-130 1.4608456197083743
1lvm_loop8_N120-122_C129-131 1.4556047145582276
1lvm_loop8_N120-122_C129-132 1.5924571888907435
1lvm_loop8_N120-122_C129-133 1.600688012256559
1lvm_loop8_N120-122_C130-130 Discontinuous
1lvm_loop8_N120-122_C130-131 1.5503899387477602
1lvm_loop8_N120-122_C130-132 1.6421209796117757
1lvm_loop8_N120-122_C130-133 1.6375426858721729
1lvm_loop8_N120-122_C131-131 1.5017227661219266
1lvm_loop8_N120-122_C131-132 Discontinuous
1lvm_loop8_N120-122_C131-133 Discontinuous
1lvm_loop8_N120-122_C132-132 1.628917945289387
1lvm_loop8_N120-122_C132-133 1.626666982810873
1lvm_loop8_N120-122_C133-133 Discontinuous
1lvm_loop8_N120-123_C129-129 1.3527064000153846
1lvm_loop8_N120-123_

1lvm_loop8_N124-124_C129-132 Discontinuous
1lvm_loop8_N124-124_C129-133 Discontinuous
1lvm_loop8_N124-124_C130-130 Discontinuous
1lvm_loop8_N124-124_C130-131 Discontinuous
1lvm_loop8_N124-124_C130-132 Discontinuous
1lvm_loop8_N124-124_C130-133 Discontinuous
1lvm_loop8_N124-124_C131-132 Discontinuous
1lvm_loop8_N124-124_C131-133 Discontinuous
1lvm_loop8_N124-124_C132-132 Discontinuous
1lvm_loop8_N124-124_C132-133 Discontinuous
1lvm_loop8_N124-124_C133-133 Discontinuous
1lvm_loop9_N129-129_C147-147 Discontinuous
1lvm_loop9_N129-129_C147-148 0.6576186672936291
1lvm_loop9_N129-129_C147-149 Discontinuous
1lvm_loop9_N129-129_C147-150 0.648745619222728
1lvm_loop9_N129-129_C148-148 Discontinuous
1lvm_loop9_N129-129_C148-149 0.6727048382285955
1lvm_loop9_N129-129_C148-150 Discontinuous
1lvm_loop9_N129-129_C149-150 0.814377253126788
1lvm_loop9_N129-129_C150-150 Discontinuous
1lvm_loop9_N129-130_C147-147 0.6535294353257931
1lvm_loop9_N129-130_C147-148 0.6457379213426376
1lvm_loop9_N129-130_C147-1

# Next

In [197]:
fails = []
for loop in failing_loops['target']:
    print(loop.loop_name, loop.loop_source)

    # Setting up poses
    swapped_pose = Pose(query_pose)
    subject_pose = pose_from_pdb('aligned_pdbs/{}.pdb'.format(loop.loop_source))
    loop_pose = Pose(subject_pose, 
        loop.N_outside_overlap_residue.subject_pose_number, 
        loop.C_outside_overlap_residue.subject_pose_number)
    loop_pose.dump_pdb('exchanged_loops/loop_{}_{}_loop_only.pdb'.format(loop.loop_name, loop.loop_source))

    # Setting up CCDEndsGraftMover
    ccdgm = CCDEndsGraftMover()
    ccdgm.set_insert_region(loop.N_splice_residue.query_pose_number,
        loop.C_splice_residue.query_pose_number)
    ccdgm.set_piece(loop_pose, 
        loop.N_overlap_size, loop.C_overlap_size)

    try:
        # Applying mover and scoring and dumping the pose
        ccdgm.apply(swapped_pose)
        sf(swapped_pose)
        swapped_pose.dump_pdb('exchanged_loops/loop_{}_{}_tev_insert.pdb'.format(loop.loop_name, loop.loop_source))

    except Exception as e: 
        print(e)
        fails.append(loop)

1 2W7U
1 3WOM
1 4INK
1 4K1T
1 2AS9
1 5JWF
1 5JWG
1 5JXF
1 5JXP
1 5MM8
1 1GPZ
1 1MBM
1 2QY0
1 1A0J
1 1AMH
1 1AN1
1 1CU1
1 1H4W
1 1RGQ
1 5WDX
1 6BQJ
1 1AZZ
1 1EQ9
1 1GVL
1 1MZA
1 1NPM
1 1OP0
1 1OP8
1 1PFX
1 1PQ5
1 1QTF
1 5C2Z
1 5JB8
1 1AGJ
1 1C5M
1 1FXY
1 1SPJ
1 4BXW
1 1KIG
1 2PUX
1 1FUJ
1 1M9U
1 3CP7
1 1HYL
1 2J92
1 2M9P
1 2YOL
1 3LKW
1 4BNR
1 4CRE
1 4M9M
1 1QA7
1 4LK4
1 1BDA
1 1FIW
1 1A5I
2 1CU1
2 1RGQ
2 2M9P
2 2YOL
2 5WDX
2 6BQJ
2 1A0J
2 1A5I
2 1AGJ
2 1AMH
2 1AN1
2 1AZZ
2 1B0F
2 1C5M
2 1CVW
2 1EKB
2 1EQ9
2 1FI8
2 1FUJ
2 1FXY
2 1FY1
2 1G3C
2 1GPZ
2 1GVL
2 1H4W
2 1HYL
2 1KIG
2 1MKW
2 1NPM
2 1OP0
2 1OP8
2 1P3C
2 1PFX
2 1PQ5
2 1QA7
2 1QTF
2 1QY6
2 1SHY
2 1SPJ
2 1TRN
2 1YC0
2 1ZJK
2 2AS9
2 2PUX
2 2QY0
2 2W7U
2 3E0N
2 3F6U
2 3K65
2 3LKW
2 3RP2
2 3W94
2 4BNR
2 4DGJ
2 4INK
2 4K1T
2 4LK4
2 4M9M
2 5C2Z
2 5EDM
2 5LPE
2 5MM8
2 5TO3
2 6U1B
2 1BDA
2 1BRU
2 1FIW
2 1FIZ
2 1M9U
2 2J92
2 2Z9I
2 3WOM
2 4BXW
2 4CRE
2 5JB8
2 5JWF
2 5JWG
2 5JXF
2 5JXP
2 1MBM
2 3CP7
3 1A0J
3 1AMH
3 1AN1
3 1AO5
3 1FI8
3 1G3C

In [198]:
loop = db_dict['1GPZ'].loop_maps[10]
subject_name = '1GPZ'
subject_pose = pose_from_pdb('aligned_pdbs/{}.pdb'.format(subject_name))
loop_pose = Pose(subject_pose, 
    loop.N_outside_overlap_residue.subject_pose_number, 
    loop.C_outside_overlap_residue.subject_pose_number)
loop_pose.dump_pdb('1gpz_loop10_10x_test/1gpz_loop10_loop_only.pdb'.format(loop.loop_name, loop.loop_source))
    
for i in range(10):
    print(i)
    
    # Setting up pose
    swapped_pose = Pose(query_pose)
    lp = Pose(loop_pose)

    # Setting up CCDEndsGraftMover
    ccdgm = CCDEndsGraftMover()
    ccdgm.set_insert_region(loop.N_splice_residue.query_pose_number,
        loop.C_splice_residue.query_pose_number)
    ccdgm.set_piece(lp, loop.N_overlap_size, loop.C_overlap_size)

    # Applying mover and scoring and dumping the pose
    ccdgm.apply(swapped_pose)
    sf(swapped_pose)
    swapped_pose.dump_pdb('1gpz_loop10_10x_test/1gpz_loop10_tev_insert_{}.pdb'.format(i))

0
1
2
3
4
5
6
7
8
9


In [192]:
l10_1gpz_pdbs = glob('1GPZ_loop10_CA_aligns/*.pdb')
l10_1gpz_pdbs.sort()
l10_1gpz_pdbs.remove('1GPZ_loop10_CA_aligns/1GPZ_loop10_CA_best_align.pdb')

gpz_res = [303, 304, 305, 306, 321, 322, 323, 324, 325, 326]
tev_res = [132, 133, 134, 135, 145, 146, 147, 148, 149, 150]

for pdb in l10_1gpz_pdbs:
    print(pdb)
    model_name = pdb.replace('1GPZ_loop10_CA_aligns/','').replace('.pdb', '')
    n_side_text = model_name[model_name.index('N') + 1:
                        model_name.index('C') - 1]
    n_side_list = [int(i) for i in n_side_text.split('-')]
    n_splice = tev_res[gpz_res.index(n_side_list[1])] 
    n_side_length = n_side_list[1] - n_side_list[0] + 1
    c_side_text = model_name[model_name.index('C') + 1:]
    c_side_list = [int(i) for i in c_side_text.split('-')]
    c_splice = tev_res[gpz_res.index(c_side_list[0])] 
    c_side_length = c_side_list[1] - c_side_list[0] + 1
    
    print(n_side_length, n_splice, c_splice, c_side_length)

    swapped_pose = Pose(query_pose)
    loop_pose = pose_from_pdb(pdb)

    try:
        # Setting up CCDEndsGraftMover
        ccdgm = CCDEndsGraftMover()
        ccdgm.set_insert_region(n_splice, c_splice)
        ccdgm.set_piece(loop_pose, n_side_length, c_side_length)

        # Applying mover and scoring and dumping the pose
        ccdgm.apply(swapped_pose)
        sf(swapped_pose)
        swapped_pose.dump_pdb('1gpz_loop10_ca_graft/{}.pdb'.format(model_name))
        
    except Exception as e: 
        print(e)    
        
# Errors seems to occur where both overlap ranges are 1.

1GPZ_loop10_CA_aligns/aligned_loop_N303-303_C321-321.pdb
1 132 145 1
1GPZ_loop10_CA_aligns/aligned_loop_N303-303_C321-322.pdb
1 132 145 2
1GPZ_loop10_CA_aligns/aligned_loop_N303-303_C321-323.pdb
1 132 145 3
1GPZ_loop10_CA_aligns/aligned_loop_N303-303_C321-324.pdb
1 132 145 4
1GPZ_loop10_CA_aligns/aligned_loop_N303-303_C321-325.pdb
1 132 145 5
1GPZ_loop10_CA_aligns/aligned_loop_N303-303_C321-326.pdb
1 132 145 6
1GPZ_loop10_CA_aligns/aligned_loop_N303-303_C322-322.pdb
1 132 146 1


File: /scratch/benchmark/W.hojo-2/rosetta.Hojo-2/_commits_/main/source/src/core/kinematics/AtomTree.cc:690
[ ERROR ] UtilityExitException
ERROR: AtomTree::torsion_angle_dof_id: angle range error


1GPZ_loop10_CA_aligns/aligned_loop_N303-303_C322-323.pdb
1 132 146 2
1GPZ_loop10_CA_aligns/aligned_loop_N303-303_C322-324.pdb
1 132 146 3
1GPZ_loop10_CA_aligns/aligned_loop_N303-303_C322-325.pdb
1 132 146 4
1GPZ_loop10_CA_aligns/aligned_loop_N303-303_C322-326.pdb
1 132 146 5
1GPZ_loop10_CA_aligns/aligned_loop_N303-30

1GPZ_loop10_CA_aligns/aligned_loop_N304-304_C325-325.pdb
1 133 149 1


File: /scratch/benchmark/W.hojo-2/rosetta.Hojo-2/_commits_/main/source/src/core/kinematics/AtomTree.cc:690
[ ERROR ] UtilityExitException
ERROR: AtomTree::torsion_angle_dof_id: angle range error


1GPZ_loop10_CA_aligns/aligned_loop_N304-304_C325-326.pdb
1 133 149 2
1GPZ_loop10_CA_aligns/aligned_loop_N304-304_C326-326.pdb
1 133 150 1


File: /scratch/benchmark/W.hojo-2/rosetta.Hojo-2/_commits_/main/source/src/core/kinematics/AtomTree.cc:690
[ ERROR ] UtilityExitException
ERROR: AtomTree::torsion_angle_dof_id: angle range error


1GPZ_loop10_CA_aligns/aligned_loop_N304-305_C321-321.pdb
2 134 145 1
1GPZ_loop10_CA_aligns/aligned_loop_N304-305_C321-322.pdb
2 134 145 2
1GPZ_loop10_CA_aligns/aligned_loop_N304-305_C321-323.pdb
2 134 145 3
1GPZ_loop10_CA_aligns/aligned_loop_N304-305_C321-324.pdb
2 134 145 4
1GPZ_loop10_CA_aligns/aligned_loop_N304-305_C321-325.pdb
2 134 145 5
1GPZ_loop10_CA_aligns/aligned_loop_N304-305_C321-3

1GPZ_loop10_CA_aligns/aligned_loop_N306-306_C324-324.pdb
1 135 148 1
1GPZ_loop10_CA_aligns/aligned_loop_N306-306_C324-325.pdb
1 135 148 2
1GPZ_loop10_CA_aligns/aligned_loop_N306-306_C324-326.pdb
1 135 148 3
1GPZ_loop10_CA_aligns/aligned_loop_N306-306_C325-325.pdb
1 135 149 1


File: /scratch/benchmark/W.hojo-2/rosetta.Hojo-2/_commits_/main/source/src/core/scoring/hbonds/hbonds_geom.cc:1561
[ ERROR ] UtilityExitException
ERROR: NAN occurred in H-bonding calculations!


1GPZ_loop10_CA_aligns/aligned_loop_N306-306_C325-326.pdb
1 135 149 2
1GPZ_loop10_CA_aligns/aligned_loop_N306-306_C326-326.pdb
1 135 150 1


File: /scratch/benchmark/W.hojo-2/rosetta.Hojo-2/_commits_/main/source/src/core/scoring/hbonds/hbonds_geom.cc:1561
[ ERROR ] UtilityExitException
ERROR: NAN occurred in H-bonding calculations!




In [None]:
failed_first_db_collect = []
second_fails = []
with open('alignment_data_dump.txt', 'a') as w:
    for i in fail_list:
        print(i)
        subj_name = i.replace('aligned_pdbs/','').replace('.pdb', '')
        subj_pose = pose_from_pdb(i)
        try:
            # Note: ran with auto_calculate on
            pinf = protease_info(query_name, subj_name, dali_file, align_file, 
                                 query_pose, subj_pose, tev_cat_res, tev_map, report=w)
            failed_first_db_collect.append(pinf)
        except:
            second_fails.append(i)
            print(i, 'fail')
print(len(second_fails))
for i in second_fails:
    print('\t', i)

aligned_pdbs/1A0J.pdb
aligned_pdbs/1A0J.pdb fail
aligned_pdbs/1A5I.pdb
aligned_pdbs/1A5I.pdb fail
aligned_pdbs/1AGJ.pdb
aligned_pdbs/1AGJ.pdb fail
aligned_pdbs/1AMH.pdb
aligned_pdbs/1AMH.pdb fail
aligned_pdbs/1AN1.pdb
aligned_pdbs/1AN1.pdb fail
aligned_pdbs/1AO5.pdb
aligned_pdbs/1AO5.pdb fail
aligned_pdbs/1AZZ.pdb
aligned_pdbs/1AZZ.pdb fail
aligned_pdbs/1B0F.pdb


In [65]:
co=0
for i in failing_loops['bfactor']:
    print(i.loop_name, i.loop_source, i.N_loop_end_residue.subject_pdb_number, i.C_loop_end_residue.subject_pdb_number, i.b_factor)
    if i.b_factor < 60:
       co += 1
print(co, len(failing_loops['bfactor']))
for k, v in failing_loops.items():
    print(k, len(v))

1 1LVO 18 24 57.714285714285715
1 1FIZ 31 37 58.02969696969697
1 6E0U 70 75 59.24166666666667
1 1A5I 31 42 54.619019607843136
1 1AO5 31 38 60.994285714285716
1 5GVT 31 38 73.01400000000001
1 5EDM 322 330 59.15851851851853
1 4IC6 131 139 51.82740740740741
1 5HM2 17 25 125.31555555555555
1 5LC0 1023 1032 75.16399999999999
1 6BJR 341 343 240.0
1 5YVU 23 33 100.18666666666667
1 3NWU 184 201 63.628
1 3NZI 184 201 92.36092592592593
1 3QO6 133 153 99.4395238095238
1 4FLN 119 139 64.41492063492065
1 1L1J 41 76 69.36225490196078
1 5Y28 56 97 94.92333333333333
1 1KY9 28 83 94.12083333333332
2 4FLN 151 152 73.88833333333334
2 5LC0 1037 1044 81.65958333333334
2 5YVU 34 45 100.3886111111111
2 1AO5 39 51 53.09692307692307
2 5GVT 39 52 52.71047619047619
2 5HM2 29 40 105.72472222222223
2 6BJR 345 356 224.34166666666667
2 6E0U 77 88 59.47527777777777
2 1L1J 78 90 58.39410256410255
2 3NWU 201 213 54.64153846153847
2 3NZI 201 215 84.93044444444443
2 3QO6 154 166 78.71846153846154
2 5Y28 98 111 85.0335897