In [None]:
# default_exp display

# Display

> A collection of functions primarily GUI operations

In [None]:
#hide
from nbdev.showdoc import *

## Sequence coverage

Calculate the coverage of a target protein sequence by peptides in a given list

In [None]:
#export

import re

def calculate_sequence_coverage(target_sequence:str, peptide_list:list)->(int, int, float, list):
    """
    Calculate the percentage of a target protein covered by a list of peptides.
    Args:
        target_sequence (str): the protein sequence against which the peptide_list should be compared.
        peptide_list (List[str]): the list of peptides (str) to be compared against the target_sequence.
    return:
        int: number of residues in target_sequence.
        int: number of residues in target_sequence covered by peptides in peptide_list.
        float: percentage of residues in target_sequence covered by peptides in peptide_list.
        list (dict{str:bool}): list of dicts where keys are residue one-letter codes and values are bool (covered = True, not-covered = False). 
    """   
    residues = [
        {'res': res, 'covered': False} for res in target_sequence
    ]
    for peptide in peptide_list:
        # remove lowercase PTM markers if present
        peptide = ''.join(_ for _ in peptide if not _.islower() and _.isalpha())
        matches = [m.start() for m in re.finditer('(?=%s)' %peptide, target_sequence)]
        for m in matches:
            for index in range(m, m+len(peptide)):
                residues[index]['covered'] = True

    total = len(residues)
    total_covered = len([r for r in residues if r['covered'] == True])
    coverage_percent = total_covered / total * 100
    
    return total, total_covered, coverage_percent, residues
    

In [None]:
#hide

import numpy as np

def test_calculate_sequence_coverage():
    
    test_target_protein     = 'TESTTARGETSEQ'
    
    tests = [
        ['++++------+++', ['TEST', 'SEQ']],
        ['----------+++', ['TESST', 'SEQ']],
        ['----------+++', ['', 'SEQ']],
        ['-------------', ['RANDEMPEP']],
        ['++++------+++', ['TEpST', 'pSEQ']],
        ['++++------+++', ['modificationTEST', 'SEQ']],
        ['++++------+++', ['e<^TEST', 'SEQ']],
        ['----------+++', ['SEQ', 'SEQ']],
        ['-------------', []],
    ]
    
    # TODO: iTRAQ 4/8-plex on peptide N-term will make this fail
    # -- looks like an inconsistency in naming of PTM identifier in modifications.tsv though
    # -- eg itraq4K<^  should this be   itraq4<^  as with TMT mods?
    
    for test_target_covered_map, test_peptide_list in tests:

        test_total = len(test_target_protein)
        test_total_covered = test_target_covered_map.count('+')
        test_coverage_percent = test_total_covered / test_total * 100
        test_residue_list = [
            {
                'res': test_target_protein[i],
                'covered': True if test_target_covered_map[i] == '+' else False
            } for i in range(len(test_target_protein))
        ]

        total, total_covered, coverage_percent, residue_list = calculate_sequence_coverage(
            test_target_protein, test_peptide_list
        )

        assert total == len(test_target_protein)
        assert total_covered == test_total_covered
        assert total_covered == len([_ for _ in test_residue_list if _['covered'] == True])
        assert np.isclose(test_coverage_percent, coverage_percent) == True

        for i in range(len(test_target_protein)):
            assert test_target_protein[i] == residue_list[i]['res']

test_calculate_sequence_coverage()

In [1]:
#hide
from nbdev.export import *
notebook2script()

Converted 00_settings.ipynb.
Converted 01_chem.ipynb.
Converted 02_io.ipynb.
Converted 03_fasta.ipynb.
Converted 04_feature_finding.ipynb.
Converted 05_search.ipynb.
Converted 06_score.ipynb.
Converted 07_recalibration.ipynb.
Converted 08_quantification.ipynb.
Converted 09_matching.ipynb.
Converted 10_constants.ipynb.
Converted 11_interface.ipynb.
Converted 12_performance.ipynb.
Converted 13_export.ipynb.
Converted 14_display.ipynb.
Converted additional_code.ipynb.
Converted contributing.ipynb.
Converted file_formats.ipynb.
Converted index.ipynb.
