In [5]:
from Bio import SeqIO

In [7]:
with open("P07327.fasta", "r") as f:
    sequence = SeqIO.read(f, format="fasta")

print(sequence.seq)
SeqIO.SeqRecord.
print(len(trypsin(str(sequence.seq))))
for peptide in trypsin(str(sequence.seq)):
    print("{:50s} {:10.4f} {:10.4f}".format(peptide, monoisotopic_mass(peptide), average_mass(peptide)))

MSTAGKVIKCKAAVLWELKKPFSIEEVEVAPPKAHEVRIKMVAVGICGTDDHVVSGTMVTPLPVILGHEAAGIVESVGEGVTTVKPGDKVIPLAIPQCGKCRICKNPESNYCLKNDVSNPQGTLQDGTSRFTCRRKPIHHFLGISTFSQYTVVDENAVAKIDAASPLEKVCLIGCGFSTGYGSAVNVAKVTPGSTCAVFGLGGVGLSAIMGCKAAGAARIIAVDINKDKFAKAKELGATECINPQDYKKPIQEVLKEMTDGGVDFSFEVIGRLDTMMASLLCCHEACGTSVIVGVPPDSQNLSMNPMLLLTGRTWKGAILGGFKSKECVPKLVADFMAKKFSLDALITHVLPFEKINEGFDLLHSGKSIRTILMF
38
MVAVGICGTDDHVVSGTMVTPLPVILGHEAAGIVESVGEGVTTVKPGDK   4841.4652  4844.5893
LDTMMASLLCCHEACGTSVIVGVPPDSQNLSMNPMLLLTGR           4317.0468  4320.1269
KPIHHFLGISTFSQYTVVDENAVAK                           2800.4548  2802.1830
VTPGSTCAVFGLGGVGLSAIMGCK                            2224.1054  2225.6652
VCLIGCGFSTGYGSAVNVAK                                1944.9437  1946.2674
NDVSNPQGTLQDGTSR                                    1687.7761  1688.7288
EMTDGGVDFSFEVIGR                                    1757.7930  1758.9211
FSLDALITHVLPFEK                                     1728.9450  1730.0375
KPFSIEEVEVAPPK                       

In [1]:
def trypsin(seq: str) -> []:
    """
    Function to predict the peptides resulting from a
    tryptic digestion of a protein.
    :param seq: input protein sequence
    :return: List of peptides
    """

    seq = seq.upper()
    peptides = []
    prev_cut = -1
    for i in range(len(seq)):
        aa = seq[i]
        aa_next = seq[i+1] if i+1 < len(seq) else ""
        if (aa == "R" or aa == "K") and aa_next != "P":
            peptides.append(seq[prev_cut+1:i+1])
            prev_cut = i
    if prev_cut+1 != len(seq):
        peptides.append(seq[prev_cut+1:])

    return sorted(peptides, key=lambda x: len(x), reverse=True)

In [2]:
def monoisotopic_mass(peptide: str) -> float:
    """
    Calculates the monoisotopic mass of a peptide.
    :param peptide: Sequence of the peptide
    :return: monoisotopic mass of the peptide
    """
    # Table of monoisotopic masses from https://education.expasy.org/student_projects/isotopident/htdocs/aa-list.html
    aa_mass = {
        "A": 71.03711,
        "R": 156.10111,
        "N": 114.04293,
        "D": 115.02694,
        "C": 103.00919,
        "E": 129.04259,
        "Q": 128.05858,
        "G": 57.02146,
        "H": 137.05891,
        "I": 113.08406,
        "L": 113.08406,
        "K": 128.09496,
        "M": 131.04049,
        "F": 147.06841,
        "P": 97.05276,
        "S": 87.03203,
        "T": 101.04768,
        "W": 186.07931,
        "Y": 163.06333,
        "V": 99.06841
    }

    peptide = peptide.upper()
    mass = 15.99491 + 2*1.00782 # monoisotopic mass of water (C and N-terminus)
    # iterating over amino acids:
    for aa in peptide:
        if aa in aa_mass.keys():
            mass += aa_mass.get(aa)

    return(mass)

In [3]:
def average_mass(peptide: str) -> float:
    """
    Calculates the average mass of a peptide.
    :param peptide: Sequence of the peptide
    :return: average mass of the peptide
    """
    # Table of average masses from https://education.expasy.org/student_projects/isotopident/htdocs/aa-list.html
    aa_mass = {
        "A": 71.0788,
        "R": 156.1875,
        "N": 114.1038,
        "D": 115.0886,
        "C": 103.1388,
        "E": 129.1155,
        "Q": 128.1307,
        "G": 57.0519,
        "H": 137.1411,
        "I": 113.1594,
        "L": 113.1594,
        "K": 128.1741,
        "M": 131.1926,
        "F": 147.1766,
        "P": 97.1167,
        "S": 87.0782,
        "T": 101.1051,
        "W": 186.2132,
        "Y": 163.1760,
        "V": 99.1326
    }

    peptide = peptide.upper()
    mass = 15.99977 + 2*1.00811 # average mass of water (C and N-terminus)
    # iterating over amino acids:
    for aa in peptide:
        if aa in aa_mass.keys():
            mass += aa_mass.get(aa)

    return(mass)