In [5]:
import re
import pyopenms
from collections import defaultdict

In [6]:
def read_protein_sequence(file_path):
    with open(file_path, 'r') as file:
        sequence = "".join([line.strip() for line in file.readlines()[1:]])
    return sequence

file_path = "insulin_sequence.txt"

sequence = read_protein_sequence(file_path)
print(sequence)

MALWMRLLPLLALLALWGPDPAAAFVNQHLCGSHLVEALYLVCGERGFFYTPKTRREAEDLQVGQVELGGGPGAGSLQPLALEGSLQKRGIVEQCCTSICSLYQLENYCN


In [7]:
def digest_protein(sequence, enzyme='trypsin'):
    if enzyme == 'trypsin':
        peptides = re.split(r'(?<=[KR])(?!P)', sequence)
    return peptides

peptides = digest_protein(sequence)
print("Peptides after trypsin digestion:", peptides)

Peptides after trypsin digestion: ['MALWMR', 'LLPLLALLALWGPDPAAAFVNQHLCGSHLVEALYLVCGER', 'GFFYTPK', 'TR', 'R', 'EAEDLQVGQVELGGGPGAGSLQPLALEGSLQK', 'R', 'GIVEQCCTSICSLYQLENYCN']


In [11]:
def calculate_monoisotopic_mass_peptide(peptide_sequence):
    peptide = pyopenms.AASequence.fromString(peptide_sequence)
    mass = peptide.getMonoWeight()
    return mass


def store_peptide_and_mass_info(peptides):
    peptide_info_dict = defaultdict(list)  
    for peptide in peptides:
        mass = calculate_monoisotopic_mass_peptide(peptide)
        peptide_info_dict[mass].append(peptide)  
    return peptide_info_dict


peptide_info_dict = store_peptide_and_mass_info(peptides)

for mass, peptides_list in peptide_info_dict.items():
    print(f"Mass: {mass:.4f} Da ==> Peptides: {peptides_list}")

Mass: 806.3931 Da ==> Peptides: ['MALWMR']
Mass: 4282.2799 Da ==> Peptides: ['LLPLLALLALWGPDPAAAFVNQHLCGSHLVEALYLVCGER']
Mass: 858.4276 Da ==> Peptides: ['GFFYTPK']
Mass: 275.1594 Da ==> Peptides: ['TR']
Mass: 174.1117 Da ==> Peptides: ['R', 'R']
Mass: 3146.6096 Da ==> Peptides: ['EAEDLQVGQVELGGGPGAGSLQPLALEGSLQK']
Mass: 2382.0000 Da ==> Peptides: ['GIVEQCCTSICSLYQLENYCN']


In [10]:
def find_isobaric_peptides(peptide_data):
    result = {}
    for mass, sequences in peptide_data.items():
        if len(sequences) > 1:
            result[mass] = sequences
    return result

isobaric_peptides = find_isobaric_peptides(peptide_info_dict)


if isobaric_peptides:
    print("Detected Isobaric Peptides:")
    for mass, sequences in isobaric_peptides.items():
        formatted_sequences = " | ".join(sequences)
        print(f"> Mass: {mass:.4f} Da")
        print(f"  Sequences: {formatted_sequences}")
else:
    print("No isobaric peptides were found.")


Detected Isobaric Peptides:
> Mass: 174.1117 Da
  Sequences: R | R
