In [20]:
! pip install pyteomics





In [21]:
import pyopenms
import re
from collections import defaultdict

In [22]:
from pyteomics import mass, parser

# Input protein sequence
protein_sequence = "MKWVTFISLLFLFSSAYSRGVFRRDTHKSEIAHRFKDLGE"

# Define protease (Trypsin in this case)
cleavage_rule = parser.expasy_rules['trypsin']

# Perform in silico digestion
peptides = parser.cleave(protein_sequence, cleavage_rule)

# Compute monoisotopic mass for each peptide
peptide_masses = {}
for peptide in peptides:
    peptide_mass = mass.calculate_mass(sequence=peptide)
    if peptide_mass not in peptide_masses:
        peptide_masses[peptide_mass] = []
    peptide_masses[peptide_mass].append(peptide)

# Identify isobaric peptides
isobaric_peptides = {mass: seqs for mass, seqs in peptide_masses.items() if len(seqs) > 1}

# Print results
if isobaric_peptides:
    print("Isobaric Peptides:")
    for mass, seqs in isobaric_peptides.items():
        print(f"Mass: {mass:.4f} Da, Sequences: {', '.join(seqs)}")
else:
    print("No isobaric peptides found.")





No isobaric peptides found.


In [23]:
# Function to read the protein sequence from a FASTA file
def load_protein_sequence(file_path):
    with open(file_path, 'r') as file:
        protein_sequence = "".join([line.strip() for line in file.readlines()[1:]])
    return protein_sequence
protein_sequence = load_protein_sequence("Tyroid.fasta")
print(f"Protein Sequence:\n{protein_sequence}\n")  # Print within the function
   



Protein Sequence:
MTPNSMTENGLTAWDKPKHCPDREHDWKLVGMSEACLHRKSHSERRSTLKNEQSSPHLIQTTWTSSIFHLDHDDVNDQSVSSAQTFQTEEKKCKGYIPSYLDKDELCVVCGDKATGYHYRCITCEGCKGFFRRTIQKNLHPSYSCKYEGKCVIDKVTRNQCQECRFKKCIYVGMATDLVLDDSKRLAKRKLIEENREKRRREELQKSIGHKPEPTDEEWELIKTVTEAHVATNAQGSHWKQKRKFLPEDIGQAPIVNAPEGGKVDLEAFSHFTKIITPAITRVVDFAKKLPMFCELPCEDQIILLKGCCMEIMSLRAAVRYDPESETLTLNGEMAVTRGQLKNGGLGVVSDAIFDLGMSLSSFNLDDTEVALLQAVLLMSSDRPGLACVERIEKYQDSFLLAFEHYINYRKHHVTHFWPKLLMKVTDLRMIGACHASRFLHMKVECPTELFPPLFLEVFED



In [24]:
# Function to perform protease digestion
def protease_digest(protein_sequence, enzyme='trypsin'):
    if enzyme == 'trypsin':
        fragments = re.split(r'(?<=[KR])(?!P)', protein_sequence)
    return fragments

# Perform protease digestion
protein_fragments = protease_digest(protein_sequence)

# Print the fragments
print(f"Peptides after digestion:\n{protein_fragments}\n")




Peptides after digestion:
['MTPNSMTENGLTAWDKPK', 'HCPDR', 'EHDWK', 'LVGMSEACLHR', 'K', 'SHSER', 'R', 'STLK', 'NEQSSPHLIQTTWTSSIFHLDHDDVNDQSVSSAQTFQTEEK', 'K', 'CK', 'GYIPSYLDK', 'DELCVVCGDK', 'ATGYHYR', 'CITCEGCK', 'GFFR', 'R', 'TIQK', 'NLHPSYSCK', 'YEGK', 'CVIDK', 'VTR', 'NQCQECR', 'FK', 'K', 'CIYVGMATDLVLDDSK', 'R', 'LAK', 'R', 'K', 'LIEENR', 'EK', 'R', 'R', 'R', 'EELQK', 'SIGHKPEPTDEEWELIK', 'TVTEAHVATNAQGSHWK', 'QK', 'R', 'K', 'FLPEDIGQAPIVNAPEGGK', 'VDLEAFSHFTK', 'IITPAITR', 'VVDFAK', 'K', 'LPMFCELPCEDQIILLK', 'GCCMEIMSLR', 'AAVR', 'YDPESETLTLNGEMAVTR', 'GQLK', 'NGGLGVVSDAIFDLGMSLSSFNLDDTEVALLQAVLLMSSDRPGLACVER', 'IEK', 'YQDSFLLAFEHYINYR', 'K', 'HHVTHFWPK', 'LLMK', 'VTDLR', 'MIGACHASR', 'FLHMK', 'VECPTELFPPLFLEVFED']



In [16]:
# Function to calculate the monoisotopic mass of a peptide
def compute_peptide_mass(sequence_fragment):
    peptide = pyopenms.AASequence.fromString(sequence_fragment)
    return peptide.getMonoWeight()

# Function to map peptides to their respective masses
def map_peptides_to_masses(fragments):
    peptide_mass_mapping = defaultdict(list)
    for fragment in fragments:
        mass = compute_peptide_mass(fragment)
        peptide_mass_mapping[mass].append(fragment)
    return peptide_mass_mapping
peptide_mass_mapping = map_peptides_to_masses(protein_fragments)
print("Peptide Masses and Sequences:")  # Print within the function
for mass, fragments in peptide_mass_mapping.items():
    print(f"Mass: {mass:.4f} Da ==> Fragments: {fragments}")
    

Peptide Masses and Sequences:
Mass: 2019.9394 Da ==> Fragments: ['MTPNSMTENGLTAWDKPK']
Mass: 626.2595 Da ==> Fragments: ['HCPDR']
Mass: 713.3133 Da ==> Fragments: ['EHDWK']
Mass: 1214.5900 Da ==> Fragments: ['LVGMSEACLHR']
Mass: 146.1055 Da ==> Fragments: ['K', 'K', 'K', 'K', 'K', 'K', 'K']
Mass: 614.2772 Da ==> Fragments: ['SHSER']
Mass: 174.1117 Da ==> Fragments: ['R', 'R', 'R', 'R', 'R', 'R', 'R', 'R']
Mass: 447.2693 Da ==> Fragments: ['STLK']
Mass: 4686.1383 Da ==> Fragments: ['NEQSSPHLIQTTWTSSIFHLDHDDVNDQSVSSAQTFQTEEK']
Mass: 249.1147 Da ==> Fragments: ['CK']
Mass: 1054.5335 Da ==> Fragments: ['GYIPSYLDK']
Mass: 1079.4627 Da ==> Fragments: ['DELCVVCGDK']
Mass: 866.4035 Da ==> Fragments: ['ATGYHYR']
Mass: 855.3289 Da ==> Fragments: ['CITCEGCK']
Mass: 525.2700 Da ==> Fragments: ['GFFR']
Mass: 488.2959 Da ==> Fragments: ['TIQK']
Mass: 1047.4808 Da ==> Fragments: ['NLHPSYSCK']
Mass: 495.2329 Da ==> Fragments: ['YEGK']
Mass: 576.2941 Da ==> Fragments: ['CVIDK']
Mass: 374.2278 Da ==> Fr

In [25]:
def identify_isobaric_peptides(mass_mapping):
    isobaric_results = {}
    for mass, fragment_list in mass_mapping.items():
        if len(fragment_list) > 1:
            isobaric_results[mass] = fragment_list
    return isobaric_results

isobaric_results = identify_isobaric_peptides(peptide_mass_mapping)

if isobaric_results:
        
    print("\nIsobaric Peptides Detected:")  # Print within the function
    for mass, fragments in isobaric_results.items():
        formatted_fragments = " | ".join(fragments)
        print(f"Mass: {mass:.4f} Da")
        print(f"Fragments: {formatted_fragments}")
else:
    print("\nNo isobaric peptides were detected.")  # Print within the function
   



Isobaric Peptides Detected:
Mass: 146.1055 Da
Fragments: K | K | K | K | K | K | K
Mass: 174.1117 Da
Fragments: R | R | R | R | R | R | R | R
