# section 2

In [1]:
from pyopenms import *

In [4]:
new_seq = AASequence.fromString("DFPIANGER")
print("Sequence:",new_seq)
prefix = new_seq.getPrefix(4) 
print("Prefix:", prefix)
suffix = new_seq.getSuffix(5) 
print("Suffix:", suffix)
concat = new_seq + new_seq 
print("Concatenated:", concat)


Sequence: DFPIANGER
Prefix: DFPI
Suffix: ANGER
Concatenated: DFPIANGERDFPIANGER


In [6]:
mfull = new_seq.getMonoWeight()
print("Monoisotopic mass of peptide [M] is", mfull)

Monoisotopic mass of peptide [M] is 1017.4879641373001


In [8]:
mprecursor = new_seq.getMonoWeight(Residue.ResidueType.Full, 2)
print("Monoisotopic mass of peptide precursor [M+2H]2+ is", mprecursor)

Monoisotopic mass of peptide precursor [M+2H]2+ is 1019.5025170708421


In [11]:
mz = new_seq.getMZ(2)
print("Monoisotopic m/z of [M+2H]2+ is", mz)

Monoisotopic m/z of [M+2H]2+ is 509.7512585354211


In [12]:
aminoAcid = AASequence.fromString("DFPIANGER")

print("The peptide", str(aminoAcid), "consists of the following amino acids:")
for aa in aminoAcid:
    print(aa.getName(), ":", aa.getMonoWeight())

The peptide DFPIANGER consists of the following amino acids:
Aspartate : 133.0375092233
Phenylalanine : 165.0789793509
Proline : 115.0633292871
Isoleucine : 131.0946294147
Alanine : 89.04767922330001
Asparagine : 132.0534932552
Glycine : 75.0320291595
Glutamate : 147.05315928710002
Arginine : 174.1116764466


In [13]:
seq = AASequence.fromString("C[143]PKCK(Label:13C(6)15N(2))CR")
print(seq)

.(Pyro-carbamidomethyl)CPKCK(Label:13C(6)15N(2))CR


In [14]:
if seq.hasNTerminalModification():
    print("N-Term Modification: ", seq.getNTerminalModification().getFullId())
if seq.hasCTerminalModification():
    print("C-Term Modification: ", seq.getCTerminalModification().getFullId())

N-Term Modification:  Pyro-carbamidomethyl (N-term C)


In [15]:
for aa in seq:
    if (aa.isModified()):
        print(aa.getName(), ":", aa.getMonoWeight(), ":", aa.getModificationName())
    else:
        print(aa.getName(), ":", aa.getMonoWeight())

Cysteine : 121.0197499533
Proline : 115.0633292871
Lysine : 146.1055284466
Cysteine : 121.0197499533
Lysine : 154.11972844660002 : Label:13C(6)15N(2)
Cysteine : 121.0197499533
Arginine : 174.1116764466


# Molecular formula

In [16]:
seq = AASequence.fromString("DFPIANGER")
seq_formula = seq.getFormula()
print("Peptide", seq, "has molecular formula", seq_formula)

Peptide DFPIANGER has molecular formula C44H67N13O15


# Isotope patterns

In [18]:
coarse_isotopes = seq_formula.getIsotopeDistribution( CoarseIsotopePatternGenerator(7) )
for iso in coarse_isotopes.getContainer():
    print ("Isotope", iso.getMZ(), "has abundance", iso.getIntensity()*100, "%")

Isotope 1017.4879641373 has abundance 56.81047439575195 %
Isotope 1018.4913189751 has abundance 30.52588701248169 %
Isotope 1019.4946738128999 has abundance 9.801062196493149 %
Isotope 1020.4980286507 has abundance 2.328959107398987 %
Isotope 1021.5013834885 has abundance 0.44921189546585083 %
Isotope 1022.5047383263 has abundance 0.07377508445642889 %
Isotope 1023.5080931640999 has abundance 0.010627075243974105 %


In [19]:
fine_isotopes = seq_formula.getIsotopeDistribution( FineIsotopePatternGenerator(0.01) )
for iso in fine_isotopes.getContainer():
    print ("Isotope", iso.getMZ(), "has abundance", iso.getIntensity()*100, "%")

Isotope 1017.4879641373001 has abundance 56.80969953536987 %
Isotope 1018.4849991373001 has abundance 2.7278145775198936 %
Isotope 1018.4913191373001 has abundance 27.035287022590637 %
Isotope 1018.4921811373001 has abundance 0.3246041014790535 %
Isotope 1018.4942408854 has abundance 0.4377691075205803 %
Isotope 1019.4883541373001 has abundance 1.2981452979147434 %
Isotope 1019.4922181373001 has abundance 1.7511537298560143 %
Isotope 1019.4946741373 has abundance 6.286735832691193 %
Isotope 1019.4975958854001 has abundance 0.2083308296278119 %
Isotope 1020.4917091373001 has abundance 0.3018683288246393 %
Isotope 1020.4955731373002 has abundance 0.833360105752945 %
Isotope 1020.4980291373 has abundance 0.9519387967884541 %
Isotope 1021.4989281373 has abundance 0.1937880413606763 %


In [20]:
print(new_seq)

DFPIANGER


In [26]:
print(seq)

DFPIANGER


In [30]:
suffix = seq.getSuffix(3)
print("y3 ion sequence:", suffix)

y3 ion sequence: GER


In [29]:
print(suffix.getFormula())
print(seq.getFormula())

C13H24N6O6
C44H67N13O15


# Fragment ions

In [39]:
y3_formula = suffix.getFormula(Residue.ResidueType.Full, 2) # y3++ ion
print("formula & Full ",y3_formula)
print("Weight & full",suffix.getMonoWeight(Residue.ResidueType.Full, 2))
y3_formula = suffix.getFormula(Residue.ResidueType.YIon, 2) # y3++ ion
print("formula & YIon ",y3_formula)
print("Weight & YIon",suffix.getMonoWeight(Residue.ResidueType.YIon, 2) / 2.0 )# CORRECT
y3_formula = suffix.getFormula(Residue.ResidueType.XIon, 2)
print("formula & XIon ",y3_formula)
print("Weight & XIon",suffix.getMonoWeight(Residue.ResidueType.XIon, 2) / 2.0 )# CORRECT
y3_formula = suffix.getFormula(Residue.ResidueType.BIon, 2)
print("formula &BIon ",y3_formula)
print("Weight & BIon",suffix.getMonoWeight(Residue.ResidueType.BIon, 2)) 

formula & Full  C13H24N6O6
Weight & full 362.19028769914206
formula & YIon  C13H24N6O6
Weight & YIon 181.09514384957103
formula & XIon  C14H22N6O7
Weight & XIon 194.08477631767101
formula &BIon  C13H22N6O5
Weight & BIon 344.17972263534205


In [40]:
y3_formula = suffix.getFormula(Residue.ResidueType.YIon, 2)
print("y3 mz:", suffix.getMonoWeight(Residue.ResidueType.YIon, 2) / 2.0 )
print("y3 molecular formula:", y3_formula)

y3 mz: 181.09514384957103
y3 molecular formula: C13H24N6O6


# Modified Sequences

In [42]:
 seq = AASequence.fromString("PEPTIDESEKUEM(Oxidation)CER")
print(seq.toUnmodifiedString())
print(seq.toString())
print(seq.toUniModString())
print(seq.toBracketString())
print(seq.toBracketString(False))

print(AASequence.fromString("DFPIAM(UniMod:35)GER"))
print(AASequence.fromString("DFPIAM[+16]GER"))
print(AASequence.fromString("DFPIAM[+15.99]GER"))
print(AASequence.fromString("DFPIAM[147]GER"))
print(AASequence.fromString("DFPIAM[147.035405]GER"))

PEPTIDESEKUEMCER
PEPTIDESEKUEM(Oxidation)CER
PEPTIDESEKUEM(UniMod:35)CER
PEPTIDESEKUEM[147]CER
PEPTIDESEKUEM[147.03540001709996]CER
DFPIAM(Oxidation)GER
DFPIAM(Oxidation)GER
DFPIAM(Oxidation)GER
DFPIAM(Oxidation)GER
DFPIAM(Oxidation)GER


# Proteins and FASTA files

In [44]:
bsa = FASTAEntry()
bsa.sequence = "MKWVTFISLLLLFSSAYSRGVFRRDTHKSEIAHRFKDLGE"
bsa.description = "BSA Bovine Albumin (partial sequence)"
bsa.identifier = "BSA"
alb = FASTAEntry()
alb.sequence = "MKWVTFISLLFLFSSAYSRGVFRRDAHKSEVAHRFKDLGE"
alb.description = "ALB Human Albumin (partial sequence)"
alb.identifier = "ALB"

entries = [bsa, alb]
f = FASTAFile()
f.store("example.fasta", entries)

In [47]:
    entries = []
    f = FASTAFile()
    f.load("example.fasta", entries)
    print( len(entries) )
    for e in entries:
      print (e.identifier, e.sequence, " ",e.description)


2
BSA MKWVTFISLLLLFSSAYSRGVFRRDTHKSEIAHRFKDLGE   BSA Bovine Albumin (partial sequence)
ALB MKWVTFISLLFLFSSAYSRGVFRRDAHKSEVAHRFKDLGE   ALB Human Albumin (partial sequence)
