In [1]:

from pyopenms import *
seq = AASequence.fromString("ALTAYEB") 
prefix = seq.getPrefix(6) 
suffix = seq.getSuffix(2) 
concat = seq + seq 

print("Seq:", seq)
print("Prefix:", prefix)
print("Suffix:", suffix)
print("ConcaT:", concat)

full = seq.getMonoWeight()
mpre = seq.getMonoWeight(Residue.ResidueType.Full, 2)
mz = seq.getMonoWeight(Residue.ResidueType.Full, 2)
mz = seq.getMZ(2)

print()
print(full)
print(mpre)
print(mz)

Seq: ALTAYEB
Prefix: ALTAYE
Suffix: EB
ConcaT: ALTAYEBALTAYEB

648.3118954036001
650.326448337142
325.163224168571


In [3]:

seq = AASequence.fromString("ALTAYEB")

print(str(seq))
for aa in seq:
    print(aa.getName(), ":", aa.getMonoWeight())



ALTAYEB
Alanine : 89.0476792233
Leucine : 131.0946294147
Threonine : 119.0582442871
Alanine : 89.0476792233
Tyrosine : 181.07389435090002
Glutamate : 147.0531592871
Asparagine/Aspartate : 0.0


In [4]:

seq = AASequence.fromString("C[143]PKCK(Label:13C(6)15N(2))CR")

if seq.hasNTerminalModification():
    print("N-Term Modification: ", seq.getNTerminalModification().getFullId())
if seq.hasCTerminalModification():
    print("C-Term Modification: ", seq.getCTerminalModification().getFullId())

for aa in seq:
    if (aa.isModified()):
        print(aa.getName(), ":", aa.getMonoWeight(), ":", aa.getModificationName())
    else:
        print(aa.getName(), ":", aa.getMonoWeight())


N-Term Modification:  Pyro-carbamidomethyl (N-term C)
Cysteine : 121.01974995329999
Proline : 115.06332928709999
Lysine : 146.1055284466
Cysteine : 121.01974995329999
Lysine : 154.1197284466 : Label:13C(6)15N(2)
Cysteine : 121.01974995329999
Arginine : 174.1116764466


In [5]:

seq = AASequence.fromString("ALTAYEB")
seq_formula = seq.getFormula()
print("Peptide", seq, "has molecular formula", seq_formula)



Peptide ALTAYEB has molecular formula C30H46N6O11


In [6]:

isotopes = seq_formula.getIsotopeDistribution( CoarseIsotopePatternGenerator(6) )
for iso in isotopes.getContainer():
    print ("Isotope", iso.getMZ(), "has abundance", iso.getIntensity()*100, "%")


Isotope 666.3224604674 has abundance 68.59981417655945 %
Isotope 667.3258153052 has abundance 24.4293674826622 %
Isotope 668.3291701429999 has abundance 5.777261406183243 %
Isotope 669.3325249808 has abundance 1.0249597951769829 %
Isotope 670.3358798186 has abundance 0.14983362052589655 %
Isotope 671.3392346564 has abundance 0.018762362014967948 %


In [10]:
suffix = seq.getSuffix(3) # y3 ion "GER"
print("="*35)
print("y3 ion sequence:", suffix)
y3_formula = suffix.getFormula(Residue.ResidueType.YIon, 2) # y3++ ion
suffix.getMonoWeight(Residue.ResidueType.YIon, 2) / 2.0 # CORRECT
suffix.getMonoWeight(Residue.ResidueType.XIon, 2) / 2.0 # CORRECT
suffix.getMonoWeight(Residue.ResidueType.BIon, 2) / 2.0 # INCORRECT

print("y3 mz:", suffix.getMonoWeight(Residue.ResidueType.YIon, 2) / 2.0 )
print("y3 molecular formula:", y3_formula)

y3 ion sequence: YEB
y3 mz: 147.06023822197102
y3 molecular formula: C14H18N2O6


In [11]:
seq = AASequence.fromString("PEPTIDESEKUEM(Oxidation)CER")
print(seq.toUnmodifiedString())
print(seq.toString())
print(seq.toUniModString())
print(seq.toBracketString())
print(seq.toBracketString(False))

print(AASequence.fromString("DFPIAM(UniMod:35)GER"))
print(AASequence.fromString("DFPIAM[+16]GER"))
print(AASequence.fromString("DFPIAM[+15.99]GER"))
print(AASequence.fromString("DFPIAM[147]GER"))
print(AASequence.fromString("DFPIAM[147.035405]GER"))

PEPTIDESEKUEMCER
PEPTIDESEKUEM(Oxidation)CER
PEPTIDESEKUEM(UniMod:35)CER
PEPTIDESEKUEM[147]CER
PEPTIDESEKUEM[147.035400017100017]CER
DFPIAM(Oxidation)GER
DFPIAM(Oxidation)GER
DFPIAM(Oxidation)GER
DFPIAM(Oxidation)GER
DFPIAM(Oxidation)GER


In [12]:
s = AASequence.fromString(".(Dimethyl)DFPIAMGER.")
print(s, s.hasNTerminalModification())
s = AASequence.fromString(".DFPIAMGER.(Label:18O(2))")
print(s, s.hasCTerminalModification())
s = AASequence.fromString(".DFPIAMGER(Phospho).")
print(s, s.hasCTerminalModification())

.(Dimethyl)DFPIAMGER True
DFPIAMGER.(Label:18O(2)) True
DFPIAMGER(Phospho) False


In [13]:
bsa = FASTAEntry() 
bsa.sequence = "MKWVTFISLLLLFSSAYSRGVFRRDTHKSEIAHRFKDLGE"
bsa.description = "BSA Bovine Albumin (partial sequence)"
bsa.identifier = "BSA"
alb = FASTAEntry()
alb.sequence = "MKWVTFISLLFLFSSAYSRGVFRRDAHKSEVAHRFKDLGE"
alb.description = "ALB Human Albumin (partial sequence)"
alb.identifier = "ALB"

entries = [bsa, alb]

f = FASTAFile()
f.store("example.fasta", entries)

In [14]:
entries = []
f = FASTAFile()
f.load("example.fasta", entries)
print( len(entries) )
for e in entries:
  print (e.identifier, e.sequence)

2
BSA MKWVTFISLLLLFSSAYSRGVFRRDTHKSEIAHRFKDLGE
ALB MKWVTFISLLFLFSSAYSRGVFRRDAHKSEVAHRFKDLGE
