## amino acid sequences [Prefix&Suffix]

In [15]:
from pyopenms import *
seq = AASequence.fromString("DFPIANGER")
pre = seq.getPrefix(4)
suf = seq.getSuffix(5)
con = seq + seq
print("Sequence:", seq,"\nPrefix:", pre,"\nSuffix:", suf,"\nConcatenated:", con)

Sequence: DFPIANGER 
Prefix: DFPI 
Suffix: ANGER 
Concatenated: DFPIANGERDFPIANGER


## Mono Isotopic Weight

In [16]:
totalM=seq.getMonoWeight()
proton = seq.getMonoWeight(Residue.ResidueType.Full, 2)
#Two ways to calculate mz
mz = seq.getMonoWeight(Residue.ResidueType.Full, 2) / 2.0
#mz = seq.getMZ(2)

print("Monoisotopic mass of peptide [M] is", totalM)
print("Monoisotopic mass of peptide precursor [M+2H]2+ is", proton)
print("Monoisotopic m/z of [M+2H]2+ is", mz)

Monoisotopic mass of peptide [M] is 1017.4879641373001
Monoisotopic mass of peptide precursor [M+2H]2+ is 1019.5025170708419
Monoisotopic m/z of [M+2H]2+ is 509.75125853542096


In [19]:
seq = AASequence.fromString("DFPIANGER")
print("The peptide", str(seq), "consists of the following amino acids:")
for s in seq:
    print(s.getName(), ":", s.getMonoWeight())

The peptide DFPIANGER consists of the following amino acids:
Aspartate : 133.0375092233
Phenylalanine : 165.0789793509
Proline : 115.06332928709999
Isoleucine : 131.0946294147
Alanine : 89.0476792233
Asparagine : 132.0534932552
Glycine : 75.0320291595
Glutamate : 147.0531592871
Arginine : 174.1116764466


## Molecular formula

In [20]:
seq = AASequence.fromString("DFPIANGER")
seq_formula = seq.getFormula()
print("Peptide", seq, "has molecular formula", seq_formula)

Peptide DFPIANGER has molecular formula C44H67N13O15


In [23]:
suffix = seq.getSuffix(3) # y3 ion "GER"
print("y3 ion sequence:", suffix)
y3_formula = suffix.getFormula(Residue.ResidueType.YIon, 2)
print("y3 molecular formula:", y3_formula)
print("y3 mz:", suffix.getMonoWeight(Residue.ResidueType.YIon, 2) / 2.0 )


y3 ion sequence: GER
y3 molecular formula: C13H24N6O6
y3 mz: 181.09514384957103


## Modified Sequences

In [25]:
seq = AASequence.fromString("PEPTIDESEKUEM(Oxidation)CER")
print(seq.toUnmodifiedString())
print(seq.toString())
print(seq.toUniModString())
print(seq.toBracketString())
print(seq.toBracketString(False))
print(AASequence.fromString("DFPIAM(UniMod:35)GER"))

print(AASequence.fromString("DFPIAM[+16]GER"))

print(AASequence.fromString("DFPIAM[+15.99]GER"))

print(AASequence.fromString("DFPIAM[147]GER"))

print(AASequence.fromString("DFPIAM[147.035405]GER"))

PEPTIDESEKUEMCER
PEPTIDESEKUEM(Oxidation)CER
PEPTIDESEKUEM(UniMod:35)CER
PEPTIDESEKUEM[147]CER
PEPTIDESEKUEM[147.035400017100017]CER
DFPIAM(Oxidation)GER
DFPIAM(Oxidation)GER
DFPIAM(Oxidation)GER
DFPIAM(Oxidation)GER
DFPIAM(Oxidation)GER


## Write in fasta file

In [30]:
bsa = FASTAEntry()

bsa.sequence = "MKWVTFISLLLLFSSAYSRGVFRRDTHKSEIAHRFKDLGE"

bsa.description = "BSA Bovine Albumin (partial sequence)"

bsa.identifier = "BSA"

alb = FASTAEntry()

alb.sequence = "MKWVTFISLLFLFSSAYSRGVFRRDAHKSEVAHRFKDLGE"

alb.description = "ALB Human Albumin (partial sequence)"

alb.identifier = "ALB"

entries = [bsa, alb]

file = FASTAFile()

file.store("example.fasta", entries)

## Read fasta file

In [31]:
entries = []
file = FASTAFile()
file.load("example.fasta", entries)
print(len(entries))
for en in entries:
    print (en.identifier, en.sequence)

2
BSA MKWVTFISLLLLFSSAYSRGVFRRDTHKSEIAHRFKDLGE
ALB MKWVTFISLLFLFSSAYSRGVFRRDAHKSEVAHRFKDLGE
