1. Val-Gly-Ala-Leu-Ala-Val-Val-Val-Trp-Trp-Trp-Leu

  99-57--71--113-71--99--99--99--186-186-186-113

In [None]:
99+57+71+113+71+99+99+99+186+186+186+113

1379

2. 1379

3. Масса пептида = масса последнего субпептида, который равен всему пептиду (mass[-1])

4. $\frac{n(n+1)}{2}$

In [38]:
import numpy as np

AMINO_ACID_MASSES = {
    'A': 71,
    'C': 103,
    'D': 115,
    'E': 129,
    'F': 147,
    'G': 57,
    'H': 137,
    'I': 113,
    'K': 128,
    'L': 113,
    'M': 131,
    'N': 114,
    'P': 97,
    'Q': 128,
    'R': 156,
    'S': 87,
    'T': 101,
    'V': 99,
    'W': 186,
    'Y': 163,
}


def cyclospectrum(peptide):
    prefix_mass = [0]
    for i, aa in enumerate(peptide):
        prefix_mass.append(prefix_mass[i] + AMINO_ACID_MASSES[aa])

    peptide_mass = prefix_mass[-1]
    spectrum = [0]
    for i in range(len(peptide)):
        for j in range(i + 1, len(peptide) + 1):
            spectrum.append(prefix_mass[j] - prefix_mass[i])
            if i > 0 and j < len(peptide):
                spectrum.append(peptide_mass - (prefix_mass[j] - prefix_mass[i]))
    return sorted(spectrum)

def linearspectrum(peptide):
    prefix_mass = [0]
    for aa in peptide:
        prefix_mass.append(prefix_mass[-1] + AMINO_ACID_MASSES[aa])
    spectrum = [0]
    n = len(peptide)
    for i in range(n):
        for j in range(i + 1, n + 1):
            spectrum.append(prefix_mass[j] - prefix_mass[i])
    return sorted(spectrum)

def mass(peptide):
    return sum([AMINO_ACID_MASSES[i] for i in peptide])


def cyclopeptide_sequencing(spectrum):
    candidate_peptides = [""]
    final_peptides = []
    while candidate_peptides:
        candidate_peptides = expand(candidate_peptides)
        for peptide in list(candidate_peptides):
            if mass(peptide) == spectrum[-1]:
                if cyclospectrum(peptide) == spectrum and peptide not in final_peptides:
                    final_peptides.append(peptide)
                candidate_peptides.remove(peptide)
            elif not match(peptide, spectrum):
               candidate_peptides.remove(peptide)
    return final_peptides


def expand(candidate_peptides: list) -> list:
    result = []
    for candidate in candidate_peptides:
        for amino_acid in AMINO_ACID_MASSES.keys():
            result.append(candidate + amino_acid)
    return result

def match(peptide, spectrum):
    spectrum_c = spectrum.copy()
    subpeptides = linearspectrum(peptide)
    for i in subpeptides:
        if i in spectrum_c:
            spectrum_c.remove(i)
        else:
            return False
    return True

In [39]:
lst = cyclopeptide_sequencing(cyclospectrum("WFNQYVK"))

In [40]:
lst

['FNKYVKW',
 'FNKYVQW',
 'FNQYVKW',
 'FNQYVQW',
 'FWKVYKN',
 'FWKVYQN',
 'FWQVYKN',
 'FWQVYQN',
 'KNFWKVY',
 'KNFWQVY',
 'KVYKNFW',
 'KVYQNFW',
 'KWFNKYV',
 'KWFNQYV',
 'KYVKWFN',
 'KYVQWFN',
 'NFWKVYK',
 'NFWKVYQ',
 'NFWQVYK',
 'NFWQVYQ',
 'NKYVKWF',
 'NKYVQWF',
 'NQYVKWF',
 'NQYVQWF',
 'QNFWKVY',
 'QNFWQVY',
 'QVYKNFW',
 'QVYQNFW',
 'QWFNKYV',
 'QWFNQYV',
 'QYVKWFN',
 'QYVQWFN',
 'VKWFNKY',
 'VKWFNQY',
 'VQWFNKY',
 'VQWFNQY',
 'VYKNFWK',
 'VYKNFWQ',
 'VYQNFWK',
 'VYQNFWQ',
 'WFNKYVK',
 'WFNKYVQ',
 'WFNQYVK',
 'WFNQYVQ',
 'WKVYKNF',
 'WKVYQNF',
 'WQVYKNF',
 'WQVYQNF',
 'YKNFWKV',
 'YKNFWQV',
 'YQNFWKV',
 'YQNFWQV',
 'YVKWFNK',
 'YVKWFNQ',
 'YVQWFNK',
 'YVQWFNQ']