In [25]:
import numpy as np

amino_mass = [57, 71, 87, 97, 99, 101, 103, 113, 114, 115, 128, 129, 131, 137, 147, 156, 163, 186]


def expand(peptides):
    expanded = []
    for peptide in peptides:
        for amino in amino_mass:
            expanded.append(peptide + [amino])
    return expanded


def parent_mass(spectrum):
    return spectrum[-1]


def mass(peptide):
    return sum(peptide)


# page 191
def cyclospectrum(peptide):
    cyclic_peptide = peptide + peptide

    # add mass = 0
    theo_spectrum = [0]
    # add mass of entire peptide
    theo_spectrum.append(mass(peptide))

    for k in range(1, len(peptide)):
        for i in range(len(peptide)):
            theo_spectrum.append(mass(cyclic_peptide[i:i + k]))

    return sorted(theo_spectrum)


def cyclopeptide_sequencing(spectrum):
    result = []
    peptides = [[]]

    while peptides:

        peptides = expand(peptides)
        next_peptides = []

        for peptide in peptides:
            cyclos_spec = cyclospectrum(peptide)

            if mass(peptide) == parent_mass(spectrum):
                if cyclos_spec == spectrum:
                    result.append('-'.join([str(m) for m in peptide]))
            elif np.all(np.isin(cyclos_spec, spectrum)):
                next_peptides.append(peptide)

        peptides = next_peptides
        
    return '\n'.join(result)

def main():
    file = open('rosalind_ba4e.txt', 'r')
    
    spectrum = [int(n) for n in next(file).split()]
    
    print(cyclopeptide_sequencing(spectrum))

In [26]:
if __name__ == '__main__':
   main()




In [27]:
import numpy as np
import numba
from tqdm import tqdm

aminoacid_lst = np.array([
    57, 71, 87, 97, 99, 101, 103, 
    113, 114, 115, 128, 129, 131, 
    137, 147, 156, 163, 186 
])  



#@numba.jit(nopython=True)
def expand(peptide_lst):
    expanded_peptide_lst = []
    for peptide in peptide_lst:
        for aminoacid in aminoacid_lst:
            expanded_peptide_lst.append(peptide + [aminoacid])
    return expanded_peptide_lst


#@numba.jit(nopython=True)
def mass(peptide):
    return sum(peptide)


#@numba.jit(nopython=True)
def parent_mass(spectrum):
    return spectrum[-1]


#@numba.jit(nopython=True)
def cyclospectrum(peptide):
    selfconcat = peptide + peptide
    spectrum = [0]
    for l in range(1, len(peptide)):
        for i in range(len(peptide)):
            spectrum.append(mass(selfconcat[i : i + l]))
    spectrum.append(mass(peptide))
    return sorted(spectrum)


#@numba.jit(nopython=True)
def cyclopeptide_sequencing(spectrum):
    peptide_lst = [[]]
    result_lst = []
    curr_iter = 0
    while peptide_lst:
        #print(len(peptide_lst))
        peptide_lst = expand(peptide_lst)
        peptide_to_try_lst = []
        for peptide in tqdm(peptide_lst, desc="iter {}".format(curr_iter + 1)):
            if mass(peptide) == parent_mass(spectrum):
                #print(peptide, cyclospectrum(peptide))
                if cyclospectrum(peptide) == spectrum:
                    result_lst.append('-'.join([str(mass) for mass in peptide]))
                    print(result_lst[-1])
            elif np.all(np.isin(cyclospectrum(peptide), spectrum)):
                peptide_to_try_lst.append(peptide)
        peptide_lst = peptide_to_try_lst
        #print(peptide_lst)
        curr_iter += 1
#        if curr_iter == 2:
#            break
    return result_lst

def main():
    file = open('rosalind_ba4e.txt', 'r')
    
    spectrum = [int(n) for n in next(file).split()]
    
    print(cyclopeptide_sequencing(spectrum))

In [28]:
if __name__ == '__main__':
   main()

iter 1: 100%|██████████| 18/18 [00:00<00:00, 3915.64it/s]
iter 2: 100%|██████████| 144/144 [00:00<00:00, 7354.49it/s]
iter 3: 100%|██████████| 378/378 [00:00<00:00, 5040.48it/s]
iter 4: 100%|██████████| 216/216 [00:00<00:00, 2253.82it/s]


[]


In [None]:
d
103-137-71-131-114-113-113-115-99-97
103-97-99-115-113-113-114-131-71-137
113-113-114-131-71-137-103-97-99-115
113-113-115-99-97-103-137-71-131-114
113-114-131-71-137-103-97-99-115-113
113-115-99-97-103-137-71-131-114-113
114-113-113-115-99-97-103-137-71-131
114-131-71-137-103-97-99-115-113-113
115-113-113-114-131-71-137-103-97-99
115-99-97-103-137-71-131-114-113-113
131-114-113-113-115-99-97-103-137-71
131-71-137-103-97-99-115-113-113-114
137-103-97-99-115-113-113-114-131-71
137-71-131-114-113-113-115-99-97-103
71-131-114-113-113-115-99-97-103-137
71-137-103-97-99-115-113-113-114-131
97-103-137-71-131-114-113-113-115-99
97-99-115-113-113-114-131-71-137-103
99-115-113-113-114-131-71-137-103-97
99-97-103-137-71-131-114-113-113-115


In [30]:
import sys


def main():
    file = open('rosalind_ba4e.txt', 'r')
    
    spectrum = [int(n) for n in next(file).split()]
    
    result = sequence_peptide(spectrum)
    print(' '.join(['-'.join(list(map(str, peptide))) for peptide in result]))


AMINO_MASSES = [57, 71, 87, 97, 99, 101, 103, 113, 114, 115, 128, 129, 131, 137, 147, 156, 163, 186]


def _attach_amino_mass(peptide: list) -> list:
    """
    "Attach" all possible amino acid masses to the given peptide and return the resulting peptides
    """
    result = [peptide[:] for _ in range(len(AMINO_MASSES))]
    for i in range(len(result)):
        result[i].append(AMINO_MASSES[i])
    return result


def _is_peptide_consistent_with_spectrum(peptide: list, spectrum: set) -> bool:
    """
    Check if the given peptide consistent with the given spectrum
    """
    for amino in peptide:
        if amino not in spectrum:
            return False
    return True


def _is_peptide_cyclospectrum_equal_to_spectrum(peptide: list, spectrum: set) -> bool:
    """
    Check if the given peptide consistent with the given spectrum
    """
    cyclospectrum = set()
    for cycle_len in range(1, len(peptide)):
        peptide_extended = peptide + peptide[:(cycle_len - 1)]
        for cycle_start_pos in range(len(peptide)):
            cyclospectrum.add(_peptide_mass(peptide_extended[cycle_start_pos:(cycle_start_pos + cycle_len)]))

    cyclospectrum.add(0)
    cyclospectrum.add(_peptide_mass(peptide))

    return cyclospectrum == spectrum


def _peptide_mass(peptide: list):
    """
    Calculate peptide mass
    """
    return sum(peptide)


def sequence_peptide(spectrum: list) -> list:
    """
    Sequence a peptide from its ideal spectrum
    """
    parent_mass = spectrum[-1]
    spectrum = set(spectrum)

    peptides = [[]]
    result = []

    while len(peptides) > 0:
        print(len(peptides[0]), len(peptides), file=sys.stderr)
        next_peptides = []
        for peptide in peptides:
            new_peptides = _attach_amino_mass(peptide)
            for new_peptide in new_peptides:
                new_peptide_mass = _peptide_mass(new_peptide)
                if new_peptide_mass < parent_mass:
                    if _is_peptide_consistent_with_spectrum(new_peptide, spectrum):
                        next_peptides.append(new_peptide)
                elif new_peptide_mass == parent_mass:
                    if _is_peptide_cyclospectrum_equal_to_spectrum(new_peptide, spectrum):
                        result.append(new_peptide)

        peptides = next_peptides

    return result




In [31]:
if __name__ == '__main__':
    main()

0 1
1 8
2 64
3 512
4 4096
5 32768
6 262144
7 2097152
8 16777179


KeyboardInterrupt: 

In [44]:
amino_acid_masses = [57, 71, 87, 97, 99, 101, 103, 113, 114, 115,
                     128, 129, 131, 137, 147, 156, 163, 186]


def extend_peptides(peptides):
    new_peptides = set()
    for peptide in peptides:
        for amino_acid_mass in amino_acid_masses:
            new_peptides.add(peptide + (amino_acid_mass,))
    return new_peptides


def theoretical_spectrum(peptide):
    spectrum = [0]
    double_peptide = peptide
    double_peptide += peptide
    peptide_length = len(peptide)
    for i in range(peptide_length):
        for j in range(1, peptide_length):
            spectrum.append(sum(double_peptide[i: i + j]))
    spectrum.append(sum(peptide))
    return sorted(spectrum)


def consistent(peptide, spectrum):
    spectrum_set = set(spectrum)
    peptide_length = len(peptide)
    for i in range(peptide_length):
        for j in range(1, peptide_length + 1):
            if sum(peptide[i:i + j]) not in spectrum_set:
                return False
    return True


def mass(peptide):
    total = 0
    for mass in peptide:
        total += mass
    return total


def find_cyclopeptides(spectrum):
    peptides = set([()])
    parent_mass = max(spectrum)
    while peptides:
        peptides = extend_peptides(peptides)
        old_peptides = peptides.copy()
        for peptide in old_peptides:
            peptide_spectrum = theoretical_spectrum(peptide)
            if mass(peptide) == parent_mass:
                if peptide_spectrum == spectrum:
                    yield peptide
                peptides.remove(peptide)
            elif not consistent(peptide, spectrum):
                peptides.remove(peptide)

def main():
    spectrum = list(map(int, next(open("rosalind_ba4e.txt", "r")).split()))
    for item in find_cyclopeptides(spectrum):
        print("-".join(map(str, item)), end="\n")

In [45]:
if __name__ == "__main__":
    main()

103-114-186-163-114-71-163-113-87-114-71
114-163-186-114-103-71-114-87-113-163-71
186-114-103-71-114-87-113-163-71-114-163
163-186-114-103-71-114-87-113-163-71-114
114-71-163-113-87-114-71-103-114-186-163
113-87-114-71-103-114-186-163-114-71-163
114-186-163-114-71-163-113-87-114-71-103
186-163-114-71-163-113-87-114-71-103-114
163-114-71-163-113-87-114-71-103-114-186
114-103-71-114-87-113-163-71-114-163-186
71-103-114-186-163-114-71-163-113-87-114
71-114-87-113-163-71-114-163-186-114-103
114-71-103-114-186-163-114-71-163-113-87
113-163-71-114-163-186-114-103-71-114-87
87-113-163-71-114-163-186-114-103-71-114
103-71-114-87-113-163-71-114-163-186-114
87-114-71-103-114-186-163-114-71-163-113
163-113-87-114-71-103-114-186-163-114-71
163-71-114-163-186-114-103-71-114-87-113
71-114-163-186-114-103-71-114-87-113-163
114-87-113-163-71-114-163-186-114-103-71
71-163-113-87-114-71-103-114-186-163-114


In [46]:

"""
Idea: BnB algorithm in pages 195-196.
"""

amino_masses = [57, 71, 87, 97, 99, 101, 103, 113, 114, 115,128, 129, 131, 137, 147, 156, 163, 186]


def expand(peptides):
    new_peptides = set()
    for peptide in peptides:
        for amino_acid_mass in amino_masses:
            new_peptides.add(peptide + (amino_acid_mass,))
    return new_peptides


def theoretical_spectrum(peptide):
    spectrum = [0]
    spectrum.append(sum(peptide))

    double_peptide = peptide + peptide
    peptide_length = len(peptide)
    for k in range(peptide_length):
        for i in range(1, peptide_length):
            spectrum.append(sum(double_peptide[k: k + i]))

    return sorted(spectrum)


def consistent(peptide, spectrum):

    spectrum_set = set(spectrum)

    for k in range(len(peptide)):
        for i in range(1, len(peptide) + 1):
            if sum(peptide[k:k + i]) not in spectrum_set:
                return False

    return True


def mass(peptide):
    return sum(peptide)


def cyclopeptide_sequencing(spectrum):

    peptides = set([()])
    parent_mass = max(spectrum)
    result = []

    while peptides:
        peptides = expand(peptides)
        old_peptides = peptides.copy()

        for peptide in old_peptides:

            cyclo_spectrum = theoretical_spectrum(peptide)

            if mass(peptide) == parent_mass:
                if cyclo_spectrum == spectrum:
                    result.append('-'.join([str(m) for m in peptide]))
                peptides.remove(peptide)
            elif not consistent(peptide, spectrum):
                peptides.remove(peptide)

    return '\n'.join(result)

def main():
    spectrum = list(map(int, next(open("rosalind_ba4e.txt", "r")).split()))
    print(cyclopeptide_sequencing(spectrum))

In [47]:
if __name__ == "__main__":
    main()

103-114-186-163-114-71-163-113-87-114-71
114-163-186-114-103-71-114-87-113-163-71
186-114-103-71-114-87-113-163-71-114-163
163-186-114-103-71-114-87-113-163-71-114
114-71-163-113-87-114-71-103-114-186-163
113-87-114-71-103-114-186-163-114-71-163
114-186-163-114-71-163-113-87-114-71-103
186-163-114-71-163-113-87-114-71-103-114
163-114-71-163-113-87-114-71-103-114-186
114-103-71-114-87-113-163-71-114-163-186
71-103-114-186-163-114-71-163-113-87-114
71-114-87-113-163-71-114-163-186-114-103
114-71-103-114-186-163-114-71-163-113-87
113-163-71-114-163-186-114-103-71-114-87
87-113-163-71-114-163-186-114-103-71-114
103-71-114-87-113-163-71-114-163-186-114
87-114-71-103-114-186-163-114-71-163-113
163-113-87-114-71-103-114-186-163-114-71
163-71-114-163-186-114-103-71-114-87-113
71-114-163-186-114-103-71-114-87-113-163
114-87-113-163-71-114-163-186-114-103-71
71-163-113-87-114-71-103-114-186-163-114
