In [69]:
"""
Idea: Mostly the same as in problem 18, but instead of checking consistency we select (cut)
the best n peptides candidates according to the scores. Score(peptide,spectrum) is done in
O(min(theoretical_spectrum(peptide), spectrum)) using sets.
"""

amino_masses = [57, 71, 87, 97, 99, 101, 103, 113, 114, 115, 128, 129, 131, 137, 147, 156, 163, 186]


def expand(peptides):
    new_peptides = set() # a set of tuples is expected
    for peptide in peptides:
        for amino_acid_mass in amino_masses:
            new_peptides.add(peptide + (amino_acid_mass,))
    return new_peptides


def theoretical_spectrum(peptide):
    spectrum = set([0])
    spectrum.add(sum(peptide))

    double_peptide = peptide + peptide
    for k in range( len(peptide)):
        for i in range(1,  len(peptide)):
            spectrum.add(sum(double_peptide[k: k + i]))

    return spectrum

def mass(peptide):
    return sum(peptide)


def score(peptide_spectrum: set, spectrum: set):
    # size of set intersection
    return len(peptide_spectrum & spectrum)


def trim(leaderboard, scores, n):

    new_leaderboard = set()
    last_peptide = None

    sorted_leaderboard = sorted(leaderboard, key=lambda x: scores[x], reverse=True)
    for idx, peptide in enumerate(sorted_leaderboard):
        # page 199: Leaderboard should be trimmed down
        # to the “N highest-scoring linear peptides including ties”,
        if idx >= n and peptide != last_peptide:
            break
        new_leaderboard.add(peptide)
        last_peptide = peptide

    return new_leaderboard


def leaderboard_cyclopeptide_sequencing(spectrum, n):
    leaderboard = set([()]) # set of tuples. The elements (masses) in a tuple form a peptide
    parent_mass = max(spectrum)

    leader_peptide = ()

    # theoretical_spectrum(peptide) and spectrum match at least at mass = 0
    leader_score = 1

    while leaderboard:
        leaderboard = expand(leaderboard)
        old_leaderboard = leaderboard.copy()

        old_scores = dict()
        for peptide in old_leaderboard:

            cyclo_spectrum = theoretical_spectrum(peptide)
            old_scores[peptide] = score(cyclo_spectrum, spectrum)
            peptide_mass = mass(peptide)

            if peptide_mass == parent_mass:
                if old_scores[peptide] >= leader_score:
                    leader_peptide = '-'.join(map(str, peptide))
                    leader_score = old_scores[peptide]
            elif peptide_mass > parent_mass:
                leaderboard.remove(peptide)

        leaderboard = trim(leaderboard, old_scores, n)

    return leader_peptide


def main():
    
    file = open('rosalind_ba4g.txt', 'r')
    
    n = int(next(file))
    
    spectrum = next(file).strip()
    spectrum = set((map(int, spectrum.split())))
    print(spectrum)
    
    print(leaderboard_cyclopeptide_sequencing(spectrum, n))

In [70]:
if __name__ == "__main__":
    main()

{0, 516, 524, 1039, 528, 1040, 532, 533, 1044, 1048, 1056, 1572, 549, 553, 1069, 559, 1071, 1082, 1087, 71, 587, 1114, 604, 1120, 97, 1122, 99, 101, 613, 621, 625, 1140, 629, 1141, 631, 632, 1145, 1147, 128, 1153, 131, 644, 650, 652, 1168, 1170, 660, 1172, 1184, 163, 703, 1219, 199, 200, 712, 202, 715, 718, 722, 726, 1239, 1242, 732, 1244, 1245, 1248, 225, 1250, 227, 741, 232, 749, 753, 1269, 759, 760, 1271, 1273, 256, 1281, 260, 1312, 291, 1316, 299, 812, 301, 813, 303, 819, 823, 1340, 831, 1345, 322, 1347, 324, 327, 840, 328, 330, 333, 846, 850, 854, 857, 1370, 860, 1372, 1373, 353, 869, 1409, 388, 400, 912, 402, 404, 920, 922, 928, 1441, 419, 1444, 425, 940, 941, 431, 432, 943, 947, 951, 959, 1471, 1473, 450, 1475, 452, 968, 458, 985, 1501, 485, 490, 501, 1013, 503, 1019, 1023}
97-163-128-128-71-131-101-101-99-101-99-57-71-97-71-57


In [14]:
Output:
97-129-97-147-99-71-186-71-113-163-115-71-113-128-103-87-128-101-137-163-114


SyntaxError: invalid syntax (<ipython-input-14-105b10c5d4be>, line 1)

In [44]:
from collections import Counter, defaultdict
import numpy as np
import numba
from tqdm import tqdm

aminoacid_lst = [57, 71, 87, 97, 99, 101, 103, 113, 114, 115, 128, 129, 131, 137, 147, 156, 163, 186]


#@numba.jit(nopython=True)
def expand(leaderboard):
    expanded_leaderboard = []
    for peptide in leaderboard:
        for aminoacid in aminoacid_lst:
            expanded_leaderboard.append(peptide + [aminoacid])
    return expanded_leaderboard


#@numba.jit(nopython=True)
def mass(peptide):
    return sum(peptide)


#@numba.jit(nopython=True)
def parent_mass(spectrum):
    return spectrum[-1]


#@numba.jit(nopython=True)
def cyclospectrum(peptide):
    selfconcat = peptide + peptide
    spectrum = [0]
    for l in range(1, len(peptide)):
        for i in range(len(peptide)):
            spectrum.append(mass(selfconcat[i : i + l]))
    spectrum.append(mass(peptide))
    return sorted(spectrum)


def linspectrum(peptide):
    spectrum = [0] 
    for l in range(1, len(peptide)):
        for i in range(len(peptide) - l + 1):
            spectrum.append(mass(peptide[i : i + l]))
    spectrum.append(sum(peptide))
    return sorted(spectrum)


def score(peptide, spectrum):
    peptide_counts = Counter(cyclospectrum(peptide))
    theor_counts = Counter(spectrum)
    return np.sum([
        min(peptide_counts[base], theor_counts[base])
        for base in peptide_counts.keys()
    ])


def cut(leaderboard, spectrum, n):
    return sorted(
        leaderboard, 
        key=lambda peptide: score(peptide, spectrum), 
        reverse=True
    )[:n]

def peptide_repr(peptide):
    return '-'.join([str(x) for x in peptide])


def leaderboard_cyclopeptide_sequencing(spectrum, n):
    leaderboard = [[]]
    leader_peptide = []

    parent_mass = max(spectrum)
    used = defaultdict(bool)

    while leaderboard:
        #        print(len(leaderboard))
        leaderboard = expand(leaderboard)
        candidate_lst = []
        for peptide in tqdm(leaderboard):
            #print(peptide_repr)
            #used[peptide_repr(peptide)] = True
            #            print(mass(peptide))
            if mass(peptide) == parent_mass:
                #                print(peptide, theoretical_spectrum(peptide))
                if score(peptide, spectrum) > score(leader_peptide, spectrum):
                    leader_peptide = peptide
                    print(leader_peptide)
            elif mass(peptide) < parent_mass and not used[peptide_repr(peptide)]:
                candidate_lst.append(peptide)
        leaderboard = cut(candidate_lst, spectrum, n)
        # print(leaderboard)
    return leader_peptide


def main():
    
    file = open('leaderboard_cyclopeptide_sequencing.txt', 'r')
    
    n = int(next(file))
    
    spectrum = next(file).strip()
    spectrum = list((map(int, spectrum.split())))
    print('-'.join([str(mass) for mass in leaderboard_cyclopeptide_sequencing(spectrum, n)]))

In [45]:
if __name__ == "__main__":
    main()

100%|██████████| 18/18 [00:00<00:00, 17873.45it/s]
100%|██████████| 324/324 [00:00<00:00, 65124.57it/s]
100%|██████████| 5832/5832 [00:00<00:00, 145367.78it/s]
100%|██████████| 5850/5850 [00:00<00:00, 206720.40it/s]
100%|██████████| 5850/5850 [00:00<00:00, 76016.49it/s]
100%|██████████| 5850/5850 [00:00<00:00, 126651.38it/s]
100%|██████████| 5850/5850 [00:00<00:00, 88184.67it/s]
100%|██████████| 5850/5850 [00:00<00:00, 133199.49it/s]
100%|██████████| 5850/5850 [00:00<00:00, 158326.69it/s]
100%|██████████| 5850/5850 [00:00<00:00, 86020.57it/s]
100%|██████████| 5850/5850 [00:00<00:00, 142464.60it/s]
100%|██████████| 5850/5850 [00:00<00:00, 127018.53it/s]
100%|██████████| 5850/5850 [00:00<00:00, 132903.69it/s]
100%|██████████| 5850/5850 [00:00<00:00, 120407.10it/s]
100%|██████████| 5850/5850 [00:00<00:00, 116315.69it/s]
100%|██████████| 5850/5850 [00:00<00:00, 112847.08it/s]
100%|██████████| 5850/5850 [00:00<00:00, 91141.57it/s]
100%|██████████| 5850/5850 [00:00<00:00, 102593.96it/s]
100%

[186, 71, 113, 163, 186, 113, 128, 103, 87, 128, 101, 137, 163, 114, 97, 129, 97, 131, 186]


 53%|█████▎    | 3122/5850 [00:00<00:00, 14636.25it/s]

[113, 163, 186, 113, 128, 103, 87, 128, 101, 137, 163, 114, 97, 129, 97, 147, 99, 71, 186, 71]


100%|██████████| 5850/5850 [00:00<00:00, 17695.10it/s]
 36%|███▌      | 2090/5850 [00:00<00:00, 9577.20it/s]

[163, 113, 71, 186, 71, 99, 147, 97, 129, 97, 114, 163, 137, 101, 128, 87, 103, 128, 113, 115, 71]
[113, 163, 115, 71, 113, 128, 103, 87, 128, 101, 137, 163, 114, 97, 129, 97, 147, 99, 71, 186, 71]


100%|██████████| 5850/5850 [00:00<00:00, 11336.61it/s]
100%|██████████| 5850/5850 [00:00<00:00, 22733.38it/s]
100%|██████████| 5850/5850 [00:00<00:00, 541768.13it/s]
100%|██████████| 432/432 [00:00<00:00, 335420.09it/s]

113-163-115-71-113-128-103-87-128-101-137-163-114-97-129-97-147-99-71-186-71





In [None]:
163-101-137-97-128-113-129-131-114-128-131-147-131

In [None]:
97-129-97-147-99-71-186-71-113-163-115-71-113-128-103-87-128-101-137-163-114

In [48]:



AMINO_MASSES = [57, 71, 87, 97, 99, 101, 103, 113, 114, 115, 128, 129, 131, 137, 147, 156, 163, 186]


def _attach_amino_mass(peptide: tuple) -> list:
    """
    "Attach" all possible amino acid masses to the given peptide and return the resulting peptides
    """
    return [peptide + (mass,) for mass in AMINO_MASSES]


def _cyclospectrum_score(peptide: tuple, spectrum: set) -> int:
    """
    Calculate the cyclospectrum score for the given peptide
    """
    cyclospectrum = set()
    for cycle_len in range(1, len(peptide)):
        peptide_extended = peptide + peptide[:(cycle_len - 1)]
        for cycle_start_pos in range(len(peptide)):
            cyclospectrum.add(_peptide_mass(peptide_extended[cycle_start_pos:(cycle_start_pos + cycle_len)]))

    cyclospectrum.add(0)
    cyclospectrum.add(_peptide_mass(peptide))

    return len(spectrum.intersection(cyclospectrum))


def _peptide_mass(peptide: tuple):
    """
    Calculate peptide mass
    """
    return sum(peptide)


def _expand_leaderboard(leaderboard: list, spectrum: set):
    """
    Expand the leaderboard
    :param leaderboard: leaderboard of peptides
    :return: a new leaderboard (noncut), ordered DESC
    """
    lb_len = len(leaderboard)
    for i in range(lb_len):
        p_pair = leaderboard[i]
        new_peptides = _attach_amino_mass(p_pair[1])
        for new_peptide in new_peptides:
            leaderboard.append([_cyclospectrum_score(new_peptide, spectrum), new_peptide])

    for i in range(lb_len):
        leaderboard.pop(0)

    leaderboard.sort(key=lambda p: p[0], reverse=True)


def sequence_peptide(spectrum: list, lb_size: int) -> list:
    """
    Sequence a peptide using a leaderboad algorithm
    :param spectrum: spectrum of a peptide (not an ideal one)
    :param lb_size: leaderboard size
    :return: the leading peptide
    """
    parent_mass = spectrum[-1]
    spectrum = set(spectrum)

    leaderboard = [[0, ()]]
    leader = [0, ()]

    while len(leaderboard) > 0:
        _expand_leaderboard(leaderboard, spectrum)

        i = 0
        while True:
            if i >= len(leaderboard) or i >= lb_size:
                break

            p_pair = leaderboard[i]
            peptide_mass = _peptide_mass(p_pair[1])

            if peptide_mass == parent_mass:
                if p_pair[0] > leader[0]:
                    leader = p_pair
                else:
                    leaderboard.pop(i)
                    continue
            elif peptide_mass > parent_mass:
                leaderboard.pop(i)
                continue

            i += 1

        leaderboard = leaderboard[:lb_size]

    return leader[1]


def main():
    file = open('leaderboard_cyclopeptide_sequencing.txt', 'r')
    
    lb_size = int(next(file))
    
    spectrum = next(file).strip()
    spectrum = list((map(int, spectrum.split())))
    result = sequence_peptide(spectrum, lb_size)
    print('-'.join(list(map(str, result))))

In [49]:
if __name__ == "__main__":
    main()

115-163-113-71-186-71-99-147-97-129-97-114-163-137-101-128-87-103-128-113-71


In [62]:
import numpy as np


def do_range_expand(peptides):
    all_mass = [57, 71, 87, 97, 99, 101, 103, 113, 114, 115, 128, 129, 131, 137, 147, 156, 163, 186]
    expand_peptides = []
    for p in range(len(peptides)):
        for j in range(len(all_mass)):
            cur_pep = []
            for t in range(len(peptides[p])):
                cur_pep.append(peptides[p][t])
            cur_pep.append(all_mass[j])
            expand_peptides.append(cur_pep)
    return expand_peptides


def count_peptide_mass(peptide):
    return np.sum(peptide)


def count_parent_mass(spec):
    if (len(spec) == 0):
        return 0
    return np.max(spec)


def generate_sybpeptides(peptide):
    res = []
    res.append(0)
    n = len(peptide)
    extra_peptide = peptide + peptide
    for j in range(1, n):
        for i in range(n):
            cur_subpeptide = sum(extra_peptide[i : i + j])
            res.append(cur_subpeptide)
    res.append(np.sum(peptide))
    return res


def find_score(peptide, spec):
    if (len(peptide) == 0):
        return len(spec)
    pep_spec = generate_sybpeptides(peptide)
    ex_spec = []
    for i in range(len(spec)):
        ex_spec.append(spec[i])
    scor = 0
    for elem in pep_spec:
        if (elem in ex_spec):
            ex_spec.remove(elem)
            scor += 1
    return scor


def do_trim(leaders, spec, N):
    if (len(leaders) <= N):
        return leaders

    sorted_leaders = sorted(leaders, key=lambda p: find_score(p, spec))
    res = sorted_leaders[len(sorted_leaders) - N:]
    return res


def do_leaderboard_cyclopeptide_sequencing(spec, N):
    leaders = [[]]
    cur_leader = []
    cur_top_score = 0
    parent_mass = max(spec)
    while (len(leaders) > 0):
        #leaders = do_expand(leaders)
        leaders = do_range_expand(leaders)
        next_leaders = []
        for j in range(len(leaders)):
            next_leaders.append(leaders[j])

        for cur_pep in leaders:
            cur_mass = count_peptide_mass(cur_pep)
            if (cur_mass == parent_mass):
                cur_score = find_score(cur_pep, spec)
                if ( cur_score > cur_top_score):
                    cur_leader = cur_pep
                    cur_top_score = cur_score
                    #print("Leader: ", cur_leader, "   ----     ", cur_top_score)
            else:
                if (cur_mass > parent_mass):
                    next_leaders.remove(cur_pep)

        leaders = do_trim(next_leaders, spec, N)
    return cur_leader


def final_result_hw19(N, text):
    spec = []
    cur_num = ""
    for j in range(len(text)):
        if(text[j] == " "):
            spec.append(int(cur_num))
            cur_num = ""
        else:
            cur_num += text[j]
    spec.append(int(cur_num))
    
    print(spec)
    
    res = do_leaderboard_cyclopeptide_sequencing(spec, N)
    print(res)
    
    st_res = ""
    for r in range(len(res)):
        st_res += str(res[r])
        if (r < len(res) - 1):
            st_res += "-"
    print(st_res)


def main():
    file = open('rosalind_ba4g.txt', 'r')
    
    
    N = int(next(file))
    text = next(file).strip()
    final_result_hw19(N, text)

In [63]:
if __name__ == "__main__":
    main()

[0, 71, 97, 97, 99, 99, 101, 101, 101, 128, 128, 128, 128, 131, 163, 199, 200, 200, 200, 202, 202, 225, 225, 225, 227, 232, 256, 260, 291, 299, 301, 301, 303, 322, 324, 327, 328, 330, 333, 353, 388, 388, 400, 402, 404, 419, 425, 431, 432, 450, 452, 458, 485, 490, 501, 503, 516, 516, 524, 528, 532, 533, 549, 553, 559, 587, 604, 613, 613, 621, 625, 629, 631, 632, 644, 650, 652, 660, 703, 712, 715, 718, 722, 726, 732, 741, 741, 749, 753, 759, 760, 812, 813, 819, 823, 831, 831, 840, 846, 850, 854, 857, 860, 869, 912, 920, 922, 928, 940, 941, 943, 947, 951, 959, 959, 968, 985, 1013, 1019, 1023, 1039, 1040, 1044, 1048, 1056, 1056, 1069, 1071, 1082, 1087, 1114, 1120, 1122, 1140, 1141, 1145, 1147, 1153, 1168, 1170, 1172, 1184, 1184, 1219, 1239, 1242, 1244, 1245, 1248, 1250, 1269, 1271, 1271, 1273, 1281, 1312, 1316, 1340, 1345, 1347, 1347, 1347, 1370, 1370, 1372, 1372, 1372, 1373, 1409, 1441, 1444, 1444, 1444, 1444, 1471, 1471, 1471, 1473, 1473, 1475, 1475, 1501, 1572]
[101, 101, 131, 71, 128, 

In [None]:
101-101-131-71-128-128-163-97-128-97-128-99-101-99

In [85]:
import numpy as np


def do_range_expand(peptides):
    all_mass = [57, 71, 87, 97, 99, 101, 103, 113, 114, 115, 128, 129, 131, 137, 147, 156, 163, 186]
    expand_peptides = []
    for p in range(len(peptides)):
        for j in range(len(all_mass)):
            cur_pep = []
            for t in range(len(peptides[p])):
                cur_pep.append(peptides[p][t])
            cur_pep.append(all_mass[j])
            expand_peptides.append(cur_pep)
    return expand_peptides


def count_peptide_mass(peptide):
    return np.sum(peptide)


def count_parent_mass(spec):
    if (len(spec) == 0):
        return 0
    return np.max(spec)


def generate_sybpeptides(peptide):
    res = []
    res.append(0)
    n = len(peptide)
    extra_peptide = peptide + peptide
    for j in range(1, n):
        for i in range(n):
            cur_subpeptide = sum(extra_peptide[i : i + j])
            res.append(cur_subpeptide)
    res.append(np.sum(peptide))
    return res


def find_score(peptide, spec):
    if (len(peptide) == 0):
        return len(spec)
    pep_spec = generate_sybpeptides(peptide)
    ex_spec = []
    for i in range(len(spec)):
        ex_spec.append(spec[i])
    scor = 0
    for elem in pep_spec:
        if (elem in ex_spec):
            ex_spec.remove(elem)
            scor += 1
    return scor


def do_trim(leaders, spec, N):
    if (len(leaders) <= N):
        return leaders

    sorted_leaders = sorted(leaders, key=lambda p: find_score(p, spec))
    res = sorted_leaders[len(sorted_leaders) - N:]
    return res


def do_leaderboard_cyclopeptide_sequencing(spec, N):
    leaders = [[]]
    cur_leader = []
    cur_top_score = 0
    parent_mass = max(spec)
    while (len(leaders) > 0):
        #leaders = do_expand(leaders)
        leaders = do_range_expand(leaders)
        next_leaders = []
        for j in range(len(leaders)):
            next_leaders.append(leaders[j])

        for cur_pep in leaders:
            cur_mass = count_peptide_mass(cur_pep)
            if (cur_mass == parent_mass):
                cur_score = find_score(cur_pep, spec)
                if ( cur_score > cur_top_score):
                    cur_leader = cur_pep
                    cur_top_score = cur_score
                    #print("Leader: ", cur_leader, "   ----     ", cur_top_score)
            else:
                if (cur_mass > parent_mass):
                    next_leaders.remove(cur_pep)

        leaders = do_trim(next_leaders, spec, N)
    return '-'.join([str(n) for n in  cur_leader])


def final_result_hw19(N, text):
    
    spectrum = [int(s) for s in text]

    #print(spectrum)
    res = do_leaderboard_cyclopeptide_sequencing(spectrum, N)
    
    print(res)


def main():
    file = open('rosalind_ba4g.txt', 'r')
    
    
    N = int(next(file))
    text = next(file).strip().split()
    final_result_hw19(N, text)

In [86]:
if __name__ == "__main__":
    main()

99-113-156-115-87-97-128-71-114-101-128-147


In [None]:
101-101-131-71-128-128-163-97-128-97-128-99-101-99

In [68]:
def main():
    file = open('rosalind_ba4g.txt', 'r')
    
    
    N = int(next(file))
    text = [int(s) for s in next(file).strip().split()]
    print(text)
    
    
    #final_result_hw19(N, text)
    
if __name__ == "__main__":
    main()

[0, 71, 97, 97, 99, 99, 101, 101, 101, 128, 128, 128, 128, 131, 163, 199, 200, 200, 200, 202, 202, 225, 225, 225, 227, 232, 256, 260, 291, 299, 301, 301, 303, 322, 324, 327, 328, 330, 333, 353, 388, 388, 400, 402, 404, 419, 425, 431, 432, 450, 452, 458, 485, 490, 501, 503, 516, 516, 524, 528, 532, 533, 549, 553, 559, 587, 604, 613, 613, 621, 625, 629, 631, 632, 644, 650, 652, 660, 703, 712, 715, 718, 722, 726, 732, 741, 741, 749, 753, 759, 760, 812, 813, 819, 823, 831, 831, 840, 846, 850, 854, 857, 860, 869, 912, 920, 922, 928, 940, 941, 943, 947, 951, 959, 959, 968, 985, 1013, 1019, 1023, 1039, 1040, 1044, 1048, 1056, 1056, 1069, 1071, 1082, 1087, 1114, 1120, 1122, 1140, 1141, 1145, 1147, 1153, 1168, 1170, 1172, 1184, 1184, 1219, 1239, 1242, 1244, 1245, 1248, 1250, 1269, 1271, 1271, 1273, 1281, 1312, 1316, 1340, 1345, 1347, 1347, 1347, 1370, 1370, 1372, 1372, 1372, 1373, 1409, 1441, 1444, 1444, 1444, 1444, 1471, 1471, 1471, 1473, 1473, 1475, 1475, 1501, 1572]


In [87]:
import numpy as np

amino_masses = [57, 71, 87, 97, 99, 101, 103, 113, 114, 115, 128, 129, 131, 137, 147, 156, 163, 186]


def expand(peptides):
    expanded_peptides = []

    for k in range(len(peptides)):
        for i in range(len(amino_masses)):
            cur_pep = []

            for t in range(len(peptides[k])):
                cur_pep.append(peptides[k][t])

            cur_pep.append(amino_masses[i])
            expanded_peptides.append(cur_pep)

    return expanded_peptides


def mass(peptide):
    return np.sum(peptide)


def theoretical_spectrum(peptide):
    result = [0]

    double_peptide = peptide + peptide

    for k in range(1, len(peptide)):
        for i in range(len(peptide)):
            curr_subpeptide = sum(double_peptide[i : i + k])
            result.append(curr_subpeptide)

    result.append(np.sum(peptide))
    return result


def score(peptide, spectrum):

    if len(peptide) == 0:
        return len(spectrum)

    subpeptides_set = theoretical_spectrum(peptide)

    temp_spectrum = spectrum.copy()

    curr_score = 0
    for elem in subpeptides_set:
        if elem in temp_spectrum:
            temp_spectrum.remove(elem)
            curr_score += 1
            
    return curr_score


def trim(leaderboard, spec, n):
    
    if len(leaderboard) <= n:
        return leaderboard

    sorted_leaders = sorted(leaderboard, key=lambda p: score(p, spec))
    trimmed_leaderboard = sorted_leaders[len(sorted_leaders) - n:]
    
    return trimmed_leaderboard


def leaderboard_cyclopeptide_sequencing(spectrum, n):
    leaderboard = [[]]
    leader = []
    leader_score = 1

    parent_mass = max(spectrum)

    while len(leaderboard):

        leaderboard = expand(leaderboard)
        next_leaderboard = leaderboard.copy()

        for peptide in leaderboard:
            if mass(peptide) == parent_mass:
                curr_score = score(peptide, spectrum)
                if curr_score > leader_score:
                    leader = peptide
                    leader_score = curr_score
            elif mass(peptide) > parent_mass:
                next_leaderboard.remove(peptide)

        leaderboard = trim(next_leaderboard, spectrum, n)

    return '-'.join([str(n) for n in leader])


def main():
    file = open('rosalind_ba4g.txt', 'r')

    n = int(next(file))
    spectrum = [int(s) for s in next(file).strip().split()]
    
    print(leaderboard_cyclopeptide_sequencing(spectrum, n))

In [88]:
if __name__ == "__main__":
    main()

99-113-156-115-87-97-128-71-114-101-128-147


In [None]:
99-113-156-115-87-97-128-71-114-101-128-147