In [1]:
import collections

# Standard integer masses for amino acids
# G 57, A 71, S 87, P 97, V 99, T 101, C 103, I/L 113, N 114, D 115, K/Q 128, E 129, M 131, H 137, F 147, R 156, Y 163, W 186
INTEGER_MASSES = sorted(list(set([
    57, 71, 87, 97, 99, 101, 103, 113, 114, 115, 128, 129, 131, 137, 147, 156, 163, 186
])))

def get_linear_spectrum(peptide_masses):
    spectrum = collections.Counter({0: 1}) # Spectrum always contains mass 0
    prefix_sum = [0] * (len(peptide_masses) + 1)
    for i in range(len(peptide_masses)):
        prefix_sum[i+1] = prefix_sum[i] + peptide_masses[i]

    for i in range(len(peptide_masses)): # Outer loop (start of peptide)
        for j in range(i + 1, len(peptide_masses) + 1): # Inner loop (end of peptide)
            mass = prefix_sum[j] - prefix_sum[i] # Calculate the mass (prefix sum of end - prefix sum of start) and add it to spectrum
            spectrum[mass] += 1
    return spectrum

def get_cyclospectrum(peptide_masses):
    if not peptide_masses:
        return collections.Counter({0: 1}) # Spectrum of empty peptide is {0:1}
    
    spectrum = collections.Counter()
    spectrum[0] += 1

    n = len(peptide_masses)
    doubled_for_subs = peptide_masses + peptide_masses[:n-1] # I need to concat the n - 1 peptides in the end to mimic the circle

    for length in range(1, n): # Subpeptide lengths from 1 to n-1
        for i in range(n):     # Starting position in the original peptide
            subpeptide_mass = sum(doubled_for_subs[i : i + length])
            spectrum[subpeptide_mass] += 1
    
    # Add mass of the full peptide
    spectrum[sum(peptide_masses)] += 1
    return spectrum

def expand_peptide_candidates(current_peptides_set, amino_acid_masses_list): # Add all amino acids to all candidates
    expanded_set = set()
    for p_tuple in current_peptides_set:
        for aa_mass in amino_acid_masses_list:
            expanded_set.add(p_tuple + (aa_mass,))
    return expanded_set

def is_peptide_consistent_with_spectrum(peptide_masses_tuple, target_spectrum_counter): # Pruning
    """
    A peptide is consistent if every mass in its linear spectrum is present in the target_spectrum_counter, 
    and its count in the linear spectrum is less than or equal to its count in the target_spectrum_counter
    """
    peptide_linear_spectrum = get_linear_spectrum(list(peptide_masses_tuple))
    for mass, count in peptide_linear_spectrum.items():
        if target_spectrum_counter[mass] < count: # target_spectrum_counter[mass] is 0 if mass not present
            return False
    return True

def cyclopeptide_sequencing(ideal_spectrum_string): # ideal_spectrum_string: A string of integers representing the ideal spectrum
    spectrum_values = list(map(int, ideal_spectrum_string.split()))
    if not spectrum_values: # Handle empty input spectrum
        return []
        
    ideal_spectrum_counter = collections.Counter(spectrum_values) # Used to check the counts of each amino acid in ideal_spectrum
    parent_mass = max(spectrum_values) # Mass of the full target peptide, used to check against the mass of the spectrum

    candidate_peptides = {tuple()} # Start with an empty peptide and build on it later
    final_solution_peptides = set() # To store formatted strings of valid peptides

    while candidate_peptides:
        # Expand current candidates by one amino acid
        expanded_peptides_set = expand_peptide_candidates(candidate_peptides, INTEGER_MASSES)
        
        # Prepare for the next round of candidates; clear current set
        candidate_peptides.clear() 

        for p_tuple in expanded_peptides_set:
            current_peptide_mass = sum(p_tuple)

            if current_peptide_mass == parent_mass:
                # Mass matches parent mass: check if its cyclospectrum matches ideal spectrum
                peptide_cyclospectrum = get_cyclospectrum(list(p_tuple))
                if peptide_cyclospectrum == ideal_spectrum_counter:
                    final_solution_peptides.add("-".join(map(str, p_tuple)))
                # This peptide path is complete (either solution or mismatch), do not expand further.
            
            elif current_peptide_mass < parent_mass:
                # Check for consistency to prune branches and continue
                if is_peptide_consistent_with_spectrum(p_tuple, ideal_spectrum_counter):
                    candidate_peptides.add(p_tuple) # Keep consistent peptides for next expansion round
            
            # else (current_peptide_mass > parent_mass): Peptide is too heavy, pruned by not keeping it.
            
    return sorted(list(final_solution_peptides))


if __name__ == "__main__":
    file_path = "../data/rosalind_ba4e.txt" 
    
    ideal_spectrum_input_str = ""
    with open(file_path, 'r') as f:
        ideal_spectrum_input_str = f.readline().strip()
        
    result_peptide_strings = cyclopeptide_sequencing(ideal_spectrum_input_str)
    
    # The problem asks for space-separated output
    print(" ".join(result_peptide_strings))

101-113-97-156-128-113-103-71 101-71-103-113-128-156-97-113 103-113-128-156-97-113-101-71 103-71-101-113-97-156-128-113 113-101-71-103-113-128-156-97 113-103-71-101-113-97-156-128 113-128-156-97-113-101-71-103 113-97-156-128-113-103-71-101 128-113-103-71-101-113-97-156 128-156-97-113-101-71-103-113 156-128-113-103-71-101-113-97 156-97-113-101-71-103-113-128 71-101-113-97-156-128-113-103 71-103-113-128-156-97-113-101 97-113-101-71-103-113-128-156 97-156-128-113-103-71-101-113
