In [1]:
import collections

# Standard integer masses for amino acids
AMINO_ACID_MASSES_DICT = {
    'G': 57, 'A': 71, 'S': 87, 'P': 97, 'V': 99,
    'T': 101, 'C': 103, 'I': 113, 'L': 113, 'N': 114,
    'D': 115, 'K': 128, 'Q': 128, 'E': 129, 'M': 131,
    'H': 137, 'F': 147, 'R': 156, 'Y': 163, 'W': 186
}

def get_cyclospectrum(peptide_masses): # Same as in BA4E
    if not peptide_masses:
        return collections.Counter({0: 1}) # Spectrum of empty peptide is {0:1}
    
    spectrum = collections.Counter()
    spectrum[0] += 1

    n = len(peptide_masses)
    doubled_for_subs = peptide_masses + peptide_masses[:n-1] # I need to concat the n - 1 peptides in the end to mimic the circle

    for length in range(1, n): # Subpeptide lengths from 1 to n-1
        for i in range(n):     # Starting position in the original peptide
            subpeptide_mass = sum(doubled_for_subs[i : i + length])
            spectrum[subpeptide_mass] += 1
    
    # Add mass of the full peptide
    spectrum[sum(peptide_masses)] += 1
    return spectrum


def amino_acid_string_to_mass_list(peptide_string): # Get all theoretical mass values for the given peptide 
    mass_list = []
    for aa in peptide_string:
        mass = AMINO_ACID_MASSES_DICT.get(aa)
        if mass is None:
            raise ValueError(f"Unknown amino acid: {aa} in peptide {peptide_string}")
        mass_list.append(mass)
    return mass_list

def score_cyclic_peptide_against_spectrum(peptide_string, experimental_spectrum_list):
    peptide_mass_list = amino_acid_string_to_mass_list(peptide_string) # Convert peptide string to list of masses
    
    theoretical_cyclospectrum_counter = get_cyclospectrum(peptide_mass_list) # Generate theoretical cyclospectrum for the peptide
    
    experimental_spectrum_counter = collections.Counter(experimental_spectrum_list) # Count values in the experimental spectrum
    
    total_score = 0
    # For all masses in the theoretical spectrum
    for mass, theo_count in theoretical_cyclospectrum_counter.items():
        # Get the count of this mass in the experimental spectrum (0 if not present)
        exp_count = experimental_spectrum_counter[mass]
        # Add the minimum of the two counts (shared values of masses)
        total_score += min(theo_count, exp_count)
        
    return total_score

if __name__ == "__main__":
    file_path = "../data/rosalind_ba4f.txt" 
    
    peptide_input_str = ""
    experimental_spectrum_input_str = ""
    
    with open(file_path, 'r') as f:
        peptide_input_str = f.readline().strip()
        experimental_spectrum_input_str = f.readline().strip()

    # Convert experimental spectrum string to list of ints
    experimental_spectrum = list(map(int, experimental_spectrum_input_str.split()))
    final_score = score_cyclic_peptide_against_spectrum(peptide_input_str, experimental_spectrum)
    
    print(final_score)

775
