In [8]:
def FindAllKmers(peptide,k):
  kmers_list = []
  i = 0
  while i + k - 1 <= len(peptide)-1:
    kmers_list.append(peptide[i:i+k])
    i = i + 1
  while i <= len(peptide) - 1:
    kmers_list.append(peptide[i:len(peptide)] + peptide[0:k-len(peptide[i:len(peptide)])])
    i = i + 1
  return kmers_list

In [9]:
def GenerateAllSubpeptides(peptide):
  subpeptides = []
  for subpeptide_length in range(1,len(peptide)):
    subpeptides.extend(FindAllKmers(peptide,subpeptide_length))
  subpeptides.append(peptide)
  return subpeptides

In [10]:
def CycloSpectrum(peptide):
  subpeptides = GenerateAllSubpeptides(peptide)

In [11]:
aminoacids_integer_masses_dict = {'A':71.03711,
'R':156.10111,
'N':114.04293,
'D':115.02694,
'C':103.00919,
'E':129.04259,
'Q':128.05858,
'G':57.02146,
'H':137.05891,
'I':113.08406,
'L':113.08406,
'K':128.09496,
'M':131.04049,
'F':147.06841,
'P':97.05276,
'S':87.03203,
'T':101.04768,
'W':186.07931,
'Y':163.06333,
'V':99.06841}

In [12]:
for key in aminoacids_integer_masses_dict:
  aminoacids_integer_masses_dict[key] = int(aminoacids_integer_masses_dict[key])

The theoretical spectrum of a cyclic peptide Peptide is the collection of all of the masses
of its subpeptides, in addition to the mass 0 and the mass of the entire peptide, with
masses ordered from smallest to largest. We will assume that the theoretical spectrum can contain duplicate elements

In [13]:
def CycloSpectrum(peptide):
  subpeptides = GenerateAllSubpeptides(peptide)
  theoretical_spectrum = [0]
  for subpeptide in subpeptides:
    subpeptide_mass = 0
    for aminoacid in subpeptide:
      subpeptide_mass = subpeptide_mass + aminoacids_integer_masses_dict[aminoacid]
    theoretical_spectrum.append(subpeptide_mass)
  return sorted(theoretical_spectrum)

In [4]:
from collections import Counter

Theoretical: 0 113 114 128 129 227 242 242 257 355 356 370 371 484 --> theoretical

Experimental: 0 99 113 114 128 227 257 299 355 356 370 371 484 --> experimental

99 --> false mass

129 --> missing mass

Given a cyclic peptide Peptide and a spectrum Spectrum, we define SCORE(Peptide, Spectrum) as the number of masses shared between
CYCLOSPECTRUM(Peptide) and Spectrum --> ako je neka atomska masa zastupljena isti broj puta u teoretskom spektru i eksperimetnalnom spektru, onda scoreu dodamo taj broj, ako nije ne dodajemo ništa

Ako je neka atomska masa zastupljena isti broj puta u teoretskom spektru i u eksperimentalnom spektru, onda teoretski spektar i eksperimentalni spektar imaju isti broj ponavljanja te mase, odnosno dijele tu masu. Ako neka atomska masa nije zastupljena isti broj puta u teoretskom spektru i u eksperimentalnom spektru, onda ili teoretski ili eksperimentalni spektar ima veći broj ponavljanja te mase. Eksperimentalni spektar može imati veći broj ponavljanja mase od teoretskog spektra pa su višak lažne mase. Eksperimentalni spektar može imati manji broj ponavljanja neke mase od teoretskog spektra pa je broj ponavljanja t.s. - broj ponavljanja e.s. masa iz teoretskog spektra mase koje fale.

In [27]:
def CyclicPeptideScoring(peptide,spectrum): #the score is computed for spectrum against peptide's theoretical spectrum
  peptide_theoretical_spectrum = CycloSpectrum(peptide)
  peptide_theoretical_spectrum_counter_dict = Counter(peptide_theoretical_spectrum)
  spectrum_counter_dict = Counter(spectrum)
  score = 0
  scored_aminoacid_integer_masses = []
  for aminoacid_integer_mass in spectrum:
    if aminoacid_integer_mass not in scored_aminoacid_integer_masses:
      if spectrum_counter_dict[aminoacid_integer_mass] == peptide_theoretical_spectrum_counter_dict[aminoacid_integer_mass]:
        score = score + spectrum_counter_dict[aminoacid_integer_mass]
        scored_aminoacid_integer_masses.append(aminoacid_integer_mass)
      elif spectrum_counter_dict[aminoacid_integer_mass] > peptide_theoretical_spectrum_counter_dict[aminoacid_integer_mass]:
        if peptide_theoretical_spectrum_counter_dict[aminoacid_integer_mass] > 0: #if peptide_theoretical_spectrum_counter_dict[aminoacid_integer_mass] > 0 then there are surpluss occurences of same mass in experimental spectrum
          score = score + peptide_theoretical_spectrum_counter_dict[aminoacid_integer_mass]
          scored_aminoacid_integer_masses.append(aminoacid_integer_mass)
      else: #spectrum_counter_dict[aminoacid_integer_mass] < peptide_theoretical_spectrum_counter_dict[aminoacid_integer_mass] --> there are surpluss occurences of same mass in theoretical spectrum
        if spectrum_counter_dict[aminoacid_integer_mass] > 0:
          score = score + spectrum_counter_dict[aminoacid_integer_mass]
          scored_aminoacid_integer_masses.append(aminoacid_integer_mass)
  return score

In [28]:
peptide = 'NQEL'

In [29]:
spectrum = [0, 99, 113, 114, 128, 227, 257, 299, 355, 356, 370, 371, 484]

In [30]:
CyclicPeptideScoring(peptide,spectrum)

11

In [32]:
with open('/content/rosalind_ba4f.txt') as task_file:
  spectrum = [line.rstrip() for line in task_file]

In [34]:
spectrum = spectrum[0]

In [35]:
spectrum = spectrum.split(' ')

In [36]:
for i in range(len(spectrum)):
  spectrum[i] = int(spectrum[i])

In [31]:
peptide = 'NRTWEFVLVYQCTIAYATNRETWCYTLVRWNCMGP'

In [38]:
CyclicPeptideScoring(peptide,spectrum)

330