In [1]:
aminoacids_integer_masses_dict = {'A':71.03711,
'R':156.10111,
'N':114.04293,
'D':115.02694,
'C':103.00919,
'E':129.04259,
'Q':128.05858,
'G':57.02146,
'H':137.05891,
'I':113.08406,
'L':113.08406,
'K':128.09496,
'M':131.04049,
'F':147.06841,
'P':97.05276,
'S':87.03203,
'T':101.04768,
'W':186.07931,
'Y':163.06333,
'V':99.06841}

In [2]:
for key in aminoacids_integer_masses_dict:
  aminoacids_integer_masses_dict[key] = int(aminoacids_integer_masses_dict[key])

In [3]:
def FindAllKmers(peptide,k):
  kmers_list = []
  i = 0
  while i + k - 1 <= len(peptide)-1:
    kmers_list.append(peptide[i:i+k])
    i = i + 1
  return kmers_list

In [4]:
def GenerateAllSubpeptides(peptide):
  subpeptides = []
  for subpeptide_length in range(1,len(peptide)):
    subpeptides.extend(FindAllKmers(peptide,subpeptide_length))
  subpeptides.append(peptide)
  return subpeptides

In [5]:
def SubpeptideMass(subpeptide):
  mass = 0
  for aminoacid in subpeptide:
    mass = mass + aminoacids_integer_masses_dict[aminoacid]
  return mass

In [7]:
def LinearSpectrum(peptide):
  theoretical_linear_spectrum = [0]
  for subpeptide in GenerateAllSubpeptides(peptide):
    theoretical_linear_spectrum.append(SubpeptideMass(subpeptide))
  return theoretical_linear_spectrum

In [8]:
from collections import Counter

Given a cyclic peptide Peptide and a spectrum Spectrum, we define SCORE(Peptide, Spectrum) as the number of masses shared between
CYCLOSPECTRUM(Peptide) and Spectrum

In [13]:
def LinearPeptideScoring(peptide,spectrum): #the score is computed for spectrum against peptide's theoretical spectrum --> spectrum is experimental spectrum
  peptide_theoretical_spectrum = LinearSpectrum(peptide)
  peptide_theoretical_spectrum_counter_dict = Counter(peptide_theoretical_spectrum)
  spectrum_counter_dict = Counter(spectrum)
  score = 0
  scored_aminoacid_integer_masses = []
  for aminoacid_integer_mass in spectrum:
    if aminoacid_integer_mass not in scored_aminoacid_integer_masses:
      if spectrum_counter_dict[aminoacid_integer_mass] == peptide_theoretical_spectrum_counter_dict[aminoacid_integer_mass]:
        score = score + spectrum_counter_dict[aminoacid_integer_mass]
        scored_aminoacid_integer_masses.append(aminoacid_integer_mass)
      elif spectrum_counter_dict[aminoacid_integer_mass] > peptide_theoretical_spectrum_counter_dict[aminoacid_integer_mass]:
        if peptide_theoretical_spectrum_counter_dict[aminoacid_integer_mass] > 0: #if peptide_theoretical_spectrum_counter_dict[aminoacid_integer_mass] > 0 then there are surpluss occurences of same mass in experimental spectrum
          score = score + peptide_theoretical_spectrum_counter_dict[aminoacid_integer_mass]
          scored_aminoacid_integer_masses.append(aminoacid_integer_mass)
      else: #spectrum_counter_dict[aminoacid_integer_mass] < peptide_theoretical_spectrum_counter_dict[aminoacid_integer_mass] --> there are surpluss occurences of same mass in theoretical spectrum
        score = score + spectrum_counter_dict[aminoacid_integer_mass]
        scored_aminoacid_integer_masses.append(aminoacid_integer_mass)
  return score

In [14]:
peptide = 'NQEL'

In [15]:
linear_spectrum = [0, 99, 113, 114, 128, 227, 257, 299, 355, 356, 370, 371, 484] #spectrum of linear peptide

In [16]:
LinearPeptideScoring(peptide,linear_spectrum)

8

In [17]:
with open('/content/rosalind_ba4k.txt') as task_file:
  task_arguments = [line.rstrip() for line in task_file]

In [19]:
peptide = task_arguments[0]

In [20]:
linear_spectrum = task_arguments[1]
linear_spectrum = linear_spectrum.split(' ')
for i in range(len(linear_spectrum)):
  linear_spectrum[i] = int(linear_spectrum[i])

In [23]:
f = open("task_result.txt","w")
f.write(str(LinearPeptideScoring(peptide,linear_spectrum)))
f.close()