In [0]:
from collections import defaultdict
from collections import Counter

In [0]:
symbol_to_mass = {
    'G':57,
    'A':71,
    'S':87,
    'P':97,
    'V':99,
    'T':101,
    'C':103,
    'I':113,
    'L':113,
    'N':114,
    'D':115,
    'K':128,
    'Q':128,
    'E':129,
    'M':131,
    'H':137,
    'F':147,
    'R':156,
    'Y':163,
    'W':186
}

#Cyclic Spectum

In [0]:
def generate_theoretical_spectrum(peptide):
  masses = [symbol_to_mass[ch] for ch in peptide]*2
  n = len(peptide)
  spectrum = [sum(masses[pos:pos+l]) for pos in range(n) for l in range(1,n)] + [0, sum(masses)//2]
  return sorted(spectrum)

In [0]:
print(*generate_theoretical_spectrum('WRRFRILSTSAKL'))

0 71 87 87 101 113 113 113 128 147 156 156 156 158 186 188 188 199 200 226 241 259 269 275 286 299 301 303 303 312 312 313 342 346 382 387 388 399 414 416 427 455 459 459 459 469 474 498 498 500 501 529 570 572 572 583 585 587 587 611 615 616 645 654 657 685 686 700 700 717 728 728 739 741 758 772 773 801 804 810 813 841 842 856 873 875 886 886 897 914 914 928 929 957 960 969 998 999 1003 1027 1027 1029 1031 1042 1042 1044 1085 1113 1114 1116 1116 1140 1145 1155 1155 1155 1159 1187 1198 1200 1215 1226 1227 1232 1268 1272 1301 1302 1302 1311 1311 1313 1315 1328 1339 1345 1355 1373 1388 1414 1415 1426 1426 1428 1456 1458 1458 1458 1467 1486 1501 1501 1501 1513 1527 1527 1543 1614


#Compute the Number of Peptides of Given Total Mass

In [0]:
def number_of_peptides(mass):
  acids = [57, 71, 87, 97, 99, 101, 103, 113, 114, 115, 128, 129, 131, 137, 147, 156, 163, 186]
  dp = defaultdict(int)
  def rec(current):
    if current > mass:
      return 0
    if current == mass:
      return 1
    if current in dp:
      return dp[current]
    ans = 0
    for m in acids:
      ans += rec(current+m)
    dp[current] = ans
    return dp[current]
  return rec(0)


In [0]:
number_of_peptides(1024)

14712706211

#Compute the Score of a Cyclic Peptide Against a Spectrum

In [0]:
def cyclic_peptide_score(peptide, spectrum):
  spectrum_peptide = generate_theoretical_spectrum(peptide)
  cnt1, cnt2 = Counter(spectrum) , Counter(spectrum_peptide)
  return sum(min(val, cnt2[key]) for key,val in cnt1.items())


In [0]:
with open('rosalind_ba4f.txt','r') as file:
  data = file.read().splitlines()
  peptide = data[0]
  spectrum = [int(m) for m in data[1].split()]
  print(cyclic_peptide_score(peptide, spectrum))

647


#Generate the Convolution of a Spectrum

In [0]:
def convolution(spectrum):
  conv = [(x-y) for x in spectrum for y in spectrum if (x-y) > 0]
  cnt = Counter(conv)
  acid = sorted(cnt.items(), key=lambda x: x[1], reverse=True)
  ans = []
  for key,val in acid:
    ans.extend([key]*val)
  return ans

In [0]:
with open('rosalind_ba4h.txt','r') as file:
  data = file.read()
  spectrum = [int(m) for m in data.split()]
  print(*convolution(spectrum))

101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 87 87 87 87 87 87 87 87 87 87 87 87 87 87 87 87 87 87 87 87 87 87 87 87 87 87 87 87 87 87 87 87 87 87 87 87 87 87 87 87 87 87 87 87 87 87 87 87 87 87 87 87 87 87 87 87 87 87 87 87 87 87 87 87 87 87 87 87 87 87 87 87 87 87 87 87 87 87 87 87 87 87 87 87 87 87 87 87 87 87 87 87 87 87 87 87 87 87 87 87 87 87 87 87 87 87 87 87 87 87 87 87 87 87 87

#Trim a Peptide Leaderboard

In [0]:
def trim(peptides, spectrum, M):
  def linear_spectrum(peptide):
    n = len(peptide)
    l_spectrum = [sum(peptide[idx:idx+k]) for k in range(1,n+1) for idx in range(n-k+1)] + [0]
    return sorted(l_spectrum)
  
  def linear_score(peptide):
    cnt1 , cnt2 = Counter(spectrum) , Counter(linear_spectrum(peptide))
    return sum(min(val, cnt2[key]) for key, val in cnt1.items())
  
  def driver():
    mp = defaultdict(int)
    for peptide in peptides:
      m = [symbol_to_mass[ch] for ch in peptide]
      mp[peptide] = linear_score(m)
    peptide_score_tuple = sorted(mp.items(), key = lambda x: x[1], reverse=True)
    leader,score = map(list, zip(*peptide_score_tuple))
    for i in range(M,len(peptides)):
      if score[i] < score[M-1]:
        return leader[0:i] 
  return driver()

In [0]:
with open('rosalind_ba4l.txt', 'r') as file:
  data = file.read().splitlines()
  peptides = [p for p in data[0].split()]
  spectrum = [int(m) for m in data[1].split()]
  M = int(data[2])
  print(*trim(peptides, spectrum, M))

CCKADLTNKTTEFRNQMTQTYAIREQPDFHTGQSEYCTQKPL KPAQKNEHCSLPGRIIGYASWEKDTVENNHNRIMSCPDPITV WNTGNEACTIMCYYILKCYIGSMTNQAYRGMLFRTRCSVIYW HNGLLQYHCAMFGKIYKNWPPMADNDATNIMQMRGELWRWYH YNDRRNDARLWSCKSICISKPPGNMHCYTCIQEIFLSDHDLY
