In [1]:
import numpy as np
from collections import Counter

In [2]:
mass_table = {
    'G' : 57,
    'A' : 71,
    'S' : 87,
    'P' : 97,
    'V' : 99,
    'T' : 101,
    'C' : 103,
    'I' : 113,
    'L' : 113,
    'N' : 114,
    'D' : 115,
    'K' : 128,
    'Q' : 128,
    'E' : 129,
    'M' : 131,
    'H' : 137,
    'F' : 147,
    'R' : 156,
    'Y' : 163,
    'W' : 186
}


def spectrum(peptid):
    result = []
    for window in range(1, len(peptid) + 1):
        for left_bound in range(0, len(peptid) - window + 1):
            sub = peptid[left_bound:left_bound + window]
            sum = 0
            for ch in sub:
                sum += mass_table[ch]
            result.append([str(sub), sum])
    final_result = sorted(result, key=lambda x: x[1])
    return final_result

In [3]:
gramicidin = 'VGALAVVVWWWL'
final_result = spectrum(gramicidin)
for it in final_result:
    print(it)

['G', 57]
['A', 71]
['A', 71]
['V', 99]
['V', 99]
['V', 99]
['V', 99]
['L', 113]
['L', 113]
['GA', 128]
['VG', 156]
['AV', 170]
['AL', 184]
['LA', 184]
['W', 186]
['W', 186]
['W', 186]
['VV', 198]
['VV', 198]
['VGA', 227]
['GAL', 241]
['ALA', 255]
['AVV', 269]
['LAV', 283]
['VW', 285]
['VVV', 297]
['WL', 299]
['GALA', 312]
['VGAL', 340]
['ALAV', 354]
['AVVV', 368]
['WW', 372]
['WW', 372]
['LAVV', 382]
['VVW', 384]
['VGALA', 411]
['GALAV', 411]
['ALAVV', 453]
['VWW', 471]
['LAVVV', 481]
['VVVW', 483]
['WWL', 485]
['VGALAV', 510]
['GALAVV', 510]
['ALAVVV', 552]
['AVVVW', 554]
['WWW', 558]
['VVWW', 570]
['VGALAVV', 609]
['GALAVVV', 609]
['VWWW', 657]
['LAVVVW', 667]
['VVVWW', 669]
['WWWL', 671]
['VGALAVVV', 708]
['ALAVVVW', 738]
['AVVVWW', 740]
['VVWWW', 756]
['VWWWL', 770]
['GALAVVVW', 795]
['LAVVVWW', 853]
['VVVWWW', 855]
['VVWWWL', 869]
['VGALAVVVW', 894]
['ALAVVVWW', 924]
['AVVVWWW', 926]
['VVVWWWL', 968]
['GALAVVVWW', 981]
['LAVVVWWW', 1039]
['AVVVWWWL', 1039]
['VGALAVVVWW', 1080]
['

In [4]:
def cyclospectrum(peptide):
    return np.array(spectrum(peptide))[:,1].astype(int)

In [5]:
def spectrum_for_numbers(peptid):
    result = []
    for window in range(1, len(peptid) + 1):
        for left_bound in range(0, len(peptid) - window + 1):
            sub = peptid[left_bound:left_bound + window]
            sum_ = sum(sub)
            result.append(sum_)
    #result.append(sum(peptid))
    final_result = sorted(result)
    return final_result

In [6]:
peptide = 'WFNQYVK'
s = spectrum_for_numbers([mass_table[c] for c in peptide])
s[:-1]

[99,
 114,
 128,
 128,
 147,
 163,
 186,
 227,
 242,
 261,
 262,
 291,
 333,
 389,
 390,
 390,
 405,
 447,
 504,
 518,
 552,
 575,
 632,
 651,
 738,
 779,
 837]

In [7]:
c = Counter(s)
c

Counter({128: 2,
         390: 2,
         99: 1,
         114: 1,
         147: 1,
         163: 1,
         186: 1,
         227: 1,
         242: 1,
         261: 1,
         262: 1,
         291: 1,
         333: 1,
         389: 1,
         405: 1,
         447: 1,
         504: 1,
         518: 1,
         552: 1,
         575: 1,
         632: 1,
         651: 1,
         738: 1,
         779: 1,
         837: 1,
         965: 1})

In [8]:
c = cyclospectrum(gramicidin)
p = spectrum_for_numbers([mass_table[c] for c in gramicidin])
print(c)
print(p)

[  57   71   71   99   99   99   99  113  113  128  156  170  184  184
  186  186  186  198  198  227  241  255  269  283  285  297  299  312
  340  354  368  372  372  382  384  411  411  453  471  481  483  485
  510  510  552  554  558  570  609  609  657  667  669  671  708  738
  740  756  770  795  853  855  869  894  924  926  968  981 1039 1039
 1080 1110 1152 1167 1223 1266 1280 1379]
[57, 71, 71, 99, 99, 99, 99, 113, 113, 128, 156, 170, 184, 184, 186, 186, 186, 198, 198, 227, 241, 255, 269, 283, 285, 297, 299, 312, 340, 354, 368, 372, 372, 382, 384, 411, 411, 453, 471, 481, 483, 485, 510, 510, 552, 554, 558, 570, 609, 609, 657, 667, 669, 671, 708, 738, 740, 756, 770, 795, 853, 855, 869, 894, 924, 926, 968, 981, 1039, 1039, 1080, 1110, 1152, 1167, 1223, 1266, 1280, 1379]


In [9]:
seq = ''
for key, values in final_result:
    seq += str(values) + ' '
seq

'57 71 71 99 99 99 99 113 113 128 156 170 184 184 186 186 186 198 198 227 241 255 269 283 285 297 299 312 340 354 368 372 372 382 384 411 411 453 471 481 483 485 510 510 552 554 558 570 609 609 657 667 669 671 708 738 740 756 770 795 853 855 869 894 924 926 968 981 1039 1039 1080 1110 1152 1167 1223 1266 1280 1379 '

Сколько субпептидов имеет линейный пептид длины n?

In [10]:
n = 12
factorial = 1
#factorial
for i in range(1, n+1):
    factorial *= i
factorial += 1 #for empty subsequence
factorial

479001601

In [11]:
def expand(peptides):
    result = []

    for peptide in peptides:
#        result.append(peptide)
        #print(peptide)
        for mass in list(mass_table.values()):
            tmp = []
            for m in peptide:
                tmp.append(m)
            tmp.append(mass)
            #print(tmp)
            result.append(tmp)

    return result

In [12]:
def mass(peptide):
    return sum(peptide)

In [13]:
def parent_mass(spectr):
    return spectr[-1]

In [14]:
def remove_duplicates(nested_list):
    unique_list = []  # Пустой список
    for element in nested_list:
        if element not in unique_list:  # Проверяем каждый элемент
            unique_list.append(element)  # Добавляем, если элемент уникален
    return unique_list

In [40]:
def CyclopeptideSequencing(spectr):
    final_peptides = []
    candidates = [final_peptides]
    while candidates and not final_peptides:
        candidates = expand(candidates)
        candidates = remove_duplicates(candidates)
        for peptide in candidates:
            if peptide:
                if peptide == [186, 147, 114, 128]:
                    stop_point = 0
                peptide_spectrum = spectrum_for_numbers(peptide)
                if mass(peptide) == parent_mass(spectr):
                    if peptide_spectrum == spectr and peptide not in final_peptides:
                        final_peptides.append(peptide.copy())
                    peptide.clear() 
                else:
                    #проверим, согласуется ли пептид со спектром
                    counter_peptide = Counter(peptide_spectrum)
                    counter_spectrum = Counter(spectr)
                    for item in counter_peptide.keys():
                        if item not in counter_spectrum.keys() or counter_peptide[item] > counter_spectrum[item]:
                            peptide.clear()
                            break
                    
                    
        max_len = max([len(p) for p in candidates])
        candidates = [p for p in candidates if len(p) == max_len and p]
        #print(candidates)
        
        
    return final_peptides


In [32]:
tmp = spectrum_for_numbers([186, 147, 114, 128])
tmp

[114, 128, 147, 186, 242, 261, 333, 389, 447, 575]

In [37]:
peptide = 'WFNQYVK'
spec = spectrum(peptide)
spec

[['V', 99],
 ['N', 114],
 ['Q', 128],
 ['K', 128],
 ['F', 147],
 ['Y', 163],
 ['W', 186],
 ['VK', 227],
 ['NQ', 242],
 ['FN', 261],
 ['YV', 262],
 ['QY', 291],
 ['WF', 333],
 ['FNQ', 389],
 ['QYV', 390],
 ['YVK', 390],
 ['NQY', 405],
 ['WFN', 447],
 ['NQYV', 504],
 ['QYVK', 518],
 ['FNQY', 552],
 ['WFNQ', 575],
 ['NQYVK', 632],
 ['FNQYV', 651],
 ['WFNQY', 738],
 ['FNQYVK', 779],
 ['WFNQYV', 837],
 ['WFNQYVK', 965]]

In [16]:
candidates = [[]]
candidates = expand(candidates)
candidates

[[57],
 [71],
 [87],
 [97],
 [99],
 [101],
 [103],
 [113],
 [113],
 [114],
 [115],
 [128],
 [128],
 [129],
 [131],
 [137],
 [147],
 [156],
 [163],
 [186]]

In [41]:
peptide = 'WFNQYVK'
result = CyclopeptideSequencing(spectrum_for_numbers([mass_table[p] for p in peptide]))
result

[[128, 99, 163, 128, 114, 147, 186], [186, 147, 114, 128, 163, 99, 128]]

In [42]:
mass_pept = [mass_table[p] for p in peptide]
mass_pept

[186, 147, 114, 128, 163, 99, 128]

In [43]:
peptide_table = dict(zip(mass_table.values(), mass_table.keys()))
peptide_table

{57: 'G',
 71: 'A',
 87: 'S',
 97: 'P',
 99: 'V',
 101: 'T',
 103: 'C',
 113: 'L',
 114: 'N',
 115: 'D',
 128: 'Q',
 129: 'E',
 131: 'M',
 137: 'H',
 147: 'F',
 156: 'R',
 163: 'Y',
 186: 'W'}

In [44]:
result_peptides = [[peptide_table[m] for m in pept] for pept in result]
result_peptides

[['Q', 'V', 'Y', 'Q', 'N', 'F', 'W'], ['W', 'F', 'N', 'Q', 'Y', 'V', 'Q']]