In [None]:
from collections import Counter

AMINO_ACIDS = {
    'G':57, 'A':71, 'S':87, 'P':97, 'V':99,
    'T':101, 'C':103, 'I':113, 'L':113,
    'N':114, 'D':115, 'K':128, 'Q':128,
    'E':129, 'M':131, 'H':137, 'F':147,
    'R':156, 'Y':163, 'W':186
}

# MASS OF PEPTIDE
def peptide_mass(peptide):
    return sum(AMINO_ACIDS[a] for a in peptide)

# LINEAR SPECTRUM
def linear_spectrum(peptide):
    prefix = [0]
    for a in peptide:
        prefix.append(prefix[-1] + AMINO_ACIDS[a])

    spec = [0]
    n = len(peptide)

    for i in range(n):
        for j in range(i+1, n+1):
            spec.append(prefix[j] - prefix[i])

    return sorted(spec)

# CYCLIC SPECTRUM
def cyclic_spectrum(peptide):
    prefix = [0]
    for a in peptide:
        prefix.append(prefix[-1] + AMINO_ACIDS[a])

    total = prefix[-1]
    spec = [0]
    n = len(peptide)

    for i in range(n):
        for j in range(i+1, n+1):
            diff = prefix[j] - prefix[i]
            spec.append(diff)
            if i > 0 and j < n:
                spec.append(total - diff)

    return sorted(spec)

#  CONSISTENCY CHECK
def is_consistent(peptide, spectrum_counter):
    pep_spec = Counter(linear_spectrum(peptide))
    for m in pep_spec:
        if pep_spec[m] > spectrum_counter[m]:
            return False
    return True

#  EXPAND
def expand(peptides, allowed_letters):
    new_list = []
    for p in peptides:
        for a in allowed_letters:
            new_list.append(p + a)
    return new_list

#  MAIN BRANCH & BOUND
def branch_bound_kmers(spectrum):

    spectrum_counter = Counter(spectrum)
    parent_mass = max(spectrum)

    # Step 1: allowed amino acids from spectrum
    allowed_letters = []
    for aa, mass in AMINO_ACIDS.items():
        if mass in spectrum:
            allowed_letters.append(aa)

    print("Allowed 1-mers:", allowed_letters)

    peptides = [""]
    k = 1
    final = []

    while peptides:
        peptides = expand(peptides, allowed_letters)
        next_round = []

        print(f"\nConsistent {k}-mers:")

        for pep in peptides:
            m = peptide_mass(pep)

            if m == parent_mass:
                if Counter(cyclic_spectrum(pep)) == spectrum_counter:
                    print("FINAL:", pep)
                    final.append(pep)

            elif m < parent_mass:
                if is_consistent(pep, spectrum_counter):
                    print(pep)
                    next_round.append(pep)

        peptides = next_round
        k += 1

    return final



spectrum = [
0,97,97,99,101,103,196,198,198,200,202,
295,297,299,299,301,394,396,398,400,400,497
]

result = branch_bound_kmers(spectrum)

print("\nFinal Cyclic Peptides:", result)


Allowed 1-mers: ['P', 'V', 'T', 'C']

Consistent 1-mers:
P
V
T
C

Consistent 2-mers:
PV
PT
PC
VP
VT
VC
TP
TV
CP
CV

Consistent 3-mers:
PVT
PVC
PTP
PTV
PCV
VPT
VPC
VTP
VCP
TPV
TPC
TVP
CPV
CPT
CVP

Consistent 4-mers:
PVCP
PTPV
PTPC
PCVP
VPTP
VCPT
TPVC
TPCV
CPTP
CVPT

Consistent 5-mers:
FINAL: PVCPT
FINAL: PTPVC
FINAL: PTPCV
FINAL: PCVPT
FINAL: VPTPC
FINAL: VCPTP
FINAL: TPVCP
FINAL: TPCVP
FINAL: CPTPV
FINAL: CVPTP

Final Cyclic Peptides: ['PVCPT', 'PTPVC', 'PTPCV', 'PCVPT', 'VPTPC', 'VCPTP', 'TPVCP', 'TPCVP', 'CPTPV', 'CVPTP']


In [None]:
!pip install biopython




In [None]:
from google.colab import files
uploaded = files.upload()


Saving rcsb_pdb_4Q21.fasta to rcsb_pdb_4Q21.fasta


In [None]:
from Bio import SeqIO
from collections import Counter

# READ FASTA
record = SeqIO.read("rcsb_pdb_4Q21.fasta", "fasta")
sequence = str(record.seq)

print("Full Protein Length:", len(sequence))

peptide_string = sequence[:6]
print("Selected Peptide:", peptide_string)

# MASS TABLE
mass_table = {
    'G':57,'A':71,'S':87,'P':97,'V':99,
    'T':101,'C':103,'I':113,'L':113,
    'N':114,'D':115,'K':128,'Q':128,
    'E':129,'M':131,'H':137,'F':147,
    'R':156,'Y':163,'W':186
}

original_masses = [mass_table[a] for a in peptide_string]
print("Original Mass List:", original_masses)

# SPECTRUM FUNCTIONS
def cyclic_spectrum(peptide):
    prefix=[0]
    for m in peptide:
        prefix.append(prefix[-1]+m)

    total=prefix[-1]
    spec=[0]
    n=len(peptide)

    for i in range(n):
        for j in range(i+1,n+1):
            diff=prefix[j]-prefix[i]
            spec.append(diff)
            if i>0 and j<n:
                spec.append(total-diff)
    return sorted(spec)


def linear_spectrum(peptide):
    prefix=[0]
    for m in peptide:
        prefix.append(prefix[-1]+m)

    spec=[0]
    n=len(peptide)
    for i in range(n):
        for j in range(i+1,n+1):
            spec.append(prefix[j]-prefix[i])
    return sorted(spec)

# EXPERIMENTAL SPECTRUM
experimental_spectrum = cyclic_spectrum(original_masses)
parent_mass = max(experimental_spectrum)

print("\nExperimental Spectrum:")
print(experimental_spectrum)

# SCORE FUNCTION
def score(peptide, spectrum, cyclic=False):
    if cyclic:
        pep_spec = cyclic_spectrum(peptide)
    else:
        pep_spec = linear_spectrum(peptide)

    s_counter = Counter(spectrum)
    p_counter = Counter(pep_spec)

    total=0
    for m in p_counter:
        total += min(p_counter[m], s_counter[m])
    return total

#  EXPAND
unique_masses=list(set(mass_table.values()))

def expand(peptides):
    new=[]
    for p in peptides:
        for m in unique_masses:
            new.append(p+[m])
    return new

# TRIM
def trim(board, spectrum, N):
    scored=[(p,score(p,spectrum,False)) for p in board]
    scored.sort(key=lambda x:x[1], reverse=True)

    if len(scored)<=N:
        return [p[0] for p in scored]

    cutoff=scored[N-1][1]
    return [p for p,s in scored if s>=cutoff]

# LEADERBOARD
def leaderboard_cyclopeptide(spectrum, N, parent_mass):
    leaderboard=[[]]
    leader=[]
    leader_score=0

    while leaderboard:
        leaderboard=expand(leaderboard)
        new_board=[]

        for pep in leaderboard:
            m=sum(pep)

            if m==parent_mass:
                sc=score(pep,spectrum,True)
                if sc>leader_score:
                    leader=pep
                    leader_score=sc

            if m<=parent_mass:
                new_board.append(pep)

        leaderboard=trim(new_board,spectrum,N)

    return leader

# RUN
N=10
result = leaderboard_cyclopeptide(experimental_spectrum, N, parent_mass)

print("\nBest Peptide (Mass Format):")
print(result)

#  MASS â†’ LETTER
reverse_mass={}
for aa,m in mass_table.items():
    reverse_mass.setdefault(m,[]).append(aa)

print("\nBest Peptide (Letter Format):")

strings=['']
for m in result:
    new=[]
    for s in strings:
        for aa in reverse_mass[m]:
            new.append(s+aa)
    strings=new

print(strings)


Full Protein Length: 189
Selected Peptide: MTEYKL
Original Mass List: [131, 101, 129, 163, 128, 113]

Experimental Spectrum:
[0, 101, 113, 128, 129, 131, 163, 230, 232, 241, 244, 291, 292, 345, 361, 372, 393, 404, 420, 473, 474, 521, 524, 533, 535, 602, 634, 636, 637, 652, 664, 765]

Best Peptide (Mass Format):
[128, 163, 129, 101, 131, 113]

Best Peptide (Letter Format):
['KYETMI', 'KYETML', 'QYETMI', 'QYETML']
