In [5]:
# Original dictionary of amino acid masses
amino_acid_mass = {
    'A': 71.08,  # Alanine
    'C': 103.14, # Cysteine
    'D': 115.09, # Aspartic acid
    'E': 129.11, # Glutamic acid
    'F': 147.13, # Phenylalanine
    'G': 57.05,  # Glycine
    'H': 137.14, # Histidine
    'I': 113.16, # Isoleucine
    'K': 128.17, # Lysine
    'L': 113.16, # Leucine
    'M': 131.20, # Methionine
    'N': 114.11, # Asparagine
    'P': 97.12,  # Proline
    'Q': 128.13, # Glutamine
    'R': 156.19, # Arginine
    'S': 87.08,  # Serine
    'T': 101.10, # Threonine
    'V': 99.14,  # Valine
    'W': 186.21, # Tryptophan
    'Y': 163.17  # Tyrosine
}

# Input list of amino acids
input_amino_acids = ['A', 'M', 'G', 'Y']

def calculate_mass(binary_input):
    total_mass = 0.0
    for index, bit in enumerate(binary_input):
        if bit == '1':  # Only include mass for amino acids where the bit is 1
            total_mass += amino_acid_mass[input_amino_acids[index]]
    return total_mass

# List to store total masses
output = []

# Example usage for binary input '0000', '0001', ..., '1111'
for i in range(16):  # Generate binary combinations from 0000 to 1111
    binary_str = f"{i:04b}"  # Format as 4-bit binary
    mass = calculate_mass(binary_str)
    output.append(mass)  # Append the calculated mass to the output list
    #print(f"Binary input: {binary_str} -> Total Mass: {mass:.2f} Da")

# Print the sorted unique integer values of the masses
sorted_masses = sorted(set(int(round(mass)) for mass in output))
print("Sorted unique integer masses:", sorted_masses)


Sorted unique integer masses: [0, 57, 71, 128, 131, 163, 188, 202, 220, 234, 259, 291, 294, 351, 365, 422]


In [28]:
import math

In [4]:
# Dictionary of amino acid masses
amino_acid_mass = {
    'A': 71,  # Alanine
    'C': 103, # Cysteine
    'D': 115, # Aspartic acid
    'E': 129, # Glutamic acid
    'F': 147, # Phenylalanine
    'G': 57,  # Glycine
    'H': 137, # Histidine
    'I': 113, # Isoleucine
    'K': 128, # Lysine
    'L': 113, # Leucine
    'M': 131, # Methionine
    'N': 114, # Asparagine
    'P': 97,  # Proline
    'Q': 128, # Glutamine
    'R': 156, # Arginine
    'S': 87,  # Serine
    'T': 101, # Threonine
    'V': 99,  # Valine
    'W': 186, # Tryptophan
    'Y': 163  # Tyrosine
}

# Set of target masses
target_masses = {0, 71, 71, 99, 101, 103, 113, 113, 114, 128, 128, 131, 147, 163, 170, 172, 184, 199, 215, 227, 227, 231, 244, 259, 260, 266, 271, 286, 298,298, 310, 312, 328, 330, 330, 372, 385, 391, 394, 399, 399, 399, 401,413, 423, 426, 443, 443, 470, 493, 498, 502, 513, 519, 526, 527, 541,554, 556, 557, 564, 569, 590, 598, 616, 626, 640, 654, 657, 658, 665,670, 682, 697, 697, 703, 711, 729, 729, 753, 753, 771, 779, 785, 785,800, 812, 817, 824, 825, 828, 842, 856, 866, 884, 892, 913, 918, 925,926, 928, 941, 955, 956, 963, 969, 980, 984, 989, 1012, 1039, 1039,1056, 1059, 1069, 1081, 1083, 1083, 1083, 1088, 1091, 1097, 1110,1152, 1152, 1154, 1170, 1172, 1184, 1184, 1196, 1211, 1216, 1222,1223, 1238, 1251, 1255, 1255, 1267, 1283, 1298, 1310, 1312, 1319,1335, 1351, 1354, 1354, 1368, 1369, 1369, 1379, 1381, 1383, 1411,1411, 1482}


# Memoization dictionary to avoid recalculating the same sequences
memo = {}

# Global variables to track the best score and sequence
best_score = 1
best_seq = ""

# Optimized recursive function to find combinations
def find_combinations(current_seq, current_mass, index, amino_acids, amino_acid_mass, score):
    global best_score, best_seq, memo
    
    # Memoization check: if the current mass and sequence have been computed before
    if (current_mass, score) in memo and memo[(current_mass, score)] <= index:
        return
    
    # Memoize the current state
    memo[(current_mass, score)] = index
    
    # Pruning: if the score is already higher or equal to the best score
    if score >= best_score: 
        return
    
    # Increment score if the current mass is not in the target masses
    if current_mass not in target_masses:
        score += 1
    
    # If we reach exactly the 13th index with the max target mass
    if current_mass == max(target_masses) and index == 13:
        if score < best_score:
            best_score = score
            best_seq = current_seq
        return
    
    # Prune branches where mass exceeds the maximum target mass or depth exceeds 13
    if current_mass > max(target_masses) or index >= 13:
        return
    
    # Try adding more amino acids
    for aa in amino_acids:
        new_seq = current_seq + aa
        new_mass = current_mass + amino_acid_mass[aa]
        find_combinations(new_seq, new_mass, index + 1, amino_acids, amino_acid_mass, score)

# Sort amino acids by mass for a more efficient search
amino_acids = sorted(list(amino_acid_mass.keys()), key=lambda aa: amino_acid_mass[aa])

# Start the recursion with an empty sequence and mass
find_combinations("", 0, 0, amino_acids, amino_acid_mass, 0)

# Output the best score and sequence found
print(f"Best score: {best_score}")
print(f"Best sequence: {best_seq}")


Best score: 0
Best sequence: AGASVIKWTRCFY


In [1]:
# Dictionary of amino acid masses
amino_acid_mass = {
    'A': 71,  # Alanine
    'C': 103, # Cysteine
    'D': 115, # Aspartic acid
    'E': 129, # Glutamic acid
    'F': 147, # Phenylalanine
    'G': 57,  # Glycine
    'H': 137, # Histidine
    'I': 113, # Isoleucine
    'K': 128, # Lysine
    'L': 113, # Leucine
    'M': 131, # Methionine
    'N': 114, # Asparagine
    'P': 97,  # Proline
    'Q': 128, # Glutamine
    'R': 156, # Arginine
    'S': 87,  # Serine
    'T': 101, # Threonine
    'V': 99,  # Valine
    'W': 186, # Tryptophan
    'Y': 163  # Tyrosine
}

# Set of target masses
target_masses = {0, 71, 71, 99, 101, 103, 113, 113, 114, 128, 128, 131, 147, 163, 170, 172, 184, 199, 215, 227, 227, 231, 244, 259, 260, 266, 271, 286, 298,298, 310, 312, 328, 330, 330, 372, 385, 391, 394, 399, 399, 399, 401,413, 423, 426, 443, 443, 470, 493, 498, 502, 513, 519, 526, 527, 541,554, 556, 557, 564, 569, 590, 598, 616, 626, 640, 654, 657, 658, 665,670, 682, 697, 697, 703, 711, 729, 729, 753, 753, 771, 779, 785, 785,800, 812, 817, 824, 825, 828, 842, 856, 866, 884, 892, 913, 918, 925,926, 928, 941, 955, 956, 963, 969, 980, 984, 989, 1012, 1039, 1039,1056, 1059, 1069, 1081, 1083, 1083, 1083, 1088, 1091, 1097, 1110,1152, 1152, 1154, 1170, 1172, 1184, 1184, 1196, 1211, 1216, 1222,1223, 1238, 1251, 1255, 1255, 1267, 1283, 1298, 1310, 1312, 1319,1335, 1351, 1354, 1354, 1368, 1369, 1369, 1379, 1381, 1383, 1411,1411, 1482}


# Memoization dictionary to avoid recalculating the same sequences
memo = {}

# Global variables to track the best score and sequence
best_score = 13
best_seq = ""

# Optimized recursive function to find combinations
def find_combinations(current_seq, current_mass, index, amino_acids, amino_acid_mass, score):
    global best_score, best_seq, memo
    
    # Memoization check: if the current mass and sequence have been computed before
    if (current_mass, score) in memo and memo[(current_mass, score)] <= index:
        return
    
    # Memoize the current state
    memo[(current_mass, score)] = index
    
    # Pruning: if the score is already higher or equal to the best score
    if score >= best_score: 
        return
    
    # Increment score if the current mass is not in the target masses
    if current_mass not in target_masses:
        score += 1
    
    # If we reach exactly the 13th index with the max target mass
    if current_mass == max(target_masses) and index == 13:
        if score < best_score:
            best_score = score
            best_seq = current_seq
        return
    
    # Prune branches where mass exceeds the maximum target mass or depth exceeds 13
    if current_mass > max(target_masses) or index >= 13:
        return
    
    # Try adding more amino acids
    for aa in amino_acids:
        new_seq = current_seq + aa
        new_mass = current_mass + amino_acid_mass[aa]
        find_combinations(new_seq, new_mass, index + 1, amino_acids, amino_acid_mass, score)

# Sort amino acids by mass for a more efficient search
amino_acids = sorted(list(amino_acid_mass.keys()), key=lambda aa: amino_acid_mass[aa])

# Start the recursion with an empty sequence and mass
find_combinations("", 0, 0, amino_acids, amino_acid_mass, 0)

# Output the best score and sequence found
print(f"Best score: {best_score}")
print(f"Best sequence: {best_seq}")


Best score: 0
Best sequence: AGASVIKWTRCFY
