In [1]:
def create_scoring_matrix():
  amino_acids = "CSTPAGNDEQHRKMILVFYW"
  scores = [
        [9, -1, -1, -3, 0, -3, -3, -3, -4, -3, -3, -3, -3, -1, -1, -1, -1, -2, -2, -2],  # C done
        [-1, 4, 1, -1, 1, 0, -1, 1, 0, 0, 0, -1, -1, 0, -2, -2, -2, -2, -2, -3],         # S done
        [-1, 1, 5, -1, 0, -2, 0, -1, -1, -1, -2, -1, -1, -1, -1, -1, 0, -2, -2, -2],     # T done
        [-3, -1, -1, 7, -1, -2, -2, -1, -1, -1, -2, -2, -1, -2, -3, -3, -2, -4, -3, -4], # P done
        [0, 1, 0, -1, 4, 0, -2, -2, -1, -1, -2, -1, -1, -1, -1, -1, 0, -2, -2, -3],      # A done
        [-3, 0, -2, -2, 0, 6, 0, -1, -2, -2, -2, -2, -2, -3, -4, -4, -3, -3, -3, -2],    # G done
        [-3, -1, 0, -2, -2, 0, 6, 1, 0, 0, 1, 0, 0, -2, -3, -3, -3, -3, -2, -4],         # N done
        [-3, 1, -1, -1, -2, -1, 1, 6, 2, 0, -1, -2, -1, -3, -3, -4, -3, -3, -3, -4],     # D done
        [-4, 0, -1, -1, -1, -2, 0, 2, 5, 2, 0, 0, 1, -2, -3, -3, -2, -3, -2, -3],        # E done
        [-3, 0, -1, -1, -1, -2, 0, 0, 2, 5, 0, 1, 1, 0, -3, -2, -2, -3, -1, -2],         # Q done
        [-3, 0, -2, -2, -2, -2, 1, -1, 0, 0, 8, 0, -1, -2, -3, -3, -3, -1, 2, -2],       # H done
        [-3, -1, -1, -2, -1, -2, 0, -2, 0, 1, 0, 5, 2, -1, -3, -2, -3, -3, -2, -3],      # R done
        [-3, -1, -1, -1, -1, -2, 0, -1, 1, 1, -1, 2, 5, -1, -3, -2, -2, -3, -2, -3],     # K done
        [-1, 0, -1, -2, -1, -3, -2, -3, -2, 0, -2, -1, -1, 5, 1, 2, 1, 0, -1, -1],       # M done
        [-1, -2, -1, -3, -1, -4, -3, -3, -3, -3, -3, -3, -3, 1, 4, 2, 3, 0, -1, -3],     # I done
        [-1, -2, -1, -3, -1, -4, -3, -4, -3, -2, -3, -2, -2, 2, 2, 4, 1, 0, -1, -2],     # L done
        [-1, -2, 0, -2, 0, -3, -3, -3, -2, -2, -3, -3, -2, 1, 3, 1, 4, -1, -1, -3],      # V done
        [-2, -2, -2, -4, -2, -3, -3, -3, -3, -3, -1, -3, -3, 0, 0, 0, -1, 6, 3, 1],      # F done
        [-2, -2, -2, -3, -2, -3, -2, -3, -2, -1, 2, -2, -2, -1, -1, -1, -1, 3, 7, 2],    # Y done
        [-2, -3, -2, -4, -3, -2, -4, -4, -3, -2, -2, -3, -3, -1, -3, -2, -3, 1, 2, 11]   # W done
    ]

  matrix = {}
  for i, aa1 in enumerate(amino_acids):
      matrix[aa1] = {}
      for j, aa2 in enumerate(amino_acids):
          matrix[aa1][aa2] = scores[i][j]
  return matrix

In [2]:
def calculate_active_pattern_score(active_pattern):
  matrix = create_scoring_matrix()
  print(matrix)
  score = 0
  for i in active_pattern:
    if i in matrix:
      print(f"The point got is {matrix[i][i]}")
      score += matrix[i][i]
    else:
      print(f"{i} is not found in the matrix")
  return score

In [3]:
def calculate_conservation_score(substring, pattern):
  if len(substring) != len(pattern):
    raise ValueError("Strings must have equal length")
  
  matrix = create_scoring_matrix()
  total_score = 0
  
  for s, p in zip(substring, pattern):
    if s in matrix and p in matrix:
      total_score += matrix[s][p]
    else:
      if s not in matrix:
        print(f"{s} from substring is not found in the matrix")
      if p not in matrix:
        print(f"{p} from pattern is not found in the matrix")
  
  return total_score

In [None]:
def is_conserved(substring, pattern, pattern_self_score, threshold=0.80):
  conservation_score = calculate_conservation_score(substring, pattern)
  ratio = conservation_score / pattern_self_score
  return ratio >= threshold

In [None]:
M1 = "DSIAPVNSSALPIYDSMSRNAKQFLEINGGSH"
M2 = "DTGIEASHPEFEGRAQMVKTYYYSSRDGNGH"

In [15]:
M1_score = calculate_active_pattern_score(M1)
print()
M2_score = calculate_active_pattern_score(M2)

{'C': {'C': 9, 'S': -1, 'T': -1, 'P': -3, 'A': 0, 'G': -3, 'N': -3, 'D': -3, 'E': -4, 'Q': -3, 'H': -3, 'R': -3, 'K': -3, 'M': -1, 'I': -1, 'L': -1, 'V': -1, 'F': -2, 'Y': -2, 'W': -2}, 'S': {'C': -1, 'S': 4, 'T': 1, 'P': -1, 'A': 1, 'G': 0, 'N': -1, 'D': 1, 'E': 0, 'Q': 0, 'H': 0, 'R': -1, 'K': -1, 'M': 0, 'I': -2, 'L': -2, 'V': -2, 'F': -2, 'Y': -2, 'W': -3}, 'T': {'C': -1, 'S': 1, 'T': 5, 'P': -1, 'A': 0, 'G': -2, 'N': 0, 'D': -1, 'E': -1, 'Q': -1, 'H': -2, 'R': -1, 'K': -1, 'M': -1, 'I': -1, 'L': -1, 'V': 0, 'F': -2, 'Y': -2, 'W': -2}, 'P': {'C': -3, 'S': -1, 'T': -1, 'P': 7, 'A': -1, 'G': -2, 'N': -2, 'D': -1, 'E': -1, 'Q': -1, 'H': -2, 'R': -2, 'K': -1, 'M': -2, 'I': -3, 'L': -3, 'V': -2, 'F': -4, 'Y': -3, 'W': -4}, 'A': {'C': 0, 'S': 1, 'T': 0, 'P': -1, 'A': 4, 'G': 0, 'N': -2, 'D': -2, 'E': -1, 'Q': -1, 'H': -2, 'R': -1, 'K': -1, 'M': -1, 'I': -1, 'L': -1, 'V': 0, 'F': -2, 'Y': -2, 'W': -3}, 'G': {'C': -3, 'S': 0, 'T': -2, 'P': -2, 'A': 0, 'G': 6, 'N': 0, 'D': -1, 'E': -2, 'Q':

In [26]:
print(M1_score)
print(M2_score)

162
170


In [10]:
stm = "MHRTWKRMLVTVAATIALTAPLGATAAHAADNPYERGPNPTLAALQASRGPYAVSTTSVSRLSAVGFGGGTIYYPTSTADGTFGAIAISPGFTAYWSSISWLGPRLASHGFVVIGIETLTTADQPDSRGDQLLAALDYLTSRSSVRSRIDSSRLAVAGHSMGGGGSLEAASDRPSLQAAVPLAPWNLDKSWTELRVPTLIVGGEADTIAPVASHSIPFYTSIPSSAEKSYLELNGASHFFPQSVNTPTAVQAVAWLKRFVDDDTRYSQFICPGPSSLSISDYRSSCPV"

In [37]:
highest_S1 = 0
highest_S2 = 0
best_substring_S1 = ""
best_substring_S2 = ""

length_M1 = len(M1)
length_M2 = len(M2)

for i in range(len(stm) - length_M1 + 1):
    substring = stm[i:i + length_M1]
    score_S1 = calculate_conservation_score(substring, M1)
    
    if score_S1 > highest_S1:
        highest_S1 = score_S1
        best_substring_S1 = substring

for i in range(len(stm) - length_M2 + 1):
    substring = stm[i:i + length_M2]
    score_S2 = calculate_conservation_score(substring, M2)
    
    if score_S2 > highest_S2:
        highest_S2 = score_S2
        best_substring_S2 = substring

print(f"Highest S1 score: {highest_S1} with substring: {best_substring_S1}")
print(f"Highest S2 score: {highest_S2} with substring: {best_substring_S2}")
print(f"Length of best_substring_S1: {len(best_substring_S1)}")
print(f"Length of best_substring_S2: {len(best_substring_S2)}")

Highest S1 score: 47 with substring: DTIAPVASHSIPFYTSIPSSAEKSYLELNGAS
Highest S2 score: 23 with substring: ETLTTADQPDSRGDQLLAALDYLTSRSSVRS
Length of best_substring_S1: 32
Length of best_substring_S2: 31


In [38]:
def compute_mutation_scores(best_substring, reference, reference_score):
    matrix = create_scoring_matrix()
    mutation_scores = []
    
    for i, char in enumerate(best_substring):
        reference_char = reference[i]
        current_score = matrix[char][reference_char]
        reference_char_score = matrix[reference_char][reference_char]
        print(f"Current char: {char}, Reference char: {reference_char}")
        print(f"Current score: {current_score}, Reference char score: {reference_char_score}")
        new_score = 0 - current_score + reference_char_score
        mutation_scores.append(new_score)
    
    return mutation_scores

mutation_scores_S1 = compute_mutation_scores(best_substring_S1, M1, M1_score)
mutation_scores_S2 = compute_mutation_scores(best_substring_S2, M2, M2_score)

print("Mutation scores for best_substring_S1:", mutation_scores_S1)
print("Sum of mutation scores for best_substring_S1:", sum(mutation_scores_S1))

print("Mutation scores for best_substring_S2:", mutation_scores_S2)
print("Sum of mutation scores for best_substring_S2:", sum(mutation_scores_S2))

Current char: D, Reference char: D
Current score: 6, Reference char score: 6
Current char: T, Reference char: S
Current score: 1, Reference char score: 4
Current char: I, Reference char: I
Current score: 4, Reference char score: 4
Current char: A, Reference char: A
Current score: 4, Reference char score: 4
Current char: P, Reference char: P
Current score: 7, Reference char score: 7
Current char: V, Reference char: V
Current score: 4, Reference char score: 4
Current char: A, Reference char: N
Current score: -2, Reference char score: 6
Current char: S, Reference char: S
Current score: 4, Reference char score: 4
Current char: H, Reference char: S
Current score: 0, Reference char score: 4
Current char: S, Reference char: A
Current score: 1, Reference char score: 4
Current char: I, Reference char: L
Current score: 2, Reference char score: 4
Current char: P, Reference char: P
Current score: 7, Reference char score: 7
Current char: F, Reference char: I
Current score: 0, Reference char score: 

In [None]:
def mutate_to_threshold(best_substring, reference, reference_score, target_score):
    matrix = create_scoring_matrix()
    mutated_string = list(best_substring)
    mutation_scores = compute_mutation_scores(best_substring, reference, reference_score)
    
    # Sort positions by mutation score in descending order
    sorted_positions = sorted(range(len(mutation_scores)), key=lambda i: mutation_scores[i], reverse=True)
    
    current_score = calculate_conservation_score("".join(mutated_string), reference)
    mutation_count = 0
    
    for pos in sorted_positions:
        if current_score >= target_score:
            break
        mutated_string[pos] = reference[pos]
        current_score = calculate_conservation_score("".join(mutated_string), reference)
        mutation_count += 1
    
    return "".join(mutated_string), current_score, mutation_count
target_S1_score = 0.80 * M1_score
target_S2_score = 0.80 * M2_score
mutated_S1, final_S1_score, mutation_count_S1 = mutate_to_threshold(best_substring_S1, M1, M1_score, target_S1_score)
mutated_S2, final_S2_score, mutation_count_S2 = mutate_to_threshold(best_substring_S2, M2, M2_score, target_S2_score)

print(f"Unmutated S1: {best_substring_S1}, Score: {highest_S1}")
print(f"Unmutated S2: {best_substring_S2}, Score: {highest_S2}")
print(f"Mutated S1: {mutated_S1}, Score: {final_S1_score}, Mutations: {mutation_count_S1}")
print(f"Mutated S2: {mutated_S2}, Score: {final_S2_score}, Mutations: {mutation_count_S2}")

#DTIAPVASHSIPFYTSIPSSAEKSYLELNGAS
#DTIAPVNSHSIPFYDSISRNAEKFLEINGGAH

Current char: D, Reference char: D
Current score: 6, Reference char score: 6
Current char: T, Reference char: S
Current score: 1, Reference char score: 4
Current char: I, Reference char: I
Current score: 4, Reference char score: 4
Current char: A, Reference char: A
Current score: 4, Reference char score: 4
Current char: P, Reference char: P
Current score: 7, Reference char score: 7
Current char: V, Reference char: V
Current score: 4, Reference char score: 4
Current char: A, Reference char: N
Current score: -2, Reference char score: 6
Current char: S, Reference char: S
Current score: 4, Reference char score: 4
Current char: H, Reference char: S
Current score: 0, Reference char score: 4
Current char: S, Reference char: A
Current score: 1, Reference char score: 4
Current char: I, Reference char: L
Current score: 2, Reference char score: 4
Current char: P, Reference char: P
Current score: 7, Reference char score: 7
Current char: F, Reference char: I
Current score: 0, Reference char score: 