# Needleman-Wunsch taikymas ir mutacijų sąrašo gavimas

In [None]:
# reikiamo paketo instaliavimas
!pip install biopython -q

In [None]:
import os
import pandas as pd
import os, shutil

In [None]:
def nw_alignment(cut_natural_seq: str, full_mutated_seq: str):
  aligner = PairwiseAligner()
  aligner.gap_score = -1
  alignments = aligner.align(cut_natural_seq, full_mutated_seq)
  best_alignment = alignments[0]
  return best_alignment


def get_mut_positions(aligned_natural_seq: str, aligned_mutated_seq: str, cut_position: int, include_deletions: bool = False) -> list[tuple[str, int, str]]:
  sequence_length = len(aligned_natural_seq)
  mutation_list = []

  for position in range(sequence_length):
    natural_aa = aligned_natural_seq[position]
    if natural_aa == '-':  # ignorujame tarpus natūralioje sekoje - reiškia ignoruosime insercijas, bet nieko nepadarysi
      continue
    mutated_aa = aligned_mutated_seq[position]
    if not include_deletions and (mutated_aa == '-'):
      print(f'Ingoruojama delecija {position+cut_position} pozicijoje')
      continue
    if natural_aa != mutated_aa:
      mutation_list.append((natural_aa, position+cut_position, mutated_aa))
  return mutation_list


def save_mutation_lists(mutation_list: list[tuple[str, int, str]], natural_seq_name: str, mutated_seq_name: str,
                        mutation_list_types: list[str], output_dir: str) -> None:
  for mutation_list_type in mutation_list_types:
    mutation_list_file_name = os.path.join(output_dir, f'{natural_seq_name}_{mutated_seq_name}_{mutation_list_type}.txt')
    mutation_list_formatted = []
    for (natural_aa, pos, mutated_aa) in mutation_list:
      if mutation_list_type == 'Maestro':
        mutation = f'{natural_aa}{pos}.{mutated_aa}' + '{' + f'{mutated_aa}' + '}'
      elif mutation_list_type == 'DDGEmb':
        mutation = f'{natural_aa}{pos}{mutated_aa}'
      elif mutation_list_type == 'DynaMut':
        mutation = f'{natural_aa}{pos}{mutated_aa}'
      elif mutation_list_type == 'DynaMut2':
        mutation = f'A {natural_aa}{pos}{mutated_aa}'
      else:
        raise ValueError(f'Nežinoma programa: {mutation_list_type}')
      mutation_list_formatted.append(mutation)

    with open(mutation_list_file_name, 'w') as f:
      if mutation_list_type == 'Maestro':
        f.write('\n'.join(mutation_list_formatted))
      elif mutation_list_type == 'DDGEmb':
        f.write(','.join(mutation_list_formatted))
      elif mutation_list_type == 'DynaMut':
        f.write('\n'.join(mutation_list_formatted))
      elif mutation_list_type == 'DynaMut2':
        f.write(';'.join(mutation_list_formatted))

In [None]:
def run_all_for_seq(natural_seq_name: str, mutated_seq_name: str,
                    full_natural_seq: str, full_mutated_seq: str,
                    cut_position: int, end_cut_position: int,
                    programs: list[str], output_dir: str):
  cut_natural_seq = full_natural_seq[cut_position-1:end_cut_position]
  best_alignment = nw_alignment(cut_natural_seq, full_mutated_seq)

  aligned_natural_seq = ''.join([cut_natural_seq[i] if i != -1 else '-' for i in best_alignment.indices[0]])
  aligned_mutated_seq = ''.join([full_mutated_seq[i] if i != -1 else '-' for i in best_alignment.indices[1]])
  aligned_natural_seq_gap_positions = [i for i, x in enumerate(aligned_natural_seq) if x == '-']
  aligned_natural_seq = ''.join([x for i, x in enumerate(aligned_natural_seq) if i not in aligned_natural_seq_gap_positions])
  aligned_mutated_seq = ''.join([x for i, x in enumerate(aligned_mutated_seq) if i not in aligned_natural_seq_gap_positions])

  assert len(aligned_natural_seq) == len(aligned_mutated_seq), "Sekų ilgiai skiriasi! Taip neturi būti, yra klaida kode"

  mutation_list = get_mut_positions(aligned_natural_seq, aligned_mutated_seq, cut_position, include_deletions=False)
  print(f'{natural_seq_name}->{mutated_seq_name}: rasta {len(mutation_list)} mutacijų')
  save_mutation_lists(mutation_list, natural_seq_name, mutated_seq_name, programs, output_dir)

In [None]:
info_table_file = 'nsekuduomenyscolabui1.csv'  # CSV failo pavadinimas
# turi būti lygiai 5 stulpeliai
# 1. sekos pavadinimas
# 2. mutuotos sekos pavadinimas
# 3. natūrali seka
# 4. Mutuota seka
# 5. Cut positions      # visur 1 jei nera kitaip
# 6. End cut positions  # visur -1

output_dir = 'mut_lists'
os.makedirs(output_dir, exist_ok=True)
programs = ['DynaMut2']


info_table = pd.read_csv(info_table_file)
assert len(info_table.columns) == 6, "Lentelė netinkamo formato!"
info_table.columns = ['name', 'mut_name', 'natural', 'mutated', 'cut_pos', 'end_cut_pos']

print(f'Lentelėje rasta {len(info_table)} sekų, pirmos 5:')
print(info_table.head(5))

for index, row in info_table.iterrows():
  run_all_for_seq(
      natural_seq_name=row['name'],
      mutated_seq_name=row['mut_name'],
      full_natural_seq=row['natural'],
      full_mutated_seq=row['mutated'],
      cut_position=int(row['cut_pos']),
      end_cut_position=int(row['end_cut_pos']),
      programs=programs,
      output_dir=output_dir
  )

# gaut visus rezultatus archyve "mut_lists.zip"
shutil.make_archive(output_dir, 'zip', output_dir)

Lentelėje rasta 121 sekų, pirmos 5:
         name    mut_name                                            natural  \
0  A0A024S7H6           0  MVKAVAVLRGDSNVKGTVVFEQASESSATVITYSLSGNDPNALRGF...   
1  A0A024THB6  A0A6G0X6Z6  MAKAVVTLYGDDATVYGTLVLSQSNEDAKTIVAGSLKGLSAGKHAL...   
2  A0A031LV15        5400  MQSKVSAFKVSMLSALLSLGFVGCAATQNTSASKESNTKIQTIPVN...   
3  A0A061Q2Z6        1358  MNKTTWLAALTLLSSPAFAETVSVEMIDLGSGQSTGTVMISDSDYG...   
4  A0A084TNJ5        2833  MKKISFLTLALAFTLAACKKEKKQETPASADVQEQVEEVKEVKEEI...   

                                             mutated  cut_pos  end_cut_pos  
0  MVKAVSVLRGDSKVSGTVHFEQASENPPTTVTYEITGNDPNAKRGF...        1           -1  
1  MAKAVVTLYGNEGQVFGSLVLSQANEDAKTIIAGSLKGLSAGKHAI...        1           -1  
2  MHEVSAQGVDKKIGTVEFRDSAQGLVISLDLESLPPGYHGFHIHEK...       38           -1  
3  MVEWKQLDSGKTVGYVVVSQSDYGVVFTPHLNGLPAGMHGFHIHTN...       14           -1  
4  MFYLESKSGANATGMAIFKEEDGEVSMMAVFEGLTPGTHAIHLHEK...       45           -1  
Ingoruojama delecija 

'/content/mut_lists.zip'