# Python course 2021 - Exercises F

## Part1 - DNA, RNA and peptide sequences



---
1.1) Write a function to get the reverse complement (upper case letters) of a DNA sequence given in upper case letters!


In [12]:
dna_alphabet = 'ACGT'

def rev_comp(sequence):
  rev_comp = ""
  for base in sequence:
    if base not in dna_alphabet:
      print("This is not a DNA sequence!")
      exit(1)
    if base == 'A':
      rev_comp += 'T'
    elif base == 'C':
      rev_comp += 'G'
    elif base == 'G':
      rev_comp += 'C'
    elif base == 'T':
      rev_comp += 'A'
  return rev_comp[::-1]

print(rev_comp("GTTATACA"))

TGTATAAC




---
1.2) Write a function to convert a DNA sequence into a RNA sequence!


In [6]:
rna_alphabet = 'ACGU'
def convert_dna_to_rna(sequence):
  rna_sequence = ""
  for base in sequence:
    if not (base in dna_alphabet):
      print("This is not a DNA sequence!")
      exit(1)
    if base == 'T':
      rna_sequence += 'U'
    else:
      rna_sequence += base 
  return rna_sequence

print(convert_dna_to_rna("GGCAACGATGGGCTTTC"))

GGCAACGAUGGGCUUUC




---
1.3) Write a function to translate a DNA sequence into amino acids (first frame only)!
* Tip: [wiki - codon tables](https://en.wikipedia.org/wiki/DNA_and_RNA_codon_tables)


In [18]:
amino_alphabet = 'ARNDCQEGHILKMFPSTWYVUO'
triplet_table = {
    'GCU': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A',
    'CGU': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 'AGA': 'R', 'AGG': 'R',
    'AAU': 'N', 'AAC': 'N',
    'GAU': 'D', 'GAC': 'D',
    'AAU': 'B', 'AAC': 'B', 'GAU': 'B', 'GAC': 'B',
    'UGU': 'C', 'UGC': 'C',
    'CAA': 'Q', 'CAG': 'Q',
    'GAA': 'G', 'GAG': 'G',
    'CAA': 'Z', 'CAG': 'Z', 'GAA': 'Z', 'GAG': 'Z',
    'GGU': 'G', 'GGC': 'G', 'GGA': 'G', 'GGG': 'G',
    'CAU': 'H', 'CAC': 'H',
    'AUU': 'I', 'AUC': 'I', 'AUA': 'I',
    'CUU': 'L', 'CUC': 'L', 'CUA': 'L', 'CUG': 'L', 'UUA': 'L', 'UUG': 'L',
    'AAA': 'K', 'AAG': 'K',
    'AUG': 'M',
    'UUU': 'F', 'UUC': 'F',
    'CCU': 'P', 'CCC': 'P', 'CCA': 'P', 'CCG': 'P',
    'UCU': 'S', 'UCC': 'S', 'UCA': 'S', 'UCG': 'S', 'AGU': 'S', 'AGC': 'S',
    'ACU': 'T', 'ACC': 'T', 'ACA': 'T', 'ACG': 'T',
    'UGG': 'W',
    'UAU': 'Y', 'UAC': 'Y',
    'GUU': 'V', 'GUC': 'V', 'GUA': 'V', 'GUG': 'V'
}
start = 'AUG'
stop = ('UAA', 'UGA', 'UAG')

def convert_dna_to_amino(sequence):
  rna_sequence = convert_dna_to_rna(sequence)
  amino_sequence = ""
  if rna_sequence.find('AUG') == -1:    # s.find(x) returns -1 if x could not be found in s
    print("No start codon found!")
    exit(1)
  for i in range(rna_sequence.find('AUG'), len(rna_sequence)-2):
    if rna_sequence[i:i+3] in stop:
      break
    triplet = triplet_table[rna_sequence[i:i+3]]
    amino_sequence += triplet
    print(rna_sequence[i:i+3], triplet)
  return amino_sequence

print("Resulting amino sequence:", convert_dna_to_amino('ACCCGTTTTGGAGACTATGGGGCCGTGAAA'))
  

AUG M
UGG W
GGG G
GGG G
GGC G
GCC A
CCG P
CGU R
GUG V
Resulting amino sequence: MWGGGAPRV




---
1.4) Write a function to translate DNA sequences in all 6 frames into peptide sequences! The longest peptide sequence per DNA sequence should be returned!


In [25]:
def convert_dna_to_peptides_all_frames(sequence):
  rev_complement = rev_comp(sequence)
  fwd_strand = convert_dna_to_rna(sequence)
  rev_strand = convert_dna_to_rna(rev_complement)
  frame1 = ""
  frame2 = ""
  frame3 = ""

  frame4 = ""
  frame5 = ""
  frame6 = ""
  for i in range(len(sequence)-2):
    if fwd_strand[i:i+3] in stop or rev_strand[i:i+3] in stop:
      break

    frame1 += triplet_table[fwd_strand[i:i+3]]
    frame4 += triplet_table[rev_strand[i:i+3]]

    if i > len(sequence)-3:
      continue
    if fwd_strand[+1:i+4] in stop or rev_strand[i+1:i+4] in stop:
      break
    frame2 += triplet_table[fwd_strand[i+1:i+4]]
    frame5 += triplet_table[rev_strand[i+1:i+4]]

    if i > len(sequence)-4:
      continue
    if fwd_strand[i+2:i+5] in stop or rev_strand[i+2:i+5] in stop:
      break
    frame3 += triplet_table[fwd_strand[i+2:i+5]]
    frame6 += triplet_table[rev_strand[i+2:i+5]]
  return frame1, frame2, frame3, frame4, frame5, frame6

input = 'ACCCGTTTTGGAGACTATGGGGCCGTGAAA'
print(input)
print("Translation for all six frames:", convert_dna_to_peptides_all_frames(input))

ACCCGTTTTGGAGACTATGGGGCCGTGAAA
Translation for all six frames: ('TPPRVFFLWGZR', 'PPRVFFLWGZRB', 'PRVFFLWGZRB', 'FFSHTRGAPPPH', 'FSHTRGAPPPHI', 'SHTRGAPPPHI')
