#### BA4A: Translate an RNA String into an Amino Acid String

In [None]:


def generateKmerwithoutOveralpping(genome):
  i = 0
  kmers = []
  while i <= len(genome)-3+1:
    kmers.append(genome[i:i+3])
    i += 3
  return kmers

def TranslateRNAintoAminoAcid(genome):
  kmers = generateKmerwithoutOveralpping(genome)
  aminoAcid = ""
  for kmer in kmers:
    aminoAcid += aminoCodes[kmer]
  return aminoAcid


# main fuction
aminoCodes = {
  'AAA': 'K', 
  'AAC': 'N', 
  'AAG': 'K', 
  'AAU': 'N', 
  'ACA': 'T', 
  'ACC': 'T', 
  'ACG': 'T', 
  'ACU': 'T', 
  'AGA': 'R', 
  'AGC': 'S', 
  'AGG': 'R', 
  'AGU': 'S', 
  'AUA': 'I', 
  'AUC': 'I', 
  'AUG': 'M',
  'AUU': 'I', 
  'CAA': 'Q', 
  'CAC': 'H',  
  'CAG': 'Q', 
  'CAU': 'H',
  'CCA': 'P', 
  'CCC': 'P', 
  'CCG': 'P', 
  'CCU': 'P',
  'CGA': 'R',
  'CGC': 'R', 
  'CGG': 'R', 
  'CGU': 'R',
  'CUA': 'L', 
  'CUC': 'L', 
  'CUG': 'L', 
  'CUU': 'L', 
  'GAA': 'E', 
  'GAC': 'D', 
  'GAG': 'E', 
  'GAU': 'D', 
  'GCA': 'A', 
  'GCC': 'A', 
  'GCG': 'A', 
  'GCU': 'A',
  'GGA': 'G', 
  'GGC': 'G', 
  'GGG': 'G', 
  'GGU': 'G', 
  'GUA': 'V' , 
  'GUC': 'V', 
  'GUG': 'V' , 
  'GUU': 'V', 
  'UAA': '', 
  'UAC': 'Y', 
  'UAG': '', 
  'UAU': 'Y', 
  'UCA': 'S', 
  'UCC': 'S', 
  'UCG': 'S', 
  'UCU': 'S', 
  'UGA': '', 
  'UGC': 'C', 
  'UGG': 'W',
  'UGU': 'C',
  'UUA': 'L', 
  'UUC': 'F', 
  'UUG': 'L', 
  'UUU': 'F'
}
genome = "AUGGCCAUGGCGCCCAGAACUGAGAUCAAUAGUACCCGUAUUAACGGGUGA"
TranslateRNAintoAminoAcid(genome)


'MAMAPRTEINSTRING'

#### BA4B: Find Substrings of a Genome Encoding a Given Amino Acid String

In [None]:
def convertRnatoDna(dnaString):
  return dnaString.replace('T', 'U')

def ReverseCompDna(dna):
  compdna = ""
  for i in range(len(dna)):
    if dna[i] == 'A':
      compdna += 'T'
    elif dna[i] == 'T':
      compdna += 'A'
    elif dna[i] == 'C':
      compdna += 'G'
    elif dna[i] == 'G':
      compdna += 'C'
  revComp = compdna[::-1]
  return revComp

def isEncodedbyDNA(dnaString, aminoString):
  result = []
  k = len(aminoString)
  for i in range(len(dnaString)-(k*3)+1):
    dna = dnaString[i: i+(len(aminoString)*3)]
    rna = convertRnatoDna(dna)
    # checking for main string
    if TranslateRNAintoAminoAcid(rna) == aminoString:
      result.append(dna)
    # checking for rev complement string
    if TranslateRNAintoAminoAcid(convertRnatoDna(ReverseCompDna(dna))) == aminoString:
      result.append(dna)
  return result

# main function
dnaString = "ATGGCCATGGCCCCCAGAACTGAGATCAATAGTACCCGTATTAACGGGTGA"
aminoString = "MA"

result = isEncodedbyDNA(dnaString, aminoString)
for r in result:
  print(r)



ATGGCC
GGCCAT
ATGGCC


#### BA4C: Generate the Theoretical Spectrum of a Cyclic Peptide

In [None]:
mass = {
  'G':57, 
  'A':71, 
  'S':87, 
  'P':97, 
  'V':99, 
  'T':101, 
  'C':103, 
  'I':113, 
  'L':113, 
  'N':114, 
  'D':115, 
  'K':128, 
  'Q':128, 
  'E':129, 
  'M':131, 
  'H':137, 
  'F':147, 
  'R':156, 
  'Y':163, 
  'W':186
}

def CalculateMass(peptide):
  val = 0
  for pep in peptide:
    val += mass[pep]
  return val

def GenerateTheoraticalSpectrum(peptide):
  res = [0]
  l = len(peptide)
  for i in range(1, l): # will traverse 1,2,3 for 4
    for j in range(l):
      if (i+j) > l:
        idx = i+j-l # for cyclic
        subpeptide = peptide[j:] + peptide[:idx]
        res.append(CalculateMass(subpeptide))
      else:
        res.append(CalculateMass(peptide[j:j+i]))
  res.append(CalculateMass(peptide))
  res.sort()
  return res


# main
peptide = "LEQN"
ans = GenerateTheoraticalSpectrum(peptide)
for i in ans:
  print(i, end=" ")

0 113 114 128 129 227 242 242 257 355 356 370 371 484 

#### BA4H: Generate the Convolution of a Spectrum

In [1]:

def GenerateConvolutionSpectrum(spectrum):
  output = [] # subtraction vaue
  for i in spectrum: # row val
    for j in spectrum: # col val
      if (i-j) > 0:
        output.append(i-j) 
  
  # counting frequency
  freq = {}
  for item in output:
    if item in freq:
      freq[item] += 1
    else:
      freq[item] = 1
  
  # s
  sorted_list = [k for k, j in sorted(freq.items(), key=lambda item: item[1], reverse=True)] #sorts by count and returns key
  
  # multiplying with frequency 
  ans = []
  for item in sorted_list:
      ans += [item] * freq[item]
  return ans

# main function
with open('/content/drive/MyDrive/Bioinformatics Lab/Rosalind Dataset/Lab Task 6/rosalind_ba4h.txt', 'r') as f:
  for line in f:  #Line is a string, split the string on whitespace
    numbers_str = line.split()
    #convert numbers to int
    spectrum = [int(x) for x in numbers_str] 

result = GenerateConvolutionSpectrum(spectrum)
for i in result:
  print(i, end=" ")

137 137 186 186 49 323 