## Protein Translation Problem

In [31]:
def rna_to_protein(rna):
    dic = dict()
    protein = ''
    with open('RNA_codon_table_1.txt') as f:
        for line in f.readlines():
            line = line.strip().split(' ')
            dic[line[0]] = line[1]
    for i in range(0, len(rna) - 3 + 1, 3):
        protein += dic[rna[i: i + 3]]
    return protein.replace(':', '')

In [66]:
rna_to_protein('AUGGCCAUGGCGCCCAGAACUGAGAUCAAUAGUACCCGUAUUAACGGGUGA')

'MAMAPRTEINSTRING'

In [69]:
with open('datasets/dataset_96_4.txt', 'r') as f, open('answers/answer_96_4.txt', 'w') as g:
    rna = f.read()
    g.write(rna_to_protein(rna))

## Peptide Encoding Problem

In [46]:
'GGCCAT'.replace('T', 'a').replace('A', 't').replace('G', 'c').replace('C', 'g')[::-1].upper()

'ATGGCC'

In [57]:
def encoding(string, protein):
    reverse_str = lambda string: string.replace('U', 'a').replace('A', 'u').replace('G', 'c').replace('C', 'g')[::-1].upper()
    string = string.replace('T', 'U')
    reverse = reverse_str(string)
    k = len(protein) * 3
    rna = []
    for i in range(len(string) - k + 1):
        if rna_to_protein(string[i:i + k]) == protein:
            rna.append(string[i:i + k].replace('U', 'T'))
    for i in range(len(reverse) - k + 1):
        if rna_to_protein(reverse[i:i + k]) == protein:
            rev = reverse_str(reverse[i:i + k])
            rna.append(rev.replace('U', 'T'))
    return rna

In [59]:
string = 'ATGGCCATGGCCCCCAGAACTGAGATCAATAGTACCCGTATTAACGGGTGA'
protein = 'MA'
encoding(string, protein)

['ATGGCC', 'ATGGCC', 'GGCCAT']

In [61]:
with open('datasets/dataset_96_7.txt') as f, open('answers/anwer_96_7.txt', 'w') as g:
    string = f.readline().strip()
    protein = f.readline().strip()
    g.write('\n'.join(encoding(string, protein)))

## Generating the Theoretical Spectrum of a Peptide

In [2]:
def mass_dict():
    """
    Return: dict of AminoAsid Masses
    """
    dic = dict()
    with open('integer_mass_table.txt') as f:
        for line in f.readlines():
            line = line.strip().split(' ')
            dic[line[0]] = int(line[1])
    return dic

In [3]:
def LinearSpectrum(peptide, mass_dict):
    """
    Generating spectrum of linear peptide
    """
    prefix_mass = [0]
    for i in range(1, len(pept) + 1):
        for s in mass_dict.keys():
            if s == peptide[i - 1]:
                prefix_mass.append(prefix_mass[i - 1] + mass_dict[s])
    linear_spectrum = [0]
    for i in range(len(peptide)):
        for j in range(i + 1, len(peptide) + 1):
            linear_spectrum.append(prefix_mass[j] - prefix_mass[i])
    return sorted(linear_spectrum)

In [4]:
pept = 'NQEL'
print(' '.join(list(map(str, LinearSpectrum(pept, mass_dict())))))

0 113 114 128 129 242 242 257 370 371 484


In [5]:
def CyclicSpectrum(peptide, mass_dict):
    """
    Generating spectrum of linear peptide
    """
    prefix_mass = [0]
    for i in range(1, len(pept) + 1):
        for s in mass_dict.keys():
            if s == peptide[i - 1]:
                prefix_mass.append(prefix_mass[i - 1] + mass_dict[s])
    peptitede_mass = prefix_mass[-1]
    cyclic_spectrum = [0]
    for i in range(len(peptide)):
        for j in range(i + 1, len(peptide) + 1):
            cyclic_spectrum.append(prefix_mass[j] - prefix_mass[i])
            if i > 0 and j < len(peptide):
                cyclic_spectrum.append(peptitede_mass - (prefix_mass[j] - prefix_mass[i]))
    return sorted(cyclic_spectrum)

In [6]:
pept = 'MTAI'
print(' '.join(list(map(str, CyclicSpectrum(pept, mass_dict())))))

0 71 101 113 131 184 202 214 232 285 303 315 345 416


In [45]:
pept = 'TAIM'
strr = ' '.join(list(map(str, CyclicSpectrum(pept, mass_dict()))))
spec = '0 71 101 113 131 184 202 214 232 285 303 315 345 416'
strr == spec

False

In [51]:
pept = 'CET'
spec = set(map(int,'0 71 99 101 103 128 129 199 200 204 227 230 231 298 303 328 330 332 333'.split()))
set(LinearSpectrum(pept, mass_dict())).issubset(spec)

False

In [23]:
spec = set('1 2 3'.split())


{'1', '2', '3'}

In [35]:
rna_to_protein('CCAAGUACAGAGAUUAAC')

'PSTEIN'