# Pacotes usados

In [1]:
!pip install BioPython

Collecting BioPython
  Downloading biopython-1.83-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.1/3.1 MB[0m [31m12.2 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: BioPython
Successfully installed BioPython-1.83


In [2]:
import numpy as np

from Bio.Seq import Seq
from Bio import SeqIO
from Bio.SeqRecord import SeqRecord

# Dados usados

In [3]:
data_path = "/content/drive/MyDrive/AB/exercicio_4/data/IN Dengue 21 segmentos.fasta"

In [4]:
dna_strings = list(SeqIO.parse(data_path, "fasta"))

In [5]:
dna_strings

[SeqRecord(seq=Seq('AGTTGTTAGTCTACGTGGACCGACAAGAACAGTTTCGAATCGGAAGCTTGCTTA...ACG'), id='AF226685.2', name='AF226685.2', description='AF226685.2 Dengue virus type 1 strain Den1BR/90, complete genome segment 1', dbxrefs=[]),
 SeqRecord(seq=Seq('AGGGGGAGAGCCACACATGATAGTTAGTAAGCAGGAAAGAGGAAAGTCACTCTT...GAC'), id='AF226685.2', name='AF226685.2', description='AF226685.2 Dengue virus type 1 strain Den1BR/90, complete genome segment 2', dbxrefs=[]),
 SeqRecord(seq=Seq('AAACGTTCCGTGGCACTGGCCCCACATGTGGGACTTGGTCTAGAAACAAGAACC...TGT'), id='AF226685.2', name='AF226685.2', description='AF226685.2 Dengue virus type 1 strain Den1BR/90, complete genome segment 3', dbxrefs=[]),
 SeqRecord(seq=Seq('CGACGAACGTTTGTGGACAGAGGCTGGGGTAATGGCTGCGGACTATTTGGAAAA...TTA'), id='AF226685.2', name='AF226685.2', description='AF226685.2 Dengue virus type 1 strain Den1BR/90, complete genome segment 4', dbxrefs=[]),
 SeqRecord(seq=Seq('CATTCAAGACAGCTCATGCAAAGAAACAGGAAGTAGTCGTACTGGGATCACAGG...ATG'), id='AF226685.2', name='A

# Função para conversão de DNA em RNA

In [6]:
def transform_dna_rna(dna_sequences_array):
  rna_strings_sequences = []

  for i in range(len(dna_sequences_array)):

    dna_array = np.array(dna_sequences_array[i].seq)
    dna_array[dna_array == 'T'] = 'U'

    dna_array = SeqRecord(
        Seq("".join(dna_array)),
        id = dna_sequences_array[i].id,
        name = dna_sequences_array[i].name,
        description = f"{dna_sequences_array[i].description}, converted to RNA"
    )
    rna_strings_sequences.append(dna_array)


  return rna_strings_sequences

# Cria-se dicionário para tradução de RNA para proteínas. Adota-se X como tripla de nucleotídios de parada

In [7]:
dict_rna_protein = {
    "UUU": 'F',
    "UUC": 'F',
    "UUA": 'L',
    "UUG": 'L',

    "CUU": 'L',
    "CUC": 'L',
    "CUA": 'L',
    "CUG": 'L',

    "AUU": 'I',
    "AUC": 'I',
    "AUA": 'I',
    "AUG": 'M',

    "GUU": 'V',
    "GUC": 'V',
    "GUA": 'V',
    "GUG": 'V',

    "UCU": 'S',
    "UCC": 'S',
    "UCA": 'S',
    "UCG": 'S',

    "CCU": 'P',
    "CCC": 'P',
    "CCA": 'P',
    "CCG": 'P',

    "ACU": 'T',
    "ACC": 'T',
    "ACA": 'T',
    "ACG": 'T',

    "GCU": 'A',
    "GCC": 'A',
    "GCA": 'A',
    "GCG": 'A',

    "UAU": 'Y',
    "UAC": 'Y',
    "UAA": "X",
    "UAG": "X",

    "CAU": 'H',
    "CAC": 'H',
    "CAA": 'Q',
    "CAG": 'Q',

    "AAU": 'N',
    "AAC": 'N',
    "AAA": 'K',
    "AAG": 'K',

    "GAU": 'D',
    "GAC": 'D',
    "GAA": 'E',
    "GAG": 'E',

    "UGU": 'C',
    "UGC": 'C',
    "UGA": "X",
    "UGG": 'W',

    "CGU": 'R',
    "CGC": 'R',
    "CGA": 'R',
    "CGG": 'R',

    "AGU": 'S',
    "AGC": 'S',
    "AGA": 'R',
    "AGG": 'R',

    "GGU": 'G',
    "GGC": 'G',
    "GGA": 'G',
    "GGG": 'G',
}

# Função para conversão RNA para proteína

In [8]:
def transform_rna_protein(rna_sequences_array, frame = 1):

  if frame == 1:
    super_counter = 0
  elif frame == 2:
    super_counter = 1
  elif frame == 3:
    super_counter = 2
  elif frame == 4:
    super_counter = 1
  elif frame == 5:
    super_counter = 2
  elif frame == 6:
    super_counter = 3
  else:
    return print("frame number not supported")


  protein_strings_sequences = []

  if frame in [1, 2, 3]:

    for i in range(len(rna_sequences_array)):
      counter = super_counter
      protein_array = []

      for j in range(len(rna_sequences_array[i].seq)):
        if (counter <= len(rna_sequences_array[i].seq) - 3):

          protein_array.append(dict_rna_protein[rna_sequences_array[i].seq[counter:(counter + 3)]])

          counter = counter + 3

        else:
          break

      protein_array = SeqRecord(
          Seq("".join(protein_array)),
          id = rna_sequences_array[i].id,
          name = rna_sequences_array[i].name,
          description = f"{rna_sequences_array[i].description[:-18]}, converted to protein, frame {frame}"
      )

      protein_strings_sequences.append(protein_array)


  elif frame in [4, 5, 6]:
    for i in range(len(rna_sequences_array)):
      counter = super_counter
      protein_array = []

      for j in range(len(rna_sequences_array[i].seq)):
        if (counter <= len(rna_sequences_array[i].seq) - 3):

          protein_array.append(dict_rna_protein[rna_sequences_array[i].seq[-counter:-(counter + 3):-1]])

          counter = counter + 3

        else:
          break

      protein_array = SeqRecord(
          Seq("".join(protein_array)),
          id = rna_sequences_array[i].id,
          name = rna_sequences_array[i].name,
          description = f"{rna_sequences_array[i].description[:-18]}, converted to protein, frame {frame}"

      )

      protein_strings_sequences.append(protein_array)

  return protein_strings_sequences

# Extrai-se as informações

In [9]:
rna_records = transform_dna_rna(dna_strings)

protein_frame_1 = transform_rna_protein(rna_records, frame = 1)
protein_frame_2 = transform_rna_protein(rna_records, frame = 2)
protein_frame_3 = transform_rna_protein(rna_records, frame = 3)
protein_frame_4 = transform_rna_protein(rna_records, frame = 4)
protein_frame_5 = transform_rna_protein(rna_records, frame = 5)
protein_frame_6 = transform_rna_protein(rna_records, frame = 6)

protein_all_6_frames = [protein_frame_1, protein_frame_2, protein_frame_3, protein_frame_4, protein_frame_5, protein_frame_6]

# Cria-se os arquivos .fasta

In [10]:
fasta_file = open("/content/drive/MyDrive/AB/exercicio_4/data/dengue_sequencias_rna.fasta", "w")

write_file = SeqIO.write(rna_records, fasta_file, "fasta")

fasta_file.close()


for i in range(6):
  fasta_file = open(f"/content/drive/MyDrive/AB/exercicio_4/data/dengue_sequencias_proteina_frame_{i + 1}.fasta", "w")

  write_file = SeqIO.write(protein_all_6_frames[i], fasta_file, "fasta")

  fasta_file.close()