## Translating nucleotide to amino acid sequences
Code written by Eugene Gan, Ng Jing Ting and Tay Hui Yi 

#### File format: FASTA file format of individual nucleotide sequences, collated in a file

#### Required packages: Biopython

In [None]:
#packages to be imported, which needs to be downloaded prior
from Bio.Seq import Seq
import os
import glob

### 1. Translating individual sequences on the notebook

In [None]:
#if you want to directly translate a sequence
def translate(dnaSeq):
    strand = Seq(dnaSeq)
    return str(strand.translate())

In [None]:
#insert sequence
translate("")

### 2. Looping the translation within a file

- all files should be in FASTA format
  

### 2.1. Change your directory to the file containing all your sequences

In [None]:
#What is your current working directory?
os.getcwd()

In [None]:
#Change to desired directory
os.chdir('./')

In [None]:
#check your current directory
os.getcwd()

### 2.2 Reading the files in the directory

In [None]:
#Name all files as gt_data
gt_data = glob.glob("*")

In [None]:
#Checking if all files are read
gt_data

### 3. Carrying out the loop

In [None]:
#Prior to translation, check for the number of bases within each file
#Format of output: file name /n number of nucleotides /n checking if the number of nucleotides are multiples of 3
for i in range(0, len(gt_data)):
    openFileName = str(gt_data[i])
    print(openFileName)
    file = open(openFileName, "r")
    lines = file.readlines()
    seq = ""
    start = False
    for line in lines:
        if ">" in line:
            start = True
        elif start and line!="\n":
            print(len(line))
            print(len(line)%3)

In [None]:
#Carrying out tranaslation
#Writing a new file, in FASTA format, for the translated sequences
#Note to user: you are able to change the file name of the translated sequences
for i in range(0, len(gt_data)):
    openFileName = str(gt_data[i])
    file = open(openFileName, "r")
    lines = file.readlines()
    for line in lines:
        if ">" in line:
            filename = openFileName + "_translated"                 #filename can be modified by user
            print(filename)
            outfile = open(filename, 'w')
            outfile.write(str(line))
        elif line != "\n":
            translatedSeq = (str(Seq(line).translate()))
            outfile.write(translatedSeq)
            outfile.close()
    file.close()

### 4. Combining all translated files to a single file

- Note to user: change the working directory to the file containing ONLY the translated sequences. This may be done manually. Alternatively, you may redirect your newly written files containing the translated sequences into a new file to facilitate this process.

In [None]:
#User may need to change the working directory once again (refer to the code in 2.1 for clearer steps)
os.chdir('./')

In [None]:
#combining the files
name = "translated"                                               #may be modified by user
outfile = open(name, 'w')
for i in range(0, len(gt_data)):
    openFileName = str(gt_data[i])
    file = open(openFileName, "r")
    lines = file.readlines()
    for line in lines:
        outfile.write(line)
        #outfile.write("\n")
    outfile.write("\n")
    file.close()
outfile.close()