In [9]:
def rest_of_ORF(dna):
    """ Takes a DNA sequence that is assumed to begin with a start
        codon and returns the sequence up to but not including the
        first in frame stop codon.  If there is no in frame stop codon,
        returns the whole string.

        dna: a DNA sequence
        returns: the open reading frame represented as a string
    >>> rest_of_ORF("ATGTGAA")
    'ATG'
    >>> rest_of_ORF("ATGAGATAGG")
    'ATGAGA'
    """
    i = 0
    stop = len(dna)
    while i <= (stop/3):
        # here we basically divide the entire dna into sets of 3s
        # we can run the loop number of set times
        # e.g if there are 4 sets of characters(total length of 12 characters), we would run the loop 4 times
        if (dna[ 3 * i : 3 * i + 3]== 'TAG' or dna[ 3 * i : 3 * i + 3]=='TGA' or dna[3*i:3*i+3]=='TAA'):
            stop = i*3
            # run through evey set of 3, and record the index when a set = stop codon
            break

        else:
            i+=1
    return dna[:stop]
    # return the dna string up to the stopping point 
    # TODO: implement this
    pass

rest_of_ORF("ATGTGAATGA")

'ATG'

In [30]:
def find_all_ORFs_oneframe(dna):
    """ Finds all non-nested open reading frames in the given DNA
        sequence and returns them as a list.  This function should
        only find ORFs that are in the default frame of the sequence
        (i.e. they start on indices that are multiples of 3).
        By non-nested we mean that if an ORF occurs entirely within
        another ORF, it should not be included in the returned list of ORFs.

        dna: a DNA sequence
        returns: a list of non-nested ORFs
    >>> find_all_ORFs_oneframe("ATGCATGAATGTAGATAGATGTGCCC")
    ['ATGCATGAATGTAGA', 'ATGTGCCC']
    >>> find_all_ORFs_oneframe("ATGATGCATGAATGTAGATAGATGTGCCC")
    ['ATGATGCATGAATGTAGA', 'ATGTGCCC']
    ensures the returned frame always starts at the 1st start codon
    """
    # initialize the start points, and frame
    one_frame = []
    start_codon = 'ATG'
    start_point = -1
    while len(dna)/3 >= 1:
        # ensure length of dna is larger than 3
        i = 0
        while i <= len(dna)/3:
            if (dna[i * 3: i * 3 +3] == start_codon):
                start_point = 3*i
                break
            else:
                i += 1
            # not breaking until we find the start codon
        if (start_point!=-1):
            # continues if we find start codon
            dna = dna[start_point:]
            # returns the remainder of the dna from its start codon
        else:
            return one_frame
        orf = rest_of_ORF(dna)
        # orf = dna starting with a start_codon up to its stop codon(not included)
        cut = len(dna)-len(orf)-3
        # how much is cut off
        start_point = len(dna)- cut
        # reset start point to equal to the place its cut off
        dna = dna[start_point:]
        # reset dna 
        one_frame.append(orf)
        # append the orf to one_frame list
        # the process repeats as long the remaining dna is > 3 characters
    one_frame = list(filter(None, one_frame))
    return one_frame
    # TODO: implement this
    pass


find_all_ORFs_oneframe("ATGCATGAATGTAGATAGATGTGCCC")

['ATGCATGAATGTAGA', 'ATGTGCCC']

In [36]:
def find_all_ORFs(dna):
    """ Finds all non-nested open reading frames in the given DNA sequence in
        all 3 possible frames and returns them as a list.  By non-nested we
        mean that if an ORF occurs entirely within another ORF and they are
        both in the same frame, it should not be included in the returned list
        of ORFs.

        dna: a DNA sequence
        returns: a list of non-nested ORFs

    >>> find_all_ORFs("ATGCATGAATGTAG")
    ['ATGCATGAATGTAG', 'ATGAATGTAG', 'ATG']
    >>> find_all_ORFs("TAGATGCATGAATGTAGTAG")
    ['ATGCATGAATGTAGTAG', 'ATGAATGTAGTAG', 'ATG']
    """
    orf_list = []
    for a in range(0 , 3):
        # runs 3 times to find all 3 possible frams
        dna1= dna[a: ]
        orf1 = find_all_ORFs_oneframe(dna1)
        orf_list.extend(orf1)
        # combine the resulting arrays in to a single one
    return orf_list
    # TODO: implement this
    pass


['ATGCATGAATGTAGTAG', 'ATGAATGTAGTAG', 'ATG']