In [3]:
#   Motif Finder

def find_motifs(seq, motif): 
    return [i for i in range(len(seq)-len(motif)+1) if seq[i:i+len(motif)] == motif]


    # It loops through the DNA sequence, compares substrings, and returns the starting indices where the motif (like "ATG") occurs.
 # Output [0, 6] for "ATG" is totally correct for your sequence "ATGCGTATGCTTAGCTAA".


In [4]:
#  GC Content
def gc_content(seq): 
    g = seq.count("G") 
    c = seq.count("C") 
    return round(((g+c)/len(seq))*100, 2)


# Mathematically correct and neatly formatted.
# GC% = ((3 G + 4 C)/18) × 100 = 38.89% → matches your output exactly.

In [5]:
# ORF Finder  : open reading frame, 
# is a continuous stretch of nucleotides in DNA that has the potential to code for a protein, starting with a start codon and ending with a stop codon.
def find_orfs(seq): 
    start, stops, orfs = "ATG", ["TAA", "TAG", "TGA"], [] 
    for i in range(len(seq)-2): 
        if seq[i:i+3] == start: 
            for j in range(i+3, len(seq)-2, 3): 
                if seq[j:j+3] in stops: 
                    orfs.append(seq[i:j+3]) 
                    break 
    return orfs


# Logically sound — it:

# Finds a start codon "ATG"

# Then reads in codon steps (+3)

# Stops when it hits a valid stop codon (TAA, TAG, or TGA)

# Extracts that full substring as one ORF

In [6]:
 
# Example DNA sequence 
DNA = "ATGCGTATGCTTAGCTAA" 
print("Motif 'ATG' found at:", find_motifs(DNA, "ATG")) 
print("GC Content:", gc_content(DNA), "%") 
print("Coding Regions (ORFs):", find_orfs(DNA))

Motif 'ATG' found at: [0, 6]
GC Content: 38.89 %
Coding Regions (ORFs): ['ATGCGTATGCTTAGCTAA', 'ATGCTTAGCTAA']
