In [1]:

from Bio.Seq import Seq
from Bio.SeqUtils import gc_fraction

sequence = Seq("ATGCGTATCGTAGCGTAGCTAAATGCCCGGGTAA")

# 1. Finding motifs
motif = "ATG"
positions = [i+1 for i in range(len(sequence)-len(motif)+1)
              if sequence[i:i+len(motif)] == motif]
print("Motif positions:", positions)

# 2. Calculating GC content (using Biopython)
GC_content = gc_fraction(sequence)
print("GC Content: {:.2f}%".format(GC_content * 100))

# 3. Identifying coding region (translate to protein)
start = sequence.find("ATG")
stop_codons = ["TAA", "TAG", "TGA"]
coding_region = ""
if start != -1:
    for i in range(start + 3, len(sequence), 3):
        if sequence[i:i+3] in stop_codons:
            coding_region = sequence[start:i+3]
            break
print("Coding Region:", coding_region)

# Translating coding region into protein sequence
protein = Seq(coding_region).translate()
print("Protein Sequence:", protein)

Motif positions: [1, 23]
GC Content: 50.00%
Coding Region: ATGCGTATCGTAGCGTAG
Protein Sequence: MRIVA*
