In [None]:
import re

def find_motifs(sequence, motif):
  """Finds all occurrences of a given motif in a DNA sequence.

  Args:
    sequence: A string representing the DNA sequence.
    motif: A string representing the motif to search for.

  Returns:
    A list of all start positions of the motif in the sequence.
  """

  matches = []
  for i in range(len(sequence) - len(motif) + 1):
    if sequence[i:i + len(motif)] == motif:
      matches.append(i)
  return matches

def calculate_gc_content(sequence):
  """Calculates the GC content of a DNA sequence.

  Args:
    sequence: A string representing the DNA sequence.

  Returns:
    The GC content of the sequence as a float.
  """

  gc_count = 0
  for base in sequence:
    if base == "g" or base == "c":
      gc_count += 1
  return gc_count / float(len(sequence))

def identify_coding_regions(sequence):
  """Identifies the coding regions of a DNA sequence.

  Args:
    sequence: A string representing the DNA sequence.

  Returns:
    A list of all start and end positions of the coding regions in the sequence.
  """

  coding_regions = []
  for i in range(0, len(sequence), 3):
    codon = sequence[i:i + 3]
    if codon not in ["tga", "taa", "tag"]:
      coding_regions.append((i, i + 3))
  return coding_regions

def main():
  """Analyzes a given DNA sequence and performs basic sequence manipulation."""

  # Get the DNA sequence from the user.
  sequence = input("Enter a DNA sequence: ")

  # Find the motifs in the sequence.
  motifs = find_motifs(sequence, "tat")

  # Calculate the GC content of the sequence.
  gc_content = calculate_gc_content(sequence)

  # Identify the coding regions of the sequence.
  coding_regions = identify_coding_regions(sequence)

  # Print the results to the console.
  print("Motifs found:", motifs)
  print("GC content:", gc_content)
  print("Coding regions:", coding_regions)

if __name__ == "__main__":
  main()


Enter a DNA sequence: attcaattccaaagcgcttcaatacttttaaatctcagctagtgagggactgaagcctcc aagtctgcattatgttctttgaaattaacgttctcctcgg
Motifs found: [71]
GC content: 0.39603960396039606
Coding regions: [(0, 3), (3, 6), (6, 9), (9, 12), (12, 15), (15, 18), (18, 21), (21, 24), (24, 27), (27, 30), (30, 33), (33, 36), (36, 39), (45, 48), (48, 51), (51, 54), (54, 57), (57, 60), (60, 63), (63, 66), (66, 69), (69, 72), (72, 75), (75, 78), (78, 81), (81, 84), (84, 87), (87, 90), (90, 93), (93, 96), (96, 99), (99, 102)]
