In [2]:
# DNA Class Implementation
class DNA:
    def __init__(self, sequence):
        """
        Initialize the DNA object with a given sequence.
        Converts the sequence to uppercase and validates it.
        """
        self.sequence = sequence.upper()  # convert to uppercase for uniformity
        self.valid_nucleotides = {'A', 'T', 'G', 'C'}  # allowed bases
        self.validate_sequence()  # check for invalid bases

    def validate_sequence(self):
        """
        Validates that the sequence contains only A, T, G, C.
        Raises ValueError if invalid nucleotide found.
        """
        for base in self.sequence:
            if base not in self.valid_nucleotides:
                raise ValueError(f"Invalid nucleotide found: '{base}'. "
                                 f"DNA sequence can only contain A, T, G, and C.")

    def get_length(self):
        """
        Returns the length of the DNA sequence.
        """
        return len(self.sequence)

    def count_nucleotides(self):
        """
        Returns a dictionary with counts of each nucleotide.
        Example: {'A': 3, 'T': 2, 'G': 4, 'C': 1}
        """
        count_dict = {base: 0 for base in self.valid_nucleotides}
        for base in self.sequence:
            count_dict[base] += 1
        return count_dict

    def get_statistics(self):
        """
        Returns basic statistics of the DNA sequence:
        - Length
        - GC content (% of G and C nucleotides)
        """
        length = self.get_length()
        counts = self.count_nucleotides()
        gc_content = ((counts['G'] + counts['C']) / length) * 100
        return {
            'Length': length,
            'GC_Content(%)': round(gc_content, 2),
            'Counts': counts
        }

    def __str__(self):
        """
        String representation of the DNA object.
        """
        return f"DNA Sequence: {self.sequence} (Length: {self.get_length()})"


In [4]:
# ---- TEST BLOCK ----
if __name__ == "__main__":
    # 1️⃣ Test with a valid DNA sequence
    try:
        dna1 = DNA("ATGCGATCGA")
        print(dna1)
        print("Length:", dna1.get_length())
        print("Nucleotide Counts:", dna1.count_nucleotides())
        print("Statistics:", dna1.get_statistics())
        print()
    except ValueError as e:
        print("Error:", e)

    # 2️⃣ Test with lowercase input (should be auto-handled)
    try:
        dna2 = DNA("atgcgtaa")
        print(dna2)
        print("Nucleotide Counts:", dna2.count_nucleotides())
        print("Statistics:", dna2.get_statistics())
        print()
    except ValueError as e:
        print("Error:", e)

    # 3️⃣ Test with an invalid character (should raise error)
    try:
        dna3 = DNA("ATGBXTA")
    except ValueError as e:
        print("Error:", e)
        print()

    # 4️⃣ Test with an empty string
    try:
        dna4 = DNA("")
        print(dna4)
        print("Length:", dna4.get_length())
    except ValueError as e:
        print("Error:", e)
        print()

DNA Sequence: ATGCGATCGA (Length: 10)
Length: 10
Nucleotide Counts: {'G': 3, 'C': 2, 'A': 3, 'T': 2}
Statistics: {'Length': 10, 'GC_Content(%)': 50.0, 'Counts': {'G': 3, 'C': 2, 'A': 3, 'T': 2}}

DNA Sequence: ATGCGTAA (Length: 8)
Nucleotide Counts: {'G': 2, 'C': 1, 'A': 3, 'T': 2}
Statistics: {'Length': 8, 'GC_Content(%)': 37.5, 'Counts': {'G': 2, 'C': 1, 'A': 3, 'T': 2}}

Error: Invalid nucleotide found: 'B'. DNA sequence can only contain A, T, G, and C.

DNA Sequence:  (Length: 0)
Length: 0
