In [2]:
from Bio import SeqIO


# ===================== IO =====================

def read_dna_sequence(filepath):
    """
    Reads the first DNA sequence from a FASTA file and returns it as a SeqRecord.
    """
    records = list(SeqIO.parse(filepath, "fasta"))
    if not records:
        raise ValueError("The file does not contain any DNA sequences.")
    return records[0]


def display_sequence(record, line_length=60):
    """
    Displays the DNA sequence in a readable format.
    """
    print("\nDNA Sequence:")
    seq = str(record.seq)
    for i in range(0, len(seq), line_length):
        print(seq[i:i + line_length])
    print(f"\nLength: {len(seq)} bp")


# ===================== BIO FUNCTIONS =====================

def get_complementary_strand(record):
    """
    Returns the complementary DNA strand.
    """
    return record.seq.complement()


def transcribe_to_rna(record):
    """
    Transcribes DNA to RNA (T -> U).
    """
    return record.seq.transcribe()


def translate_to_protein(record):
    """
    Translates DNA to a protein sequence.
    Stops at the first stop codon.
    """
    return record.seq.translate(to_stop=True)


def count_gc_at_bonds(record):
    """
    Counts the number of G-C and A-T base pairs.
    Note: This is a base count, not physical hydrogen bonds.
    """
    seq = str(record.seq).upper()
    g = seq.count("G")
    c = seq.count("C")
    a = seq.count("A")
    t = seq.count("T")

    return {
        "G-C": g + c,
        "A-T": a + t
    }


# ===================== SPECIES SIMILARITY (MOCK) =====================

def get_top_species_similarity_mock():
    return [
        ("Homo sapiens", 98.7),
        ("Pan troglodytes", 96.4),
        ("Mus musculus", 89.2)
    ]


# ===================== MENU =====================

def print_menu():
    print("""
========= BIOINFORMATICS MENU =========
1. Read and display the DNA sequence
2. Display the complementary strand
3. Display the protein it encodes
4. Display top 3 species by similarity (mock)
5. Display the RNA sequence
6. Count G-C and A-T base pairs
0. Exit
""")


def main():
    filepath = input("Enter the path to the FASTA file: ").strip()

    try:
        record = read_dna_sequence(filepath)
    except Exception as e:
        print("Error:", e)
        return

    while True:
        print_menu()
        choice = input("Choose an option: ").strip()

        if choice == "1":
            display_sequence(record)

        elif choice == "2":
            comp = get_complementary_strand(record)
            print("\nComplementary Strand:")
            print(comp)

        elif choice == "3":
            protein = translate_to_protein(record)
            print("\nProtein Sequence:")
            print(protein)

        elif choice == "4":
            print("\nTop 3 species by similarity (mock):")
            for species, percent in get_top_species_similarity_mock():
                print(f"{species}: {percent}%")

        elif choice == "5":
            rna = transcribe_to_rna(record)
            print("\nRNA Sequence:")
            print(rna)

        elif choice == "6":
            counts = count_gc_at_bonds(record)
            print("\nBase pair counts:")
            print(f"G-C: {counts['G-C']}")
            print(f"A-T: {counts['A-T']}")

        elif choice == "0":
            print("Goodbye!")
            break

        else:
            print("Invalid option. Try again.")


if __name__ == "__main__":
    main()



1. Read and display the DNA sequence
2. Display the complementary strand
3. Display the protein it encodes
4. Display top 3 species by similarity (mock)
5. Display the RNA sequence
6. Count G-C and A-T base pairs
0. Exit


Base pair counts:
G-C: 22
A-T: 17

1. Read and display the DNA sequence
2. Display the complementary strand
3. Display the protein it encodes
4. Display top 3 species by similarity (mock)
5. Display the RNA sequence
6. Count G-C and A-T base pairs
0. Exit

Goodbye!
