In [None]:
import os
from exercise_1 import parse_fasta, discard_ambiguous_seqs, nucleotide_frequencies, map_reads

def test_parse_fasta(tmp_path):
    # Create a temporary FASTA file
    fasta_content = """>seq1
    AGCTAGCTAG
    >seq2
    CGATCGATCG
    """
    fasta_path = tmp_path / "test.fasta"
    fasta_path.write_text(fasta_content)

    # Run the function
    headers, sequences = parse_fasta(str(fasta_path))

    # Assert the outputs
    assert headers == ["seq1", "seq2"]
    assert sequences == ["AGCTAGCTAG", "CGATCGATCG"]

def test_discard_ambiguous_seqs():
    headers = ["seq1", "seq2", "seq3"]
    sequences = ["AGCT", "AGXT", "ACGT"]
    filtered_headers, filtered_sequences = discard_ambiguous_seqs(headers, sequences)

    assert filtered_headers == ["seq1", "seq3"]
    assert filtered_sequences == ["AGCT", "ACGT"]

def test_nucleotide_frequencies(capsys):
    sequences = ["AGCT", "AGCT", "AAGGTTCC"]
    nucleotide_frequencies(sequences)

    captured = capsys.readouterr()
    assert "A :  0.31" in captured.out
    assert "G :  0.25" in captured.out
    assert "C :  0.25" in captured.out
    assert "T :  0.19" in captured.out

def test_map_reads(tmp_path):
    # Create temporary query and reference FASTA files
    query_content = """>query1
    AGCT
    """
    reference_content = """>ref1
    TTAGCTTTAGCT
    >ref2
    AGCTAGCTAGCT
    """
    query_path = tmp_path / "query.fasta"
    reference_path = tmp_path / "reference.fasta"
    query_path.write_text(query_content)
    reference_path.write_text(reference_content)

    # Run the function
    results = map_reads(str(query_path), str(reference_path))

    # Assert the outputs
    assert "query1" in results
    assert "ref1" in results["query1"]
    assert "ref2" in results["query1"]
    assert results["query1"]["ref1"] == [3, 9]
    assert results["query1"]["ref2"] == [1, 5, 9]