In [8]:
import sys
import os
from collections import Counter

def calculate_statistics(file_path):
    counts = Counter()
    total = 0
    with open(file_path, 'r') as f:
        for line in f:
            line = line.strip().upper()
            counts.update(line)
            total += len(line)
    valid_bases = 'ACGT'
    stats = {base: counts[base] for base in valid_bases}
    unknown_count = total - sum(stats.values())
    stats['Unknown'] = unknown_count
    stats['Total'] = total
    return stats

def print_statistics(stats, label):
    total = stats['Total']
    print(label)
    for base, count in stats.items():
        if base == 'Total':
            continue
        percentage = (count / total * 100) if total > 0 else 0
        print(f"{base}: {count} ({percentage:.1f}%)")
    print(f"Total: {total}\n")

def main(file_paths):
    all_counts = Counter()
    total_bases = 0
    for file_path in file_paths:
        stats = calculate_statistics(file_path)
        print_statistics(stats, file_path)
        for base, count in stats.items():
            if base != 'Total':
                all_counts[base] += count
        total_bases += stats['Total']
    if len(file_paths) > 1:
        all_counts['Total'] = total_bases
        print_statistics(all_counts, "All")


In [10]:
import pytest

def test_calculate_statistics():
    file_a_path = "a_seq.txt"
    file_b_path = "b_seq.txt"
    assert os.path.exists(file_a_path), f"File {file_a_path} is missing."
    assert os.path.exists(file_b_path), f"File {file_b_path} is missing."

    stats_a = calculate_statistics(file_a_path)
    stats_b = calculate_statistics(file_b_path)

    assert stats_a['Total'] == sum(stats_a[base] for base in ['A', 'C', 'G', 'T', 'Unknown'])
    assert stats_b['Total'] == sum(stats_b[base] for base in ['A', 'C', 'G', 'T', 'Unknown'])

    combined_stats = {base: stats_a.get(base, 0) + stats_b.get(base, 0) for base in ['A', 'C', 'G', 'T', 'Unknown']}
    combined_stats['Total'] = stats_a['Total'] + stats_b['Total']

    print("Results for a_seq.txt:")
    print_statistics(stats_a, file_a_path)
    print("Results for b_seq.txt:")
    print_statistics(stats_b, file_b_path)
    print("Combined Results:")
    print_statistics(combined_stats, "All")

if __name__ == "__main__":
    file_paths = ["a_seq.txt", "b_seq.txt"]
    main(file_paths)
    pytest.main(["-v"])


a_seq.txt
A: 2 (1.1%)
C: 57 (32.2%)
G: 33 (18.6%)
T: 7 (4.0%)
Unknown: 78 (44.1%)
Total: 177

b_seq.txt
A: 2 (1.1%)
C: 57 (32.2%)
G: 33 (18.6%)
T: 7 (4.0%)
Unknown: 78 (44.1%)
Total: 177

All
A: 4 (1.1%)
C: 114 (32.2%)
G: 66 (18.6%)
T: 14 (4.0%)
Unknown: 156 (44.1%)
Total: 354

platform win32 -- Python 3.12.3, pytest-7.4.4, pluggy-1.0.0 -- c:\Users\talsh\anaconda3\python.exe
cachedir: .pytest_cache
rootdir: c:\לימודים\wis-python_course_assignments\day5
plugins: anyio-4.2.0, typeguard-4.3.0
[1mcollecting ... [0mcollected 0 items

