In [8]:
# Installation
!pip install pytest ipytest

Collecting ipytest
  Downloading ipytest-0.14.2-py3-none-any.whl.metadata (17 kB)
Downloading ipytest-0.14.2-py3-none-any.whl (18 kB)
Installing collected packages: ipytest
Successfully installed ipytest-0.14.2


In [10]:
# Setting up
import ipytest
ipytest.autoconfig()

In [12]:
def reverse_list(l):
    """
    Reverses order of elements in list l.
    """
    return l[::-1] # reverses the list


def test_reverse_list():
    assert reverse_list([1, 2, 3, 4, 5]) == [5, 4, 3, 2, 1]

# ------------------------------------------------------------------------------

def reverse_string(s):
    """
    Reverses order of characters in string s.
    """
    return s[::-1] # reverses the list


def test_reverse_string():
    assert reverse_string("foobar") == "raboof"
    
# ------------------------------------------------------------------------------

def is_english_vowel(c):
    """
    Returns True if c is an english vowel
    and False otherwise.
    """
    return c.lower() in "aeiouy" # c.lower makes it indifferent to case sensitivity


def test_is_english_vowel():
    assert is_english_vowel('a')
    assert is_english_vowel('e')
    assert is_english_vowel('i')
    assert is_english_vowel('o')
    assert is_english_vowel('u')
    assert is_english_vowel('y')
    assert is_english_vowel('A')
    assert is_english_vowel('E')
    assert is_english_vowel('I')
    assert is_english_vowel('O')
    assert is_english_vowel('U')
    assert is_english_vowel('Y')
    assert not is_english_vowel('k')
    assert not is_english_vowel('z')
    assert not is_english_vowel('?')

# ------------------------------------------------------------------------------
def count_num_vowels(s):
    """
    Returns the number of vowels in a string s.
    """
    return sum(1 for char in s.lower() if char in "aeiouy") 


def test_count_num_vowels():
    sentence = "hey ho let's go"
    assert count_num_vowels(sentence) == 5
    sentence = "HEY ho let's GO"
    assert count_num_vowels(sentence) == 5
    paragraph = """She told me her name was Billie Jean,
                   as she caused a scene
                   Then every head turned with eyes
                   that dreamed of being the one
                   Who will dance on the floor in the round"""
    assert count_num_vowels(paragraph) == 54

# ------------------------------------------------------------------------------

def histogram(l):
    """
    Converts a list of integers into a simple string histogram.
    """
    return '\n'.join('#' * num for num in l) # creates histogram of lines and join with newlines


def test_histogram():
    assert histogram([2, 5, 1]) == '##\n#####\n#'

# ------------------------------------------------------------------------------

def get_word_lengths(s):
    """
    Returns a list of integers representing
    the word lengths in string s.
    """
    return [len(word) for word in s.split()] # splits the string words and count the length


def test_get_word_lengths():
    text = "Three tomatoes are walking down the street"
    assert get_word_lengths(text) == [5, 8, 3, 7, 4, 3, 6]

# ------------------------------------------------------------------------------

def find_longest_word(s):
    """
    Returns the longest word in string s.
    In case there are several, return the first.
    """
    words = s.split()  # Split string into words
    return max(words, key=len)  # Find the longest word based on length


def test_find_longest_word():
    text = "Three tomatoes are walking down the street"
    assert find_longest_word(text) == "tomatoes"
    text = "foo foo1 foo2 foo3"
    assert find_longest_word(text) == "foo1"

# ------------------------------------------------------------------------------

def validate_dna(s):
    """
    Return True if the DNA string only contains characters
    a, c, t, or g (lower or uppercase). False otherwise.
    """
    return all(char in "actgACTG" for char in s)  # Check if all characters are valid DNA bases


def test_validate_dna():
    assert validate_dna('CCGGAAGAGCTTACTTAGccggaagagcttacttag') # contains valid DNA bases
    assert not validate_dna('xCCGGAAGAGCTTACTTAGccggaagagcttacttag') # Contains 'x', an invalid DNA base
    assert not validate_dna('CCxGGAAGAGCTTACTTAGccggaagagcttacttag') # Contains 'x', an invalid DNA base

# ------------------------------------------------------------------------------

def base_pair(c):
    """
    Return the corresponding character (lowercase)
    of the base pair. If the base is not recognized,
    return 'unknown'.
    """
    pairs = {'a': 't', 't': 'a', 'c': 'g', 'g': 'c'}
    c = c.lower()  # Convert to lowercase to handle uppercase inputs
    return pairs.get(c, 'unknown')  # Return base pair if found, otherwise 'unknown'


def test_base_pair():
    assert base_pair('a') == 't'
    assert base_pair('t') == 'a'
    assert base_pair('c') == 'g'
    assert base_pair('g') == 'c'
    assert base_pair('A') == 't'
    assert base_pair('T') == 'a'
    assert base_pair('C') == 'g'
    assert base_pair('G') == 'c'
    assert base_pair('x') == 'unknown'
    assert base_pair('foo') == 'unknown'

# ------------------------------------------------------------------------------

def transcribe_dna_to_rna(s):
    """
    Return string s with each letter T replaced by U.
    Result is always uppercase.
    """
    return s.upper().replace('T', 'U')  # Convert to uppercase and replace T → U


def test_transcribe_dna_to_rna():
    dna = 'CCGGAAGAGCTTACTTAGccggaagagcttacttag'
    assert transcribe_dna_to_rna(dna) == 'CCGGAAGAGCUUACUUAGCCGGAAGAGCUUACUUAG'

# ------------------------------------------------------------------------------

def get_complement(s):
    """
    Return the DNA complement in uppercase
    (A -> T, T-> A, C -> G, G-> C).
    """
    complement_dict = {'A': 'T', 'T': 'A', 'C': 'G', 'G': 'C'}
    return ''.join(complement_dict.get(base.upper(), base.upper()) for base in s)  # Apply complement and uppercase


def test_get_complement():
    assert get_complement('CCGGAAGAGCTTACTTAG') == 'GGCCTTCTCGAATGAATC'
    assert get_complement('ccggaagagcttacttag') == 'GGCCTTCTCGAATGAATC'

# ------------------------------------------------------------------------------

def get_reverse_complement(s):
    """
    Return the reverse complement of string s
    (complement reversed in order).
    """
    complement_dict = {'A': 'T', 'T': 'A', 'C': 'G', 'G': 'C'}
    
    # First, get the complement of each base, then reverse the order
    complement_str = ''.join(complement_dict.get(base.upper(), base.upper()) for base in s)
    
    # Reverse the complement string
    return complement_str[::-1]


def test_get_reverse_complement():
    assert get_reverse_complement('CCGGAAGAGCTTACTTAG') == 'CTAAGTAAGCTCTTCCGG'
    assert get_reverse_complement('ccggaagagcttacttag') == 'CTAAGTAAGCTCTTCCGG'

# ------------------------------------------------------------------------------

def remove_substring(substring, string):
    """
    Returns string with all occurrences of substring removed.
    """
    return string.replace(substring, '')  # Remove all occurrences of substring


def test_remove_substring():
    assert remove_substring('GAA', 'CCGGAAGAGCTTACTTAG') == 'CCGGAGCTTACTTAG'
    assert remove_substring('CCG', 'CCGGAAGAGCTTACTTAG') == 'GAAGAGCTTACTTAG'
    assert remove_substring('TAG', 'CCGGAAGAGCTTACTTAG') == 'CCGGAAGAGCTTACT'
    assert remove_substring('GAA', 'GAAGAAGAA') == ''

# ------------------------------------------------------------------------------

def get_position_indices(triplet, dna):
    """
    Returns list of position indices for a specific triplet (3-mer)
    in a DNA sequence. We start counting from 0
    and jump by 3 characters from one position to the next.
    """
    indices = [
        i // 3 # Convert character index to codon index
        for i in range(0, len(dna) - 2, 3) # Iterate over sequence in steps of 3
        if dna[i:i+3] == triplet] # Check if the current 3-mer matches the triplet
    return indices


def test_get_position_indices():
    assert get_position_indices('GAA', 'CCGGAAGAGCTTACTTAG') == [1]
    assert get_position_indices('GAA', 'CCGGAAGAGCTTACTTAGGAAGAA') == [1, 6, 7]

# ------------------------------------------------------------------------------
from collections import Counter
def get_3mer_usage_chart(s):
    """
    This routine implements a 'sliding window'
    and extracts all possible consecutive 3-mers.
    It counts how often they appear and returns
    a list of tuples with (name, occurrence).
    The list is alphabetically sorted by the name
    of the 3-mer.
    """
    three_mers = [s[i:i+3] for i in range(len(s) - 2)]  # Extract 3-mers
    counts = Counter(three_mers)  # Count occurrences
    return sorted(counts.items())  # Sort alphabetically by 3-mer

def test_get_3mer_usage_chart():
    s = 'CCGGAAGAGCTTACTTAGGAAGAA'
    result = []
    result.append(('AAG', 2))
    result.append(('ACT', 1))
    result.append(('AGA', 2))
    result.append(('AGC', 1))
    result.append(('AGG', 1))
    result.append(('CCG', 1))
    result.append(('CGG', 1))
    result.append(('CTT', 2))
    result.append(('GAA', 3))
    result.append(('GAG', 1))
    result.append(('GCT', 1))
    result.append(('GGA', 2))
    result.append(('TAC', 1))
    result.append(('TAG', 1))
    result.append(('TTA', 2))
    assert get_3mer_usage_chart(s) == result

# ------------------------------------------------------------------------------

def read_column(file_name, column_number):
    """
    Reads column column_number from file file_name
    and returns the values as floats in a list.
    """
    values = []
    with open(file_name, 'r') as f:
        for line in f:
            parts = line.split()  # Split by whitespace
            if len(parts) > column_number - 1:  # Ensure column exists
                values.append(float(parts[column_number - 1]))  # Convert to float
    return values


def test_read_column():

    import tempfile
    import os

    text = """1   0.1  0.001
2   0.2  0.002
3   0.3  0.003
4   0.4  0.004
5   0.5  0.005
6   0.6  0.006"""

    # Create a temporary file and close the file descriptor to avoid file lock issues
    fd, file_name = tempfile.mkstemp()  # Returns file descriptor and filename
    os.close(fd)  # Close the file descriptor to avoid access issues

    try:
        # Write the text to the file
        with open(file_name, 'w') as f:
            f.write(text)

        # Now we pass the file name to the function which will read the column
        assert read_column(file_name, 2) == [0.1, 0.2, 0.3, 0.4, 0.5, 0.6]

    finally:
        # Ensure the file is removed after the test
        os.unlink(file_name)

# ------------------------------------------------------------------------------
import collections
def character_statistics(file_name):
    """
    Reads text from file file_name, then
    lowercases the text, and then returns
    a tuple (x, y), where x is the most abundant
    and y is the least abundant but present character found.
    Use the isalpha() method to figure out
    whether the character is in the alphabet.
    """
    with open(file_name, 'r') as f:
        text = f.read().lower()  # Read file and convert to lowercase

    char_count = collections.Counter(c for c in text if c.isalpha())  # Count only alphabetic characters

    if not char_count:
        return None  # Return None if no valid characters found

    most_abundant = max(char_count, key=char_count.get)  # Character with max frequency
    least_abundant = min(char_count, key=char_count.get)  # Character with min nonzero frequency

    return most_abundant, least_abundant


def test_character_statistics():

    import tempfile
    import os

    text = """
To be, or not to be: that is the question:
Whether 'tis nobler in the mind to suffer
The slings and arrows of outrageous fortune,
Or to take arms against a sea of troubles,
And by opposing end them? To die: to sleep;
No more; and by a sleep to say we end
The heart-ache and the thousand natural shocks
That flesh is heir to, 'tis a consummation
Devoutly to be wish'd. To die, to sleep;
To sleep: perchance to dream: ay, there's the rub;
For in that sleep of death what dreams may come
When we have shuffled off this mortal coil,
Must give us pause: there's the respect
That makes calamity of so long life;
For who would bear the whips and scorns of time,
The oppressor's wrong, the proud man's contumely,
The pangs of despised love, the law's delay,
The insolence of office and the spurns
That patient merit of the unworthy takes,
When he himself might his quietus make
With a bare bodkin? who would fardels bear,
To grunt and sweat under a weary life,
But that the dread of something after death,
The undiscover'd country from whose bourn
No traveller returns, puzzzles the will
And makes us rather bear those ills we have
Than fly to others that we know not of?
Thus conscience does make cowards of us all;
And thus the native hue of resolution
Is sicklied o'er with the pale cast of thought,
And enterprises of great pith and moment
With this regard their currents turn awry,
And lose the name of action.--Soft you now!
The fair Ophelia! Nymph, in thy orisons
Be all my sins remember'd."""

   # Create a temporary file
    fd, file_name = tempfile.mkstemp()  # Returns file descriptor and filename
    os.close(fd)  # Close the file descriptor to avoid access issues

    try:
        # Write the text to the file
        with open(file_name, 'w') as f:
            f.write(text)

        # Call the function and get the result
        (most_abundant, least_abundant) = character_statistics(file_name)

        # Assert expected result
        assert (most_abundant, least_abundant) == ('e', 'q')

    finally:
        # Ensure the file is removed after the test
        os.unlink(file_name)

# ------------------------------------------------------------------------------

def pythagorean_triples(n):
    """
    Returns list of all unique pythagorean triples
    (a, b, c) where a < b < c <= n.
    """
    l = []
    # loop over all a < b < c <= n
    for c in range(1, n + 1):
        for b in range(1, c):
            for a in range(1, b):
                if a*a + b*b == c*c:
                    l.append((a, b, c))
    return l


# ------------------------------------------------------------------------------

def test_pythagorean_triples():
    pass  # so far we do not test anything, check also test coverage

In [17]:
# Run Pytest
!pytest -vv Downloads/TDD_exercises_2021.py

platform win32 -- Python 3.12.7, pytest-7.4.4, pluggy-1.0.0 -- C:\Users\USER\anaconda3\python.exe
cachedir: .pytest_cache
rootdir: C:\Users\USER
plugins: anyio-4.2.0
[1mcollecting ... [0mcollected 18 items

Downloads/TDD_exercises_2021.py::test_reverse_list [32mPASSED[0m[32m                [  5%][0m
Downloads/TDD_exercises_2021.py::test_reverse_string [32mPASSED[0m[32m              [ 11%][0m
Downloads/TDD_exercises_2021.py::test_is_english_vowel [32mPASSED[0m[32m            [ 16%][0m
Downloads/TDD_exercises_2021.py::test_count_num_vowels [32mPASSED[0m[32m            [ 22%][0m
Downloads/TDD_exercises_2021.py::test_histogram [32mPASSED[0m[32m                   [ 27%][0m
Downloads/TDD_exercises_2021.py::test_get_word_lengths [32mPASSED[0m[32m            [ 33%][0m
Downloads/TDD_exercises_2021.py::test_find_longest_word [32mPASSED[0m[32m           [ 38%][0m
Downloads/TDD_exercises_2021.py::test_validate_dna [32mPASSED[0m[32m                [ 44%][0m
Downloa