In [None]:
# test_convert_to_int.py

import pytest
from preprocessing_helpers import convert_to_int

def test_on_string_with_one_comma():
    test_argument = "2,081"
    expected = 2081
    actual = convert_to_int(test_argument)
    # Format the string with the actual return value
    message = "convert_to_int('2,081') should return the int 2081, but it actually returned {0}".format(actual)
    # Write the assert statement which prints message on failure
    assert actual == expected, message

If the return value is a float or an object containing float, you should use the pytest.approx() function in comparisons.<br><br>
assert 0.1 + 0.1 + 0.1 == pytest.approx(0.3)

In [None]:
# test_get_data_as_numpy_array.py

import numpy as np
import pytest
from as_numpy import get_data_as_numpy_array

def test_on_clean_file():
  expected = np.array([[2081.0, 314942.0],
                       [1059.0, 186606.0],
  					   [1148.0, 206186.0]])
                         
  actual = get_data_as_numpy_array("example_clean_data.txt", num_columns=2)
  message = "Expected return value: {0}, Actual return value: {1}".format(expected, actual)
  # Complete the assert statement
  assert actual == pytest.approx(expected), message

In [None]:
# test_split_into_training_and_testing_sets.py

def test_on_six_rows():
    example_argument = np.array([[2081.0, 314942.0], [1059.0, 186606.0],
                                 [1148.0, 206186.0], [1506.0, 248419.0],
                                 [1210.0, 214114.0], [1697.0, 277794.0]])
    # Fill in with training array's expected number of rows
    expected_training_array_num_rows = 4
    # Fill in with testing array's expected number of rows
    expected_testing_array_num_rows = 2
    actual = split_into_training_and_testing_sets(example_argument)
    # Write the assert statement checking training array's number of rows
    assert actual[0].shape[0] == expected_training_array_num_rows, f"The actual number of rows in the training array is not {expected_training_array_num_rows}"
    # Write the assert statement checking testing array's number of rows
    assert actual[1].shape[0] == expected_testing_array_num_rows, f"The actual number of rows in the testing array is not {expected_testing_array_num_rows}"

In [2]:
import row_to_list

def test_for_clean_row():
  assert row_to_list("2,081\t314,942\n") == ["2,081", "314,942"]

def test_for_missing_area():
  assert row_to_list("\t293,410\n") is None

In [None]:
def test_on_circular_data(self):
    theta = pi/4.0
    test_argument = np.array([[1.0, 0.0], 
                              [cos(theta), sin(theta)],[0.0, 1.0],
                              [cos(3 * theta), sin(3 * theta)],[-1.0, 0.0],
                              [cos(5 * theta), sin(5 * theta)],[0.0, -1.0],
                              [cos(7 * theta), sin(7 * theta)]])
    actual = model_test(test_argument, slope=0.0, intercept=0.0)
    assert actual == pytest.approx(0.0)

In [None]:
 with context_manager:
    # <--- Runs code on entering context
    print("This is part of the context")    # any code inside is the context
    # <--- Runs code on exiting context


 with pytest.raises(ValueError):
    # <--- Does nothing on entering the context
    print("This is part of the context")
    # <--- If context raised ValueError, the code IS CORRECT
    # <--- If the context did not raise ValueError, raise an exception.


 with pytest.raises(ValueError):
    raise ValueError    # context exits with ValueError
    # <--- pytest.raises(ValueError) silences it


 with pytest.raises(ValueError):
    pass    # context exits without raising a ValueError
    # <--- pytest.raises(ValueError) raises Failed


# Store the raised ValueError in the variable exc_info
with pytest.raises(ValueError) as exc_info:
    raise ValueError("Silence me!")


with pytest.raises(ValueError) as exc_info:
    raise ValueError("Silence me!")
# Check if the raised ValueError contains the correct message
assert exc_info.match("Silence me!")

In [None]:
 def test_valueerror_on_one_dimensional_argument():
    example_argument = np.array([2081, 314942, 1059, 186606, 1148, 206186])
    
    with pytest.raises(ValueError):
        split_into_training_and_testing_sets(example_argument)

In [None]:
 def test_valueerror_on_one_dimensional_argument():
    example_argument = np.array([2081, 314942, 1059, 186606, 1148, 206186])

    with pytest.raises(ValueError) as exception_info:    # store the exception
        split_into_training_and_testing_sets(example_argument)

    # Check if ValueError contains correct message
    assert exception_info.match("Argument data array must be two dimensional. ", "Got 1 dimensional array instead!")

In [None]:
import numpy as np
import pytest
from train import split_into_training_and_testing_sets

def test_on_one_row():
    test_argument = np.array([[1382.0, 390167.0]])

    # Store information about raised ValueError in exc_info
    with pytest.raises(ValueError) as exc_info:
      split_into_training_and_testing_sets(test_argument)
      
    expected_error_msg = "Argument data_array must have at least 2 rows, it actually has just 1"
    # Check if the raised ValueError contains the correct message
    assert exc_info.match(expected_error_msg)

In [None]:
import pytest
from preprocessing_helpers import row_to_list

def test_on_no_tab_no_missing_value():    # (0, 0) boundary value
    actual = row_to_list("123\n")
    assert actual is None, "Expected: None, Actual: {0}".format(actual)

    
def test_on_one_tab_with_missing_value():    # (1, 1) boundary value
    actual = row_to_list("\t4,567\n")
    assert actual is None, "Expected: None, Actual: {0}".format(actual)

In [None]:
import pytest
from preprocessing_helpers import row_to_list

def test_on_normal_argument_1():
    actual = row_to_list("123\t4,567\n")
    expected = ["123", "4,567"]
    assert actual == expected, "Expected: {0}, Actual: {1}".format(expected, actual)

In [None]:
'''
TDD
first write several test functions to test the proposed method in different ways
'''

def test_with_no_comma():
    actual = convert_to_int("756")
    assert actual == 756, "Expected: 756, Actual: {0}".format(actual)
    

def test_with_one_comma():
    actual = convert_to_int("2,081")
    assert actual == 2081, "Expected: 2081, Actual: {0}".format(actual)


def test_on_string_with_incorrectly_placed_comma():
    actual = convert_to_int("12,72,891")
    assert actual is None, "Expected: None, Actual: {0}".format(actual)
    

def test_on_float_valued_string():
    actual = convert_to_int("23,816.92")
    assert actual is None, "Expected: None, Actual: {0}".format(actual)

In [3]:
'''
then write the function that would pass all those tests
'''

def convert_to_int(integer_string_with_commas):
    comma_separated_parts = integer_string_with_commas.split(",")
    for i in range(len(comma_separated_parts)):
        if len(comma_separated_parts[i]) > 3:
            return None
        if i != 0 and len(comma_separated_parts[i]) != 3:
            return None
    integer_string_without_commas = "".join(comma_separated_parts)
    try:
        return int(integer_string_without_commas)
    except ValueError:
        return None

In [None]:
# test class is a container for a single unit's testing

from data.preprocessing_helpers import row_to_list, convert_to_int

class TestRowToList(object):  # Always put the argument object
                        
    def test_on_no_tab_no_missing_value(self):    # Always put the argument self
        pass

    def test_on_two_tabs_no_missing_value(self):  # Always put the argument self
        pass

# you can keep two test classes inside one file (similar to a python module)
class TestConvertToInt(object):

    def test_with_no_comma(self):
        pass

    def test_with_one_comma(self):
        pass
 

In [None]:
import pytest
import numpy as np
from models.train import split_into_training_and_testing_sets


class TestSplitIntoTrainingAndTestingSets(object):

    def test_on_one_row(self):
        test_argument = np.array([[1382.0, 390167.0]])

        with pytest.raises(ValueError) as exc_info:
            split_into_training_and_testing_sets(test_argument)
            
        expected_error_msg = "Argument data_array must have at least 2 rows, it actually has just 1"
        assert exc_info.match(expected_error_msg)

In [7]:
'''
How to run all tests in the test folder

a =>  cd into the test folder
b => run the command "pytest"
pytest will automatically find all the needed tests and run them
'''

'''
for a specific test class or test function

Node ID of a test class =>  <path to test module>::<test class name>
Node ID of a test function => <path to test module>::<test class name>::<unit test name>
'''
>> pytest data/test_preprocessing_helpers.py::TestRowToList
>> pytest data/test_preprocessing_helpers.py::TestRowToList::test_on_one_tab_with_missing_value

SyntaxError: invalid syntax (<ipython-input-7-d7abc0e1f0da>, line 15)

In [None]:
import numpy as np

def split_into_training_and_testing_sets(data_array):
    dim = data_array.ndim
    if dim != 2:
        raise ValueError("Argument data_array must be two dimensional. Got {0} dimensional array instead!".format(dim))

    num_rows = data_array.shape[0]
    if num_rows < 2:
        raise ValueError("Argument data_array must have at least 2 rows, it actually has just {0}".format(num_rows))

    num_training = int(0.75 * data_array.shape[0])
    permuted_indices = np.random.permutation(data_array.shape[0])

    return data_array[permuted_indices[:num_training], :], data_array[permuted_indices[num_training:], :]


>> pytest models/test_train.py::TestSplitIntoTrainingAndTestingSets

# how to run a specific test that was failing before
>> pytest models/test_train.py::TestSplitIntoTrainingAndTestingSets::test_on_six_rows

# the -k expression finds the closest named class/method available in current folder
>> pytest -k "SplitInto"

In [None]:
import pytest

class TestTrainModel(object):
    # this annotation will inform that this test is supposed to fail
    @pytest.mark.xfail
    def test_on_linear_data(self):
        pass

In [None]:
@pytest.mark.xfail(reason="Using TDD, model_test() has not yet been implemented")
class TestModelTest(object):
    def test_on_linear_data(self):
        pass
    
    def test_on_one_dimensional_array(self):
        pass

In [None]:
import sys

class TestGetDataAsNumpyArray(object):
    # Add a reason for skipping the test
    @pytest.mark.skipif(sys.version_info > (2, 7), reason="Works only on Python 2.7 or lower")
    def test_on_clean_file(self):
        pass

In [None]:
# showing the failure reseaon in test failed results
>>  pytest -r

# would only show the reason for skipped tests in the test result report
>> pytest -rs

Some functions in python need to process some file or need input that requires some pre-process<br><br>
to test these functions we need to : create the needed file (SetUp) -> do the testing -> delete the tested file (TearDown)<br><br>
the extra method for this testing is annotated with "@pytest.fixture" 


In [None]:
@pytest.fixture
def my_fixture():
    # Do setup here
    yield data    # Use yield instead of return
    # Do teardown here


def test_something(my_fixture):
    ...
    data = my_fixture
    ...

In [None]:
# real life example

@pytest.fixture
def raw_and_clean_data_file():
    #setup part
    raw_data_file_path = "raw.txt"
    clean_data_file_path = "clean.txt"
    with open(raw_data_file_path, "w") as f:
        f.write("1,801\t201,411\n"
                "1,767565,112\n"
                "2,002\t333,209\n"
                "1990\t782,911\n"
                "1,285\t389129\n")
    # part to give the needed file to the test function
    yield raw_data_file_path, clean_data_file_path
    # teardown part
    os.remove(raw_data_file_path)
    os.remove(clean_data_file_path)


def test_on_raw_data(raw_and_clean_data_file):
    # call the fixture method
    raw_path, clean_path = raw_and_clean_data_file
    # run the tested function
    preprocess(raw_path, clean_path)
    # do the test assertion
    with open(clean_data_file_path) as f:
        lines = f.readlines()
    first_line = lines[0]
    assert first_line == "1801\t201411\n"
    second_line = lines[1]
    assert second_line == "2002\t333209\n"

In [None]:
@pytest.fixture
def empty_file():
    # Assign the file path "empty.txt" to the variable
    file_path = "empty.txt"
    open(file_path, "w").close()
    # Yield the variable file_path
    yield file_path
    # Remove the file in the teardown
    os.remove(file_path)
    
    
def test_on_empty_file(self, empty_file):
    expected = np.empty((0, 2))
    actual = get_data_as_numpy_array(empty_file, 2)
    assert actual == pytest.approx(expected), "Expected: {0}, Actual: {1}".format(expected, actual)

In [None]:
'''
Mock Tests

when a function depends on other function to run properly, there might be a case when other method might have bugs
this will fail our test, while its NOT the fault of the function we want to test.
in such cases we make another function in testing to simulate the needed methods, to make sure they send us correct results
this is called Mocking
'''
# install the pytest-mock package
>> pip install pytest-mock
# import unittest.mock package from python standard library


# this is the mocked method
def convert_to_int_bug_free(comma_separated_integer_string):
    # Assign to the dictionary holding the correct return values
    return_values = {"1,801": 1801,
                     "201,411": 201411,
                     "2,002": 2002,
                     "333,209": 333209,
                     "1990": None,
                     "782,911": 782911,
                     "1,285": 1285,
                     "389129": None}
    # Return the correct result using the dictionary return_values
    return return_values[comma_separated_integer_string]


# Add the correct argument to use the mocking fixture (mocker) in this test
def test_on_raw_data(self, raw_and_clean_data_file, mocker):
    raw_path, clean_path = raw_and_clean_data_file
    # Replace the dependency with the bug-free mock
    convert_to_int_mock = mocker.patch("data.preprocessing_helpers.convert_to_int", side_effect=convert_to_int_bug_free)
    preprocess(raw_path, clean_path)
    # Check if preprocess() called the dependency correctly
    assert convert_to_int_mock.call_args_list == [call("1,801"), call("201,411"), call("2,002"), call("333,209"),
                                                  call("1990"),  call("782,911"), call("1,285"), call("389129")]
    # open the file and read it to make sure the results are correct
    with open(clean_path, "r") as f:
        lines = f.readlines()
    first_line = lines[0]
    assert first_line == "1801\\t201411\\n"
    second_line = lines[1]
    assert second_line == "2002\\t333209\\n" 