In [1]:
import ipytest
import pandas as pd
import pytest

# enable pytest's assertions and ipytest's magics
ipytest.config(rewrite_asserts=True, magics=True)

# set the filename
__file__ = 'pytest_for_data_scientists.ipynb'

In [2]:
def column_difference(df, col1, col2):
    """Subtract items in `col1` from items in `col2` elementwise (e.g. df[col1] - df[col2)]"""
    return df[col1] - df[col2]

In [3]:
def test_column_difference():
    test_df = pd.DataFrame([(1, 2), (3, 4)], columns=["A", "B"])
    test_df["A_minus_B"] = column_difference(test_df, col1="A", col2="B")
    assert all(test_df["A_minus_B"] == pd.Series([-1, -1]))

In [4]:
ipytest.run('-svvv')

platform darwin -- Python 3.7.4, pytest-4.6.2, py-1.8.0, pluggy-0.12.0 -- /usr/local/opt/python/bin/python3.7
cachedir: .pytest_cache
rootdir: /Users/danielsammons/software/ds-blog/pytest_for_data_scientists
collecting ... collected 1 item

pytest_for_data_scientists.py::test_column_difference <- <ipython-input-3-4bdfa87f1f37> PASSED



In [5]:
# A failing test
def test_column_difference():
    test_df = pd.DataFrame([(1, 2), (3, 4)], columns=["A", "B"])
    test_df["A_minus_B"] = column_difference(test_df, col1="A", col2="B")
    assert all(test_df["A_minus_B"] == pd.Series([-2, -1]))

In [6]:
ipytest.run('-svvv')

platform darwin -- Python 3.7.4, pytest-4.6.2, py-1.8.0, pluggy-0.12.0 -- /usr/local/opt/python/bin/python3.7
cachedir: .pytest_cache
rootdir: /Users/danielsammons/software/ds-blog/pytest_for_data_scientists
collecting ... collected 1 item

pytest_for_data_scientists.py::test_column_difference <- <ipython-input-5-686c9e9b1a37> FAILED

_______________________________________ test_column_difference _______________________________________

    def test_column_difference():
        test_df = pd.DataFrame([(1, 2), (3, 4)], columns=["A", "B"])
        test_df["A_minus_B"] = column_difference(test_df, col1="A", col2="B")
>       assert all(test_df["A_minus_B"] == pd.Series([-2, -1]))
E       assert False
E        +  where False = all(0   -1\n1   -1..., dtype: int64 == 0   -2\n1   -1\ndtype: int64
E           -0   -1\n
E           -1   -1\n
E           -Name: A_minus_B, dtype: int64
E           +0   -2\n
E           +1   -1\n
E           +dtype: int64
E           Full diff:
E           - 0   -

In [7]:
def test_column_difference():
    # test common cases
    test_df = pd.DataFrame([(1, 2), (3, 4)], columns=["A", "B"])
    test_df["A_minus_B"] = column_difference(test_df, col1="A", col2="B")
    assert all(test_df["A_minus_B"] == pd.Series([-1, -1]))
    test_df = pd.DataFrame([(5, 3), (10, 14), (0, -8)], columns=["A", "B"])
    test_df["A_minus_B"] = column_difference(test_df, col1="A", col2="B")
    assert all(test_df["A_minus_B"] == pd.Series([2, -4, 8]))
    # Include a third column
    test_df = pd.DataFrame([(1, 2, 100), (3, 4, 200)], columns=["A", "B", "C"])
    test_df["A_minus_B"] = column_difference(test_df, col1="A", col2="B")
    assert all(test_df["A_minus_B"] == pd.Series([-1, -1]))
    # Tets column of zeros
    test_df = pd.DataFrame([(1, 0), (3, 0)], columns=["A", "B"])
    test_df["A_minus_B"] = column_difference(test_df, col1="A", col2="B")
    assert all(test_df["A_minus_B"] == pd.Series([1, 3]))
    # Test empty dataframe
    test_df = pd.DataFrame(columns=["A", "B"])
    test_df["A_minus_B"] = column_difference(test_df, col1="A", col2="B")
    assert all(test_df["A_minus_B"] == pd.Series([]))

In [8]:
ipytest.run('-svvv')

platform darwin -- Python 3.7.4, pytest-4.6.2, py-1.8.0, pluggy-0.12.0 -- /usr/local/opt/python/bin/python3.7
cachedir: .pytest_cache
rootdir: /Users/danielsammons/software/ds-blog/pytest_for_data_scientists
collecting ... collected 1 item

pytest_for_data_scientists.py::test_column_difference <- <ipython-input-7-34c172daba78> PASSED



In [9]:
test_columns_difference_params = [
    # test common cases
    ([(1, 2), (3, 4)], ["A", "B"], [-1, -1]),
    ([(5, 3), (10, 14), (0, -8)], ["A", "B"], [2, -4, 8]),
    # Include a third column
    ([(1, 2, 100), (3, 4, 200)], ["A", "B", "C"], [-1, -1]),
    # Tets column of zeros
    ([(1, 0), (3, 0)], ["A", "B"], [1, 3]),
    # Test empty dataframe
    ([], ["A", "B"], []),
]


@pytest.mark.parametrize(
    "test_data, columns, expected_output", test_columns_difference_params
)
def test_column_difference_with_parametrize(test_data, columns, expected_output):
    test_df = pd.DataFrame(test_data, columns=columns)
    expected_series = pd.Series(expected_output)
    test_df["A_minus_B"] = column_difference(test_df, col1="A", col2="B")
    assert all(test_df["A_minus_B"] == expected_series)

In [10]:
ipytest.run('-svvv')

platform darwin -- Python 3.7.4, pytest-4.6.2, py-1.8.0, pluggy-0.12.0 -- /usr/local/opt/python/bin/python3.7
cachedir: .pytest_cache
rootdir: /Users/danielsammons/software/ds-blog/pytest_for_data_scientists
collecting ... collected 6 items

pytest_for_data_scientists.py::test_column_difference <- <ipython-input-7-34c172daba78> PASSED
pytest_for_data_scientists.py::test_column_difference_with_parametrize[test_data0-columns0-expected_output0] <- <ipython-input-9-9db7e9cc9f50> PASSED
pytest_for_data_scientists.py::test_column_difference_with_parametrize[test_data1-columns1-expected_output1] <- <ipython-input-9-9db7e9cc9f50> PASSED
pytest_for_data_scientists.py::test_column_difference_with_parametrize[test_data2-columns2-expected_output2] <- <ipython-input-9-9db7e9cc9f50> PASSED
pytest_for_data_scientists.py::test_column_difference_with_parametrize[test_data3-columns3-expected_output3] <- <ipython-input-9-9db7e9cc9f50> PASSED
pytest_for_data_scientists.py::test_column_difference_with_para