In [1]:
# pytest-like detailed asserts 
import ipytest
ipytest.config(rewrite_asserts=True, magics=True)

import math
from typing import List

import numpy as np
import pandas as pd

from hypothesis import given
from hypothesis.extra.pandas import data_frames, column
from hypothesis.strategies import floats, integers, one_of, just

from pandas.testing import assert_series_equal

  import pandas.util.testing as _pd_testing


In [2]:
# function to test

def percentile(data: pd.DataFrame, grouping_columns: List[str], value_column: str, q: float) -> float:
    return data.groupby(grouping_columns)[value_column].quantile(q)

In [3]:
# golden test
# use fixed values and manually precalculated result

def test_percentiles_golden():
    data = pd.DataFrame([
        [1, "Dubai", 23],
        [2, "Dubai", 34.2],
        [3, "Dubai", 34.2],
        [4, "Dubai", 123],
        [6, "Dubai", 111],
        [7, "Dubai", 111],
        [8, "Abu-Dhabi", 12],
        [9, "Abu-Dhabi", 335],
        [10, "Abu-Dhabi", 34],
    ], columns=["trip_id", "area", "cost"])
    rez = percentile(data, ["area"], "cost", 0.75)
    expected = pd.Series({
        "Abu-Dhabi": 184.5,
        "Dubai": 111,
    })
    expected.index.set_names("area")
    assert_series_equal(rez, expected, check_names=False)
    
test_percentiles_golden()

In [4]:
# Using hypothesis library for proiperty-based tests

def check_percentile_valid(values: pd.Series, percentile: float, q: float):
    assert values.count() > 0
    if len(values) == len(values.unique()):
        # all unique
        less_count = values.where(lambda x : x < percentile).count()
        assert math.isclose(less_count / values.count(), q, abs_tol=1/values.count())
    else:
        assert percentile >= values.min()
        assert percentile <= values.max()
    
    
    
@given(
    data_frames(
        columns=[
            column(
                name="trip_id", 
                dtype=np.dtype(int), 
                elements=integers(min_value=0, max_value=32000), 
                unique=True),
            column(
                name="area", 
                elements=one_of(just("Dubai"), just("Abu-Dhabi"))),
            column(
                name="cost", 
                dtype=np.dtype(float), 
                elements=floats(width=16, allow_infinity=False, allow_nan=False), 
                unique=False) # simplifiaction, to don't cover cases like 1,1,1,1,1,1
        ])
)
def test_percentiles(data):
    rez = percentile(data, ["area"], "cost", 0.75)
    for area in data["area"].unique():
        costs = data[data["area"] == area]["cost"]
        check_percentile_valid(costs, rez[area], 0.75)

test_percentiles()


In [5]:
# Most NumPy-based functions have mutual dimension compatibility constraints between arguments. 
# For instance, the np.dot function takes a (m,n) array and a (n,p) array. NumPy has developed the 
# notion of a function signature in its general universal (GU) function API. 
# For instance, the np.dot signature is ‘(m,n),(n,p)->(m,p)’.

# Hypothesis GU Func can take a function signature and define a strategy that generates test 
# cases compatible with the signature.

import numpy as np
from hypothesis import given
from hypothesis.strategies import floats

from hypothesis_gufunc.gufunc import gufunc_args

easy_floats = floats(min_value=-10, max_value=10)


@given(gufunc_args(
    "(m,n),(n,p)->(m,p)", 
    dtype=np.float_, 
    elements=floats(min_value=-10, max_value=10)))
def test_np_dot(args):
    x, y = args
    #print(f"{x.shape} {y.shape}")
    assert np.allclose(np.dot(x, y), np.dot(y.T, x.T).T)


test_np_dot()

In [None]:
# Done