# Ibis MapD Backend - Test Aggregation 

In [1]:
import ibis
import numpy as np
ibis.options.interactive = False

In [2]:
mapd_cli = ibis.mapd.connect(
    host='localhost', user='mapd', password='HyperInteractive',
    port=9091, database='ibis_testing'
)

In [3]:
def param(ibis_fn, pandas_fn, id, skip=False):
    if skip:
        return None, None, None
    return ibis_fn, pandas_fn, id

In [4]:
tests = (
    param(
        lambda t, where: t.bool_col.count(where=where),
        lambda t, where: len(t.bool_col[where].dropna()),
        id='bool_col_count'
    ),
    param(
        lambda t, where: t.bool_col.any(),
        lambda t, where: t.bool_col.any(),
        id='bool_col_any', skip=True
    ),
    param(
        lambda t, where: t.bool_col.notany(),
        lambda t, where: ~t.bool_col.any(),
        id='bool_col_notany'
    ),
    param(
        lambda t, where: -t.bool_col.any(),
        lambda t, where: ~t.bool_col.any(),
        id='bool_col_any_negate'
    ),
    param(
        lambda t, where: t.bool_col.all(),
        lambda t, where: t.bool_col.all(),
        id='bool_col_all'
    ),
    param(
        lambda t, where: t.bool_col.notall(),
        lambda t, where: ~t.bool_col.all(),
        id='bool_col_notall'
    ),
    param(
        lambda t, where: -t.bool_col.all(),
        lambda t, where: ~t.bool_col.all(),
        id='bool_col_all_negate'
    ),
    param(
        lambda t, where: t.double_col.sum(),
        lambda t, where: t.double_col.sum(),
        id='double_col_sum',
    ),
    param(
        lambda t, where: t.double_col.mean(),
        lambda t, where: t.double_col.mean(),
        id='double_col_mean',
    ),
    param(
        lambda t, where: t.double_col.min(),
        lambda t, where: t.double_col.min(),
        id='double_col_min',
    ),
    param(
        lambda t, where: t.double_col.max(),
        lambda t, where: t.double_col.max(),
        id='double_col_max',
    ),
    param(
        lambda t, where: t.double_col.approx_median(),
        lambda t, where: t.double_col.median(),
        id='double_col_approx_median', skip=True
    ),
    param(
        lambda t, where: t.double_col.std(how='sample'),
        lambda t, where: t.double_col.std(ddof=1),
        id='double_col_std'
    ),
    param(
        lambda t, where: t.double_col.var(how='sample'),
        lambda t, where: t.double_col.var(ddof=1),
        id='double_col_var'
    ),
    param(
        lambda t, where: t.double_col.std(how='pop'),
        lambda t, where: t.double_col.std(ddof=0),
        id='double_col_std_pop'
    ),
    param(
        lambda t, where: t.double_col.var(how='pop'),
        lambda t, where: t.double_col.var(ddof=0),
        id='double_col_var_pop'
    ),
    param(
        lambda t, where: t.string_col.approx_nunique(),
        lambda t, where: t.string_col.nunique(),
        id='string_col_approx_nunique'
    ),
    param(
        lambda t, where: t.string_col.group_concat(','),
        lambda t, where: ','.join(t.string_col),
        id='string_col_group_concat'
    ),
    param(
        lambda t, where: t.double_col.arbitrary(how='first'),
        lambda t, where: t.double_col.iloc[0],
        id='string_col_arbitrary_first'
    ),
    param(
        lambda t, where: t.double_col.arbitrary(how='last'),
        lambda t, where: t.double_col.iloc[-1],
        id='string_col_arbitrary_last'
    )
)

In [5]:
alltypes = mapd_cli.database().functional_alltypes
df = alltypes.execute()

ibis_cond = lambda t: t.string_col.isin(['1', '7'])
pandas_cond = lambda t: t.string_col.isin(['1', '7'])

In [6]:
for result_fn, expected_fn, id_test in tests:
    if result_fn is None:
        continue
    print('{} ... '.format(id_test), end='')
    expr = result_fn(alltypes, ibis_cond(alltypes))
    try:
        result = expr.execute()
    except ibis.common.UnsupportedOperationError as e:
        print('UnsupportedOperationError')
        continue
    
    expected = expected_fn(df, pandas_cond(df))
    try:
        np.testing.assert_allclose(result, expected)
        print('OK!')
    except AssertionError as e:
        print('FAIL!')
        print(e)

bool_col_count ... OK!
bool_col_notany ... OK!
bool_col_any_negate ... OK!
bool_col_all ... OK!
bool_col_notall ... OK!
bool_col_all_negate ... OK!
double_col_sum ... OK!
double_col_mean ... OK!
double_col_min ... OK!
double_col_max ... OK!
double_col_std ... FAIL!

Not equal to tolerance rtol=1e-07, atol=0

(mismatch 100.0%)
 x: array(29.010441)
 y: array(29.012029)
double_col_var ... FAIL!

Not equal to tolerance rtol=1e-07, atol=0

(mismatch 100.0%)
 x: array(841.605713)
 y: array(841.697801)
double_col_std_pop ... FAIL!

Not equal to tolerance rtol=1e-07, atol=0

(mismatch 100.0%)
 x: array(29.008453)
 y: array(29.010041)
double_col_var_pop ... FAIL!

Not equal to tolerance rtol=1e-07, atol=0

(mismatch 100.0%)
 x: array(841.490417)
 y: array(841.5825)
string_col_approx_nunique ... UnsupportedOperationError
string_col_group_concat ... UnsupportedOperationError
string_col_arbitrary_first ... UnsupportedOperationError
string_col_arbitrary_last ... UnsupportedOperationError
