# Ibis MapD Backend - Test String

In [1]:
from ibis.tests.backends import MapD
import ibis
import numpy as np
import pandas as pd
ibis.options.interactive = False

In [2]:
mapd_cli = ibis.mapd.connect(
    host='localhost', user='mapd', password='HyperInteractive',
    port=9091, database='ibis_testing'
)

In [3]:
def param(ibis_fn, pandas_fn, id, skip=False):
    if skip:
        return None, None, None
    return ibis_fn, pandas_fn, id

In [4]:
tests = (
    param(
            lambda t: t.string_col.contains('6'),
            lambda t: t.string_col.str.contains('6'),
            id='contains',
        ),
        param(
            lambda t: t.string_col.like('6%'),
            lambda t: t.string_col.str.contains('6.*'),
            id='like',
        ),
        param(
            lambda t: t.string_col.like('6^%'),
            lambda t: t.string_col.str.contains('6%'),
            id='complex_like_escape',
        ),
        param(
            lambda t: t.string_col.like('6^%%'),
            lambda t: t.string_col.str.contains('6%.*'),
            id='complex_like_escape_match',
        ),
        param(
            lambda t: t.string_col.re_search(r'[[:digit:]]+'),
            lambda t: t.string_col.str.contains(r'\d+'),
            id='re_search',
        ),
        param(
            lambda t: t.string_col.re_extract(r'([[:digit:]]+)', 0),
            lambda t: t.string_col.str.extract(r'(\d+)', expand=False),
            id='re_extract',
        ),
        param(
            lambda t: t.string_col.re_replace(r'[[:digit:]]+', 'a'),
            lambda t: t.string_col.str.replace(r'\d+', 'a'),
            id='re_replace',
        ),
        param(
            lambda t: t.string_col.repeat(2),
            lambda t: t.string_col * 2,
            id='repeat'
        ),
        param(
            lambda t: t.string_col.translate(u'0', u'a'),
            lambda t: t.string_col.str.translate(maketrans(u'0', u'a')),
            id='translate',
        ),
        param(
            lambda t: t.string_col.find('a'),
            lambda t: t.string_col.str.find('a'),
            id='find'
        ),
        param(
            lambda t: t.string_col.lpad(10, 'a'),
            lambda t: t.string_col.str.pad(10, fillchar='a', side='left'),
            id='lpad'
        ),
        param(
            lambda t: t.string_col.rpad(10, 'a'),
            lambda t: t.string_col.str.pad(10, fillchar='a', side='right'),
            id='rpad',
        ),
        param(
            lambda t: t.string_col.find_in_set(['1']),
            lambda t: t.string_col.str.find('1'),
            id='find_in_set',
        ),
        param(
            lambda t: t.string_col.find_in_set(['a']),
            lambda t: t.string_col.str.find('a'),
            id='find_in_set_all_missing',
        ),
        param(
            lambda t: t.string_col.lower(),
            lambda t: t.string_col.str.lower(),
            id='lower'
        ),
        param(
            lambda t: t.string_col.upper(),
            lambda t: t.string_col.str.upper(),
            id='upper'
        ),
        param(
            lambda t: t.string_col.reverse(),
            lambda t: t.string_col.str[::-1],
            id='reverse'
        ),
        param(
            lambda t: t.string_col.ascii_str(),
            lambda t: t.string_col.map(ord).astype('int32'),
            id='ascii_str'
        ),
        param(
            lambda t: t.string_col.length(),
            lambda t: t.string_col.str.len().astype('int32'),
            id='length'
        ),
        param(
            lambda t: t.string_col.strip(),
            lambda t: t.string_col.str.strip(),
            id='strip'
        ),
        param(
            lambda t: t.string_col.lstrip(),
            lambda t: t.string_col.str.lstrip(),
            id='lstrip'
        ),
        param(
            lambda t: t.string_col.rstrip(),
            lambda t: t.string_col.str.rstrip(),
            id='rstrip'
        ),
        param(
            lambda t: t.string_col.capitalize(),
            lambda t: t.string_col.str.capitalize(),
            id='capitalize',
        ),
        param(
            lambda t: t.date_string_col.substr(2, 3),
            lambda t: t.date_string_col.str[2:5],
            id='substr'
        ),
        param(
            lambda t: t.date_string_col.left(2),
            lambda t: t.date_string_col.str[:2],
            id='left',
        ),
        param(
            lambda t: t.date_string_col.right(2),
            lambda t: t.date_string_col.str[-2:],
            id='right',
        ),
        param(
            lambda t: t.date_string_col.split('/'),
            lambda t: t.date_string_col.str.split('/'),
            id='split',
        ),
        param(
            lambda t: ibis.literal('-').join(['a', t.string_col, 'c']),
            lambda t: 'a-' + t.string_col + '-c',
            id='join'
        )
)

In [5]:
alltypes = mapd_cli.database().functional_alltypes
df = alltypes.execute()
backend = MapD(None)

In [6]:
for result_fn, expected_fn, id_test in tests:
    if result_fn is None:
        continue
        
    print('[II] {} ... '.format(id_test), end='')
    try:
        expr = result_fn(alltypes)
        result = expr.execute()
        expected = backend.default_series_rename(expected_fn(df))
        backend.assert_series_equal(result, expected)
        print('OK!')
    except ibis.common.UnsupportedOperationError as e:
        print('UnsupportedOperationError')
        continue
    except Exception as e:
        print('Fail!')
        print(e)


[II] contains ... OK!
[II] like ... OK!
[II] complex_like_escape ... OK!
[II] complex_like_escape_match ... OK!
[II] re_search ... OK!
[II] re_extract ... UnsupportedOperationError
[II] re_replace ... UnsupportedOperationError
[II] repeat ... UnsupportedOperationError
[II] translate ... UnsupportedOperationError
[II] find ... OK!
[II] lpad ... UnsupportedOperationError
[II] rpad ... UnsupportedOperationError
[II] find_in_set ... UnsupportedOperationError
[II] find_in_set_all_missing ... UnsupportedOperationError
[II] lower ... UnsupportedOperationError
[II] upper ... UnsupportedOperationError
[II] reverse ... UnsupportedOperationError
[II] ascii_str ... UnsupportedOperationError
[II] length ... Fail!
Exception: Cast from dictionary-encoded string to none-encoded would be slow: SELECT CHAR_LENGTH("string_col") AS tmp
FROM ibis_testing.functional_alltypes
LIMIT 10000
[II] strip ... UnsupportedOperationError
[II] lstrip ... UnsupportedOperationError
[II] rstrip ... UnsupportedOperationErr