# qplib test suite

some of the tests should result in colored info, warning or error logs to be shown. These are expected function behaviour under the test conditions and do not indicate a test was passed or failed. Instead the relevant messages are those starting with "passed test..." or "failed test...".  
if any test fails, the script stops and does not save a test_report.

# imports

In [1]:
import pandas as pd
import numpy as np
import copy
import os
import shutil
import datetime
import qplib as qp
from qplib import log

working_directory = os.getcwd()



# types

In [2]:
def compare(result, expected):
    if result == expected:
        print(f'passed test for type conversion to: {expected}')
    else:
        raise Exception(f'failed test for type conversion to: {expected}')

## error raising

In [3]:
#checking if correct error is raised
type_funcs = [
    qp.na,
    qp.nk,
    qp.num,
    qp.yn,
    qp.date,
    qp.datetime,
    ]

for func in type_funcs:
    try:
        func('abc', errors='raise')
    except ValueError:
        func = str(func).split('<function ')[1].split(' at')[0]
        print(f'passed test for raising correct error for: qp.{func}("abc")')
    except:
        raise

passed test for raising correct error for: qp.qp_na("abc")
passed test for raising correct error for: qp.qp_nk("abc")
passed test for raising correct error for: qp.qp_num("abc")
passed test for raising correct error for: qp.qp_yn("abc")
passed test for raising correct error for: qp.qp_date("abc")
passed test for raising correct error for: qp.qp_datetime("abc")


## num

In [4]:
#numbers
compare(qp.num('abc', errors='coerce', na=None), None)
compare(qp.num('abc', errors='ignore'), 'abc')
compare(qp.num('abc', errors='test'), 'test')

compare(qp.num('1'), 1)
compare(qp.num('1.0'), 1.0)
compare(qp.num('1.1'), 1.1)

compare(qp.num('0'), 0)
compare(qp.num('0.0'), 0.0)
compare(qp.num('0.1'), 0.1)

compare(qp.num('-1'), -1)
compare(qp.num('-1.0'), -1.0)
compare(qp.num('-1.1'), -1.1)

compare(qp.num(1), 1)
compare(qp.num(1.0), 1.0)
compare(qp.num(1.1), 1.1)

compare(qp.num(0), 0)
compare(qp.num(0.0), 0.0)
compare(qp.num(0.1), 0.1)

compare(qp.num(-1), -1)
compare(qp.num(-1.0), -1.0)
compare(qp.num(-1.1), -1.1)


passed test for type conversion to: None
passed test for type conversion to: abc
passed test for type conversion to: test
passed test for type conversion to: 1
passed test for type conversion to: 1.0
passed test for type conversion to: 1.1
passed test for type conversion to: 0
passed test for type conversion to: 0.0
passed test for type conversion to: 0.1
passed test for type conversion to: -1
passed test for type conversion to: -1.0
passed test for type conversion to: -1.1
passed test for type conversion to: 1
passed test for type conversion to: 1.0
passed test for type conversion to: 1.1
passed test for type conversion to: 0
passed test for type conversion to: 0.0
passed test for type conversion to: 0.1
passed test for type conversion to: -1
passed test for type conversion to: -1.0
passed test for type conversion to: -1.1


## date

In [5]:
#date

compare(qp.date('abc', errors='coerce', na=None), None)
compare(qp.date('abc', errors='ignore'), 'abc')
compare(qp.date('abc', errors='test'), 'test')

compare(qp.date('2020-01-01'), datetime.date(2020, 1, 1))
compare(qp.date('2020-01-01 00:00:00'), datetime.date(2020, 1, 1))

compare(qp.date('2020.01.01'), datetime.date(2020, 1, 1))
compare(qp.date('2020/01/01'), datetime.date(2020, 1, 1))
compare(qp.date('2020 01 01'), datetime.date(2020, 1, 1))
compare(qp.date('20200101'), datetime.date(2020, 1, 1))

compare(qp.date('2020 Jan 01'), datetime.date(2020, 1, 1))
compare(qp.date('2020 January 01'), datetime.date(2020, 1, 1))
compare(qp.date('2020 Jan 1'), datetime.date(2020, 1, 1))
compare(qp.date('2020 January 1'), datetime.date(2020, 1, 1))

compare(qp.date('Jan 01 2020'), datetime.date(2020, 1, 1))
compare(qp.date('January 01 2020'), datetime.date(2020, 1, 1))
compare(qp.date('Jan 1 2020'), datetime.date(2020, 1, 1))
compare(qp.date('January 1 2020'), datetime.date(2020, 1, 1))

compare(qp.date('01 Jan 2020'), datetime.date(2020, 1, 1))
compare(qp.date('01 January 2020'), datetime.date(2020, 1, 1))
compare(qp.date('1 Jan 2020'), datetime.date(2020, 1, 1))
compare(qp.date('1 January 2020'), datetime.date(2020, 1, 1))

compare(qp.date('01-01-2020'), datetime.date(2020, 1, 1))
compare(qp.date('01.01.2020'), datetime.date(2020, 1, 1))
compare(qp.date('01/01/2020'), datetime.date(2020, 1, 1))
compare(qp.date('01 01 2020'), datetime.date(2020, 1, 1))

compare(qp.date('02-01-20'), datetime.date(2020, 1, 2))
compare(qp.date('02.01.20'), datetime.date(2020, 1, 2))
compare(qp.date('02/01/20'), datetime.date(2020, 1, 2))
compare(qp.date('02 01 20'), datetime.date(2020, 1, 2))

compare(qp.date('2020-01-02'), datetime.date(2020, 1, 2))
compare(qp.date('2020.01.02'), datetime.date(2020, 1, 2))
compare(qp.date('2020/01/02'), datetime.date(2020, 1, 2))
compare(qp.date('2020 01 02'), datetime.date(2020, 1, 2))

passed test for type conversion to: None
passed test for type conversion to: abc
passed test for type conversion to: test
passed test for type conversion to: 2020-01-01
passed test for type conversion to: 2020-01-01
passed test for type conversion to: 2020-01-01
passed test for type conversion to: 2020-01-01
passed test for type conversion to: 2020-01-01
passed test for type conversion to: 2020-01-01
passed test for type conversion to: 2020-01-01
passed test for type conversion to: 2020-01-01
passed test for type conversion to: 2020-01-01
passed test for type conversion to: 2020-01-01
passed test for type conversion to: 2020-01-01
passed test for type conversion to: 2020-01-01
passed test for type conversion to: 2020-01-01
passed test for type conversion to: 2020-01-01
passed test for type conversion to: 2020-01-01
passed test for type conversion to: 2020-01-01
passed test for type conversion to: 2020-01-01
passed test for type conversion to: 2020-01-01
passed test for type conversion 

## datetime

In [6]:
#datetime

compare(qp.datetime('abc', errors='coerce', na=None), None)
compare(qp.datetime('abc', errors='ignore'), 'abc')
compare(qp.datetime('abc', errors='test'), 'test')

compare(qp.datetime('2020-01-01'), datetime.datetime(2020, 1, 1))
compare(qp.datetime('2020-01-01 00:00:00'), datetime.datetime(2020, 1, 1))
compare(qp.datetime('2020-01-01 00:00:01'), datetime.datetime(2020, 1, 1, 0, 0, 1))
compare(qp.datetime('2020-01-01 00:01:00'), datetime.datetime(2020, 1, 1, 0, 1, 0))
compare(qp.datetime('2020-01-01 01:00:00'), datetime.datetime(2020, 1, 1, 1, 0, 0))
compare(qp.datetime('2020-01-01 01:01:01'), datetime.datetime(2020, 1, 1, 1, 1, 1))

compare(qp.datetime('2020.01.01'), datetime.datetime(2020, 1, 1))
compare(qp.datetime('2020/01/01'), datetime.datetime(2020, 1, 1))
compare(qp.datetime('2020 01 01'), datetime.datetime(2020, 1, 1))
compare(qp.datetime('20200101'), datetime.datetime(2020, 1, 1))

compare(qp.datetime('2020 Jan 01'), datetime.datetime(2020, 1, 1))
compare(qp.datetime('2020 January 01'), datetime.datetime(2020, 1, 1))
compare(qp.datetime('2020 Jan 1'), datetime.datetime(2020, 1, 1))
compare(qp.datetime('2020 January 1'), datetime.datetime(2020, 1, 1))

compare(qp.datetime('Jan 01 2020'), datetime.datetime(2020, 1, 1))
compare(qp.datetime('January 01 2020'), datetime.datetime(2020, 1, 1))
compare(qp.datetime('Jan 1 2020'), datetime.datetime(2020, 1, 1))
compare(qp.datetime('January 1 2020'), datetime.datetime(2020, 1, 1))

compare(qp.datetime('01 Jan 2020'), datetime.datetime(2020, 1, 1))
compare(qp.datetime('01 January 2020'), datetime.datetime(2020, 1, 1))
compare(qp.datetime('1 Jan 2020'), datetime.datetime(2020, 1, 1))
compare(qp.datetime('1 January 2020'), datetime.datetime(2020, 1, 1))

compare(qp.datetime('01-01-2020'), datetime.datetime(2020, 1, 1))
compare(qp.datetime('01.01.2020'), datetime.datetime(2020, 1, 1))
compare(qp.datetime('01/01/2020'), datetime.datetime(2020, 1, 1))

compare(qp.datetime('02-01-20'), datetime.datetime(2020, 1, 2))
compare(qp.datetime('02.01.20'), datetime.datetime(2020, 1, 2))
compare(qp.datetime('02/01/20'), datetime.datetime(2020, 1, 2))
compare(qp.datetime('02 01 20'), datetime.datetime(2020, 1, 2))

compare(qp.datetime('2020-01-02'), datetime.datetime(2020, 1, 2))
compare(qp.datetime('2020.01.02'), datetime.datetime(2020, 1, 2))
compare(qp.datetime('2020/01/02'), datetime.datetime(2020, 1, 2))
compare(qp.datetime('2020 01 02'), datetime.datetime(2020, 1, 2))


passed test for type conversion to: None
passed test for type conversion to: abc
passed test for type conversion to: test
passed test for type conversion to: 2020-01-01 00:00:00
passed test for type conversion to: 2020-01-01 00:00:00
passed test for type conversion to: 2020-01-01 00:00:01
passed test for type conversion to: 2020-01-01 00:01:00
passed test for type conversion to: 2020-01-01 01:00:00
passed test for type conversion to: 2020-01-01 01:01:01
passed test for type conversion to: 2020-01-01 00:00:00
passed test for type conversion to: 2020-01-01 00:00:00
passed test for type conversion to: 2020-01-01 00:00:00
passed test for type conversion to: 2020-01-01 00:00:00
passed test for type conversion to: 2020-01-01 00:00:00
passed test for type conversion to: 2020-01-01 00:00:00
passed test for type conversion to: 2020-01-01 00:00:00
passed test for type conversion to: 2020-01-01 00:00:00
passed test for type conversion to: 2020-01-01 00:00:00
passed test for type conversion to: 20

## na

In [7]:
#not available

compare(qp.na(1, errors='coerce'), None)
compare(qp.na(1, errors='ignore'), 1)
compare(qp.na(1, errors='test'), 'test')
compare(qp.na('na', na='test'), 'test')

compare(qp.na('na'), None)
compare(qp.na('nA'), None)
compare(qp.na('Na'), None)
compare(qp.na('NA'), None)

compare(qp.na('n.a.'), None)
compare(qp.na('n.a'), None)
compare(qp.na('N.A'), None)
compare(qp.na('N.A.'), None)
compare(qp.na('n/a'), None)
compare(qp.na('N/A'), None)

compare(qp.na('nan'), None)
compare(qp.na('NaN'), None)
compare(qp.na('NAN'), None)
compare(qp.na('Nan'), None)
compare(qp.na('nAn'), None)
compare(qp.na('nAN'), None)
compare(qp.na('naN'), None)

compare(qp.na('none'), None)
compare(qp.na('None'), None)
compare(qp.na('NONE'), None)

compare(qp.na('null'), None)
compare(qp.na('Null'), None)
compare(qp.na('NULL'), None)

compare(qp.na('nil'), None)
compare(qp.na('Nil'), None)
compare(qp.na('NIL'), None)

compare(qp.na('not available'), None)
compare(qp.na('Not Available'), None)
compare(qp.na('NOT AVAILABLE'), None)

compare(qp.na('not a number'), None)
compare(qp.na('Not A Number'), None)
compare(qp.na('NOT A NUMBER'), None)


passed test for type conversion to: None
passed test for type conversion to: 1
passed test for type conversion to: test
passed test for type conversion to: test
passed test for type conversion to: None
passed test for type conversion to: None
passed test for type conversion to: None
passed test for type conversion to: None
passed test for type conversion to: None
passed test for type conversion to: None
passed test for type conversion to: None
passed test for type conversion to: None
passed test for type conversion to: None
passed test for type conversion to: None
passed test for type conversion to: None
passed test for type conversion to: None
passed test for type conversion to: None
passed test for type conversion to: None
passed test for type conversion to: None
passed test for type conversion to: None
passed test for type conversion to: None
passed test for type conversion to: None
passed test for type conversion to: None
passed test for type conversion to: None
passed test for typ

## nk

In [8]:
#not known

compare(qp.nk(1, errors='coerce'), None)
compare(qp.nk(1, errors='ignore'), 1)
compare(qp.nk(1, errors='test'), 'test')
compare(qp.nk('nk', nk='test'), 'test')

compare(qp.nk('nk'), 'unknown')
compare(qp.nk('nK'), 'unknown')
compare(qp.nk('Nk'), 'unknown')
compare(qp.nk('NK'), 'unknown')

compare(qp.nk('n.k.'), 'unknown')
compare(qp.nk('n.k'), 'unknown')
compare(qp.nk('N.K'), 'unknown')
compare(qp.nk('N.K.'), 'unknown')
compare(qp.nk('n/k'), 'unknown')
compare(qp.nk('N/K'), 'unknown')

compare(qp.nk('not known'), 'unknown')
compare(qp.nk('Not Known'), 'unknown')
compare(qp.nk('NOT KNOWN'), 'unknown')

compare(qp.nk('not known.'), 'unknown')
compare(qp.nk('Not Known.'), 'unknown')
compare(qp.nk('NOT KNOWN.'), 'unknown')

compare(qp.nk('unknown'), 'unknown')
compare(qp.nk('Unknown'), 'unknown')
compare(qp.nk('UNKNOWN'), 'unknown')

compare(qp.nk('not specified'), 'unknown')
compare(qp.nk('Not Specified'), 'unknown')
compare(qp.nk('NOT SPECIFIED'), 'unknown')

compare(qp.nk('not specified.'), 'unknown')
compare(qp.nk('Not Specified.'), 'unknown')
compare(qp.nk('NOT SPECIFIED.'), 'unknown')


passed test for type conversion to: None
passed test for type conversion to: 1
passed test for type conversion to: test
passed test for type conversion to: test
passed test for type conversion to: unknown
passed test for type conversion to: unknown
passed test for type conversion to: unknown
passed test for type conversion to: unknown
passed test for type conversion to: unknown
passed test for type conversion to: unknown
passed test for type conversion to: unknown
passed test for type conversion to: unknown
passed test for type conversion to: unknown
passed test for type conversion to: unknown
passed test for type conversion to: unknown
passed test for type conversion to: unknown
passed test for type conversion to: unknown
passed test for type conversion to: unknown
passed test for type conversion to: unknown
passed test for type conversion to: unknown
passed test for type conversion to: unknown
passed test for type conversion to: unknown
passed test for type conversion to: unknown
pas

## yn

In [9]:
#yes no
compare(qp.yn('abc', errors='coerce'), None)
compare(qp.yn('abc', errors='ignore'), 'abc')
compare(qp.yn('abc', errors='test'), 'test')

compare(qp.yn('yes'), 'yes')
compare(qp.yn('Yes'), 'yes')
compare(qp.yn('YES'), 'yes')

compare(qp.yn('no'), 'no')
compare(qp.yn('No'), 'no')
compare(qp.yn('NO'), 'no')

compare(qp.yn('y'), 'yes')
compare(qp.yn('Y'), 'yes')

compare(qp.yn('n'), 'no')
compare(qp.yn('N'), 'no')

compare(qp.yn('1'), 'yes')
compare(qp.yn('0'), 'no')

compare(qp.yn(1), 'yes')
compare(qp.yn(0), 'no')

compare(qp.yn('true'), 'yes')
compare(qp.yn('True'), 'yes')
compare(qp.yn('TRUE'), 'yes')

compare(qp.yn('false'), 'no')
compare(qp.yn('False'), 'no')
compare(qp.yn('FALSE'), 'no')

compare(qp.yn('pos'), 'yes')
compare(qp.yn('Pos'), 'yes')
compare(qp.yn('POS'), 'yes')

compare(qp.yn('neg'), 'no')
compare(qp.yn('Neg'), 'no')
compare(qp.yn('NEG'), 'no')

compare(qp.yn('positive'), 'yes')
compare(qp.yn('Positive'), 'yes')
compare(qp.yn('POSITIVE'), 'yes')

compare(qp.yn('negative'), 'no')
compare(qp.yn('Negative'), 'no')
compare(qp.yn('NEGATIVE'), 'no')


passed test for type conversion to: None
passed test for type conversion to: abc
passed test for type conversion to: test
passed test for type conversion to: yes
passed test for type conversion to: yes
passed test for type conversion to: yes
passed test for type conversion to: no
passed test for type conversion to: no
passed test for type conversion to: no
passed test for type conversion to: yes
passed test for type conversion to: yes
passed test for type conversion to: no
passed test for type conversion to: no
passed test for type conversion to: yes
passed test for type conversion to: no
passed test for type conversion to: yes
passed test for type conversion to: no
passed test for type conversion to: yes
passed test for type conversion to: yes
passed test for type conversion to: yes
passed test for type conversion to: no
passed test for type conversion to: no
passed test for type conversion to: no
passed test for type conversion to: yes
passed test for type conversion to: yes
passed t

# pd.Index.q()

In [10]:

def compare(result, expected_index):
    if not result.equals(expected_index):
        print(f'failed test for arg: "{result.qp._input}"')
        print('result:')
        display(result)
        print('expected:')
        display(expected_index)
        raise Exception(f'failed test for arg: "{result.qp._input}"')
    else:
        print(f'passed test for arg: "{result.qp._input}"')



      
#number tests
nums = pd.Series({
    0: 0,
    1: 1,
    2: 2,
    })
nums = pd.Index(nums.values)


# #return all
compare(nums.q(verbosity=0), nums[[0,1,2]])
compare(nums.q('', verbosity=0), nums[[0,1,2]])

#type
compare(nums.q('is num', verbosity=0), nums[[0,1,2]])
compare(nums.q('!is num', verbosity=0), nums[[]])
compare(nums.q('is int', verbosity=0), nums[[0,1,2]])
compare(nums.q('!is int', verbosity=0), nums[[]])
compare(nums.q('is float', verbosity=0), nums[[]])
compare(nums.q('!is float', verbosity=0), nums[[0,1,2]])
compare(nums.q('is na', verbosity=0), nums[[]])
compare(nums.q('!is na', verbosity=0), nums[[0,1,2]])

#equality
compare(nums.q('=1', verbosity=0), nums[[1]])
compare(nums.q('!=1', verbosity=0), nums[[0,2]])
compare(nums.q('=1.0', verbosity=0), nums[[1]])
compare(nums.q('!=1.0', verbosity=0), nums[[0,2]])
compare(nums.q('==1', verbosity=0), nums[[1]])
compare(nums.q('!==1', verbosity=0), nums[[0,2]])
compare(nums.q('==1.0', verbosity=0), nums[[]])
compare(nums.q('!==1.0', verbosity=0), nums[[0,1,2]])

#comparison
compare(nums.q('<1', verbosity=0), nums[[0]])
compare(nums.q('!<1', verbosity=0), nums[[1,2]])
compare(nums.q('<=1', verbosity=0), nums[[0,1]])
compare(nums.q('!<=1', verbosity=0), nums[[2]])
compare(nums.q('>0', verbosity=0), nums[[1,2]])
compare(nums.q('!>0', verbosity=0), nums[[0]])
compare(nums.q('>=0', verbosity=0), nums[[0,1,2]])
compare(nums.q('!>=0', verbosity=0), nums[[]])






#string tests
strings = pd.Series({
    0: 'a',
    1: 'A',
    2: 'a ',
    3: ' a',
    4: 'a b',
    5: 'aa',
    6: 'b',
    7: 'B',
    8: 'c'
    })
strings = pd.Index(strings.values)

#type
compare(strings.q('is str', verbosity=0), strings[[0,1,2,3,4,5,6,7,8]])
compare(strings.q('!is str', verbosity=0), strings[[]])


#regex search
compare(strings.q('~a', verbosity=0), strings[[0,2,3,4,5]])

#regex equality
compare(strings.q('~~.', verbosity=0), strings[[0,1,6,7,8]])
compare(strings.q('!~~.', verbosity=0), strings[[2,3,4,5]])
compare(strings.q('~~..', verbosity=0), strings[[2,3,5]])
compare(strings.q('!~~..', verbosity=0), strings[[0,1,4,6,7,8]])


#equality
compare(strings.q('=a', verbosity=0), strings[[0,1]])
compare(strings.q('!=a', verbosity=0), strings[[2,3,4,5,6,7,8]])
compare(strings.q('=A', verbosity=0), strings[[0,1]])
compare(strings.q('!=A', verbosity=0), strings[[2,3,4,5,6,7,8]])
compare(strings.q('==a', verbosity=0), strings[[0]])
compare(strings.q('!==a', verbosity=0), strings[[1,2,3,4,5,6,7,8]])


#contains
compare(strings.q('()a', verbosity=0), strings[[0,1,2,3,4,5]])
compare(strings.q('!()a', verbosity=0), strings[[6,7,8]])
compare(strings.q('()b', verbosity=0), strings[[4,6,7]])
compare(strings.q('!()b', verbosity=0), strings[[0,1,2,3,5,8]])
compare(strings.q('()A', verbosity=0), strings[[0,1,2,3,4,5]])
compare(strings.q('!()A', verbosity=0), strings[[6,7,8]])
compare(strings.q('(())a', verbosity=0), strings[[0,2,3,4,5]])
compare(strings.q('!(())a', verbosity=0), strings[[1,6,7,8]])

#multiple conditions
compare(strings.q('()a && ()b', verbosity=0), strings[[4]])
compare(strings.q('()a && !()b', verbosity=0), strings[[0,1,2,3,5]])
compare(strings.q('!()a && ()b', verbosity=0), strings[[6,7]])
compare(strings.q('!()a && !()b', verbosity=0), strings[[8]])
compare(strings.q('()a && ()b // ()c', verbosity=0), strings[[4,8]])
compare(strings.q('()a // ()b // ()c', verbosity=0), strings[[0,1,2,3,4,5,6,7,8]])
compare(strings.q('()a // ()c && !()b', verbosity=0), strings[[0,1,2,3,5,8]])

#modify data
compare(nums.q('', 'to str', verbosity=0), pd.Index(['0', '1', '2']))
compare(nums.q('', 'to int', verbosity=0), nums[[0,1,2]])
compare(nums.q('', 'to float', verbosity=0), nums[[0,1,2]])
compare(nums.q('', 'to num', verbosity=0), nums[[0,1,2]])
compare(nums.q('', 'to bool', verbosity=0), pd.Index([False, True, None]))
compare(nums.q('', 'to date', verbosity=0), pd.Index([pd.NaT, pd.NaT, pd.NaT]))
compare(nums.q('', 'to datetime', verbosity=0), pd.Index([pd.NaT, pd.NaT, pd.NaT]))
compare(nums.q('', 'to yn', verbosity=0), pd.Index(['no', 'yes', None]))


passed test for arg: ".q()"
passed test for arg: ".q('',)"
passed test for arg: ".q('is num',)"
passed test for arg: ".q('!is num',)"
passed test for arg: ".q('is int',)"
passed test for arg: ".q('!is int',)"
passed test for arg: ".q('is float',)"
passed test for arg: ".q('!is float',)"
passed test for arg: ".q('is na',)"
passed test for arg: ".q('!is na',)"
passed test for arg: ".q('=1',)"
passed test for arg: ".q('!=1',)"
passed test for arg: ".q('=1.0',)"
passed test for arg: ".q('!=1.0',)"
passed test for arg: ".q('==1',)"
passed test for arg: ".q('!==1',)"
passed test for arg: ".q('==1.0',)"
passed test for arg: ".q('!==1.0',)"
passed test for arg: ".q('<1',)"
passed test for arg: ".q('!<1',)"
passed test for arg: ".q('<=1',)"
passed test for arg: ".q('!<=1',)"
passed test for arg: ".q('>0',)"
passed test for arg: ".q('!>0',)"
passed test for arg: ".q('>=0',)"
passed test for arg: ".q('!>=0',)"
passed test for arg: ".q('is str',)"
passed test for arg: ".q('!is str',)"
passed test 

  pd_object[indices] = pd_object[indices].map(str)
  pd_object[indices] = pd_object[indices].map(qp_bool)
['NaT', 'NaT', 'NaT']
Length: 3, dtype: datetime64[ns]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  pd_object[indices] = pd_object[indices].map(qp_date)
  idx = pd.Index(_apply_modification(se, index_expression, modification, verbosity, index=idx))
['NaT', 'NaT', 'NaT']
Length: 3, dtype: datetime64[ns]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  pd_object[indices] = pd_object[indices].map(qp_datetime)
  idx = pd.Index(_apply_modification(se, index_expression, modification, verbosity, index=idx))
  pd_object[indices] = pd_object[indices].map(qp_yn)


# pd.Series.q()

In [11]:

def compare(result, expected_series):
    if not result.equals(expected_series):
        print(f'failed test for args: "{result.qp._input}"')
        print('result:')
        display(result)
        print('expected:')
        display(expected_series)
        raise Exception(f'failed test for arg: "{result.qp._input}"')
    else:
        print(f'passed test for arg: "{result.qp._input}"')


    

    
#number tests
nums = pd.Series({
    0: 1,
    1: 1.0,
    2: 1.1,
    3: '1',
    4: None,
    5: np.nan,
    6: 'NaN',
    7: 'unk',
    8: 'unknown',
    9: ''
    })

nums1 = pd.Series({
    0: -1,
    1: -0.5,
    2: 0,
    3: 0.5,
    4: 1,
    })


#return all
compare(nums.q(), expected_series=nums)
compare(nums.q('', verbosity=0), expected_series=nums)

#type
compare(nums.q('is num', verbosity=0), nums[[0,1,2,3,4,5,9]])
compare(nums.q('is num // is na', verbosity=0), nums[[0,1,2,3,4,5,6,9]])
compare(nums.q('!is num', verbosity=0), nums[[6,7,8]])
compare(nums.q('is int', verbosity=0), nums[[0]])
compare(nums.q('!is int', verbosity=0), nums[[1,2,3,4,5,6,7,8,9]])
compare(nums.q('is float', verbosity=0), nums[[1,2,5]])
compare(nums.q('!is float', verbosity=0), nums[[0,3,4,6,7,8,9]])
compare(nums.q('is na', verbosity=0), nums[[4,5,6,9]])
compare(nums.q('!is na', verbosity=0), nums[[0,1,2,3,7,8]])

#equality
compare(nums.q('=1', verbosity=0), nums[[0,1,3]])
compare(nums.q('!=1', verbosity=0), nums[[2,4,5,6,7,8,9]])
compare(nums.q('=1.0', verbosity=0), nums[[0,1,3]])
compare(nums.q('!=1.0', verbosity=0), nums[[2,4,5,6,7,8,9]])
compare(nums.q('==1', verbosity=0), nums[[0,3]])
compare(nums.q('!==1', verbosity=0), nums[[1,2,4,5,6,7,8,9]])
compare(nums.q('==1.0', verbosity=0), nums[[1]])
compare(nums.q('!==1.0', verbosity=0), nums[[0,2,3,4,5,6,7,8,9]])
compare(nums.q('==1.1', verbosity=0), nums[[2]])
compare(nums.q('!==1.1', verbosity=0), nums[[0,1,3,4,5,6,7,8,9]])


#comparison
compare(nums1.q('<0', verbosity=0), nums1[[0,1]])
compare(nums1.q('!<0', verbosity=0), nums1[[2,3,4]])
compare(nums1.q('<=0', verbosity=0), nums1[[0,1,2]])
compare(nums1.q('!<=0', verbosity=0), nums1[[3,4]])
compare(nums1.q('>0', verbosity=0), nums1[[3,4]])
compare(nums1.q('!>0', verbosity=0), nums1[[0,1,2]])
compare(nums1.q('>=0', verbosity=0), nums1[[2,3,4]])
compare(nums1.q('!>=0', verbosity=0), nums1[[0,1]])


#modify data
compare(nums.q('', 'to str', verbosity=0), nums.astype(str))
compare(nums.q('', 'to int', verbosity=0), pd.Series([1,1,1,1,np.nan,np.nan,np.nan,np.nan,np.nan,np.nan], dtype='object'))
compare(nums.q('', 'to float', verbosity=0), pd.Series([1.0,1.0,1.1,1.0,np.nan,np.nan,np.nan,np.nan,np.nan,np.nan], dtype='object'))
compare(nums.q('', 'to num', verbosity=0), pd.Series([1.0,1.0,1.1,1.0,np.nan,np.nan,np.nan,np.nan,np.nan,np.nan], dtype='object'))
compare(nums.q('', 'to bool', verbosity=0), pd.Series([True,True,None,True,None,None,None,None,None,None], dtype='object'))
compare(nums.q('', 'to date', verbosity=0), pd.Series([pd.NaT,pd.NaT,pd.NaT,pd.NaT,pd.NaT,pd.NaT,pd.NaT,pd.NaT,pd.NaT,pd.NaT], dtype='object'))
compare(nums.q('', 'to datetime', verbosity=0), pd.Series([pd.NaT,pd.NaT,pd.NaT,pd.NaT,pd.NaT,pd.NaT,pd.NaT,pd.NaT,pd.NaT,pd.NaT], dtype='object'))
compare(nums.q('', 'to yn', verbosity=0), pd.Series(['yes','yes',None,'yes',None,None,None,None,None,None], dtype='object'))




#string tests
strings = pd.Series({
    0: 'a',
    1: 'A',
    2: 'a ',
    3: ' a',
    4: 'a b',
    5: 'a',
    6: 'b',
    7: 'B',
    8: 0,
    9: None,
    10: np.nan,
    11: True,
    12: False,
    })


#type
compare(strings.q('is str', verbosity=0), strings[[0,1,2,3,4,5,6,7]])
compare(strings.q('!is str', verbosity=0), strings[[8,9,10,11,12]])

#regex search
compare(strings.q('~a', verbosity=0), strings[[0,2,3,4,5,10,12]])
compare(strings.q(r'~\d', verbosity=0), strings[[8]])
#regex equality
compare(strings.q('~~.', verbosity=0), strings[[0,1,5,6,7,8]])
compare(strings.q('!~~.', verbosity=0), strings[[2,3,4,9,10,11,12]])
compare(strings.q('~~..', verbosity=0), strings[[2,3]])        
compare(strings.q('!~~..', verbosity=0), strings[[0,1,4,5,6,7,8,9,10,11,12]])

#equality
compare(strings.q('=a', verbosity=0), strings[[0,1,5]])
compare(strings.q('!=a', verbosity=0), strings[[2,3,4,6,7,8,9,10,11,12]])
compare(strings.q('=A', verbosity=0), strings[[0,1,5]])
compare(strings.q('!=A', verbosity=0), strings[[2,3,4,6,7,8,9,10,11,12]])
compare(strings.q('==a', verbosity=0), strings[[0,5]])
compare(strings.q('!==a', verbosity=0), strings[[1,2,3,4,6,7,8,9,10,11,12]])
#contains
compare(strings.q('()a', verbosity=0), strings[[0,1,2,3,4,5,10,12]])
compare(strings.q('!()a', verbosity=0), strings[[6,7,8,9,11]])
compare(strings.q('()b', verbosity=0), strings[[4,6,7]])
compare(strings.q('!()b', verbosity=0), strings[[0,1,2,3,5,8,9,10,11,12]])
compare(strings.q('()A', verbosity=0), strings[[0,1,2,3,4,5,10,12]])
compare(strings.q('!()A', verbosity=0), strings[[6,7,8,9,11]])
compare(strings.q('(())a', verbosity=0), strings[[0,2,3,4,5,10,12]])
compare(strings.q('!(())a', verbosity=0), strings[[1,6,7,8,9,11]])

#multiple conditions
compare(strings.q('()a && ()b', verbosity=0), strings[[4]])
compare(strings.q('()a && !()b', verbosity=0), strings[[0,1,2,3,5,10,12]])
compare(strings.q('!()a && ()b', verbosity=0), strings[[6,7]])
compare(strings.q('!()a && !()b', verbosity=0), strings[[8,9,11]])
compare(strings.q('()a && ()b // ()f', verbosity=0), strings[[4,12]])
compare(strings.q('()n // ()f // ()t', verbosity=0), strings[[9,10,11,12]])
compare(strings.q('()n // ()f // ()t && ()a', verbosity=0), strings[[10,12]])
compare(strings.q('()n // ()f // ()t && ()a && ()b', verbosity=0), strings[[]])
compare(strings.q('()n // ()f // ()t && !()a', verbosity=0), strings[[9,11]])
compare(strings.q('!is na && ()a', verbosity=0), strings[[0,1,2,3,4,5,12]])



#mixxed type tests
mixxed = pd.Series({
    0: 'a',
    1: 'B',
    2: 'abc',
    3: '1',
    4: 0,
    5: 0.5,
    6: 1,
    7: -1,
    8: '',
    9: None,
    10: np.nan,
    11: True,
    12: False,
    })


compare(mixxed.q('is na', verbosity=0), mixxed[[8,9,10]])
compare(mixxed.q('>0', verbosity=0), mixxed[[3,5,6,11]])  #note that True is read as 1 for numerical comparison
compare(mixxed.q('<0', verbosity=0), mixxed[[7]])
compare(mixxed.q('!>0', verbosity=0), mixxed[[0,1,2,4,7,8,9,10,12]])
compare(mixxed.q('!<0', verbosity=0), mixxed[[0,1,2,3,4,5,6,8,9,10,11,12]])
compare(mixxed.q('!>0 && !<0', verbosity=0), mixxed[[0,1,2,4,8,9,10,12]])

compare(mixxed.q('>-1 && <1', verbosity=0), mixxed[[4,5,12]])  #False is read as 0
compare(mixxed.q('<0 // >0.5', verbosity=0), mixxed[[3,6,7,11]])
compare(mixxed.q('>0 && !is str', verbosity=0), mixxed[[5,6,11]])  #False is read as 0



passed test for arg: ".q()"
passed test for arg: ".q('',)"
passed test for arg: ".q('is num',)"
passed test for arg: ".q('is num // is na',)"
passed test for arg: ".q('!is num',)"
passed test for arg: ".q('is int',)"
passed test for arg: ".q('!is int',)"
passed test for arg: ".q('is float',)"
passed test for arg: ".q('!is float',)"
passed test for arg: ".q('is na',)"
passed test for arg: ".q('!is na',)"
passed test for arg: ".q('=1',)"
passed test for arg: ".q('!=1',)"
passed test for arg: ".q('=1.0',)"
passed test for arg: ".q('!=1.0',)"
passed test for arg: ".q('==1',)"
passed test for arg: ".q('!==1',)"
passed test for arg: ".q('==1.0',)"
passed test for arg: ".q('!==1.0',)"
passed test for arg: ".q('==1.1',)"
passed test for arg: ".q('!==1.1',)"
passed test for arg: ".q('<0',)"
passed test for arg: ".q('!<0',)"
passed test for arg: ".q('<=0',)"
passed test for arg: ".q('!<=0',)"
passed test for arg: ".q('>0',)"
passed test for arg: ".q('!>0',)"
passed test for arg: ".q('>=0',)"
pas

# pd.DataFrame.q()

In [12]:
def compare(result, expected_df, text=None):
    if not result.equals(expected_df):
        print(f'failed test for arg: "{result.qp._input}"')

        print('result:')
        display(result)
        print('expected:')
        display(expected_df)
        if text is None:
            raise Exception(f'failed test for arg: "{result.qp._input}"')
        else:
            raise Exception(f'failed test for: {text}')
    else:
        if text is None:
            print(f'passed test for arg: "{result.qp._input}"')
        else:
            print(f'passed test for: {text}')


def get_df_simple():
    df = pd.DataFrame({
        'a': [-1, 0, 1],
        'b': [1, 2, 3]
        })
    return df

def get_df_simple_tagged():
    df = pd.DataFrame({
        '#': ['', '', '', ''],
        'a': ['', -1, 0, 1],
        'b': ['', 1, 2, 3]
        })
    df.index = ['#', 0, 1, 2]
    return df


def get_df():
    df = pd.DataFrame({
        'ID': [10001, 10002, 10003, 20001, 20002, 20003, 30001, 30002, 30003, 30004, 30005],
        'name': ['John Doe', 'Jane Smith', 'Alice Johnson', 'Bob Brown', 'eva white', 'Frank miller', 'Grace TAYLOR', 'Harry Clark', 'IVY GREEN', 'JAck Williams', 'john Doe'],
        'date of birth': ['1995-01-02', '1990/09/14', '1985.08.23', '19800406', '05-11-2007', '06-30-1983', '28-05-1975', '1960Mar08', '1955-Jan-09', '1950 Sep 10', '1945 October 11'],
        'age': [-25, '30', np.nan, None, '40.0', 'forty-five', 'nan', 'unk', '', 'unknown', 35],
        'gender': ['M', 'F', 'Female', 'Male', 'Other', 'm', 'ff', 'NaN', None, 'Mal', 'female'],
        'height': [170, '175.5cm', None, '280', 'NaN', '185', '1', '6ft 1in', -10, '', 200],
        'weight': [70.2, '68', '72.5lb', 'na', '', '75kg', None, '80.3', '130lbs', '82', -65],
        'bp systole': ['20', 130, 'NaN', '140', '135mmhg', '125', 'NAN', '122', '', 130, '45'],
        'bp diastole': [80, '85', 'nan', '90mmHg', np.nan, '75', 'NaN', None, '95', '0', 'NaN'],
        'cholesterol': ['Normal', 'Highe', 'NaN', 'GOOD', 'n.a.', 'High', 'Normal', 'n/a', 'high', '', 'Normal'],
        'diabetes': ['No', 'yes', 'N/A', 'No', 'Y', 'Yes', 'NO', None, 'NaN', 'n', 'Yes'],
        'dose': ['10kg', 'NaN', '15 mg once a day', '20mg', '20 Mg', '25g', 'NaN', None, '30 MG', '35', '40ml']
        })
    return df

def get_df_tagged():
    df1 = get_df()
    df2 = pd.DataFrame('', index=['#', *df1.index], columns=['#', *df1.columns])
    df2.iloc[1:, 1:] = df1.loc[:,:]
    return df2

## inplace

In [13]:

df = get_df()

df1 = copy.deepcopy(df)
df1.q(inplace=False, verbosity=0)
compare(df1, df, 'not adding metadata when inplace=False')

df2 = get_df_tagged()
df1.q(inplace=True, verbosity=0)
compare(df1, df2, 'adding metadata when inplace=True')

result = df.q('', '', 'x=1', inplace=False, verbosity=0)
df2.iloc[1:,1:] = 1
compare(result, df2)

result = df.q('', '', 'x=2', inplace=True, verbosity=0)
df2.iloc[1:,1:] = 2
compare(result, df2)
compare(df, df2)


passed test for: not adding metadata when inplace=False
passed test for: adding metadata when inplace=True
passed test for arg: ".q(
	'', '', 'x=1',
	max_cols=200,
	max_rows=20,
	inplace=False,
	verbosity=0,
	)"
passed test for arg: ".q(
	'', '', 'x=2',
	max_cols=200,
	max_rows=20,
	inplace=True,
	verbosity=0,
	)"
passed test for arg: ".q(
	'', '', 'x=2',
	max_cols=200,
	max_rows=20,
	inplace=True,
	verbosity=0,
	)"


## filtering

In [14]:

df = get_df()


#col equality

result = df.q('date of birth', inplace=True, verbosity=0)
compare(result, df.loc[:, ['#', 'date of birth']])

result = df.q('=date of birth', verbosity=0)
compare(result, df.loc[:, ['#', 'date of birth']])

result = df.q('date of birth // age', verbosity=0)
compare(result, df.loc[:, ['#', 'date of birth', 'age']])

result = df.q('date of birth // =age', verbosity=0)
compare(result, df.loc[:, ['#', 'date of birth', 'age']])

result = df.q('=date of birth // age', verbosity=0)
compare(result, df.loc[:, ['#', 'date of birth', 'age']])

result = df.q('=date of birth // =age', verbosity=0)
compare(result, df.loc[:, ['#', 'date of birth', 'age']])

result = df.q('!=date of birth', verbosity=0)
compare(result, df.loc[:, ['#', 'ID', 'name', 'age', 'gender', 'height', 'weight', 'bp systole', 'bp diastole', 'cholesterol', 'diabetes', 'dose']])




#col and row condition chaining
result = df.q('', verbosity=0)
compare(result, df.loc[:, :])

result = df.q('=ID', verbosity=0)
compare(result, df.loc[:, ['#', 'ID']])

result = df.q('=ID', '', verbosity=0)
compare(result, df.loc[:, ['#', 'ID']])

result = df.q('=', '', verbosity=0)
compare(result, df.loc[:, ['#']])

result = df.q(
    '=ID', '', '',
    'ID', verbosity=0)
compare(result, df.loc[:, ['#', 'ID']])

result = df.q(
    '=ID', '', '',
    '=age', verbosity=0)
compare(result, df.loc[:, ['#', 'age']])

result = df.q(
    '=ID', '', '',
    '//=age', verbosity=0)
compare(result, df.loc[:, ['#', 'ID', 'age']])



#col contains
result = df.q('()bp', verbosity=0)
compare(result, df.loc[:, ['#', 'bp systole', 'bp diastole']])

result = df.q('()I', verbosity=0)
compare(result, df.loc[:, ['#', 'ID', 'date of birth', 'height', 'weight', 'bp diastole', 'diabetes']])

result = df.q('(())I', verbosity=0)
compare(result, df.loc[:, ['#', 'ID']])




#col regex equality
result = df.q('~~.', verbosity=0)
compare(result, df.loc[:, ['#']])

result = df.q('~~..', verbosity=0)
compare(result, df.loc[:, ['#', 'ID']])



#col multiple conditions
result = df.q('()bp // =diabetes', verbosity=0)
compare(result, df.loc[:, ['#', 'bp systole', 'bp diastole', 'diabetes']])

result = df.q('()bp // =diabetes // =cholesterol', verbosity=0)
compare(result, df.loc[:, ['#', 'bp systole', 'bp diastole', 'cholesterol', 'diabetes']])

result = df.q('()bp // =cholesterol // =diabetes', verbosity=0)
compare(result, df.loc[:, ['#', 'bp systole', 'bp diastole', 'cholesterol', 'diabetes']])

result = df.q('()bp && ()systole', verbosity=0)
compare(result, df.loc[:, ['#', 'bp systole']])

result = df.q('()bp && !()systole', verbosity=0)
compare(result, df.loc[:, ['#', 'bp diastole']])

result = df.q('()i && ~~..', verbosity=0)
compare(result, df.loc[:, ['#', 'ID']])




#row types
result = df.q('=name', 'is str', verbosity=0)
compare(result, df.loc[:, ['#', 'name']])

result = df.q('=name', '!is str', verbosity=0)
compare(result, df.loc[['#'], ['#', 'name']])

result = df.q('=name', 'is num', verbosity=0)
compare(result, df.loc[['#'], ['#', 'name']])

result = df.q('=name', '!is num', verbosity=0)
compare(result, df.loc[['#', 0,1,2,3,4,5,6,7,8,9,10], ['#', 'name']])

result = df.q('=name', 'is na', verbosity=0)
compare(result, df.loc[['#'], ['#', 'name']])

result = df.q('=name', '!is na', verbosity=0)
compare(result, df.loc[['#', 0,1,2,3,4,5,6,7,8,9,10], ['#', 'name']])

result = df.q('=age', 'is na', verbosity=0)
compare(result, df.loc[['#',2,3,6,8], ['#', 'age']])

result = df.q('=cholesterol', 'is na', verbosity=0)
compare(result, df.loc[['#',2,4,7,9], ['#', 'cholesterol']])

result = df.q('=weight', 'is num', verbosity=0)
compare(result, df.loc[['#',0,1,4,6,7,9,10], ['#', 'weight']])

result = df.q('=weight', 'is num && !is na', verbosity=0)
compare(result, df.loc[['#', 0,1,7,9,10], ['#', 'weight']])

result = df.q('=date of birth', '!is date', verbosity=0)
compare(result, df.loc[['#', 7], ['#', 'date of birth']])

result = df.q('=diabetes', 'is yn', verbosity=0)
compare(result, df.loc[['#', 0,1,3,4,5,6,9,10], ['#', 'diabetes']])

result = df.q('=diabetes', 'is na // is yn', verbosity=0)
compare(result, df.loc[:, ['#', 'diabetes']])

result = df.q('=diabetes', 'is yn', verbosity=0)
compare(result, df.loc[['#', 0,1,3,4,5,6,9,10], ['#', 'diabetes']])

result = df.q('=diabetes', 'is yes', verbosity=0)
compare(result, df.loc[['#', 1,4,5,10], ['#', 'diabetes']])

result = df.q('=diabetes', 'is no', verbosity=0)
compare(result, df.loc[['#', 0,3,6,9], ['#', 'diabetes']])



#row regex equality
result = df.q('=ID', '~~1....', verbosity=0)
compare(result, df.loc[['#', 0,1,2], ['#', 'ID']])

result = df.q('=ID', '!~~3....', verbosity=0)
compare(result, df.loc[['#',0,1,2,3,4,5], ['#', 'ID']])

#two words with first letter capitalized and separated by a space
result = df.q('=name', '~~\\b[A-Z][a-z]*\\s[A-Z][a-z]*\\b', verbosity=0)
compare(result, df.loc[['#', 0,1,2,3,7], ['#', 'name']])

#all lowercase
result = df.q('=name', '~~^[^A-Z]*$', verbosity=0)
compare(result, df.loc[['#',4], ['#', 'name']])

#containing letters and numbers
result = df.q('=dose', '~~^(?=.*[a-zA-Z])(?=.*[0-9]).*$', verbosity=0)
compare(result, df.loc[['#', 0,2,3,4,5,8,10], ['#', 'dose']])


#row regex search
result = df.q('=bp systole', '~m', verbosity=0)
compare(result, df.loc[['#', 4], ['#', 'bp systole']])

result = df.q('=bp systole', r'~\D', verbosity=0)
compare(result, df.loc[['#', 2,4,6], ['#', 'bp systole']])

result = df.q('=bp systole', r'~\d', verbosity=0)
compare(result, df.loc[['#', 0,1,3,4,5,7,9,10], ['#', 'bp systole']])



#custom python expression evaluation
result = df.q('age', '? isinstance(x, int)', verbosity=0)
compare(result, df.loc[['#', 0, 10], ['#', 'age']])

result = df.q(
    'age // height', '', 'to num',
    'age', 'col? col < df["height"]',
    inplace=False, verbosity=0,
    )
compare(result, df.loc[['#', 0, 10], ['#', 'age']])

result = df.q(
    'age // height', '', 'to num',
    'age', 'col? col == df["age"].max()', '',
    'age', '', 'to str',
    inplace=False, verbosity=0,
    )
compare(result, df.loc[['#', 4], ['#', 'age']])

passed test for arg: ".q(
	'date of birth',
	max_cols=200,
	max_rows=20,
	inplace=True,
	verbosity=0,
	)"
passed test for arg: ".q(
	'=date of birth',
	max_cols=200,
	max_rows=20,
	inplace=True,
	verbosity=0,
	)"
passed test for arg: ".q(
	'date of birth // age',
	max_cols=200,
	max_rows=20,
	inplace=True,
	verbosity=0,
	)"
passed test for arg: ".q(
	'date of birth // =age',
	max_cols=200,
	max_rows=20,
	inplace=True,
	verbosity=0,
	)"
passed test for arg: ".q(
	'=date of birth // age',
	max_cols=200,
	max_rows=20,
	inplace=True,
	verbosity=0,
	)"
passed test for arg: ".q(
	'=date of birth // =age',
	max_cols=200,
	max_rows=20,
	inplace=True,
	verbosity=0,
	)"
passed test for arg: ".q(
	'!=date of birth',
	max_cols=200,
	max_rows=20,
	inplace=True,
	verbosity=0,
	)"
passed test for arg: ".q(
	'',
	max_cols=200,
	max_rows=20,
	inplace=True,
	verbosity=0,
	)"
passed test for arg: ".q(
	'=ID',
	max_cols=200,
	max_rows=20,
	inplace=True,
	verbosity=0,
	)"
passed test for arg: ".q(
	'=ID', 

  return pd.to_datetime(x, dayfirst=True).date()


## complex filtering

In [15]:

result = df.q(
    '=ID', '~~1....', '',
    '=diabetes', 'is yes',
    verbosity=0)
compare(result, df.loc[['#', 1, 4, 5, 10], ['#', 'diabetes']])


result = df.q(
    '=ID', '~~1....', '',
    '//=diabetes', '&&is yes',
    verbosity=0)
compare(result, df.loc[['#', 1], ['#', 'ID', 'diabetes']])


result = df.q(
    '=ID', '~~1.... // ~~2....', '',
    '//=gender', '&&=m // =male',
    verbosity=0)
compare(result, df.loc[['#', 0,3,5], ['#', 'ID', 'gender']])


result = df.q(
    '=ID', '~~1.... // ~~2....', '',
    '//=gender', '&& =m // =male',
    verbosity=0)
compare(result, df.loc[['#', 0,3,5], ['#', 'ID', 'gender']])


result = df.q(
    '=ID', '~~1.... // ~~2....', '',
    '//=gender', '// =m // =male',
    verbosity=0)
compare(result, df.loc[['#', 0,1,2,3,4,5], ['#', 'ID', 'gender']])


result = df.q(
    '=ID', '~~1.... // ~~2....', '',
    '//=gender', '// =f // =female',
    verbosity=0)
compare(result, df.loc[['#', 0,1,2,3,4,5,10], ['#', 'ID', 'gender']])


result = df.q(
    '=age', '>30', '',
    '//=gender', '&&=f // =female',
    verbosity=0)
compare(result, df.loc[['#', 10], ['#', 'age', 'gender']])


result = df.q(
    '=age', '>30', '',
    '//=gender', '&&=f // =female',
    verbosity=0)
compare(result, df.loc[['#', 10], ['#', 'age', 'gender']])


result = df.q(
    '=age', '>30', '',
    '//=age', '//<18',
    verbosity=0)
compare(result, df.loc[['#', 0,4,10], ['#', 'age']])


result = df.q(
    '=weight', '<70', '',
    '//=weight', '&&>40', '',
    '//=diabetes', '&&is yes',
    verbosity=0)
compare(result, df.loc[['#', 1], ['#', 'weight', 'diabetes']])

result = df.q(
    '=weight', '<70', '',
    '//=weight', '&&>40', '',
    '//=diabetes', '&&is no',
    verbosity=0)
compare(result, df.loc[['#', ], ['#', 'weight', 'diabetes']])


#wip
# df.q(
#     'id','()1','col#= x + "tag1" && row#= x + "contains 1  "',
#     'name','? x.lower()[0]=="j" ','col#= x + "tag2" && row#= x + "starts with j  "',
#     '#()2', '#()contains 1 && #!() starts with j', verbosity=3)

passed test for arg: ".q(
	'=ID', '~~1....', '',
	'=diabetes', 'is yes',
	max_cols=200,
	max_rows=20,
	inplace=True,
	verbosity=0,
	)"
passed test for arg: ".q(
	'=ID', '~~1....', '',
	'//=diabetes', '&&is yes',
	max_cols=200,
	max_rows=20,
	inplace=True,
	verbosity=0,
	)"
passed test for arg: ".q(
	'=ID', '~~1.... // ~~2....', '',
	'//=gender', '&&=m // =male',
	max_cols=200,
	max_rows=20,
	inplace=True,
	verbosity=0,
	)"
passed test for arg: ".q(
	'=ID', '~~1.... // ~~2....', '',
	'//=gender', '&& =m // =male',
	max_cols=200,
	max_rows=20,
	inplace=True,
	verbosity=0,
	)"
passed test for arg: ".q(
	'=ID', '~~1.... // ~~2....', '',
	'//=gender', '// =m // =male',
	max_cols=200,
	max_rows=20,
	inplace=True,
	verbosity=0,
	)"
passed test for arg: ".q(
	'=ID', '~~1.... // ~~2....', '',
	'//=gender', '// =f // =female',
	max_cols=200,
	max_rows=20,
	inplace=True,
	verbosity=0,
	)"
passed test for arg: ".q(
	'=age', '>30', '',
	'//=gender', '&&=f // =female',
	max_cols=200,
	max_rows=20,
	

## data modification

In [16]:
#set columns

df = get_df()
df1 = get_df_tagged()
result = df.q('id', '', 'col= df["name"]', verbosity=0)
df1['ID'] = df1['name']
compare(result, df1.loc[:, ['#', 'ID']])

df = get_df()
df1 = get_df_tagged()
result = df.q('id', '', 'col = df["name"]', '>> is any', '>> is any', verbosity=0)
df1['ID'] = df1['name']
compare(result, df1.loc[:, :])

df = get_df()
df1 = get_df_tagged()
result = df.q('id // age', '', 'col= df["name"]', verbosity=0)
df1['ID'] = df1['name']
df1['age'] = df1['name']
compare(result, df1.loc[:, ['#', 'ID', 'age']])

df = get_df()
df1 = get_df_tagged()
result = df.q('', '', 'col= df["name"]', verbosity=0)
for col in df1.columns:
    if col != '#':
        df1[col] = df1['name']
compare(result, df1)




#set rows

df = get_df()
df1 = get_df_tagged()
result = df.q('id', '=10001','row = df.loc[1]', verbosity=0)
df1.loc[0] = df1.loc[1]
compare(result, df1.loc[['#', 0], ['#', 'ID']])

df = get_df()
df1 = get_df_tagged()
result = df.q('id', '=10001', 'row =df.loc[1]', '>> is any', '>> is any', verbosity=0)
df1.loc[0] = df1.loc[1]
compare(result, df1.loc[:, :])

df = get_df()
df1 = get_df_tagged()
result = df.q('id', '()1', 'row= df.loc[0]', verbosity=0)
for row in df1.index:
    if '1' in str(df1.loc[row, 'ID']):
        df1.loc[row] = df1.loc[0]
compare(result, df1.loc[['#', 0, 1, 2, 3, 6], ['#', 'ID']])

df = get_df()
df1 = get_df_tagged()
result = df.q('', '', 'row= df.loc[0]', verbosity=0)
for row in df1.index:
    if row != '#':
        df1.loc[row] = df1.loc[0]
compare(result, df1)




#set values

df = get_df()
df1 = get_df_tagged()
result = df.q('name', '', 'x= x.lower()', '>> is any', '>> is any', verbosity=0)
df1['name'] = df1['name'].str.lower()
compare(result, df1.loc[:, :])

df = get_df()
df1 = get_df_tagged()
result = df.q('name', '!? x == x.lower()', 'x= x.lower()', '>> is any', '>> is any', verbosity=0)
df1['name'] = df1['name'].str.lower()
compare(result, df1.loc[:, :])

passed test for arg: ".q(
	'id', '', 'col= df["name"]',
	max_cols=200,
	max_rows=20,
	inplace=True,
	verbosity=0,
	)"
passed test for arg: ".q(
	'id', '', 'col = df["name"]',
	'>> is any', '>> is any',
	max_cols=200,
	max_rows=20,
	inplace=True,
	verbosity=0,
	)"
passed test for arg: ".q(
	'id // age', '', 'col= df["name"]',
	max_cols=200,
	max_rows=20,
	inplace=True,
	verbosity=0,
	)"
passed test for arg: ".q(
	'', '', 'col= df["name"]',
	max_cols=200,
	max_rows=20,
	inplace=True,
	verbosity=0,
	)"
passed test for arg: ".q(
	'id', '=10001', 'row = df.loc[1]',
	max_cols=200,
	max_rows=20,
	inplace=True,
	verbosity=0,
	)"
passed test for arg: ".q(
	'id', '=10001', 'row =df.loc[1]',
	'>> is any', '>> is any',
	max_cols=200,
	max_rows=20,
	inplace=True,
	verbosity=0,
	)"
passed test for arg: ".q(
	'id', '()1', 'row= df.loc[0]',
	max_cols=200,
	max_rows=20,
	inplace=True,
	verbosity=0,
	)"
passed test for arg: ".q(
	'', '', 'row= df.loc[0]',
	max_cols=200,
	max_rows=20,
	inplace=True,
	ver

## metadata modification

In [17]:

#syntax
df = get_df_simple()
df1 = get_df_simple_tagged()
df.q('=a', '>0', 'row#= x + ">0"', verbosity=0)
df1['#'] = ['', '', '', '>0']
compare(df, df1)

df = get_df_simple()
df1 = get_df_simple_tagged()
df.q('=a', '>0', 'row #= x + ">0"', verbosity=0)
df1['#'] = ['', '', '', '>0']
compare(df, df1)

df = get_df_simple()
df1 = get_df_simple_tagged()
df.q('=a', '>0', 'row# = x + ">0"', verbosity=0)
df1['#'] = ['', '', '', '>0']
compare(df, df1)

df = get_df_simple()
df1 = get_df_simple_tagged()
df.q('=a', '>0', 'row # = x + ">0"', verbosity=0)
df1['#'] = ['', '', '', '>0']
compare(df, df1)



#continous
df = get_df_simple()
df1 = get_df_simple_tagged()


df.q('=a', '>0', 'row#= x + ">0"', verbosity=0)
df1['#'] = ['', '', '', '>0']
compare(df, df1)


df.q('=a', '>0', 'row#= x + ">0"', verbosity=0)
df1['#'] = ['', '', '', '>0>0']
compare(df, df1)


df.q('=a', '==0', 'row#= x + "0"', verbosity=0)
df1['#'] = ['', '', '0', '>0>0']
compare(df, df1)


df.q('=a', '==0', 'row#= x.replace("0", "")', verbosity=0)
df1['#'] = ['', '', '', '>0>0']
compare(df, df1)


df.q('=a', '>0', 'row#= x.replace("0", "")', verbosity=0)
df1['#'] = ['', '', '', '>>']
compare(df, df1)


df.q('=a', '', 'row#=""', verbosity=0)
df1['#'] = ['', '', '', '']
compare(df, df1)


df.q(
    '=a', '>0', '',
    '>> =b', '', 'x=x+1',
    verbosity=0,
    )
df1['b'] = ['', 1, 2, 4]
compare(df, df1)

df.q(
    '=a', '>0', '',
    '>> =b', '&&', 'x=x-2',
    verbosity=0,
    )
df1['b'] = ['', 1, 2, 2]
compare(df, df1)

df.q('=b', '', 'x=x*2', verbosity=0)
df1['b'] = ['', 2, 4, 4]
compare(df, df1)

df.q(
    '=a', '=0', '',
    '>> =b', '', 'x=x/2',
    verbosity=0,
    )
df1['b'] = ['', 2, 2, 4]
compare(df, df1)



df = get_df_simple()
df1 = get_df_simple_tagged()

df.q('=a', '', 'col#= "col a"', verbosity=0)
df1.loc['#'] = ['', 'col a', '']
compare(df, df1)

df.q('=a', '', 'col# = x+"1"', verbosity=0)
df1.loc['#'] = ['', 'col a1', '']
compare(df, df1)

df.q('=a', '', 'col # = x.replace("1", "2")', verbosity=0)
df1.loc['#'] = ['', 'col a2', '']
compare(df, df1)

df.q('=a', '', 'col # = x', verbosity=0)
df1.loc['#'] = ['', 'col a2', '']
compare(df, df1)

df.q('=a', '', 'col # = ""', verbosity=0)
df1.loc['#'] = ['', '', '']
compare(df, df1)

df.q('=a', '', 'col # = 1', verbosity=0)
df1.loc['#'] = ['', 1, '']
compare(df, df1)


passed test for arg: ".q(
	'=a', '>0', 'row#= x + ">0"',
	max_cols=200,
	max_rows=20,
	inplace=True,
	verbosity=0,
	)"
passed test for arg: ".q(
	'=a', '>0', 'row #= x + ">0"',
	max_cols=200,
	max_rows=20,
	inplace=True,
	verbosity=0,
	)"
passed test for arg: ".q(
	'=a', '>0', 'row# = x + ">0"',
	max_cols=200,
	max_rows=20,
	inplace=True,
	verbosity=0,
	)"
passed test for arg: ".q(
	'=a', '>0', 'row # = x + ">0"',
	max_cols=200,
	max_rows=20,
	inplace=True,
	verbosity=0,
	)"
passed test for arg: ".q(
	'=a', '>0', 'row#= x + ">0"',
	max_cols=200,
	max_rows=20,
	inplace=True,
	verbosity=0,
	)"
passed test for arg: ".q(
	'=a', '>0', 'row#= x + ">0"',
	max_cols=200,
	max_rows=20,
	inplace=True,
	verbosity=0,
	)"
passed test for arg: ".q(
	'=a', '==0', 'row#= x + "0"',
	max_cols=200,
	max_rows=20,
	inplace=True,
	verbosity=0,
	)"
passed test for arg: ".q(
	'=a', '==0', 'row#= x.replace("0", "")',
	max_cols=200,
	max_rows=20,
	inplace=True,
	verbosity=0,
	)"
passed test for arg: ".q(
	'=a', 

## type conversion

In [18]:


df1 = get_df()
df2 = get_df_tagged()
result = df1.q('=age', '', 'to int', verbosity=0)
df2['age'] = ['', -25, 30, np.nan, np.nan, 40, np.nan, np.nan, np.nan, np.nan, np.nan, 35]
compare(result, df2.loc[:,['#', 'age']])

df1 = get_df()
df2 = get_df_tagged()
result = df1.q('=age', '', 'to float', verbosity=0)
df2['age'] = ['', -25.0, 30.0, np.nan, np.nan, 40.0, np.nan, np.nan, np.nan, np.nan, np.nan, 35.0]
compare(result, df2.loc[:,['#', 'age']])

df1 = get_df()
df2 = get_df_tagged()
result = df1.q('=age', '', 'to num', verbosity=0)
df2['age'] = ['', -25, 30, np.nan, np.nan, 40, np.nan, np.nan, np.nan, np.nan, np.nan, 35]
compare(result, df2.loc[:,['#', 'age']])

df1 = get_df()
df2 = get_df_tagged()
result = df1.q('=age', '', 'to str', verbosity=0)
df2['age'] = ['', '-25', '30', 'nan', 'None', '40.0', 'forty-five', 'nan', 'unk', '', 'unknown', '35']
compare(result, df2.loc[:,['#', 'age']])

df1 = get_df()
df2 = get_df_tagged()
result = df1.q('=date of birth', '', 'to date', verbosity=0)
df2['date of birth'] = [
    '',
    pd.to_datetime('1995-01-02', dayfirst=False).date(),
    pd.to_datetime('1990/09/14', dayfirst=False).date(),
    pd.to_datetime('1985.08.23', dayfirst=False).date(),
    pd.to_datetime('19800406', dayfirst=False).date(),
    pd.to_datetime('05-11-2007', dayfirst=True).date(),
    pd.to_datetime('06-30-1983', dayfirst=False).date(),
    pd.to_datetime('28-05-1975', dayfirst=True).date(),
    pd.NaT,
    pd.to_datetime('1955-Jan-09', dayfirst=False).date(),
    pd.to_datetime('1950 Sep 10', dayfirst=False).date(),
    pd.to_datetime('1945 October 11', dayfirst=False).date(),
    ]
compare(result, df2.loc[:,['#', 'date of birth']])

df1 = get_df()
df2 = get_df_tagged()
result = df1.q('=age', '', 'to na', verbosity=0)
df2['age'] = ['', -25, '30', None, None, '40.0', 'forty-five', None, 'unk', None, 'unknown', 35]
compare(result, df2.loc[:,['#', 'age']])

df1 = get_df()
df2 = get_df_tagged()
result = df1.q('=age', '', 'to nk', verbosity=0)
df2['age'] = ['', -25, '30', np.nan, None, '40.0', 'forty-five', 'nan', 'unknown', '', 'unknown', 35]
compare(result, df2.loc[:,['#', 'age']])

df1 = get_df()
df2 = get_df_tagged()
result = df1.q('=diabetes', '', 'to yn', verbosity=0)
df2['diabetes'] = ['', 'no', 'yes', None, 'no', 'yes', 'yes', 'no', None, None, 'no', 'yes']
compare(result, df2.loc[:,['#', 'diabetes']])

passed test for arg: ".q(
	'=age', '', 'to int',
	max_cols=200,
	max_rows=20,
	inplace=True,
	verbosity=0,
	)"
passed test for arg: ".q(
	'=age', '', 'to float',
	max_cols=200,
	max_rows=20,
	inplace=True,
	verbosity=0,
	)"
passed test for arg: ".q(
	'=age', '', 'to num',
	max_cols=200,
	max_rows=20,
	inplace=True,
	verbosity=0,
	)"
passed test for arg: ".q(
	'=age', '', 'to str',
	max_cols=200,
	max_rows=20,
	inplace=True,
	verbosity=0,
	)"
passed test for arg: ".q(
	'=date of birth', '', 'to date',
	max_cols=200,
	max_rows=20,
	inplace=True,
	verbosity=0,
	)"
passed test for arg: ".q(
	'=age', '', 'to na',
	max_cols=200,
	max_rows=20,
	inplace=True,
	verbosity=0,
	)"
passed test for arg: ".q(
	'=age', '', 'to nk',
	max_cols=200,
	max_rows=20,
	inplace=True,
	verbosity=0,
	)"
passed test for arg: ".q(
	'=diabetes', '', 'to yn',
	max_cols=200,
	max_rows=20,
	inplace=True,
	verbosity=0,
	)"


  return pd.to_datetime(x, dayfirst=True).date()


# qp.diff()

In [19]:
def compare(df_new, df_old, expected, mode):
    result = qp.diff(df_new, df_old, mode, verbosity=0).data
    if result.equals(expected):
        print(f'passed test for mode: "{mode}"')
    else:
        print('new:')
        display(df_new)
        print('old:')
        display(df_old)
        print('expected:')
        display(expected)
        print('result:')
        display(result)
        raise Exception(f'failed test for mode: "{mode}"')


df_new, df_old = qp.get_dfs()


result_new = pd.DataFrame(columns=['#', 'd', 'b', 'a'], index=['#', 'y','x2','z'])

result_new.loc['#', '#'] = ''
result_new.loc['#', 'd'] = 'added col'
result_new.loc['#', 'b'] = '<br>vals added: 1'
result_new.loc['#', 'a'] = '<br>vals removed: 1<br>vals changed: 1'

result_new.loc['y', '#'] = '<br>vals changed: 1'
result_new.loc['y', 'd'] = 2.0
result_new.loc['y', 'b'] = 2.0
result_new.loc['y', 'a'] = 0.0

result_new.loc['x2', '#'] = 'added row'
result_new.loc['x2', 'd'] = 1.0
result_new.loc['x2', 'b'] = 1.0
result_new.loc['x2', 'a'] = 1.0

result_new.loc['z', '#'] = '<br>vals added: 1<br>vals removed: 1'
result_new.loc['z', 'd'] = 3.0
result_new.loc['z', 'b'] = 3.0
result_new.loc['z', 'a'] = np.nan



result_old = pd.DataFrame(columns=['#', 'a', 'b', 'c'], index=['#', 'x','y','z'])

result_old.loc['#', '#'] = ''
result_old.loc['#', 'a'] = '<br>vals removed: 1<br>vals changed: 1'
result_old.loc['#', 'b'] = '<br>vals added: 1'
result_old.loc['#', 'c'] = 'removed col'

result_old.loc['x', '#'] ='removed row'
result_old.loc['x', 'a'] = 1.0
result_old.loc['x', 'b'] = 1.0
result_old.loc['x', 'c'] = 1.0

result_old.loc['y', '#'] = '<br>vals changed: 1'
result_old.loc['y', 'a'] = 2.0
result_old.loc['y', 'b'] = 2.0
result_old.loc['y', 'c'] = 2.0

result_old.loc['z', '#'] = '<br>vals added: 1<br>vals removed: 1'
result_old.loc['z', 'a'] = 3.0
result_old.loc['z', 'b'] = None
result_old.loc['z', 'c'] = 3.0



result_mix = pd.DataFrame(columns=['#', 'd', 'b', 'a', 'c'], index=['#', 'y', 'x2', 'z', 'x'])

result_mix.loc['#', '#'] = ''
result_mix.loc['#', 'd'] = 'added col'
result_mix.loc['#', 'b'] = '<br>vals added: 1'
result_mix.loc['#', 'a'] = '<br>vals removed: 1<br>vals changed: 1'
result_mix.loc['#', 'c'] = 'removed col'

result_mix.loc['y', '#'] = '<br>vals changed: 1'
result_mix.loc['y', 'd'] = 2.0
result_mix.loc['y', 'b'] = 2.0
result_mix.loc['y', 'a'] = 0.0
result_mix.loc['y', 'c'] = 2.0

result_mix.loc['x2', '#'] = 'added row'
result_mix.loc['x2', 'd'] = 1.0
result_mix.loc['x2', 'b'] = 1.0
result_mix.loc['x2', 'a'] = 1.0
result_mix.loc['x2', 'c'] = np.nan

result_mix.loc['z', '#'] = '<br>vals added: 1<br>vals removed: 1'
result_mix.loc['z', 'd'] = 3.0
result_mix.loc['z', 'b'] = 3.0
result_mix.loc['z', 'a'] = np.nan
result_mix.loc['z', 'c'] = 3.0

result_mix.loc['x', '#'] ='removed row'
result_mix.loc['x', 'd'] = None
result_mix.loc['x', 'b'] = 1.0
result_mix.loc['x', 'a'] = 1.0
result_mix.loc['x', 'c'] = 1.0




result_new_plus = pd.DataFrame(columns=['#', 'd', '#d', 'b', '#b', 'a', '#a'], index=['#', 'y','x2','z'])

result_new_plus.loc['#', '#'] = ''
result_new_plus.loc['#', 'd'] = 'added col'
result_new_plus.loc['#', '#d'] = ''
result_new_plus.loc['#', 'b'] = '<br>vals added: 1'
result_new_plus.loc['#', '#b'] = ''
result_new_plus.loc['#', 'a'] = '<br>vals removed: 1<br>vals changed: 1'
result_new_plus.loc['#', '#a'] = ''


result_new_plus.loc['y', '#'] = '<br>vals changed: 1'
result_new_plus.loc['y', 'd'] = 2.0
result_new_plus.loc['y', '#d'] = ''
result_new_plus.loc['y', 'b'] = 2.0
result_new_plus.loc['y', '#b'] = ''
result_new_plus.loc['y', 'a'] = 0.0
result_new_plus.loc['y', '#a'] = '<br>old: 2.0'

result_new_plus.loc['x2', '#'] = 'added row'
result_new_plus.loc['x2', 'd'] = 1.0
result_new_plus.loc['x2', '#d'] = ''
result_new_plus.loc['x2', 'b'] = 1.0
result_new_plus.loc['x2', '#b'] = ''
result_new_plus.loc['x2', 'a'] = 1.0
result_new_plus.loc['x2', '#a'] = ''

result_new_plus.loc['z', '#'] = '<br>vals added: 1<br>vals removed: 1'
result_new_plus.loc['z', 'd'] = 3.0
result_new_plus.loc['z', '#d'] = ''
result_new_plus.loc['z', 'b'] = 3.0
result_new_plus.loc['z', '#b'] = '<br>old: None'
result_new_plus.loc['z', 'a'] = np.nan
result_new_plus.loc['z', '#a'] = '<br>old: 3.0'


# display(qp.diff(df_new, df_old, 'new', verbose=True))
# display(qp.diff(df_new, df_old, 'old', verbose=True))
# display(qp.diff(df_new, df_old, 'mix', verbose=True))
# display(qp.diff(df_new, df_old, 'new+', verbose=True))

compare(df_new, df_old, result_new, 'new')
compare(df_new, df_old, result_old, 'old')
compare(df_new, df_old, result_mix, 'mix')
compare(df_new, df_old, result_new_plus, 'new+')




passed test for mode: "new"
passed test for mode: "old"
passed test for mode: "mix"
passed test for mode: "new+"


In [20]:
display(df_old)
display(df_new)

qp.diff(df_new, df_old, 'mix')

Unnamed: 0,a,b,c
x,1.0,1.0,1.0
y,2.0,2.0,2.0
z,3.0,,3.0


Unnamed: 0,d,b,a
y,2.0,2.0,0.0
x2,1.0,1.0,1.0
z,3.0,3.0,


{'cols added': 1,
 'cols removed': 1,
 'rows added': 1,
 'rows removed': 1,
 'vals added': np.int64(1),
 'vals removed': np.int64(1),
 'vals changed': np.int64(1)}

Unnamed: 0,#,d,b,a,c
#,,added col,vals added: 1,vals removed: 1 vals changed: 1,removed col
y,vals changed: 1,2.000000,2.000000,0.000000,2.000000
x2,added row,1.000000,1.000000,1.000000,
z,vals added: 1 vals removed: 1,3.000000,3.000000,,3.000000
x,removed row,,1.000000,1.000000,1.000000


# df.format()

In [21]:
def compare(df, result, expected):
    if result.equals(expected):
        print(f'passed test for finding all errors in df')
        display(df)
    else:
        print('result:')
        display(result)
        print('expected:')
        display(expected)
        raise Exception(f'failed test for finding all errors in df')



df = pd.DataFrame(columns=['a', 'b', 'c'])
result = df.format()
expected = pd.DataFrame('', columns=['#', 'a', 'b', 'c'], index=['#'])
compare(df, result, expected)


df = pd.DataFrame(columns=['#', 'a', 'b', 'c'], index=['#'])
result = df.format()
expected = pd.DataFrame('', columns=['#', 'a', 'b', 'c'], index=['#'])
compare(df, result, expected)


df = pd.DataFrame(columns=[' a', 'b ', ' c ', 'a b c '])
result = df.format()
expected = pd.DataFrame('', columns=['#', 'a', 'b', 'c', 'a b c'], index=['#'])
compare(df, result, expected)


df = pd.DataFrame(columns=['&&a', 'b&&', '&&c&&', 'a&&b&&c&&'])
result = df.format()
expected = pd.DataFrame('', columns=['#', '& &a', 'b& &', '& &c& &', 'a& &b& &c& &'], index=['#'])
compare(df, result, expected)


df = pd.DataFrame(columns=['//a', 'b//', '//c//', 'a//b//c//'])
result = df.format()
expected = pd.DataFrame('', columns=['#', '/ /a', 'b/ /', '/ /c/ /', 'a/ /b/ /c/ /'], index=['#'])
compare(df, result, expected)


df = pd.DataFrame(columns=['>>a', 'b>>', '>>c>>', 'a>>b>>c>>'])
result = df.format()
expected = pd.DataFrame('', columns=['#', '> >a', 'b> >', '> >c> >', 'a> >b> >c> >'], index=['#'])
compare(df, result, expected)

created dataframe qp.util.logs for tracking log entries
use qp.log(message, level, source) or qp.log(message) to add log entries
logs are saved in qp.util.logs


0,1,2,3,4,5
0,info,"striping column headers of leading and trailing whitespace, replacing ""//"" with ""/ /"", ""&&"" with ""& &"" and "">>"" with ""> >""",qp.df.format(),,2024-07-07 13:21:37.417078


0,1,2,3,4,5
1,info,adding metadata row and column,qp.df.format(),,2024-07-07 13:21:37.421078


passed test for finding all errors in df


Unnamed: 0,a,b,c


0,1,2,3,4,5
2,info,"striping column headers of leading and trailing whitespace, replacing ""//"" with ""/ /"", ""&&"" with ""& &"" and "">>"" with ""> >""",qp.df.format(),,2024-07-07 13:21:37.429081


0,1,2,3,4,5
3,info,adding metadata row and column,qp.df.format(),,2024-07-07 13:21:37.432585


passed test for finding all errors in df


Unnamed: 0,#,a,b,c
#,,,,


0,1,2,3,4,5
4,info,"striping column headers of leading and trailing whitespace, replacing ""//"" with ""/ /"", ""&&"" with ""& &"" and "">>"" with ""> >""",qp.df.format(),,2024-07-07 13:21:37.439587


0,1,2,3,4,5
5,info,adding metadata row and column,qp.df.format(),,2024-07-07 13:21:37.442588


passed test for finding all errors in df


Unnamed: 0,a,b,c,a b c


0,1,2,3,4,5
6,info,"striping column headers of leading and trailing whitespace, replacing ""//"" with ""/ /"", ""&&"" with ""& &"" and "">>"" with ""> >""",qp.df.format(),,2024-07-07 13:21:37.449590


0,1,2,3,4,5
7,info,adding metadata row and column,qp.df.format(),,2024-07-07 13:21:37.453592


passed test for finding all errors in df


Unnamed: 0,&&a,b&&,&&c&&,a&&b&&c&&


0,1,2,3,4,5
8,info,"striping column headers of leading and trailing whitespace, replacing ""//"" with ""/ /"", ""&&"" with ""& &"" and "">>"" with ""> >""",qp.df.format(),,2024-07-07 13:21:37.460593


0,1,2,3,4,5
9,info,adding metadata row and column,qp.df.format(),,2024-07-07 13:21:37.464082


passed test for finding all errors in df


Unnamed: 0,//a,b//,//c//,a//b//c//


0,1,2,3,4,5
10,info,"striping column headers of leading and trailing whitespace, replacing ""//"" with ""/ /"", ""&&"" with ""& &"" and "">>"" with ""> >""",qp.df.format(),,2024-07-07 13:21:37.471083


0,1,2,3,4,5
11,info,adding metadata row and column,qp.df.format(),,2024-07-07 13:21:37.474084


passed test for finding all errors in df


Unnamed: 0,>>a,b>>,>>c>>,a>>b>>c>>


# df.save() and df.load()

In [22]:
#remove old test files
today1 = datetime.datetime.now().strftime('%Y_%m_%d')
today2 = datetime.datetime.now().strftime('%d_%m_%Y')
date0 = qp.date('2000_01_01').strftime('%Y_%m_%d')
date1 = (datetime.datetime.now() - datetime.timedelta(days=400)).date().strftime('%Y_%m_%d')
date2 = (datetime.datetime.now() - datetime.timedelta(days=40)).date().strftime('%Y_%m_%d')
date3 = (datetime.datetime.now() - datetime.timedelta(days=8)).date().strftime('%Y_%m_%d')
date4 = (datetime.datetime.now() - datetime.timedelta(days=1)).date().strftime('%Y_%m_%d')

def clean():
    files = [
        'df.xlsx',
        'df1.xlsx',

        'test/df1.xlsx',
        'test/df1b.xlsx',
        'test/df2.xlsx',

        f'archive/df1_{today1}.xlsx',
        f'archive/df1_{today2}.xlsx',
        f'archive/df1b_{today1}.xlsx',

        f'archive/df_{today1}.xlsx',
        f'archive/df_{today2}.xlsx',
        f'archive/df_{date0}.xlsx',
        f'archive/df_{date1}.xlsx',
        f'archive/df_{date2}.xlsx',
        f'archive/df_{date3}.xlsx',
        f'archive/df_{date4}.xlsx',

        f'test/archive/df1_{today1}.xlsx',
        f'test/archive/df1b_{today1}.xlsx',
        f'test/archive/df2_{today1}.xlsx',
        ]

    for file in files:
        if os.path.isfile(file):
            os.remove(file)
            # print(f'removed file: {file}')

    if os.path.isdir('test/archive'):
        shutil.rmtree('test/archive')
clean()



df1 = pd.DataFrame({'a':[1]})
df2 = pd.DataFrame({'a':[2]})
df3 = pd.DataFrame({'a':[3]})
df4 = pd.DataFrame({'a':[4]})


#default
df1.save()
if qp.isfile('df.xlsx'):
    print('passed test for default saving behaviour')
else:
    raise Exception(f'failed test for default saving behaviour')

df1a = qp.load('df.xlsx')
if df1a.equals(df1):
    print('passed test for loading from default file')
else:
    raise Exception(f'failed test for loading from default file')




#specific file
df1.save('df1.xlsx')
if qp.isfile('df1.xlsx'):
    print('passed test for saving to a specific file')
else:
    raise Exception(f'failed test for saving to a specific file')

df1b = qp.load('df1.xlsx')
if df1b.equals(df1):
    print('passed test for loading from a specific file')
else:
    raise Exception(f'failed test for loading from a specific file')




#file in folder
df1.save('test/df1.xlsx')
if qp.isfile('test/df1.xlsx'):
    print('passed test for saving to a specific file in folder')
else:
    raise Exception(f'failed test for saving to a specific file in folder')

df1c = qp.load('test/df1.xlsx')
if df1c.equals(df1):
    print('passed test for loading from a specific file in folder')
else:
    raise Exception(f'failed test for loading from a specific file in folder')




#specific sheet
df1.save('df1.xlsx', sheet='data2')
df1d = qp.load('df1.xlsx', sheet='data2')
if df1d.equals(df1):
    print('passed test for saving and loading to and from a specific sheet')
else:
    raise Exception(f'failed test for saving and loading to and from a specific sheet')




#overwriting sheets
df1old = qp.load('df1.xlsx')
df2.save('df1.xlsx')
df1new = qp.load('df1.xlsx')
if df1old.loc[0, 'a'] == 1 and df1new.loc[0, 'a'] == 2:
    print('passed test for overwriting sheets')
else:
    raise Exception(f'failed test for overwriting sheets')
df1.save('df1.xlsx')  #restore previous state



#archiving
today = datetime.datetime.now().strftime('%Y_%m_%d')
df1.save('df1')
if qp.isfile(f'archive/df1_{today}.xlsx'):
    print('passed test for archiving file')
else:
    raise Exception(f'failed test for archiving file')


#archiving nested folder
os.mkdir('test/archive')
df1.save('test/df1')
if qp.isfile(f'test/archive/df1_{today}.xlsx'):
    print('passed test for archiving in nested folder')
else:
    raise Exception(f'failed test for archiving nested folder')





#archiving with different date format
today = datetime.datetime.now().strftime('%d_%m_%Y')
df1.save('test/df1.xlsx', sheet='data1', datefmt='%d_%m_%Y')
if qp.isfile(f'test/archive/df1_{today}.xlsx'):
    print('passed test for archiving with different date format')
else:
    raise Exception(f'failed test for archiving with different date format')



#most recent file
if os.path.isfile(f'archive/df_{today2}.xlsx'):
    os.remove(f'archive/df_{today2}.xlsx')
pd.DataFrame({'a':[today1]}).to_excel(f'archive/df_{today1}.xlsx', index=False)
pd.DataFrame({'a':[date1]}).to_excel(f'archive/df_{date1}.xlsx', index=False)

if qp.load('archive/df', sheet='Sheet1', index=False).loc[0, 'a'] == today1:
    print('passed test for loading most recent file')
else:
    raise Exception(f'failed test for loading most recent file')



#most recent file version 2
if qp.load('archive/df', sheet='Sheet1', before='now', index=False).loc[0, 'a'] == today1:
    print('passed test for loading most recent file explicitly')
else:
    raise Exception(f'failed test for loading most recent file explicitly')



#most recent file with different date format
if os.path.isfile(f'archive/df_{today1}.xlsx'):
    os.remove(f'archive/df_{today1}.xlsx')
pd.DataFrame({'a':[today2]}).to_excel(f'archive/df_{today2}.xlsx', index=False)
pd.DataFrame({'a':[date1]}).to_excel(f'archive/df_{date1}.xlsx', index=False)

if qp.load('archive/df', sheet='Sheet1', index=False).loc[0, 'a'] == today2:
    print('passed test for loading most recent file with different date format')
else:
    raise Exception(f'failed test for loading most recent file with different date format')



#file before specific date
pd.DataFrame({'a':[date0]}).to_excel(f'archive/df_{date0}.xlsx', index=False)
if qp.load('archive/df', sheet='Sheet1', before='2000_01_02', index=False).loc[0, 'a'] == date0:
    print('passed test for loading most recent file from before specific date')
else:
    raise Exception(f'failed test for loading most recent file from before specific date')


#file before this year
if qp.load('archive/df', sheet='Sheet1', before='this year', index=False).loc[0, 'a'] == date1:
    print('passed test for loading most recent file from before this year')
else:
    raise Exception(f'failed test for loading most recent file from before this year')



#file before this month
pd.DataFrame({'a':[date2]}).to_excel(f'archive/df_{date2}.xlsx', index=False)
if qp.load('archive/df', sheet='Sheet1', before='this month', index=False).loc[0, 'a'] == date2:
    print('passed test for loading most recent file from before this month')
else:
    raise Exception(f'failed test for loading most recent file from before this month')



#file before this week
pd.DataFrame({'a':[date3]}).to_excel(f'archive/df_{date3}.xlsx', index=False)
if qp.load('archive/df', sheet='Sheet1', before='this week', index=False).loc[0, 'a'] == date3:
    print('passed test for loading most recent file from before this week')
else:
    raise Exception(f'failed test for loading most recent file from before this week')



#file before this day
pd.DataFrame({'a':[date4]}).to_excel(f'archive/df_{date4}.xlsx', index=False)
if qp.load('archive/df', sheet='Sheet1', before='this day', index=False).loc[0, 'a'] == date4:
    print('passed test for loading most recent file from before this day')
else:
    raise Exception(f'failed test for loading most recent file from before this day')



#file before today
if qp.load('archive/df', sheet='Sheet1', before='today', index=False).loc[0, 'a'] == date4:
    print('passed test for loading most recent file from before this day')
else:
    raise Exception(f'failed test for loading most recent file from before this day')


# df1.save('test/df3.xlsx', sheet='sheet2', archive='source', datefmt='%Y_%m_%d')
# df1.save('test/df3.xlsx', sheet='sheet2', archive='destination', datefmt='%Y_%m_%d')
# df1.save('df.xlsx', sheet='sheet2', archive='both', datefmt='%Y_%m_%d')

0,1,2,3,4,5
12,info,"saving df to ""df.xlsx"" in sheet ""data1""",df.save(),,2024-07-07 13:21:37.505982


0,1,2,3,4,5
13,info,"archiving df to ""c:\Users\Legion16248\Desktop\data\mv\qplib/archive/df_2024_07_07.xlsx"" in sheet ""data1""",df.save(),,2024-07-07 13:21:37.822563


passed test for default saving behaviour
passed test for loading from default file


0,1,2,3,4,5
14,info,"saving df to ""df1.xlsx"" in sheet ""data1""",df.save(),,2024-07-07 13:21:37.848569


0,1,2,3,4,5
15,info,"archiving df to ""c:\Users\Legion16248\Desktop\data\mv\qplib/archive/df1_2024_07_07.xlsx"" in sheet ""data1""",df.save(),,2024-07-07 13:21:37.860572


passed test for saving to a specific file
passed test for loading from a specific file


0,1,2,3,4,5
16,info,"saving df to ""test/df1.xlsx"" in sheet ""data1""",df.save(),,2024-07-07 13:21:37.877080


0,1,2,3,4,5
17,Warning,"did not find archive folder ""test/archive""",df.save(),,2024-07-07 13:21:37.887082


passed test for saving to a specific file in folder
passed test for loading from a specific file in folder


0,1,2,3,4,5
18,Warning,"file ""df1.xlsx"" already exists. data in sheet ""data2"" will be overwritten",df.save(),,2024-07-07 13:21:37.896085


0,1,2,3,4,5
19,Warning,"archive file ""c:\Users\Legion16248\Desktop\data\mv\qplib/archive/df1_2024_07_07.xlsx"" already exists. data in sheet ""data2"" will be overwritten",df.save(),,2024-07-07 13:21:37.911088


passed test for saving and loading to and from a specific sheet


0,1,2,3,4,5
20,Warning,"file ""df1.xlsx"" already exists. data in sheet ""data1"" will be overwritten",df.save(),,2024-07-07 13:21:37.941600


0,1,2,3,4,5
21,Warning,"archive file ""c:\Users\Legion16248\Desktop\data\mv\qplib/archive/df1_2024_07_07.xlsx"" already exists. data in sheet ""data1"" will be overwritten",df.save(),,2024-07-07 13:21:37.961604


passed test for overwriting sheets


0,1,2,3,4,5
22,Warning,"file ""df1.xlsx"" already exists. data in sheet ""data1"" will be overwritten",df.save(),,2024-07-07 13:21:37.988610


0,1,2,3,4,5
23,Warning,"archive file ""c:\Users\Legion16248\Desktop\data\mv\qplib/archive/df1_2024_07_07.xlsx"" already exists. data in sheet ""data1"" will be overwritten",df.save(),,2024-07-07 13:21:38.002613


0,1,2,3,4,5
24,Warning,"file ""df1.xlsx"" already exists. data in sheet ""data1"" will be overwritten",df.save(),,2024-07-07 13:21:38.018617


0,1,2,3,4,5
25,Warning,"archive file ""c:\Users\Legion16248\Desktop\data\mv\qplib/archive/df1_2024_07_07.xlsx"" already exists. data in sheet ""data1"" will be overwritten",df.save(),,2024-07-07 13:21:38.039622


passed test for archiving file


0,1,2,3,4,5
26,Warning,"file ""test/df1.xlsx"" already exists. data in sheet ""data1"" will be overwritten",df.save(),,2024-07-07 13:21:38.060626


0,1,2,3,4,5
27,info,"archiving df to ""test/archive/df1_2024_07_07.xlsx"" in sheet ""data1""",df.save(),,2024-07-07 13:21:38.073136


passed test for archiving in nested folder


0,1,2,3,4,5
28,Warning,"file ""test/df1.xlsx"" already exists. data in sheet ""data1"" will be overwritten",df.save(),,2024-07-07 13:21:38.084138


0,1,2,3,4,5
29,info,"archiving df to ""test/archive/df1_07_07_2024.xlsx"" in sheet ""data1""",df.save(),,2024-07-07 13:21:38.102142


passed test for archiving with different date format


0,1,2,3,4,5
30,info,"no archive folder found. looking for most recent file in ""archive"" instead",df.load(),,2024-07-07 13:21:38.138150


0,1,2,3,4,5
31,info,"loading ""archive/df_2024_07_07.xlsx""",df.load(),,2024-07-07 13:21:38.144151


passed test for loading most recent file


0,1,2,3,4,5
32,info,"no archive folder found. looking for most recent file in ""archive"" instead",df.load(),,2024-07-07 13:21:38.158154


0,1,2,3,4,5
33,info,"loading ""archive/df_2024_07_07.xlsx""",df.load(),,2024-07-07 13:21:38.164156


passed test for loading most recent file explicitly


0,1,2,3,4,5
34,info,"no archive folder found. looking for most recent file in ""archive"" instead",df.load(),,2024-07-07 13:21:38.193162


0,1,2,3,4,5
35,info,"loading ""archive/df_07_07_2024.xlsx""",df.load(),,2024-07-07 13:21:38.200164


passed test for loading most recent file with different date format


0,1,2,3,4,5
36,info,"no archive folder found. looking for most recent file in ""archive"" instead",df.load(),,2024-07-07 13:21:38.229170


0,1,2,3,4,5
37,info,"loading ""archive/df_2000_01_01.xlsx""",df.load(),,2024-07-07 13:21:38.235172


passed test for loading most recent file from before specific date


0,1,2,3,4,5
38,info,"no archive folder found. looking for most recent file in ""archive"" instead",df.load(),,2024-07-07 13:21:38.250175


0,1,2,3,4,5
39,info,"loading ""archive/df_2023_06_03.xlsx""",df.load(),,2024-07-07 13:21:38.255176


passed test for loading most recent file from before this year


0,1,2,3,4,5
40,info,"no archive folder found. looking for most recent file in ""archive"" instead",df.load(),,2024-07-07 13:21:38.282069


0,1,2,3,4,5
41,info,"loading ""archive/df_2024_05_28.xlsx""",df.load(),,2024-07-07 13:21:38.290072


passed test for loading most recent file from before this month


0,1,2,3,4,5
42,info,"no archive folder found. looking for most recent file in ""archive"" instead",df.load(),,2024-07-07 13:21:38.316077


0,1,2,3,4,5
43,info,"loading ""archive/df_2024_06_29.xlsx""",df.load(),,2024-07-07 13:21:38.323079


passed test for loading most recent file from before this week


0,1,2,3,4,5
44,info,"no archive folder found. looking for most recent file in ""archive"" instead",df.load(),,2024-07-07 13:21:38.349589


0,1,2,3,4,5
45,info,"loading ""archive/df_2024_07_06.xlsx""",df.load(),,2024-07-07 13:21:38.356590


passed test for loading most recent file from before this day


0,1,2,3,4,5
46,info,"no archive folder found. looking for most recent file in ""archive"" instead",df.load(),,2024-07-07 13:21:38.371097


0,1,2,3,4,5
47,info,"loading ""archive/df_2024_07_06.xlsx""",df.load(),,2024-07-07 13:21:38.378099


passed test for loading most recent file from before this day


# "bashlike" wrappers

In [23]:

if os.path.isdir('test/dir1') is True:
    shutil.rmtree('test/dir1')


if qp.isdir('test/dir1') is False:
    print(f'passed test for non existing directory')
else:
    raise Exception(f'failed test for non existing directory')


qp.mkdir('test/dir1')
if qp.isdir('test/dir1') is True:
    print(f'passed test for creating and checking for existing directory')
else:
    raise Exception(f'failed test for creating and checking for existing directory')


qp.cd('test/dir1')
if qp.pwd().split('\\')[-1] == 'dir1':
    print(f'passed test for changing directory and finding path to current directory')
else:
    raise Exception(f'failed test for changing directory and finding path to current directory')


qp.mkdir('dir2')
qp.cd('dir2')
result1 = qp.pwd().split('\\')[-1]

qp.cd('..')
result2 = qp.pwd().split('\\')[-1]

if result1 == 'dir2' and result2 == 'dir1':
    print(f'passed test for going back and forth in directory structure')
else:
    raise Exception(f'failed test for going back and forth in directory structure')



os.chdir(working_directory)
shutil.rmtree('test/dir1')

passed test for non existing directory


0,1,2,3,4,5
48,info,"created directory ""test/dir1""","qp.mkdir(""test/dir1"")",,2024-07-07 13:21:38.397990


passed test for creating and checking for existing directory


0,1,2,3,4,5
49,info,moved from c:\Users\Legion16248\Desktop\data\mv\qplib to c:\Users\Legion16248\Desktop\data\mv\qplib\test\dir1,"qp.cd(""test/dir1"")",,2024-07-07 13:21:38.401991


passed test for changing directory and finding path to current directory


0,1,2,3,4,5
50,info,"created directory ""dir2""","qp.mkdir(""dir2"")",,2024-07-07 13:21:38.404991


0,1,2,3,4,5
51,info,moved from c:\Users\Legion16248\Desktop\data\mv\qplib\test\dir1 to c:\Users\Legion16248\Desktop\data\mv\qplib\test\dir1\dir2,"qp.cd(""dir2"")",,2024-07-07 13:21:38.407992


0,1,2,3,4,5
52,info,moved from c:\Users\Legion16248\Desktop\data\mv\qplib\test\dir1\dir2 to c:\Users\Legion16248\Desktop\data\mv\qplib\test\dir1,"qp.cd(""c:\Users\Legion16248\Desktop\data\mv\qplib\test\dir1"")",,2024-07-07 13:21:38.410993


passed test for going back and forth in directory structure


# save results

In [24]:
clean()

# # create html version of this file for documentation
# !jupyter nbconvert --to html testing.ipynb

# now = datetime.datetime.now().strftime('%Y_%m_%d')
# shutil.move('testing.html', f'test/test_report_{now}.html')


# temp

In [25]:
import pandas as pd
import numpy as np
import copy
import os
import sys
import shutil
import datetime
import qplib as qp
from qplib import log

# if 'cards' not in globals():
#     cards = pd.read_csv('data/cards.csv')
    # cards1 = cards.q('=toughness', '>1 && <4', modify='int(x)+10', inplace=False)

df = qp.get_df().format()

# cards = cards.format()


0,1,2,3,4,5
53,info,"striping column headers of leading and trailing whitespace, replacing ""//"" with ""/ /"", ""&&"" with ""& &"" and "">>"" with ""> >""",qp.df.format(),,2024-07-07 13:21:38.442866


0,1,2,3,4,5
54,info,adding metadata row and column,qp.df.format(),,2024-07-07 13:21:38.447867


In [32]:
df = pd.DataFrame({
    'ID': [10001, 10002, 10003, 20001, 20002, 20003, 30001, 30002, 30003, 30004, 30005],
    'name': ['John Doe', 'Jane Smith', 'Alice Johnson', 'Bob Brown', 'eva white', 'Frank miller', 'Grace TAYLOR', 'Harry Clark', 'IVY GREEN', 'JAck Williams', 'john Doe'],
    'date of birth': ['1995-01-02', '1990/09/14', '1985.08.23', '19800406', '05-11-2007', '06-30-1983', '28-05-1975', '1960Mar08', '1955-Jan-09', '1950 Sep 10', '1945 October 11'],
    'age': [-25, '30', np.nan, None, '40.0', 'forty-five', 'nan', 'unk', '', 'unknown', 35],
    'gender': ['M', 'F', 'Female', 'Male', 'Other', 'm', 'ff', 'NaN', None, 'Mal', 'female'],
    'height': [170, '175.5cm', None, '280', 'NaN', '185', '1', '6ft 1in', -10, '', 200],
    'weight': [70.2, '68', '72.5lb', 'na', '', '75kg', None, '80.3', '130lbs', '82', -65],
    'bp systole': ['20', 130, 'NaN', '140', '135mmhg', '125', 'NAN', '122', '', 130, '45'],
    'bp diastole': [80, '85', 'nan', '90mmHg', np.nan, '75', 'NaN', None, '95', '0', 'NaN'],
    'cholesterol': ['Normal', 'Highe', 'NaN', 'GOOD', 'n.a.', 'High', 'Normal', 'n/a', 'high', '', 'Normal'],
    'diabetes': ['No', 'yes', 'N/A', 'No', 'Y', 'Yes', 'NO', None, 'NaN', 'n', 'Yes'],
    'dose': ['10kg', 'NaN', '15 mg once a day', '20mg', '20 Mg', '25g', 'NaN', None, '30 MG', '35', '40ml']
    })


df.qi()



VBox(children=(Tab(children=(VBox(children=(GridBox(children=(Label(value='filter columns'), Label(value='filt…

HBox(children=(Output(),))