# qplib test suite

some of the tests should result in colored info, warning or error logs to be shown. These are expected function behaviour under the test conditions and do not indicate a test was passed or failed. Instead the relevant messages are those starting with "passed test..." or "failed test...".  
if any test fails, the script stops and does not save a test_report.

# imports

In [45]:
import pandas as pd
import numpy as np
import copy
import os
import shutil
import datetime
import qplib as qp
from qplib import log

working_directory = os.getcwd()



# types

In [46]:
def compare(result, expected):
    if result == expected:
        print(f'passed test for type conversion to: {expected}')
    else:
        raise Exception(f'failed test for type conversion to: {expected}')

## error raising

In [47]:
#checking if correct error is raised
type_funcs = [
    qp.na,
    qp.nk,
    qp.num,
    qp.yn,
    qp.date,
    qp.datetime,
    ]

for func in type_funcs:
    try:
        func('abc', errors='raise')
    except ValueError:
        func = str(func).split('<function ')[1].split(' at')[0]
        print(f'passed test for raising correct error for: qp.{func}("abc")')
    except:
        raise

passed test for raising correct error for: qp.qp_na("abc")
passed test for raising correct error for: qp.qp_nk("abc")
passed test for raising correct error for: qp.qp_num("abc")
passed test for raising correct error for: qp.qp_yn("abc")
passed test for raising correct error for: qp.qp_date("abc")
passed test for raising correct error for: qp.qp_datetime("abc")


## num

In [48]:
#numbers
compare(qp.num('abc', errors='coerce', na=None), None)
compare(qp.num('abc', errors='ignore'), 'abc')
compare(qp.num('abc', errors='test'), 'test')

compare(qp.num('1'), 1)
compare(qp.num('1.0'), 1.0)
compare(qp.num('1.1'), 1.1)

compare(qp.num('0'), 0)
compare(qp.num('0.0'), 0.0)
compare(qp.num('0.1'), 0.1)

compare(qp.num('-1'), -1)
compare(qp.num('-1.0'), -1.0)
compare(qp.num('-1.1'), -1.1)

compare(qp.num(1), 1)
compare(qp.num(1.0), 1.0)
compare(qp.num(1.1), 1.1)

compare(qp.num(0), 0)
compare(qp.num(0.0), 0.0)
compare(qp.num(0.1), 0.1)

compare(qp.num(-1), -1)
compare(qp.num(-1.0), -1.0)
compare(qp.num(-1.1), -1.1)


passed test for type conversion to: None
passed test for type conversion to: abc
passed test for type conversion to: test
passed test for type conversion to: 1
passed test for type conversion to: 1.0
passed test for type conversion to: 1.1
passed test for type conversion to: 0
passed test for type conversion to: 0.0
passed test for type conversion to: 0.1
passed test for type conversion to: -1
passed test for type conversion to: -1.0
passed test for type conversion to: -1.1
passed test for type conversion to: 1
passed test for type conversion to: 1.0
passed test for type conversion to: 1.1
passed test for type conversion to: 0
passed test for type conversion to: 0.0
passed test for type conversion to: 0.1
passed test for type conversion to: -1
passed test for type conversion to: -1.0
passed test for type conversion to: -1.1


## date

In [49]:
#date

compare(qp.date('abc', errors='coerce', na=None), None)
compare(qp.date('abc', errors='ignore'), 'abc')
compare(qp.date('abc', errors='test'), 'test')

compare(qp.date('2020-01-01'), datetime.date(2020, 1, 1))
compare(qp.date('2020-01-01 00:00:00'), datetime.date(2020, 1, 1))

compare(qp.date('2020.01.01'), datetime.date(2020, 1, 1))
compare(qp.date('2020/01/01'), datetime.date(2020, 1, 1))
compare(qp.date('2020 01 01'), datetime.date(2020, 1, 1))
compare(qp.date('20200101'), datetime.date(2020, 1, 1))

compare(qp.date('2020 Jan 01'), datetime.date(2020, 1, 1))
compare(qp.date('2020 January 01'), datetime.date(2020, 1, 1))
compare(qp.date('2020 Jan 1'), datetime.date(2020, 1, 1))
compare(qp.date('2020 January 1'), datetime.date(2020, 1, 1))

compare(qp.date('Jan 01 2020'), datetime.date(2020, 1, 1))
compare(qp.date('January 01 2020'), datetime.date(2020, 1, 1))
compare(qp.date('Jan 1 2020'), datetime.date(2020, 1, 1))
compare(qp.date('January 1 2020'), datetime.date(2020, 1, 1))

compare(qp.date('01 Jan 2020'), datetime.date(2020, 1, 1))
compare(qp.date('01 January 2020'), datetime.date(2020, 1, 1))
compare(qp.date('1 Jan 2020'), datetime.date(2020, 1, 1))
compare(qp.date('1 January 2020'), datetime.date(2020, 1, 1))

compare(qp.date('01-01-2020'), datetime.date(2020, 1, 1))
compare(qp.date('01.01.2020'), datetime.date(2020, 1, 1))
compare(qp.date('01/01/2020'), datetime.date(2020, 1, 1))
compare(qp.date('01 01 2020'), datetime.date(2020, 1, 1))

compare(qp.date('02-01-20'), datetime.date(2020, 1, 2))
compare(qp.date('02.01.20'), datetime.date(2020, 1, 2))
compare(qp.date('02/01/20'), datetime.date(2020, 1, 2))
compare(qp.date('02 01 20'), datetime.date(2020, 1, 2))

compare(qp.date('2020-01-02'), datetime.date(2020, 1, 2))
compare(qp.date('2020.01.02'), datetime.date(2020, 1, 2))
compare(qp.date('2020/01/02'), datetime.date(2020, 1, 2))
compare(qp.date('2020 01 02'), datetime.date(2020, 1, 2))

passed test for type conversion to: None
passed test for type conversion to: abc
passed test for type conversion to: test
passed test for type conversion to: 2020-01-01
passed test for type conversion to: 2020-01-01
passed test for type conversion to: 2020-01-01
passed test for type conversion to: 2020-01-01
passed test for type conversion to: 2020-01-01
passed test for type conversion to: 2020-01-01
passed test for type conversion to: 2020-01-01
passed test for type conversion to: 2020-01-01
passed test for type conversion to: 2020-01-01
passed test for type conversion to: 2020-01-01
passed test for type conversion to: 2020-01-01
passed test for type conversion to: 2020-01-01
passed test for type conversion to: 2020-01-01
passed test for type conversion to: 2020-01-01
passed test for type conversion to: 2020-01-01
passed test for type conversion to: 2020-01-01
passed test for type conversion to: 2020-01-01
passed test for type conversion to: 2020-01-01
passed test for type conversion 

## datetime

In [50]:
#datetime

compare(qp.datetime('abc', errors='coerce', na=None), None)
compare(qp.datetime('abc', errors='ignore'), 'abc')
compare(qp.datetime('abc', errors='test'), 'test')

compare(qp.datetime('2020-01-01'), datetime.datetime(2020, 1, 1))
compare(qp.datetime('2020-01-01 00:00:00'), datetime.datetime(2020, 1, 1))
compare(qp.datetime('2020-01-01 00:00:01'), datetime.datetime(2020, 1, 1, 0, 0, 1))
compare(qp.datetime('2020-01-01 00:01:00'), datetime.datetime(2020, 1, 1, 0, 1, 0))
compare(qp.datetime('2020-01-01 01:00:00'), datetime.datetime(2020, 1, 1, 1, 0, 0))
compare(qp.datetime('2020-01-01 01:01:01'), datetime.datetime(2020, 1, 1, 1, 1, 1))

compare(qp.datetime('2020.01.01'), datetime.datetime(2020, 1, 1))
compare(qp.datetime('2020/01/01'), datetime.datetime(2020, 1, 1))
compare(qp.datetime('2020 01 01'), datetime.datetime(2020, 1, 1))
compare(qp.datetime('20200101'), datetime.datetime(2020, 1, 1))

compare(qp.datetime('2020 Jan 01'), datetime.datetime(2020, 1, 1))
compare(qp.datetime('2020 January 01'), datetime.datetime(2020, 1, 1))
compare(qp.datetime('2020 Jan 1'), datetime.datetime(2020, 1, 1))
compare(qp.datetime('2020 January 1'), datetime.datetime(2020, 1, 1))

compare(qp.datetime('Jan 01 2020'), datetime.datetime(2020, 1, 1))
compare(qp.datetime('January 01 2020'), datetime.datetime(2020, 1, 1))
compare(qp.datetime('Jan 1 2020'), datetime.datetime(2020, 1, 1))
compare(qp.datetime('January 1 2020'), datetime.datetime(2020, 1, 1))

compare(qp.datetime('01 Jan 2020'), datetime.datetime(2020, 1, 1))
compare(qp.datetime('01 January 2020'), datetime.datetime(2020, 1, 1))
compare(qp.datetime('1 Jan 2020'), datetime.datetime(2020, 1, 1))
compare(qp.datetime('1 January 2020'), datetime.datetime(2020, 1, 1))

compare(qp.datetime('01-01-2020'), datetime.datetime(2020, 1, 1))
compare(qp.datetime('01.01.2020'), datetime.datetime(2020, 1, 1))
compare(qp.datetime('01/01/2020'), datetime.datetime(2020, 1, 1))

compare(qp.datetime('02-01-20'), datetime.datetime(2020, 1, 2))
compare(qp.datetime('02.01.20'), datetime.datetime(2020, 1, 2))
compare(qp.datetime('02/01/20'), datetime.datetime(2020, 1, 2))
compare(qp.datetime('02 01 20'), datetime.datetime(2020, 1, 2))

compare(qp.datetime('2020-01-02'), datetime.datetime(2020, 1, 2))
compare(qp.datetime('2020.01.02'), datetime.datetime(2020, 1, 2))
compare(qp.datetime('2020/01/02'), datetime.datetime(2020, 1, 2))
compare(qp.datetime('2020 01 02'), datetime.datetime(2020, 1, 2))


passed test for type conversion to: None
passed test for type conversion to: abc
passed test for type conversion to: test
passed test for type conversion to: 2020-01-01 00:00:00
passed test for type conversion to: 2020-01-01 00:00:00
passed test for type conversion to: 2020-01-01 00:00:01
passed test for type conversion to: 2020-01-01 00:01:00
passed test for type conversion to: 2020-01-01 01:00:00
passed test for type conversion to: 2020-01-01 01:01:01
passed test for type conversion to: 2020-01-01 00:00:00
passed test for type conversion to: 2020-01-01 00:00:00
passed test for type conversion to: 2020-01-01 00:00:00
passed test for type conversion to: 2020-01-01 00:00:00
passed test for type conversion to: 2020-01-01 00:00:00
passed test for type conversion to: 2020-01-01 00:00:00
passed test for type conversion to: 2020-01-01 00:00:00
passed test for type conversion to: 2020-01-01 00:00:00
passed test for type conversion to: 2020-01-01 00:00:00
passed test for type conversion to: 20

## na

In [51]:
#not available

compare(qp.na(1, errors='coerce'), None)
compare(qp.na(1, errors='ignore'), 1)
compare(qp.na(1, errors='test'), 'test')
compare(qp.na('na', na='test'), 'test')

compare(qp.na('na'), None)
compare(qp.na('nA'), None)
compare(qp.na('Na'), None)
compare(qp.na('NA'), None)

compare(qp.na('n.a.'), None)
compare(qp.na('n.a'), None)
compare(qp.na('N.A'), None)
compare(qp.na('N.A.'), None)
compare(qp.na('n/a'), None)
compare(qp.na('N/A'), None)

compare(qp.na('nan'), None)
compare(qp.na('NaN'), None)
compare(qp.na('NAN'), None)
compare(qp.na('Nan'), None)
compare(qp.na('nAn'), None)
compare(qp.na('nAN'), None)
compare(qp.na('naN'), None)

compare(qp.na('none'), None)
compare(qp.na('None'), None)
compare(qp.na('NONE'), None)

compare(qp.na('null'), None)
compare(qp.na('Null'), None)
compare(qp.na('NULL'), None)

compare(qp.na('nil'), None)
compare(qp.na('Nil'), None)
compare(qp.na('NIL'), None)

compare(qp.na('not available'), None)
compare(qp.na('Not Available'), None)
compare(qp.na('NOT AVAILABLE'), None)

compare(qp.na('not a number'), None)
compare(qp.na('Not A Number'), None)
compare(qp.na('NOT A NUMBER'), None)


passed test for type conversion to: None
passed test for type conversion to: 1
passed test for type conversion to: test
passed test for type conversion to: test
passed test for type conversion to: None
passed test for type conversion to: None
passed test for type conversion to: None
passed test for type conversion to: None
passed test for type conversion to: None
passed test for type conversion to: None
passed test for type conversion to: None
passed test for type conversion to: None
passed test for type conversion to: None
passed test for type conversion to: None
passed test for type conversion to: None
passed test for type conversion to: None
passed test for type conversion to: None
passed test for type conversion to: None
passed test for type conversion to: None
passed test for type conversion to: None
passed test for type conversion to: None
passed test for type conversion to: None
passed test for type conversion to: None
passed test for type conversion to: None
passed test for typ

## nk

In [52]:
#not known

compare(qp.nk(1, errors='coerce'), None)
compare(qp.nk(1, errors='ignore'), 1)
compare(qp.nk(1, errors='test'), 'test')
compare(qp.nk('nk', nk='test'), 'test')

compare(qp.nk('nk'), 'unknown')
compare(qp.nk('nK'), 'unknown')
compare(qp.nk('Nk'), 'unknown')
compare(qp.nk('NK'), 'unknown')

compare(qp.nk('n.k.'), 'unknown')
compare(qp.nk('n.k'), 'unknown')
compare(qp.nk('N.K'), 'unknown')
compare(qp.nk('N.K.'), 'unknown')
compare(qp.nk('n/k'), 'unknown')
compare(qp.nk('N/K'), 'unknown')

compare(qp.nk('not known'), 'unknown')
compare(qp.nk('Not Known'), 'unknown')
compare(qp.nk('NOT KNOWN'), 'unknown')

compare(qp.nk('not known.'), 'unknown')
compare(qp.nk('Not Known.'), 'unknown')
compare(qp.nk('NOT KNOWN.'), 'unknown')

compare(qp.nk('unknown'), 'unknown')
compare(qp.nk('Unknown'), 'unknown')
compare(qp.nk('UNKNOWN'), 'unknown')

compare(qp.nk('not specified'), 'unknown')
compare(qp.nk('Not Specified'), 'unknown')
compare(qp.nk('NOT SPECIFIED'), 'unknown')

compare(qp.nk('not specified.'), 'unknown')
compare(qp.nk('Not Specified.'), 'unknown')
compare(qp.nk('NOT SPECIFIED.'), 'unknown')


passed test for type conversion to: None
passed test for type conversion to: 1
passed test for type conversion to: test
passed test for type conversion to: test
passed test for type conversion to: unknown
passed test for type conversion to: unknown
passed test for type conversion to: unknown
passed test for type conversion to: unknown
passed test for type conversion to: unknown
passed test for type conversion to: unknown
passed test for type conversion to: unknown
passed test for type conversion to: unknown
passed test for type conversion to: unknown
passed test for type conversion to: unknown
passed test for type conversion to: unknown
passed test for type conversion to: unknown
passed test for type conversion to: unknown
passed test for type conversion to: unknown
passed test for type conversion to: unknown
passed test for type conversion to: unknown
passed test for type conversion to: unknown
passed test for type conversion to: unknown
passed test for type conversion to: unknown
pas

## yn

In [53]:
#yes no
compare(qp.yn('abc', errors='coerce'), None)
compare(qp.yn('abc', errors='ignore'), 'abc')
compare(qp.yn('abc', errors='test'), 'test')

compare(qp.yn('yes'), 'yes')
compare(qp.yn('Yes'), 'yes')
compare(qp.yn('YES'), 'yes')

compare(qp.yn('no'), 'no')
compare(qp.yn('No'), 'no')
compare(qp.yn('NO'), 'no')

compare(qp.yn('y'), 'yes')
compare(qp.yn('Y'), 'yes')

compare(qp.yn('n'), 'no')
compare(qp.yn('N'), 'no')

compare(qp.yn('1'), 'yes')
compare(qp.yn('0'), 'no')

compare(qp.yn(1), 'yes')
compare(qp.yn(0), 'no')

compare(qp.yn('true'), 'yes')
compare(qp.yn('True'), 'yes')
compare(qp.yn('TRUE'), 'yes')

compare(qp.yn('false'), 'no')
compare(qp.yn('False'), 'no')
compare(qp.yn('FALSE'), 'no')

compare(qp.yn('pos'), 'yes')
compare(qp.yn('Pos'), 'yes')
compare(qp.yn('POS'), 'yes')

compare(qp.yn('neg'), 'no')
compare(qp.yn('Neg'), 'no')
compare(qp.yn('NEG'), 'no')

compare(qp.yn('positive'), 'yes')
compare(qp.yn('Positive'), 'yes')
compare(qp.yn('POSITIVE'), 'yes')

compare(qp.yn('negative'), 'no')
compare(qp.yn('Negative'), 'no')
compare(qp.yn('NEGATIVE'), 'no')


passed test for type conversion to: None
passed test for type conversion to: abc
passed test for type conversion to: test
passed test for type conversion to: yes
passed test for type conversion to: yes
passed test for type conversion to: yes
passed test for type conversion to: no
passed test for type conversion to: no
passed test for type conversion to: no
passed test for type conversion to: yes
passed test for type conversion to: yes
passed test for type conversion to: no
passed test for type conversion to: no
passed test for type conversion to: yes
passed test for type conversion to: no
passed test for type conversion to: yes
passed test for type conversion to: no
passed test for type conversion to: yes
passed test for type conversion to: yes
passed test for type conversion to: yes
passed test for type conversion to: no
passed test for type conversion to: no
passed test for type conversion to: no
passed test for type conversion to: yes
passed test for type conversion to: yes
passed t

## qp.dict

In [54]:
#wip

dict1 = qp.dict({
    'a': [1, 2, 3, 4, 5],
    'b': (1, 2, 3, 4, 5),
    'c': {'a': 1, 'b': 2, 'c': 3, 'd': 4, 'e': 5},
    'd': {1, 2, 3, 4, 5},
    'e': np.array([1, 2, 3, 4, 5]),
    'f': pd.Series([1, 2, 3, 4, 5]),
    })

result = dict1.values_flat()
expected = [
    1, 2, 3, 4, 5, 
    1, 2, 3, 4, 5, 
    1, 2, 3, 4, 5, 
    1, 2, 3, 4, 5, 
    1, 2, 3, 4, 5, 
    1, 2, 3, 4, 5,
    ]
if result == expected:
    print('passed test for dict.values_flat()')
else:
    raise Exception('failed test for dict.values_flat()')


dict2 = qp.dict({
    'a': '1',
    'b': '2',
    'c': '3',
    })
result = dict2.invert()
expected = {
    '1': 'a',
    '2': 'b',
    '3': 'c',
    }
if result == expected:
    print('passed test for dict.invert()')
else:
    raise Exception('failed test for dict.invert()')


passed test for dict.values_flat()
passed test for dict.invert()


# pd.DataFrame.q()

In [55]:
def compare(result, expected_df, text=None):
    if not result.equals(expected_df):
        print(f'failed test for arg: "{result.qp.code}"')

        print('result:')
        display(result)
        print('expected:')
        display(expected_df)
        if text is None:
            raise Exception(f'failed test for arg: "{result.qp.code}"')
        else:
            raise Exception(f'failed test for: {text}')
    else:
        if text is None:
            print(f'passed test for arg: "{result.qp.code}"')
        else:
            print(f'passed test for: {text}')


def get_df_simple():
    df = pd.DataFrame({
        'a': [-1, 0, 1],
        'b': [1, 2, 3]
        })
    return df

def get_df_simple_tagged():
    df = pd.DataFrame({
        'meta': ['', '', ''],
        'a': [-1, 0, 1],
        'b': [1, 2, 3]
        })
    df.index = [0, 1, 2]
    return df


def get_df():
    df = pd.DataFrame({
        'ID': [10001, 10002, 10003, 20001, 20002, 20003, 30001, 30002, 30003, 30004, 30005],
        'name': ['John Doe', 'Jane Smith', 'Alice Johnson', 'Bob Brown', 'eva white', 'Frank miller', 'Grace TAYLOR', 'Harry Clark', 'IVY GREEN', 'JAck Williams', 'john Doe'],
        'date of birth': ['1995-01-02', '1990/09/14', '1985.08.23', '19800406', '05-11-2007', '06-30-1983', '28-05-1975', '1960Mar08', '1955-Jan-09', '1950 Sep 10', '1945 October 11'],
        'age': [-25, '30', np.nan, None, '40.0', 'forty-five', 'nan', 'unk', '', 'unknown', 35],
        'gender': ['M', 'F', 'Female', 'Male', 'Other', 'm', 'ff', 'NaN', None, 'Mal', 'female'],
        'height': [170, '175.5cm', None, '280', 'NaN', '185', '1', '6ft 1in', -10, '', 200],
        'weight': [70.2, '68', '72.5lb', 'na', '', '75kg', None, '80.3', '130lbs', '82', -65],
        'bp systole': ['20', 130, 'NaN', '140', '135mmhg', '125', 'NAN', '122', '', 130, '45'],
        'bp diastole': [80, '85', 'nan', '90mmHg', np.nan, '75', 'NaN', None, '95', '0', 'NaN'],
        'cholesterol': ['Normal', 'Highe', 'NaN', 'GOOD', 'n.a.', 'High', 'Normal', 'n/a', 'high', '', 'Normal'],
        'diabetes': ['No', 'yes', 'N/A', 'No', 'Y', 'Yes', 'NO', None, 'NaN', 'n', 'Yes'],
        'dose': ['10kg', 'NaN', '15 mg once a day', '20mg', '20 Mg', '25g', 'NaN', None, '30 MG', '35', '40ml']
        })
    return df

def get_df_tagged():
    df1 = get_df()
    df2 = pd.DataFrame('', index=df1.index, columns=['meta', *df1.columns])
    df2.iloc[:, 1:] = df1.loc[:, :]
    return df2

## filtering

In [56]:

df = get_df()


#col equality

result = df.q('date of birth', inplace=True, verbosity=0)
compare(result, df.loc[:, ['date of birth']])

result = df.q('=date of birth', verbosity=0)
compare(result, df.loc[:, ['date of birth']])

result = df.q('date of birth / age', verbosity=0)
compare(result, df.loc[:, ['date of birth', 'age']])

result = df.q('date of birth / =age', verbosity=0)
compare(result, df.loc[:, ['date of birth', 'age']])

result = df.q('=date of birth / age', verbosity=0)
compare(result, df.loc[:, ['date of birth', 'age']])

result = df.q('=date of birth / =age', verbosity=0)
compare(result, df.loc[:, ['date of birth', 'age']])

result = df.q('!=date of birth', verbosity=0)
compare(result, df.loc[:, ['ID', 'name', 'age', 'gender', 'height', 'weight', 'bp systole', 'bp diastole', 'cholesterol', 'diabetes', 'dose']])




#multi line col equality
result = df.q('', verbosity=0)
compare(result, df.loc[:, :])

result = df.q('=ID', verbosity=0)
compare(result, df.loc[:, ['ID']])

result = df.q('=ID', '', verbosity=0)
compare(result, df.loc[:, ['ID']])

result = df.q('=', verbosity=0)
compare(result, df.loc[:, []])

result = df.q(
    r"""
    =ID
    ID
    """,
    verbosity=0)
compare(result, df.loc[:, ['ID']])

result = df.q(
    r"""
    ID
    age
    """,
    verbosity=0)
compare(result, df.loc[:, ['age']])

result = df.q(
    r"""
    ID
    / age
    """,
    verbosity=0)
compare(result, df.loc[:, ['ID', 'age']])



#col contains
result = df.q('?bp', verbosity=0)
compare(result, df.loc[:, ['bp systole', 'bp diastole']])

result = df.q('?I', verbosity=0)
compare(result, df.loc[:, ['ID', 'date of birth', 'height', 'weight', 'bp diastole', 'diabetes']])

result = df.q('??I', verbosity=0)
compare(result, df.loc[:, ['ID']])




#col regex equality
result = df.q('r=.', verbosity=0)
compare(result, df.loc[:, []])

result = df.q('r=..', verbosity=0)
compare(result, df.loc[:, ['ID']])



#col multiple conditions
result = df.q('?bp / =diabetes', verbosity=0)
compare(result, df.loc[:, ['bp systole', 'bp diastole', 'diabetes']])

result = df.q('?bp / =diabetes / =cholesterol', verbosity=0)
compare(result, df.loc[:, ['bp systole', 'bp diastole', 'cholesterol', 'diabetes']])

result = df.q('?bp / =cholesterol / =diabetes', verbosity=0)
compare(result, df.loc[:, ['bp systole', 'bp diastole', 'cholesterol', 'diabetes']])

result = df.q('?bp & ?systole', verbosity=0)
compare(result, df.loc[:, ['bp systole']])

result = df.q('?bp & !?systole', verbosity=0)
compare(result, df.loc[:, ['bp diastole']])

result = df.q('?i & r=..', verbosity=0)
compare(result, df.loc[:, ['ID']])




#row types
result = df.q('name ´r is str', verbosity=0)
compare(result, df.loc[:, ['name']])

result = df.q('name ´r !is str', verbosity=0)
compare(result, df.loc[[], ['name']])

result = df.q('name ´r is num', verbosity=0)
compare(result, df.loc[[], ['name']])

result = df.q('name ´r !is num', verbosity=0)
compare(result, df.loc[[0,1,2,3,4,5,6,7,8,9,10], ['name']])

result = df.q('name ´r is na', verbosity=0)
compare(result, df.loc[[], ['name']])

result = df.q('name ´r !is na', verbosity=0)
compare(result, df.loc[[0,1,2,3,4,5,6,7,8,9,10], ['name']])

result = df.q('age ´r is na', verbosity=0)
compare(result, df.loc[[2,3,6,8], ['age']])

result = df.q('cholesterol ´r is na', verbosity=0)
compare(result, df.loc[[2,4,7,9], ['cholesterol']])

result = df.q('weight ´r is num', verbosity=0)
compare(result, df.loc[[0,1,4,6,7,9,10], ['weight']])

result = df.q('weight ´r is num & !is na', verbosity=0)
compare(result, df.loc[[0,1,7,9,10], ['weight']])

result = df.q('date of birth ´r !is date', verbosity=0)
compare(result, df.loc[[7], ['date of birth']])

result = df.q('diabetes ´r is yn', verbosity=0)
compare(result, df.loc[[0,1,3,4,5,6,9,10], ['diabetes']])

result = df.q('diabetes ´r is na / is yn', verbosity=0)
compare(result, df.loc[:, ['diabetes']])

result = df.q('diabetes ´r is yn', verbosity=0)
compare(result, df.loc[[0,1,3,4,5,6,9,10], ['diabetes']])

result = df.q('diabetes ´r is yes', verbosity=0)
compare(result, df.loc[[1,4,5,10], ['diabetes']])

result = df.q('diabetes ´r is no', verbosity=0)
compare(result, df.loc[[0,3,6,9], ['diabetes']])



#row regex equality
result = df.q('ID ´r r=1....', verbosity=0)
compare(result, df.loc[[0,1,2], ['ID']])

result = df.q('ID ´r !r=3....', verbosity=0)
compare(result, df.loc[[0,1,2,3,4,5], ['ID']])

#two words with first letter capitalized and separated by a space
result = df.q('name ´r r=\\b[A-Z][a-z]*\\s[A-Z][a-z]*\\b', verbosity=0)
compare(result, df.loc[[0,1,2,3,7], ['name']])

#all lowercase
result = df.q('name ´r r=^[^A-Z]*$', verbosity=0)
compare(result, df.loc[[4], ['name']])

#containing letters and numbers
result = df.q('dose ´r r=^(?=.*[a-zA-Z])(?=.*[0-9]).*$', verbosity=0)
compare(result, df.loc[[0,2,3,4,5,8,10], ['dose']])


#row regex search
result = df.q('bp systole ´r r?m', verbosity=0)
compare(result, df.loc[[4], ['bp systole']])

result = df.q(r'bp systole ´r r?\D', verbosity=0)
compare(result, df.loc[[2,4,6], ['bp systole']])

result = df.q('bp systole ´r r?\d', verbosity=0)
compare(result, df.loc[[0,1,3,4,5,7,9,10], ['bp systole']])



#filter by multiple columns
result = df.q('id / name  ´r ?j', verbosity=0)
compare(result, df.loc[[0, 1, 2, 9, 10], ['ID', 'name']])

result = df.q('id / name  ´r ?j / ?n', verbosity=0)
compare(result, df.loc[[0, 1, 2, 3, 5, 8, 9, 10], ['ID', 'name']])

result = df.q('id / name  ´r ?j & ?n', verbosity=0)
compare(result, df.loc[[0, 1, 2, 10], ['ID', 'name']])

result = df.q('height / weight  ´r is num', verbosity=0)
compare(result, df.loc[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10], ['height', 'weight']])

result = df.q('height / weight  ´r any is num', verbosity=0)
compare(result, df.loc[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10], ['height', 'weight']])

result = df.q('height / weight  ´r anyis num', verbosity=0)
compare(result, df.loc[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10], ['height', 'weight']])

result = df.q('height / weight  ´r all is num', verbosity=0)
compare(result, df.loc[[0, 6, 9, 10], ['height', 'weight']])


result = df.q(
    r"""
    height / weight ´m to num
    height / weight  ´r all is num
    """,
    inplace=True, verbosity=0)
compare(result, df.loc[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10], ['height', 'weight']])
df = get_df()

result = df.q(
    r"""
    height / weight
        ´m to num
        ´r all is num
    """,
    inplace=True, verbosity=0)
compare(result, df.loc[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10], ['height', 'weight']])
df = get_df()



#uniqueness
result = df.q(
    r"""
    diabetes ´r is unique
    is any
    """,
    diff=None,
    inplace=False,
    verbosity=3,
    )
compare(result, df.loc[[1,2,4,6,7,8,9], :])

result = df.q(
    r"""
    diabetes ´r is first
    is any
    """,
    diff=None,
    inplace=False,
    verbosity=3,
    )
compare(result, df.loc[[0,1,2,4,5,6,7,8,9], :])

result = df.q(
    r"""
    diabetes ´r is last
    is any
    """,
    diff=None,
    inplace=False,
    verbosity=3,
    )
compare(result, df.loc[[1,2,3,4,6,7,8,9,10], :])


#custom python expression evaluation
result = df.q('age ´r ~ isinstance(x, int)', verbosity=0)
compare(result, df.loc[[0, 10], ['age']])

result = df.q(
    r"""
    age / height ´m to num
    age ´r col~ col < df["height"]
    """,
    inplace=False, verbosity=0,
    )
compare(result, df.loc[[0, 10], ['age']])

result = df.q(
    r"""
    age / height ´m to num
    age ´r col~ col == df["age"].max()
    age ´m to str
    """,
    inplace=False, verbosity=0,
    )
compare(result, df.loc[[4], ['age']])




  result = df.q('bp systole ´r r?\d', verbosity=0)


0,1,2,3,4
2568,df was checked. no problems found,qp.pd_util._check_df,info,2024-08-06 17:30:45.609524


passed test for arg: "date of birth"
passed test for arg: "=date of birth"
passed test for arg: "date of birth / age"
passed test for arg: "date of birth / =age"
passed test for arg: "=date of birth / age"
passed test for arg: "=date of birth / =age"
passed test for arg: "!=date of birth"
passed test for arg: ""
passed test for arg: "=ID"
passed test for arg: "=ID"


0,1,2,3,4
2625,"no columns fulfill the condition in ""´c =""",df.q(),Warning,2024-08-06 17:30:46.250402


passed test for arg: "="
passed test for arg: "
    =ID
    ID
    "
passed test for arg: "
    ID
    age
    "
passed test for arg: "
    ID
    / age
    "
passed test for arg: "?bp"
passed test for arg: "?I"
passed test for arg: "??I"


0,1,2,3,4
2666,"no columns fulfill the condition in ""´c r=.""",df.q(),Warning,2024-08-06 17:30:46.520538


passed test for arg: "r=."
passed test for arg: "r=.."
passed test for arg: "?bp / =diabetes"
passed test for arg: "?bp / =diabetes / =cholesterol"
passed test for arg: "?bp / =cholesterol / =diabetes"
passed test for arg: "?bp & ?systole"
passed test for arg: "?bp & !?systole"
passed test for arg: "?i & r=.."
passed test for arg: "name ´r is str"
passed test for arg: "name ´r !is str"
passed test for arg: "name ´r is num"
passed test for arg: "name ´r !is num"
passed test for arg: "name ´r is na"
passed test for arg: "name ´r !is na"
passed test for arg: "age ´r is na"
passed test for arg: "cholesterol ´r is na"
passed test for arg: "weight ´r is num"
passed test for arg: "weight ´r is num & !is na"


  return pd.to_datetime(x, dayfirst=True).date()


passed test for arg: "date of birth ´r !is date"
passed test for arg: "diabetes ´r is yn"
passed test for arg: "diabetes ´r is na / is yn"
passed test for arg: "diabetes ´r is yn"
passed test for arg: "diabetes ´r is yes"
passed test for arg: "diabetes ´r is no"
passed test for arg: "ID ´r r=1...."
passed test for arg: "ID ´r !r=3...."
passed test for arg: "name ´r r=\b[A-Z][a-z]*\s[A-Z][a-z]*\b"
passed test for arg: "name ´r r=^[^A-Z]*$"
passed test for arg: "dose ´r r=^(?=.*[a-zA-Z])(?=.*[0-9]).*$"
passed test for arg: "bp systole ´r r?m"
passed test for arg: "bp systole ´r r?\D"
passed test for arg: "bp systole ´r r?\d"
passed test for arg: "id / name  ´r ?j"
passed test for arg: "id / name  ´r ?j / ?n"
passed test for arg: "id / name  ´r ?j & ?n"
passed test for arg: "height / weight  ´r is num"
passed test for arg: "height / weight  ´r any is num"
passed test for arg: "height / weight  ´r anyis num"
passed test for arg: "height / weight  ´r all is num"
passed test for arg: "
    h

0,1,2,3,4
3078,df was checked. no problems found,qp.pd_util._check_df,info,2024-08-06 17:30:48.370434


passed test for arg: "
    height / weight
        ´m to num
        ´r all is num
    "


0,1,2,3,4
3095,df was checked. no problems found,qp.pd_util._check_df,info,2024-08-06 17:30:48.429693


passed test for arg: "
    diabetes ´r is unique
    is any
    "
passed test for arg: "
    diabetes ´r is first
    is any
    "
passed test for arg: "
    diabetes ´r is last
    is any
    "
passed test for arg: "age ´r ~ isinstance(x, int)"
passed test for arg: "
    age / height ´m to num
    age ´r col~ col < df["height"]
    "
passed test for arg: "
    age / height ´m to num
    age ´r col~ col == df["age"].max()
    age ´m to str
    "


## complex filtering

In [57]:
df = get_df()

result = df.q(
    r"""
    ID  ´r r=1....
    diabetes ´r is yes
    """,
    verbosity=0)
compare(result, df.loc[[1, 4, 5, 10], ['diabetes']])


result = df.q(
    r"""
    ID  ´r r=1....
    diabetes ´r & is yes
    ID / diabetes
    """,
    verbosity=0)
compare(result, df.loc[[1], ['ID', 'diabetes']])


result = df.q(
    r"""
    diabetes ´r is yes
    ID  ´r & r=1....
    / diabetes
    """,
    verbosity=0)
compare(result, df.loc[[1], ['ID', 'diabetes']])


result = df.q(
    r"""
    diabetes ´r is yes
    / ID  ´r & r=1....
    """,
    verbosity=0)
compare(result, df.loc[[1], ['ID', 'diabetes']])


result = df.q(
    r"""
    ID  ´r r=1.... / r=2....  ´n @1
    gender ´r =m / =male & @1
    ID / gender
    """,
    verbosity=0)
compare(result, df.loc[[0,3,5], ['ID', 'gender']])


result = df.q(
    r"""
    ID  ´r r=1.... / r=2....  ´n @ 1
    gender ´r =m / =male &@1
    """,
    verbosity=0)
compare(result, df.loc[[0,3,5], ['gender']])


result = df.q(
    r"""
    ID  ´r r=1.... / r=2....  ´n @1
    gender ´r =m / =m / =male  / @ 1
    """,
    verbosity=0)
compare(result, df.loc[[0,1,2,3,4,5], ['gender']])


result = df.q(
    r"""
    ID  ´r r=1.... / r=2....  ´n @1
    gender ´r =f / =f / =female  / @ 1
    ID
    """,
    verbosity=0)
compare(result, df.loc[[0,1,2,3,4,5,10], ['ID']])


result = df.q(
    r"""
    gender ´r =f / =female
    age ´r & >30
    """,
    verbosity=0)
compare(result, df.loc[[10], ['age']])


result = df.q(
    r"""
    gender ´r =f / =female  ´n @ a
    age ´r >30  & @ a 
    """,
    verbosity=0)
compare(result, df.loc[[10], ['age']])


result = df.q(
    r"""
    age ´r >30
    age ´r / <18
    """,
    verbosity=0)
compare(result, df.loc[[0,4,10], ['age']])


result = df.q(
    r"""
    age ´r >30  ´n @ a
    age ´r <18 /@a
    """,
    verbosity=0)
compare(result, df.loc[[0,4,10], ['age']])


result = df.q(
    r"""
    age ´r >30 / <18
    """,
    verbosity=0)
compare(result, df.loc[[0,4,10], ['age']])


result = df.q(
    r"""
    weight ´r <70 & >40  ´n @ between 40 and 70
    diabetes ´r is yes & @between 40 and 70   
    """,
    verbosity=0)
compare(result, df.loc[[1], ['diabetes']])

result = df.q(
    r"""
    weight ´r <70  ´n @ <70
    &weight ´r >40  ´n@>40
    diabetes ´r is no & @<70 & @ >40
    weight / diabetes
    """,
    verbosity=0)
compare(result, df.loc[[], ['weight', 'diabetes']])





0,1,2,3,4
3191,df was checked. no problems found,qp.pd_util._check_df,info,2024-08-06 17:30:48.795495


passed test for arg: "
    ID  ´r r=1....
    diabetes ´r is yes
    "
passed test for arg: "
    ID  ´r r=1....
    diabetes ´r & is yes
    ID / diabetes
    "
passed test for arg: "
    diabetes ´r is yes
    ID  ´r & r=1....
    / diabetes
    "
passed test for arg: "
    diabetes ´r is yes
    / ID  ´r & r=1....
    "
passed test for arg: "
    ID  ´r r=1.... / r=2....  ´n @1
    gender ´r =m / =male & @1
    ID / gender
    "
passed test for arg: "
    ID  ´r r=1.... / r=2....  ´n @ 1
    gender ´r =m / =male &@1
    "
passed test for arg: "
    ID  ´r r=1.... / r=2....  ´n @1
    gender ´r =m / =m / =male  / @ 1
    "
passed test for arg: "
    ID  ´r r=1.... / r=2....  ´n @1
    gender ´r =f / =f / =female  / @ 1
    ID
    "
passed test for arg: "
    gender ´r =f / =female
    age ´r & >30
    "
passed test for arg: "
    gender ´r =f / =female  ´n @ a
    age ´r >30  & @ a 
    "
passed test for arg: "
    age ´r >30
    age ´r / <18
    "
passed test for arg: "
    age ´r >

## data modification

In [58]:
#set columns

df = get_df()
df1 = get_df()
result = df.q('id ´m col~ df["name"]', verbosity=0)
df1['ID'] = df1['name']
compare(result, df1.loc[:, ['ID']])


df = get_df()
df1 = get_df()
result = df.q(
    r"""
    id ´m col~ df["name"]
    is any ´r is any
    """,
    verbosity=0)
df1['ID'] = df1['name']
compare(result, df1.loc[:, :])


df = get_df()
df1 = get_df()
result = df.q('id / age ´m col~ df["name"]' ,verbosity=0)
df1['ID'] = df1['name']
df1['age'] = df1['name']
compare(result, df1.loc[:, ['ID', 'age']])

df = get_df()
df1 = get_df()
result = df.q('´m col~ df["name"]', verbosity=0)
for col in df1.columns:
    df1[col] = df1['name']
compare(result, df1)



#set values

df = get_df()
df1 = get_df()
result = df.q(
    r"""
    name ´m ~ x.lower()
    is any ´r is any
    """,
    verbosity=0)
df1['name'] = df1['name'].str.lower()
compare(result, df1.loc[:, :])

df = get_df()
df1 = get_df()
result = df.q(
    r"""
    name  ´r !x? x == x.lower()  ´m ~ x.lower()
    is any ´r is any
    """,
    verbosity=0)
df1['name'] = df1['name'].str.lower()
compare(result, df1.loc[:, :])


df = get_df()
df1 = get_df()
result = df.q(
    r"""
    gender ´m to str & ~ x.lower()
    is any
    """,
    verbosity=0)
df1['gender'] = df1['gender'].astype(str).str.lower()
compare(result, df1.loc[:, :])


df = get_df()
df1 = get_df()
result = df.q(
    r"""
    gender ´m to str / ~ x.lower()
    is any
    """,
    verbosity=0)
df1['gender'] = df1['gender'].astype(str).str.lower()
compare(result, df1.loc[:, :])





df = get_df_simple_tagged()
df1 = get_df_simple_tagged()
df.q(
    r"""
    a ´r >0
    b ´m ~ x+1
    """,
    inplace=True, verbosity=0,
    )
df1['b'] = [ 1, 2, 4]
compare(df, df1)


df.q(
    r"""
    a ´r >0
    b ´m ~ x - 2
    """,
    inplace=True, verbosity=0,
    )
df1['b'] = [ 1, 2, 2]
compare(df, df1)


df.q('b  ´m ~x*2', inplace=True, verbosity=0)
df1['b'] = [ 2, 4, 4]
compare(df, df1)


df.q(
    r"""
    a ´r =0
    b ´m ~ x`/2
    """,
    inplace=True, verbosity=0,
    )
df1['b'] = [ 2, 2, 4]
compare(df, df1)


0,1,2,3,4
3636,df was checked. no problems found,qp.pd_util._check_df,info,2024-08-06 17:30:50.495839


passed test for arg: "id ´m col~ df["name"]"


0,1,2,3,4
3644,df was checked. no problems found,qp.pd_util._check_df,info,2024-08-06 17:30:50.539612


passed test for arg: "
    id ´m col~ df["name"]
    is any ´r is any
    "


0,1,2,3,4
3661,df was checked. no problems found,qp.pd_util._check_df,info,2024-08-06 17:30:50.580150


passed test for arg: "id / age ´m col~ df["name"]"


0,1,2,3,4
3673,df was checked. no problems found,qp.pd_util._check_df,info,2024-08-06 17:30:50.616248


passed test for arg: "´m col~ df["name"]"


0,1,2,3,4
3677,df was checked. no problems found,qp.pd_util._check_df,info,2024-08-06 17:30:50.641569


passed test for arg: "
    name ´m ~ x.lower()
    is any ´r is any
    "


0,1,2,3,4
3694,df was checked. no problems found,qp.pd_util._check_df,info,2024-08-06 17:30:50.683126


passed test for arg: "
    name  ´r !x? x == x.lower()  ´m ~ x.lower()
    is any ´r is any
    "


0,1,2,3,4
3716,df was checked. no problems found,qp.pd_util._check_df,info,2024-08-06 17:30:50.736958


passed test for arg: "
    gender ´m to str & ~ x.lower()
    is any
    "


0,1,2,3,4
3731,df was checked. no problems found,qp.pd_util._check_df,info,2024-08-06 17:30:50.774568


passed test for arg: "
    gender ´m to str / ~ x.lower()
    is any
    "


0,1,2,3,4
3746,df was checked. no problems found,qp.pd_util._check_df,info,2024-08-06 17:30:50.811087


passed test for arg: "
    a ´r >0
    b ´m ~ x+1
    "
passed test for arg: "
    a ´r >0
    b ´m ~ x - 2
    "
passed test for arg: "b  ´m ~x*2"
passed test for arg: "
    a ´r =0
    b ´m ~ x`/2
    "


## metadata modification

In [59]:

#syntax
df = get_df_simple_tagged()
df1 = get_df_simple_tagged()
df.q('a ´r >0  ´n meta   ´m += >0', inplace=True, verbosity=0)
df1['meta'] = ['', '', '>0']
compare(df, df1)

df = get_df_simple_tagged()
df1 = get_df_simple_tagged()
df.q('=a   ´r >0   ´n meta   ´m ~ x + ">0"', inplace=True, verbosity=0)
df1['meta'] = [ '', '', '>0']
compare(df, df1)

df = get_df_simple_tagged()
df1 = get_df_simple_tagged()
df.q('=a   ´r >0   ´n  meta ´m +=>0', inplace=True, verbosity=0)
df1['meta'] = [ '', '', '>0']
compare(df, df1)

df = get_df_simple_tagged()
df1 = get_df_simple_tagged()
df.q('=a´r >0     ´n meta   ´m += >0', inplace=True, verbosity=0)
df1['meta'] = [ '', '', '>0']
compare(df, df1)



#continous
df = get_df_simple_tagged()
df1 = get_df_simple_tagged()


df.q('=a  ´r >0   ´n meta   ´m += >0', inplace=True, verbosity=0)
df1['meta'] = [ '', '', '>0']
compare(df, df1)


df.q('=a   ´r >0  ´n meta   ´m += >0', inplace=True, verbosity=0)
df1['meta'] = [ '', '', '>0>0']
compare(df, df1)


df.q('=a   ´r ==0    ´n meta   ´m += 0', inplace=True, verbosity=0)
df1['meta'] = [ '', '0', '>0>0']
compare(df, df1)


df.q('a   ´r ==0    ´n meta  ´m ~ x.replace("0", "")', inplace=True, verbosity=0)
df1['meta'] = [ '', '', '>0>0']
compare(df, df1)


df.q('=a   ´r >0    ´n meta  ´m ~ x.replace("0", "")', inplace=True, verbosity=0)
df1['meta'] = [ '', '', '>>']
compare(df, df1)


df.q('=a     ´n meta   ´m =', inplace=True, verbosity=0)
df1['meta'] = [ '', '', '']
compare(df, df1)





0,1,2,3,4
3802,df was checked. no problems found,qp.pd_util._check_df,info,2024-08-06 17:30:50.977571


passed test for arg: "a ´r >0  ´n meta   ´m += >0"


0,1,2,3,4
3818,df was checked. no problems found,qp.pd_util._check_df,info,2024-08-06 17:30:51.041059


passed test for arg: "=a   ´r >0   ´n meta   ´m ~ x + ">0""


0,1,2,3,4
3834,df was checked. no problems found,qp.pd_util._check_df,info,2024-08-06 17:30:51.085589


passed test for arg: "=a   ´r >0   ´n  meta ´m +=>0"


0,1,2,3,4
3850,df was checked. no problems found,qp.pd_util._check_df,info,2024-08-06 17:30:51.124902


passed test for arg: "=a´r >0     ´n meta   ´m += >0"


0,1,2,3,4
3866,df was checked. no problems found,qp.pd_util._check_df,info,2024-08-06 17:30:51.176489


passed test for arg: "=a  ´r >0   ´n meta   ´m += >0"
passed test for arg: "=a   ´r >0  ´n meta   ´m += >0"
passed test for arg: "=a   ´r ==0    ´n meta   ´m += 0"
passed test for arg: "a   ´r ==0    ´n meta  ´m ~ x.replace("0", "")"
passed test for arg: "=a   ´r >0    ´n meta  ´m ~ x.replace("0", "")"
passed test for arg: "=a     ´n meta   ´m ="


## type conversion

In [60]:

df1 = get_df()
df2 = get_df()
result = df1.q('age ´m to int', verbosity=0)
df2['age'] = [-25, 30, np.nan, np.nan, 40, np.nan, np.nan, np.nan, np.nan, np.nan, 35]
df2['age'] = df2['age'].astype('object')
compare(result, df2.loc[:,['age']])

df1 = get_df()
df2 = get_df()
result = df1.q('=age  ´m to float', verbosity=0)
df2['age'] = [-25.0, 30.0, np.nan, np.nan, 40.0, np.nan, np.nan, np.nan, np.nan, np.nan, 35.0]
df2['age'] = df2['age'].astype('object')
compare(result, df2.loc[:,['age']])

df1 = get_df()
df2 = get_df()
result = df1.q('=age   ´m to num', verbosity=0)
df2['age'] = [-25, 30, np.nan, np.nan, 40, np.nan, np.nan, np.nan, np.nan, np.nan, 35]
df2['age'] = df2['age'].astype('object')
compare(result, df2.loc[:,['age']])

df1 = get_df()
df2 = get_df()
result = df1.q('=age   ´m to str', verbosity=0)
df2['age'] = ['-25', '30', 'nan', 'None', '40.0', 'forty-five', 'nan', 'unk', '', 'unknown', '35']
df2['age'] = df2['age'].astype('object')
compare(result, df2.loc[:,['age']])

df1 = get_df()
df2 = get_df()
result = df1.q('date of birth   ´m to date', verbosity=0)
df2['date of birth'] = [
    pd.to_datetime('1995-01-02', dayfirst=False).date(),
    pd.to_datetime('1990/09/14', dayfirst=False).date(),
    pd.to_datetime('1985.08.23', dayfirst=False).date(),
    pd.to_datetime('19800406', dayfirst=False).date(),
    pd.to_datetime('05-11-2007', dayfirst=True).date(),
    pd.to_datetime('06-30-1983', dayfirst=False).date(),
    pd.to_datetime('28-05-1975', dayfirst=True).date(),
    pd.NaT,
    pd.to_datetime('1955-Jan-09', dayfirst=False).date(),
    pd.to_datetime('1950 Sep 10', dayfirst=False).date(),
    pd.to_datetime('1945 October 11', dayfirst=False).date(),
    ]
df2['age'] = df2['age'].astype('object')
compare(result, df2.loc[:,['date of birth']])

df1 = get_df()
df2 = get_df()
result = df1.q('=age   ´m to na', verbosity=0)
df2['age'] = [-25, '30', None, None, '40.0', 'forty-five', None, 'unk', None, 'unknown', 35]
df2['age'] = df2['age'].astype('object')
compare(result, df2.loc[:,['age']])

df1 = get_df()
df2 = get_df()
result = df1.q('=age   ´m to nk', verbosity=0)
df2['age'] = [-25, '30', np.nan, None, '40.0', 'forty-five', 'nan', 'unknown', '', 'unknown', 35]
df2['age'] = df2['age'].astype('object')
compare(result, df2.loc[:,['age']])

df1 = get_df()
df2 = get_df()
result = df1.q('=diabetes   ´m to yn', verbosity=0)
df2['diabetes'] = ['no', 'yes', None, 'no', 'yes', 'yes', 'no', None, None, 'no', 'yes']
df2['age'] = df2['age'].astype('object')
compare(result, df2.loc[:,['diabetes']])

0,1,2,3,4
3952,df was checked. no problems found,qp.pd_util._check_df,info,2024-08-06 17:30:51.484762


passed test for arg: "age ´m to int"


0,1,2,3,4
3960,df was checked. no problems found,qp.pd_util._check_df,info,2024-08-06 17:30:51.534710


passed test for arg: "=age  ´m to float"


0,1,2,3,4
3968,df was checked. no problems found,qp.pd_util._check_df,info,2024-08-06 17:30:51.566866


passed test for arg: "=age   ´m to num"


0,1,2,3,4
3976,df was checked. no problems found,qp.pd_util._check_df,info,2024-08-06 17:30:51.603877


passed test for arg: "=age   ´m to str"


0,1,2,3,4
3984,df was checked. no problems found,qp.pd_util._check_df,info,2024-08-06 17:30:51.631403


passed test for arg: "date of birth   ´m to date"


  return pd.to_datetime(x, dayfirst=True).date()


0,1,2,3,4
3992,df was checked. no problems found,qp.pd_util._check_df,info,2024-08-06 17:30:51.685722


passed test for arg: "=age   ´m to na"


0,1,2,3,4
4000,df was checked. no problems found,qp.pd_util._check_df,info,2024-08-06 17:30:51.715418


passed test for arg: "=age   ´m to nk"


0,1,2,3,4
4008,df was checked. no problems found,qp.pd_util._check_df,info,2024-08-06 17:30:51.750513


passed test for arg: "=diabetes   ´m to yn"


## adding columns


In [61]:
#wip

# qp.diff()

In [62]:
def compare(df_new, df_old, expected, mode):
    result = qp.diff(df_new, df_old, mode, verbosity=0).data
    # display(expected == result)
    if result.equals(expected):
        print(f'passed test for mode: "{mode}"')
    else:
        print('new:')
        display(df_new)
        print('old:')
        display(df_old)
        print('expected:')
        display(expected)
        print('result:')
        display(result)
        raise Exception(f'failed test for mode: "{mode}"')


df_new, df_old = qp.get_dfs()


result_new = pd.DataFrame(columns=['meta', 'd', 'b', 'a'], index=['y','x2','z'])


result_new.loc['y', 'meta'] = '<br>vals changed: 1'
result_new.loc['y', 'd'] = 2.0
result_new.loc['y', 'b'] = 2.0
result_new.loc['y', 'a'] = 0.0

result_new.loc['x2', 'meta'] = 'added row'
result_new.loc['x2', 'd'] = 1.0
result_new.loc['x2', 'b'] = 1.0
result_new.loc['x2', 'a'] = 1.0

result_new.loc['z', 'meta'] = '<br>vals added: 1<br>vals removed: 1'
result_new.loc['z', 'd'] = 3.0
result_new.loc['z', 'b'] = 3.0
result_new.loc['z', 'a'] = np.nan



result_old = pd.DataFrame(columns=['meta', 'a', 'b', 'c'], index=['x','y','z'])

result_old.loc['x', 'meta'] ='removed row'
result_old.loc['x', 'a'] = 1.0
result_old.loc['x', 'b'] = 1.0
result_old.loc['x', 'c'] = 1.0

result_old.loc['y', 'meta'] = '<br>vals changed: 1'
result_old.loc['y', 'a'] = 2.0
result_old.loc['y', 'b'] = 2.0
result_old.loc['y', 'c'] = 2.0

result_old.loc['z', 'meta'] = '<br>vals added: 1<br>vals removed: 1'
result_old.loc['z', 'a'] = 3.0
result_old.loc['z', 'b'] = None
result_old.loc['z', 'c'] = 3.0



result_mix = pd.DataFrame(columns=['meta', 'd', 'b', 'a', 'c'], index=['y', 'x2', 'z', 'x'])

result_mix.loc['y', 'meta'] = '<br>vals changed: 1'
result_mix.loc['y', 'd'] = 2.0
result_mix.loc['y', 'b'] = 2.0
result_mix.loc['y', 'a'] = 0.0
result_mix.loc['y', 'c'] = 2.0

result_mix.loc['x2', 'meta'] = 'added row'
result_mix.loc['x2', 'd'] = 1.0
result_mix.loc['x2', 'b'] = 1.0
result_mix.loc['x2', 'a'] = 1.0
result_mix.loc['x2', 'c'] = np.nan

result_mix.loc['z', 'meta'] = '<br>vals added: 1<br>vals removed: 1'
result_mix.loc['z', 'd'] = 3.0
result_mix.loc['z', 'b'] = 3.0
result_mix.loc['z', 'a'] = np.nan
result_mix.loc['z', 'c'] = 3.0

result_mix.loc['x', 'meta'] ='removed row'
result_mix.loc['x', 'd'] = None
result_mix.loc['x', 'b'] = 1.0
result_mix.loc['x', 'a'] = 1.0
result_mix.loc['x', 'c'] = 1.0




result_new_plus = pd.DataFrame(columns=['meta', 'd', 'old: d', 'b', 'old: b', 'a', 'old: a'], index=['y','x2','z'])


result_new_plus.loc['y', 'meta'] = '<br>vals changed: 1'
result_new_plus.loc['y', 'd'] = 2.0
result_new_plus.loc['y', 'old: d'] = ''
result_new_plus.loc['y', 'b'] = 2.0
result_new_plus.loc['y', 'old: b'] = ''
result_new_plus.loc['y', 'a'] = 0.0
result_new_plus.loc['y', 'old: a'] = 2.0

result_new_plus.loc['x2', 'meta'] = 'added row'
result_new_plus.loc['x2', 'd'] = 1.0
result_new_plus.loc['x2', 'old: d'] = ''
result_new_plus.loc['x2', 'b'] = 1.0
result_new_plus.loc['x2', 'old: b'] = ''
result_new_plus.loc['x2', 'a'] = 1.0
result_new_plus.loc['x2', 'old: a'] = ''

result_new_plus.loc['z', 'meta'] = '<br>vals added: 1<br>vals removed: 1'
result_new_plus.loc['z', 'd'] = 3.0
result_new_plus.loc['z', 'old: d'] = ''
result_new_plus.loc['z', 'b'] = 3.0
result_new_plus.loc['z', 'old: b'] = None
result_new_plus.loc['z', 'a'] = np.nan
result_new_plus.loc['z', 'old: a'] = 3.0


display(qp.diff(df_new, df_old, 'new', verbosity=4))
display(qp.diff(df_new, df_old, 'old', verbosity=4))
display(qp.diff(df_new, df_old, 'mix', verbosity=4))
display(qp.diff(df_new, df_old, 'new+', verbosity=4))

compare(df_new, df_old, result_new, 'new')
compare(df_new, df_old, result_old, 'old')
compare(df_new, df_old, result_mix, 'mix')
compare(df_new, df_old, result_new_plus, 'new+')



0,1,2,3,4
4016,"adding column ""meta"" at position 0",df.format(),info,2024-08-06 17:30:51.921255


0,1,2,3,4
4017,"adding column ""meta"" at position 0",df.format(),info,2024-08-06 17:30:51.938827


{'cols added': 1,
 'cols removed': 1,
 'rows added': 1,
 'rows removed': 1,
 'vals added': 1,
 'vals removed': 1,
 'vals changed': 1}

Unnamed: 0,meta,d,b,a
y,vals changed: 1,2.0,2.0,0.0
x2,added row,1.0,1.0,1.0
z,vals added: 1 vals removed: 1,3.0,3.0,


0,1,2,3,4
4018,"adding column ""meta"" at position 0",df.format(),info,2024-08-06 17:30:52.008911


0,1,2,3,4
4019,"adding column ""meta"" at position 0",df.format(),info,2024-08-06 17:30:52.019970


{'cols added': 1,
 'cols removed': 1,
 'rows added': 1,
 'rows removed': 1,
 'vals added': 1,
 'vals removed': 1,
 'vals changed': 1}

Unnamed: 0,meta,a,b,c
x,removed row,1.0,1.0,1.0
y,vals changed: 1,2.0,2.0,2.0
z,vals added: 1 vals removed: 1,3.0,,3.0


0,1,2,3,4
4020,"adding column ""meta"" at position 0",df.format(),info,2024-08-06 17:30:52.144300


0,1,2,3,4
4021,"adding column ""meta"" at position 0",df.format(),info,2024-08-06 17:30:52.155201


{'cols added': 1,
 'cols removed': 1,
 'rows added': 1,
 'rows removed': 1,
 'vals added': 1,
 'vals removed': 1,
 'vals changed': 1}

Unnamed: 0,meta,d,b,a,c
y,vals changed: 1,2.0,2.0,0.0,2.0
x2,added row,1.0,1.0,1.0,
z,vals added: 1 vals removed: 1,3.0,3.0,,3.0
x,removed row,,1.0,1.0,1.0


0,1,2,3,4
4022,"adding column ""meta"" at position 0",df.format(),info,2024-08-06 17:30:52.238604


0,1,2,3,4
4023,"adding column ""meta"" at position 0",df.format(),info,2024-08-06 17:30:52.249055


{'cols added': 1,
 'cols removed': 1,
 'rows added': 1,
 'rows removed': 1,
 'vals added': 1,
 'vals removed': 1,
 'vals changed': 1}

Unnamed: 0,meta,d,old: d,b,old: b,a,old: a
y,vals changed: 1,2.0,,2.0,,0.0,2.0
x2,added row,1.0,,1.0,,1.0,
z,vals added: 1 vals removed: 1,3.0,,3.0,,,3.0


{'cols added': 1,
 'cols removed': 1,
 'rows added': 1,
 'rows removed': 1,
 'vals added': 1,
 'vals removed': 1,
 'vals changed': 1}

passed test for mode: "new"


{'cols added': 1,
 'cols removed': 1,
 'rows added': 1,
 'rows removed': 1,
 'vals added': 1,
 'vals removed': 1,
 'vals changed': 1}

passed test for mode: "old"


{'cols added': 1,
 'cols removed': 1,
 'rows added': 1,
 'rows removed': 1,
 'vals added': 1,
 'vals removed': 1,
 'vals changed': 1}

passed test for mode: "mix"


{'cols added': 1,
 'cols removed': 1,
 'rows added': 1,
 'rows removed': 1,
 'vals added': 1,
 'vals removed': 1,
 'vals changed': 1}

passed test for mode: "new+"


# df.format()

In [63]:
def compare(df, result, expected):
    if result.equals(expected):
        print(f'passed test for finding all errors in df')
        display(df)
    else:
        print('result:')
        display(result)
        print('expected:')
        display(expected)
        raise Exception(f'failed test for finding all errors in df')



df = pd.DataFrame(columns=['a', 'b', 'c'])
result = df.format(verbosity=0)
expected = pd.DataFrame('', columns=['meta', 'a', 'b', 'c'], index=[])
compare(df, result, expected)


df = pd.DataFrame(columns=['meta', 'a', 'b', 'c'], index=[])
result = df.format(verbosity=0)
expected = pd.DataFrame('', columns=['meta', 'a', 'b', 'c'], index=[])
compare(df, result, expected)


df = pd.DataFrame(columns=[' a', 'b ', ' c ', 'a b c '])
result = df.format(verbosity=0)
expected = pd.DataFrame('', columns=['meta', 'a', 'b', 'c', 'a b c'], index=[])
compare(df, result, expected)



passed test for finding all errors in df


Unnamed: 0,a,b,c


passed test for finding all errors in df


Unnamed: 0,meta,a,b,c


passed test for finding all errors in df


Unnamed: 0,a,b,c,a b c


# df.save() and df.load()

In [64]:
#remove old test files
today1 = datetime.datetime.now().strftime('%Y_%m_%d')
today2 = datetime.datetime.now().strftime('%d_%m_%Y')
date0 = qp.date('2000_01_01').strftime('%Y_%m_%d')
date1 = (datetime.datetime.now() - datetime.timedelta(days=400)).date().strftime('%Y_%m_%d')
date2 = (datetime.datetime.now() - datetime.timedelta(days=40)).date().strftime('%Y_%m_%d')
date3 = (datetime.datetime.now() - datetime.timedelta(days=8)).date().strftime('%Y_%m_%d')
date4 = (datetime.datetime.now() - datetime.timedelta(days=1)).date().strftime('%Y_%m_%d')

def clean():
    files = [
        'df.xlsx',
        'df1.xlsx',

        'test/df1.xlsx',
        'test/df1b.xlsx',
        'test/df2.xlsx',

        f'archive/df1_{today1}.xlsx',
        f'archive/df1_{today2}.xlsx',
        f'archive/df1b_{today1}.xlsx',

        f'archive/df_{today1}.xlsx',
        f'archive/df_{today2}.xlsx',
        f'archive/df_{date0}.xlsx',
        f'archive/df_{date1}.xlsx',
        f'archive/df_{date2}.xlsx',
        f'archive/df_{date3}.xlsx',
        f'archive/df_{date4}.xlsx',

        f'test/archive/df1_{today1}.xlsx',
        f'test/archive/df1b_{today1}.xlsx',
        f'test/archive/df2_{today1}.xlsx',
        ]

    for file in files:
        if os.path.isfile(file):
            os.remove(file)
            # print(f'removed file: {file}')

    if os.path.isdir('test/archive'):
        shutil.rmtree('test/archive')
clean()



df1 = pd.DataFrame({'a':[1]})
df2 = pd.DataFrame({'a':[2]})
df3 = pd.DataFrame({'a':[3]})
df4 = pd.DataFrame({'a':[4]})


#default
df1.save()
if qp.isfile('df.xlsx'):
    print('passed test for default saving behaviour')
else:
    raise Exception(f'failed test for default saving behaviour')

df1a = qp.load('df.xlsx')
if df1a.equals(df1):
    print('passed test for loading from default file')
else:
    raise Exception(f'failed test for loading from default file')




#specific file
df1.save('df1.xlsx')
if qp.isfile('df1.xlsx'):
    print('passed test for saving to a specific file')
else:
    raise Exception(f'failed test for saving to a specific file')

df1b = qp.load('df1.xlsx')
if df1b.equals(df1):
    print('passed test for loading from a specific file')
else:
    raise Exception(f'failed test for loading from a specific file')




#file in folder
df1.save('test/df1.xlsx')
if qp.isfile('test/df1.xlsx'):
    print('passed test for saving to a specific file in folder')
else:
    raise Exception(f'failed test for saving to a specific file in folder')

df1c = qp.load('test/df1.xlsx')
if df1c.equals(df1):
    print('passed test for loading from a specific file in folder')
else:
    raise Exception(f'failed test for loading from a specific file in folder')




#specific sheet
df1.save('df1.xlsx', sheet='data2')
df1d = qp.load('df1.xlsx', sheet='data2')
if df1d.equals(df1):
    print('passed test for saving and loading to and from a specific sheet')
else:
    raise Exception(f'failed test for saving and loading to and from a specific sheet')




#overwriting sheets
df1old = qp.load('df1.xlsx')
df2.save('df1.xlsx')
df1new = qp.load('df1.xlsx')
if df1old.loc[0, 'a'] == 1 and df1new.loc[0, 'a'] == 2:
    print('passed test for overwriting sheets')
else:
    raise Exception(f'failed test for overwriting sheets')
df1.save('df1.xlsx')  #restore previous state



#archiving
today = datetime.datetime.now().strftime('%Y_%m_%d')
df1.save('df1')
if qp.isfile(f'archive/df1_{today}.xlsx'):
    print('passed test for archiving file')
else:
    raise Exception(f'failed test for archiving file')


#archiving nested folder
os.mkdir('test/archive')
df1.save('test/df1')
if qp.isfile(f'test/archive/df1_{today}.xlsx'):
    print('passed test for archiving in nested folder')
else:
    raise Exception(f'failed test for archiving nested folder')





#archiving with different date format
today = datetime.datetime.now().strftime('%d_%m_%Y')
df1.save('test/df1.xlsx', sheet='data1', datefmt='%d_%m_%Y')
if qp.isfile(f'test/archive/df1_{today}.xlsx'):
    print('passed test for archiving with different date format')
else:
    raise Exception(f'failed test for archiving with different date format')



#most recent file
if os.path.isfile(f'archive/df_{today2}.xlsx'):
    os.remove(f'archive/df_{today2}.xlsx')
pd.DataFrame({'a':[today1]}).to_excel(f'archive/df_{today1}.xlsx', index=False)
pd.DataFrame({'a':[date1]}).to_excel(f'archive/df_{date1}.xlsx', index=False)

if qp.load('archive/df', sheet='Sheet1', index=False).loc[0, 'a'] == today1:
    print('passed test for loading most recent file')
else:
    raise Exception(f'failed test for loading most recent file')



#most recent file version 2
if qp.load('archive/df', sheet='Sheet1', before='now', index=False).loc[0, 'a'] == today1:
    print('passed test for loading most recent file explicitly')
else:
    raise Exception(f'failed test for loading most recent file explicitly')



#most recent file with different date format
if os.path.isfile(f'archive/df_{today1}.xlsx'):
    os.remove(f'archive/df_{today1}.xlsx')
pd.DataFrame({'a':[today2]}).to_excel(f'archive/df_{today2}.xlsx', index=False)
pd.DataFrame({'a':[date1]}).to_excel(f'archive/df_{date1}.xlsx', index=False)

if qp.load('archive/df', sheet='Sheet1', index=False).loc[0, 'a'] == today2:
    print('passed test for loading most recent file with different date format')
else:
    raise Exception(f'failed test for loading most recent file with different date format')



#file before specific date
pd.DataFrame({'a':[date0]}).to_excel(f'archive/df_{date0}.xlsx', index=False)
if qp.load('archive/df', sheet='Sheet1', before='2000_01_02', index=False).loc[0, 'a'] == date0:
    print('passed test for loading most recent file from before specific date')
else:
    raise Exception(f'failed test for loading most recent file from before specific date')


#file before this year
if qp.load('archive/df', sheet='Sheet1', before='this year', index=False).loc[0, 'a'] == date1:
    print('passed test for loading most recent file from before this year')
else:
    raise Exception(f'failed test for loading most recent file from before this year')



#file before this month
pd.DataFrame({'a':[date2]}).to_excel(f'archive/df_{date2}.xlsx', index=False)
if qp.load('archive/df', sheet='Sheet1', before='this month', index=False).loc[0, 'a'] == date2:
    print('passed test for loading most recent file from before this month')
else:
    raise Exception(f'failed test for loading most recent file from before this month')



#file before this week
pd.DataFrame({'a':[date3]}).to_excel(f'archive/df_{date3}.xlsx', index=False)
if qp.load('archive/df', sheet='Sheet1', before='this week', index=False).loc[0, 'a'] == date3:
    print('passed test for loading most recent file from before this week')
else:
    raise Exception(f'failed test for loading most recent file from before this week')



#file before this day
pd.DataFrame({'a':[date4]}).to_excel(f'archive/df_{date4}.xlsx', index=False)
if qp.load('archive/df', sheet='Sheet1', before='this day', index=False).loc[0, 'a'] == date4:
    print('passed test for loading most recent file from before this day')
else:
    raise Exception(f'failed test for loading most recent file from before this day')



#file before today
if qp.load('archive/df', sheet='Sheet1', before='today', index=False).loc[0, 'a'] == date4:
    print('passed test for loading most recent file from before this day')
else:
    raise Exception(f'failed test for loading most recent file from before this day')


# df1.save('test/df3.xlsx', sheet='sheet2', archive='source', datefmt='%Y_%m_%d')
# df1.save('test/df3.xlsx', sheet='sheet2', archive='destination', datefmt='%Y_%m_%d')
# df1.save('df.xlsx', sheet='sheet2', archive='both', datefmt='%Y_%m_%d')

0,1,2,3,4
4024,"saving df to ""df.xlsx"" in sheet ""data1""",df.save(),info,2024-08-06 17:30:52.862680


0,1,2,3,4
4025,"archiving df to ""c:\Users\MartinVölkl-GouyaIns\OneDrive - Gouya Insights\Desktop\qplib_dev/archive/df_2024_08_06.xlsx"" in sheet ""data1""",df.save(),info,2024-08-06 17:30:52.981308


passed test for default saving behaviour
passed test for loading from default file


0,1,2,3,4
4026,"saving df to ""df1.xlsx"" in sheet ""data1""",df.save(),info,2024-08-06 17:30:53.127145


0,1,2,3,4
4027,"archiving df to ""c:\Users\MartinVölkl-GouyaIns\OneDrive - Gouya Insights\Desktop\qplib_dev/archive/df1_2024_08_06.xlsx"" in sheet ""data1""",df.save(),info,2024-08-06 17:30:53.209617


passed test for saving to a specific file
passed test for loading from a specific file


0,1,2,3,4
4028,"saving df to ""test/df1.xlsx"" in sheet ""data1""",df.save(),info,2024-08-06 17:30:53.294905


0,1,2,3,4
4029,"did not find archive folder ""test/archive""",df.save(),Warning,2024-08-06 17:30:53.346506


passed test for saving to a specific file in folder
passed test for loading from a specific file in folder


0,1,2,3,4
4030,"file ""df1.xlsx"" already exists. data in sheet ""data2"" will be overwritten",df.save(),Warning,2024-08-06 17:30:53.382684


0,1,2,3,4
4031,"archive file ""c:\Users\MartinVölkl-GouyaIns\OneDrive - Gouya Insights\Desktop\qplib_dev/archive/df1_2024_08_06.xlsx"" already exists. data in sheet ""data2"" will be overwritten",df.save(),Warning,2024-08-06 17:30:53.465499


passed test for saving and loading to and from a specific sheet


0,1,2,3,4
4032,"file ""df1.xlsx"" already exists. data in sheet ""data1"" will be overwritten",df.save(),Warning,2024-08-06 17:30:53.588615


0,1,2,3,4
4033,"archive file ""c:\Users\MartinVölkl-GouyaIns\OneDrive - Gouya Insights\Desktop\qplib_dev/archive/df1_2024_08_06.xlsx"" already exists. data in sheet ""data1"" will be overwritten",df.save(),Warning,2024-08-06 17:30:53.633974


passed test for overwriting sheets


0,1,2,3,4
4034,"file ""df1.xlsx"" already exists. data in sheet ""data1"" will be overwritten",df.save(),Warning,2024-08-06 17:30:53.721486


0,1,2,3,4
4035,"archive file ""c:\Users\MartinVölkl-GouyaIns\OneDrive - Gouya Insights\Desktop\qplib_dev/archive/df1_2024_08_06.xlsx"" already exists. data in sheet ""data1"" will be overwritten",df.save(),Warning,2024-08-06 17:30:53.775047


0,1,2,3,4
4036,"file ""df1.xlsx"" already exists. data in sheet ""data1"" will be overwritten",df.save(),Warning,2024-08-06 17:30:53.828933


0,1,2,3,4
4037,"archive file ""c:\Users\MartinVölkl-GouyaIns\OneDrive - Gouya Insights\Desktop\qplib_dev/archive/df1_2024_08_06.xlsx"" already exists. data in sheet ""data1"" will be overwritten",df.save(),Warning,2024-08-06 17:30:53.886964


passed test for archiving file


0,1,2,3,4
4038,"file ""test/df1.xlsx"" already exists. data in sheet ""data1"" will be overwritten",df.save(),Warning,2024-08-06 17:30:53.953321


0,1,2,3,4
4039,"archiving df to ""test/archive/df1_2024_08_06.xlsx"" in sheet ""data1""",df.save(),info,2024-08-06 17:30:54.003424


passed test for archiving in nested folder


0,1,2,3,4
4040,"file ""test/df1.xlsx"" already exists. data in sheet ""data1"" will be overwritten",df.save(),Warning,2024-08-06 17:30:54.056530


0,1,2,3,4
4041,"archiving df to ""test/archive/df1_06_08_2024.xlsx"" in sheet ""data1""",df.save(),info,2024-08-06 17:30:54.096407


passed test for archiving with different date format


0,1,2,3,4
4042,"no archive folder found. looking for most recent file in ""archive"" instead",df.load(),info,2024-08-06 17:30:54.359251


0,1,2,3,4
4043,"loading ""archive/df_2024_08_06.xlsx""",df.load(),info,2024-08-06 17:30:54.372934


passed test for loading most recent file


0,1,2,3,4
4044,"no archive folder found. looking for most recent file in ""archive"" instead",df.load(),info,2024-08-06 17:30:54.398229


0,1,2,3,4
4045,"loading ""archive/df_2024_08_06.xlsx""",df.load(),info,2024-08-06 17:30:54.412051


passed test for loading most recent file explicitly


0,1,2,3,4
4046,"no archive folder found. looking for most recent file in ""archive"" instead",df.load(),info,2024-08-06 17:30:54.655696


0,1,2,3,4
4047,"loading ""archive/df_06_08_2024.xlsx""",df.load(),info,2024-08-06 17:30:54.671202


passed test for loading most recent file with different date format


0,1,2,3,4
4048,"no archive folder found. looking for most recent file in ""archive"" instead",df.load(),info,2024-08-06 17:30:54.797284


0,1,2,3,4
4049,"loading ""archive/df_2000_01_01.xlsx""",df.load(),info,2024-08-06 17:30:54.808682


passed test for loading most recent file from before specific date


0,1,2,3,4
4050,"no archive folder found. looking for most recent file in ""archive"" instead",df.load(),info,2024-08-06 17:30:54.825906


0,1,2,3,4
4051,"loading ""archive/df_2023_07_03.xlsx""",df.load(),info,2024-08-06 17:30:54.838604


passed test for loading most recent file from before this year


0,1,2,3,4
4052,"no archive folder found. looking for most recent file in ""archive"" instead",df.load(),info,2024-08-06 17:30:55.089154


0,1,2,3,4
4053,"loading ""archive/df_2024_06_27.xlsx""",df.load(),info,2024-08-06 17:30:55.105643


passed test for loading most recent file from before this month


0,1,2,3,4
4054,"no archive folder found. looking for most recent file in ""archive"" instead",df.load(),info,2024-08-06 17:30:55.223413


0,1,2,3,4
4055,"loading ""archive/df_2024_07_29.xlsx""",df.load(),info,2024-08-06 17:30:55.245062


passed test for loading most recent file from before this week


0,1,2,3,4
4056,"no archive folder found. looking for most recent file in ""archive"" instead",df.load(),info,2024-08-06 17:30:55.392771


0,1,2,3,4
4057,"loading ""archive/df_2024_08_05.xlsx""",df.load(),info,2024-08-06 17:30:55.410539


passed test for loading most recent file from before this day


0,1,2,3,4
4058,"no archive folder found. looking for most recent file in ""archive"" instead",df.load(),info,2024-08-06 17:30:55.435415


0,1,2,3,4
4059,"loading ""archive/df_2024_08_05.xlsx""",df.load(),info,2024-08-06 17:30:55.454010


passed test for loading most recent file from before this day


# log()

In [65]:
#wip

# "bashlike" wrappers

In [66]:

if os.path.isdir('test/dir1') is True:
    shutil.rmtree('test/dir1')


if qp.isdir('test/dir1') is False:
    print(f'passed test for non existing directory')
else:
    raise Exception(f'failed test for non existing directory')


qp.mkdir('test/dir1')
if qp.isdir('test/dir1') is True:
    print(f'passed test for creating and checking for existing directory')
else:
    raise Exception(f'failed test for creating and checking for existing directory')


qp.cd('test/dir1')
if qp.pwd().split('\\')[-1] == 'dir1':
    print(f'passed test for changing directory and finding path to current directory')
else:
    raise Exception(f'failed test for changing directory and finding path to current directory')


qp.mkdir('dir2')
qp.cd('dir2')
result1 = qp.pwd().split('\\')[-1]

qp.cd('..')
result2 = qp.pwd().split('\\')[-1]

if result1 == 'dir2' and result2 == 'dir1':
    print(f'passed test for going back and forth in directory structure')
else:
    raise Exception(f'failed test for going back and forth in directory structure')



os.chdir(working_directory)
shutil.rmtree('test/dir1')

passed test for non existing directory


0,1,2,3,4
4060,"created directory ""test/dir1""","qp.mkdir(""test/dir1"")",info,2024-08-06 17:30:55.559909


passed test for creating and checking for existing directory


0,1,2,3,4
4061,moved from c:\Users\MartinVölkl-GouyaIns\OneDrive - Gouya Insights\Desktop\qplib_dev to c:\Users\MartinVölkl-GouyaIns\OneDrive - Gouya Insights\Desktop\qplib_dev\test\dir1,"qp.cd(""test/dir1"")",info,2024-08-06 17:30:55.573763


passed test for changing directory and finding path to current directory


0,1,2,3,4
4062,"created directory ""dir2""","qp.mkdir(""dir2"")",info,2024-08-06 17:30:55.587926


0,1,2,3,4
4063,moved from c:\Users\MartinVölkl-GouyaIns\OneDrive - Gouya Insights\Desktop\qplib_dev\test\dir1 to c:\Users\MartinVölkl-GouyaIns\OneDrive - Gouya Insights\Desktop\qplib_dev\test\dir1\dir2,"qp.cd(""dir2"")",info,2024-08-06 17:30:55.598095


0,1,2,3,4
4064,moved from c:\Users\MartinVölkl-GouyaIns\OneDrive - Gouya Insights\Desktop\qplib_dev\test\dir1\dir2 to c:\Users\MartinVölkl-GouyaIns\OneDrive - Gouya Insights\Desktop\qplib_dev\test\dir1,"qp.cd(""c:\Users\MartinVölkl-GouyaIns\OneDrive - Gouya Insights\Desktop\qplib_dev\test\dir1"")",info,2024-08-06 17:30:55.609058


passed test for going back and forth in directory structure


# save results

In [67]:
clean()

In [68]:

# create html version of this file for documentation
!jupyter nbconvert --to html testing.ipynb

now = datetime.datetime.now().strftime('%Y_%m_%d')
shutil.move('testing.html', f'test/test_report_{now}.html')


[NbConvertApp] Converting notebook testing.ipynb to html
[NbConvertApp] Writing 693939 bytes to testing.html


'test/test_report_2024_08_06.html'

# temp

In [69]:
import pandas as pd
import numpy as np
import copy
import os
import sys
import shutil
import datetime
import qplib as qp
from qplib import log

# if 'cards' not in globals():
#     cards = pd.read_csv('data/cards.csv')
# cards.qi()


# df = qp.get_df().format()
# df.qi()
