# imports

In [None]:
import pandas as pd
import numpy as np
import copy
import os
import sys
import shutil
import datetime
import pickle
import qplib as qp
from qplib import log, na, nk, num



# log

In [None]:
import pandas as pd
import numpy as np
import copy
import os
import sys
import shutil
import datetime
import qplib as qp
from qplib import log
pd.set_option('display.max_columns', None)




log('trace: this is a trace message')
log('debug: this is a debug message')
log('info: this is an info message')
log('warning: this is a warning message')
log('error: this is an error message')

log()

# diff

In [None]:

import pandas as pd
import numpy as np
import copy
import os
import datetime
import qplib as qp

from IPython.display import display
from ipywidgets import interact, widgets
from pandas.api.extensions import register_dataframe_accessor

from qplib.pandas import _format_df, get_dfs
from qplib.util import log, GREEN, RED, ORANGE, GREEN_LIGHT, RED_LIGHT, ORANGE_LIGHT
from qplib.types import _date, _na, qpDict


df_new, df_old = get_dfs()


print('df_new:')
display(df_new)

print('df_old:')
display(df_old)

print('mode=new:')
display(qp.diff(df_new, df_old, mode='new'))

print('mode=new+:')
display(qp.diff(df_new, df_old, mode='new+'))

print('mode=old:')
display(qp.diff(df_new, df_old, mode='old'))

print('mode=mix:')
display(qp.diff(df_new, df_old, mode='mix'))


# tests

In [None]:

# #run tests in folder "tests" using pytest and create a test report
# !pytest tests --html=tests/test_report.html


In [None]:
import pandas as pd
import numpy as np
import copy
import os
import sys
import shutil
import datetime
import qplib as qp
from qplib import log
pd.set_option('display.max_columns', None)


def get_df_simple():
    df = pd.DataFrame({
        'a': [-1, 0, 1],
        'b': [1, 2, 3]
        })
    return df

def get_df_simple_tagged():
    df = pd.DataFrame({
        'meta': ['', '', ''],
        'a': [-1, 0, 1],
        'b': [1, 2, 3]
        })
    df.index = pd.Index([3, 1, 2])
    return df


def get_df():
    df = pd.DataFrame({
        'ID': [10001, 10002, 10003, 20001, 20002, 20003, 30001, 30002, 30003, 30004, 30005],
        'name': ['John Doe', 'Jane Smith', 'Alice Johnson', 'Bob Brown', 'eva white', 'Frank miller', 'Grace TAYLOR', 'Harry Clark', 'IVY GREEN', 'JAck Williams', 'john Doe'],
        'date of birth': ['1995-01-02', '1990/09/14', '1985.08.23', '19800406', '05-11-2007', '06-30-1983', '28-05-1975', '1960Mar08', '1955-Jan-09', '1950 Sep 10', '1945 October 11'],
        'age': [-25, '30', np.nan, None, '40.0', 'forty-five', 'nan', 'unk', '', 'unknown', 35],
        'gender': ['M', 'F', 'Female', 'Male', 'Other', 'm', 'ff', 'NaN', None, 'Mal', 'female'],
        'height': [170, '175.5cm', None, '280', 'NaN', '185', '1', '6ft 1in', -10, '', 200],
        'weight': [70.2, '68', '72.5lb', 'na', '', '75kg', None, '80.3', '130lbs', '82', -65],
        'bp systole': ['20', 130, 'NaN', '140', '135mmhg', '125', 'NAN', '122', '', 130, '45'],
        'bp diastole': [80, '85', 'nan', '90mmHg', np.nan, '75', 'NaN', None, '95', '0', 'NaN'],
        'cholesterol': ['Normal', 'Highe', 'NaN', 'GOOD', 'n.a.', 'High', 'Normal', 'n/a', 'high', '', 'Normal'],
        'diabetes': ['No', 'yes', 'N/A', 'No', 'Y', 'Yes', 'NO', None, 'NaN', 'n', 'Yes'],
        'dose': ['10kg', 'NaN', '15 mg once a day', '20mg', '20 Mg', '25g', 'NaN', None, '30 MG', '35', '40ml']
        })
    return df


def get_df_tagged():
    df1 = get_df()
    df2 = pd.DataFrame('', index=df1.index, columns=['meta', *df1.columns])
    df2.iloc[:, 1:] = df1.loc[:, :]
    return df2


df = qp.get_df()
display(df)

  

param_pairs = [
    (r'name  %%is str;',                 df.loc[:, ['name']]),
    (r'name  %%!is str;',                df.loc[[], ['name']]),
    (r'name  %%is num;',                 df.loc[[], ['name']]),
    (r'name  %%!is num;',                df.loc[:, ['name']]),
    (r'name  %%is na;',                  df.loc[[], ['name']]),
    (r'name  %%!is na;',                 df.loc[:, ['name']]),

    (r'age   %%is int;',                 df.loc[[0,1,4,10], ['age']]),
    (r'age   %%strict is int;',          df.loc[[0,10], ['age']]),
    (r'age   %%is float;',               df.loc[[0,1,2,4,6,10], ['age']]),
    (r'age   %%strict is float;',        df.loc[[2], ['age']]),
    (r'age   %%is na;',                  df.loc[[2,3,6,8], ['age']]),

    (r'weight  %%is int;',               df.loc[[1,9,10], ['weight']]),
    (r'weight  %%strict is int;',        df.loc[[10], ['weight']]),
    (r'weight  %%is float;',             df.loc[[0,1,7,9,10], ['weight']]),
    (r'weight  %%strict is float;',      df.loc[[0], ['weight']]),
    (r'weight  %%is num;',               df.loc[[0,1,4,6,7,9,10], ['weight']]),
    (r'weight  %%strict is num;',        df.loc[[0,10], ['weight']]),
    (r'weight  %%is num;  &&!is na;',    df.loc[[0,1,7,9,10], ['weight']]),

    (r'height       %%is bool;',         df.loc[[6], ['height']]),
    (r'bp diastole  %%is bool;',         df.loc[[9], ['bp diastole']]),
    (r'diabetes     %%is bool;',         df.loc[[0,1,3,4,5,6,9,10], ['diabetes']]),
    (r'diabetes     %%strict is bool;',  df.loc[[], ['diabetes']]),

    (r'diabetes  %%is yn;',              df.loc[[0,1,3,4,5,6,9,10], ['diabetes']]),
    (r'diabetes  %%is na;  //is yn;',    df.loc[:, ['diabetes']]),
    (r'diabetes  %%is yes;',             df.loc[[1,4,5,10], ['diabetes']]),
    (r'diabetes  %%is no;',              df.loc[[0,3,6,9], ['diabetes']]),

    (r'cholesterol  %%is na;',           df.loc[[2,4,7,9], ['cholesterol']]),
    (r'age          %%is na;',           df.loc[[2,3,6,8], ['age']]),
    (r'age          %%strict is na;',           df.loc[[2,3], ['age']]),
    ]
def test(code, expected):
    df = get_df()
    temp = df.q(code)
    result = df.loc[temp.index, temp.columns]
    assert result.equals(expected), qp.diff(result, expected, output='str')

for code, expected in param_pairs:
    test(code, expected)




# qlang

In [None]:
import pandas as pd
import numpy as np
import copy
import os
import sys
import shutil
import datetime
import qplib as qp
from qplib import log
pd.set_option('display.max_columns', None)

def format_symbols():
    shutil.copy('qplib/data/symbols.xlsx', 'qplib/data/symbols_temp.xlsx')
    df = pd.read_excel('qplib/data/symbols_temp.xlsx', index_col=0)
    size = len(df.index)
    for i in range(2, size):
        row = df.index[i]
        for j in range(i+1):
            col = df.index[j]
            if row != col:
                df.loc[col, row] = df.loc[row, col]
    df.to_csv('qplib/data/symbols.csv')
    os.remove('qplib/data/symbols_temp.xlsx')
    return df

df = format_symbols()

df

In [1]:
import pandas as pd
import numpy as np
import copy
import os
import sys
import shutil
import datetime
import qplib as qp
from qplib import log
pd.set_option('display.max_columns', None)
qp.qlang.VERBOSITY = 3

df = qp.get_df()

df1 = pd.DataFrame({
        'meta': ['', '', ''],
        'a': [-1, 0, 1],
        'b': [1, 2, 3]
        })
df1.index = [0, 1, 2]

df.q(
    """
    id   %%each ?1   %%save 1
    id   %%each ?2   %%save 2

    id   %%load 1   &&load 2  $color=red
    """
    )


df.q(
        r"""
        $ verbosity = 3
        $ diff = None

        id /date of birth   %%all?1  $tag1

        is any;
        """
        )

0,1,2,3,4
84,INFO,"no metadata column found in dataframe. creating new column named ""meta""",qp.qlang._metadata,2025-04-09 15:20:09.104871


Unnamed: 0,ID,name,date of birth,age,gender,height,weight,bp systole,bp diastole,cholesterol,diabetes,dose,meta
0,10001,John Doe,1995-01-02,-25.0,M,170,70.2,20,80,Normal,No,10kg,\n@ID@date of birth: 1
1,10002,Jane Smith,1990/09/14,30.0,F,175.5cm,68,130,85,Highe,yes,,\n@ID@date of birth: 1
2,10003,Alice Johnson,1985.08.23,,Female,,72.5lb,,,,,15 mg once a day,\n@ID@date of birth: 1
3,20001,Bob Brown,19800406,,Male,280,na,140,90mmHg,GOOD,No,20mg,\n@ID@date of birth: 1
6,30001,Grace TAYLOR,28-05-1975,,ff,1,,NAN,,Normal,NO,,\n@ID@date of birth: 1


# temp

In [2]:
import pandas as pd
import numpy as np
import copy
import os
import sys
import shutil
import datetime
import qplib as qp
from qplib import log
pd.set_option('display.max_columns', None)

df = qp.get_df()


In [31]:

import numpy as np
import pandas as pd
import re
import qplib as qp

from IPython.display import display
from ipywidgets import widgets, interactive_output, HBox, VBox, fixed, Layout

from qplib.util import log
from qplib.types import _dict, _int, _float, _num, _bool, _datetime, _date, _na, _nk, _yn, _type
from qplib.pandas import _diff


#####################     settings     #####################

VERBOSITY = 3
DIFF = None
INPLACE = False

TYPES_INT = (
    int,
    np.int64,
    np.int32,
    np.int16,
    np.int8,
    pd.Int64Dtype,
    pd.Int32Dtype,
    pd.Int16Dtype,
    pd.Int8Dtype,
    )
TYPES_FLOAT = (
    float,
    np.float64,
    np.float32,
    np.float16,
    pd.Float64Dtype,
    pd.Float32Dtype,
    )
TYPES_NUM = (
    int,
    float,
    np.int64,
    np.float64,
    np.int32,
    np.float32,
    np.int16,
    np.float16,
    np.int8,
    np.number,
    pd.Int64Dtype,
    pd.Float64Dtype,
    pd.Int32Dtype,
    pd.Float32Dtype,
    pd.Int16Dtype,
    pd.Int8Dtype,
    )
TYPES_BOOL = (
    bool,
    np.bool_,
    pd.BooleanDtype,
    )



##################     syntax symbols     ##################

def get_symbols():
    df = pd.read_csv('qplib/data/symbols.csv', index_col=0)
    df.rename(lambda x: x.strip('"'), axis=0, inplace=True)
    df.rename(lambda x: x.strip('"'), axis=1, inplace=True)
    df = df.replace(0, False).replace(1, True)
    return df

SYMBOLS = get_symbols()

class Symbol:
    """
    A Symbol used in the query languages syntax.
    """
    def __init__(self, glyph, name, description, **kwargs):
        self.glyph = glyph
        self.name = name
        self.description = description
        for key, value in kwargs.items():
            setattr(self, key, value)

    def details(self):
        return f'symbol:\n\tglyph: "{self.glyph}"\n\tname: "{self.name}"\n\tdescription: "{self.description}"'

    def __repr__(self):
        return f'"{self.glyph}": {self.name}'
 
    def __str__(self):
        return f'"{self.glyph}": {self.name}'
    
    def __lt__(self, value):
        return self.glyph < value
    
    def __gt__(self, value):
        return self.glyph > value

class Symbols:
    """
    Multiple Symbols of the same category are collected in a Symbols object.
    """
    def __init__(self, name, *symbols):
        self.name = name
        self.by_name = {symbol.name: symbol for symbol in symbols}
        self.by_glyph = {symbol.glyph: symbol for symbol in symbols}

    def __getattribute__(self, value):
        if value == 'by_name':
            return super().__getattribute__(value)
        elif value == 'by_glyph':
            return super().__getattribute__(value)
        elif value in self.by_glyph:
            return self.by_glyph[value]
        elif value in self.by_name:
            return self.by_name[value]
        else:
            return super().__getattribute__(value)

    def __getitem__(self, key):
        if key in self.by_glyph:
            return self.by_glyph[key]
        elif key in self.by_name:
            return self.by_name[key]
        else:
            log(f'error: symbol "{key}" not found in "{self.name}"', 'qp.qlang.Symbols.__getitem__', VERBOSITY)
            return None

    def __iter__(self):
        return iter(self.by_name.values())

    def __repr__(self):
        return f'{self.name}:\n\t' + '\n\t'.join([str(val) for key,val in self.by_name.items()])

    def __str__(self):
        return self.__repr__()

class Instruction:
    """
    Each query is built from sequential instructions.
    """
    def __init__(self, code='', line_num=None):
        #initial values
        self.line_num = line_num
        self.code = code

        #determined by tokenize()
        self.connector = None
        self.flags = set()
        self.operator = None
        self.value = None

        #determined by parse()
        self.function = None


    def __repr__(self):
        string = f'Instruction:\n\tline_num: {self.line_num}\n\tcode: {self.code}\n\tconnector: {self.connector}'
        for flag in self.flags:
            string += f'\n\tflag: {flag}'
        string += f'\n\toperator: {self.operator}\n\tvalue: {self.value}'
        if self.function:
            string += f'\n\tfunction: {self.function.__name__}'

        return string

    def __str__(self):
        return self.__repr__()



COMMENT = Symbol('#', 'COMMENT', 'comments out the rest of the line')
ESCAPE = Symbol('´', 'ESCAPE', 'escape the next character')


CONNECTORS = Symbols('CONNECTORS',
    #select rows
    Symbol('%%', 'NEW_SELECT_ROWS', 'select rows. disregard previous row selection'),
    Symbol('&&', 'AND_SELECT_ROWS', 'this row selection condition AND the previous condition/s must be fulfilled'),
    Symbol('//', 'OR_SELECT_ROWS', 'this row selection condition OR the previous condition/s must be fulfilled'),

    #select cols
    Symbol('%', 'NEW_SELECT_COLS', 'select columns. disregard previous column selection'),
    Symbol('&', 'AND_SELECT_COLS', 'this column selection condition AND the previous condition/s must be fulfilled'),
    Symbol('/', 'OR_SELECT_COLS', 'this column selection condition OR the previous condition/s must be fulfilled'),

    #modify values
    Symbol('$', 'MODIFY', 'modify settings, metadata, format, headers, values or create new columns'),
    )

connectors_select_cols = set([
    CONNECTORS.NEW_SELECT_COLS,
    CONNECTORS.AND_SELECT_COLS,
    CONNECTORS.OR_SELECT_COLS,
    ])
connectors_select_rows = set([
    CONNECTORS.NEW_SELECT_ROWS,
    CONNECTORS.AND_SELECT_ROWS,
    CONNECTORS.OR_SELECT_ROWS,
    ])
connectors_select = connectors_select_cols | connectors_select_rows



FLAGS = Symbols('FLAGS',

    #selection flags

    #select rows/values
    Symbol('any', 'ANY', 'select whole row if ANY value in the selected columns fulfills the condition'),
    Symbol('all', 'ALL', 'select whole row if ALL values in the selected columns fulfill the condition'),
    Symbol('idx', 'IDX', 'select whole row if the index of the row fulfills the condition'),
    Symbol('each', 'EACH', 'select each value (not the whole row) that fulfills the condition'),

    #negate the selection condition 
    Symbol('!', 'NEGATE', 'negate/invert the selection condition'),

    #strict comparison
    Symbol('strict', 'STRICT', 'use strict comparison for selection condition (eg: case sensitive, strict typing)'),

    #save and load selections
    Symbol('save', 'SAVE_SELECTION', 'save current selection with given <name>. load using: "$load = <name>'),
    Symbol('load', 'LOAD_SELECTION', 'load a saved selection of rows/values (boolean mask). save using: "$save = <name>'),



    #modification flags

    #modify settings
    Symbol('verbosity', 'VERBOSITY', 'change the verbosity/logging level'),
    Symbol('diff', 'DIFF', 'change if and how the difference between the old and new dataframe is shown'),

    #set/modify metadata
    Symbol('meta', 'METADATA', 'modify the metadata of the selected rows/values'),
    Symbol('tag', 'TAG_METADATA', 'add a tag of the currently selected column(s) in the form of "\\n@<selected col>: <value>" to the column named "meta"',),

    #modify format
    Symbol('color', 'COLOR', 'change the color of the selected values'),
    Symbol('bg', 'BACKGROUND_COLOR', 'change the background color of the selected values'),
    Symbol('align', 'ALIGN', 'change the alignment of the selected values'),
    Symbol('width', 'WIDTH', 'change the width of the selected values'),
    Symbol('css', 'CSS', 'use css to format the selected values'),

    #modify data
    Symbol('val', 'VAL', 'modify selected values'),
    Symbol('header', 'HEADER', 'modify the headers of the selected columns'),
    Symbol('new', 'NEW_COL', 'create a new column with the selected values'),



    #multipurpose flags

    #evaluate a python expression
    Symbol('col', 'COL_EVAL', 'when used with the eval operator, evaluates on the whole column'),

    #use regex for matching and contains operations
    Symbol('regex', 'REGEX', 'use regex for equality and contains operator'),

    )

flags_select = set([
    FLAGS.ANY,
    FLAGS.ALL,
    FLAGS.IDX,
    FLAGS.EACH,
    FLAGS.NEGATE,
    FLAGS.STRICT,
    FLAGS.REGEX,
    FLAGS.SAVE_SELECTION,
    FLAGS.LOAD_SELECTION,
    ])
flags_select_rows_scope = set([
    FLAGS.ANY,
    FLAGS.ALL,
    FLAGS.IDX,
    FLAGS.EACH,
    ])
flags_settings = set([
    FLAGS.VERBOSITY,
    FLAGS.DIFF,
    ])
flags_metadata = set([
    FLAGS.METADATA,
    FLAGS.TAG_METADATA,
    ])
flags_format = set([
    FLAGS.COLOR,
    FLAGS.BACKGROUND_COLOR,
    FLAGS.ALIGN,
    FLAGS.WIDTH,
    FLAGS.CSS,
    ])
flags_copy_df = set([
    FLAGS.VAL,
    FLAGS.HEADER,
    FLAGS.NEW_COL,
    FLAGS.COL_EVAL,
    FLAGS.METADATA,
    FLAGS.TAG_METADATA,
    ])
flags_modify = flags_settings | flags_metadata | flags_format | flags_copy_df



OPERATORS = Symbols('OPERATORS',

    #binary selection operators
    Symbol('>=', 'BIGGER_EQUAL', 'bigger or equal'),
    Symbol('<=', 'SMALLER_EQUAL', 'smaller or equal'),
    Symbol('>', 'BIGGER', 'bigger'),
    Symbol('<', 'SMALLER', 'smaller'),
    Symbol('==', 'EQUALS', 'equal to'),
    Symbol('?', 'CONTAINS', 'contains a string (not case sensitive)'),

    #unary selection operators
    Symbol('is any;', 'IS_ANY', 'is any value (use to reset selection)'),
    Symbol('is str;', 'IS_STR', 'is a string'),
    Symbol('is int;', 'IS_INT', 'is an integer'),
    Symbol('is float;', 'IS_FLOAT', 'is a float'),
    Symbol('is num;', 'IS_NUM', 'is a number'),
    Symbol('is bool;', 'IS_BOOL', 'is a boolean'),
    Symbol('is datetime;', 'IS_DATETIME', 'is a datetime'),
    Symbol('is date;', 'IS_DATE', 'is a date'),
    Symbol('is na;', 'IS_NA', 'is a missing value'),
    Symbol('is nk;', 'IS_NK', 'is not a known value'),
    Symbol('is yn;', 'IS_YN', 'is a value representing yes or no'),
    Symbol('is yes;', 'IS_YES', 'is a value representing yes'),
    Symbol('is no;', 'IS_NO', 'is a value representing no'),
    Symbol('is unique;', 'IS_UNIQUE', 'is a unique value'),
    Symbol('is first;', 'IS_FIRST', 'is the first value (of multiple values)'),
    Symbol('is last;', 'IS_LAST', 'is the last value (of multiple values)'),


    #binary modification operators
    Symbol('+=', 'ADD', 'append a string to the value (coerce to string if needed)'),

    #unary modification operators
    Symbol('sort;', 'SORT', 'sort values based on the selected column(s)'),
    Symbol('to str;', 'TO_STR', 'convert to string', type_func=str, dtype=str),
    Symbol('to int;', 'TO_INT', 'convert to integer', type_func=_int, dtype='Int64'),
    Symbol('to float;', 'TO_FLOAT', 'convert to float', type_func=_float, dtype='Float64'),
    Symbol('to num;', 'TO_NUM', 'convert to number', type_func=_num, dtype='object'),
    Symbol('to bool;', 'TO_BOOL', 'convert to boolean', type_func=_bool, dtype='bool'),
    Symbol('to datetime;', 'TO_DATETIME', 'convert to datetime', type_func=_datetime, dtype='datetime64[ns]'),
    Symbol('to date;', 'TO_DATE', 'convert to date', type_func=_date, dtype='datetime64[ns]'),
    Symbol('to na;', 'TO_NA', 'convert to missing value', type_func=_na),
    Symbol('to nk;', 'TO_NK', 'convert to not known value', type_func=_nk, dtype='object'),
    Symbol('to yn;', 'TO_YN', 'convert to yes or no value', type_func=_yn, dtype='object'),


    #multipurpose operators
    Symbol('=', 'SET', 'set values'),  #default. gets interpreted as EQUALS when used in selection instructions
    Symbol('~', 'EVAL', 'evaluate a python expression'),  #can be used for selection and modification

    )

operators_select = set([
    OPERATORS.BIGGER_EQUAL,
    OPERATORS.SMALLER_EQUAL,
    OPERATORS.BIGGER,
    OPERATORS.SMALLER,
    OPERATORS.EQUALS,
    OPERATORS.SET,  #interpreted as EQUALS for selection instructions
    OPERATORS.CONTAINS,
    OPERATORS.EVAL,
    OPERATORS.IS_ANY,
    OPERATORS.IS_STR,
    OPERATORS.IS_INT,
    OPERATORS.IS_FLOAT,
    OPERATORS.IS_NUM,
    OPERATORS.IS_BOOL,
    OPERATORS.IS_DATETIME,
    OPERATORS.IS_DATE,
    OPERATORS.IS_NA,
    OPERATORS.IS_NK,
    OPERATORS.IS_YN,
    OPERATORS.IS_YES,
    OPERATORS.IS_NO,
    OPERATORS.IS_UNIQUE,
    OPERATORS.IS_FIRST,
    OPERATORS.IS_LAST,
    ])
operators_modify = set([
    OPERATORS.SET,
    OPERATORS.ADD,
    OPERATORS.SORT,
    OPERATORS.EVAL,
    OPERATORS.TO_STR,
    OPERATORS.TO_INT,
    OPERATORS.TO_FLOAT,
    OPERATORS.TO_NUM,
    OPERATORS.TO_BOOL,
    OPERATORS.TO_DATETIME,
    OPERATORS.TO_DATE,
    OPERATORS.TO_NA,
    OPERATORS.TO_NK,
    OPERATORS.TO_YN,
    ])
operators_unary = set([
    OPERATORS.IS_ANY,
    OPERATORS.IS_STR,
    OPERATORS.IS_INT,
    OPERATORS.IS_FLOAT,
    OPERATORS.IS_NUM,
    OPERATORS.IS_BOOL,
    OPERATORS.IS_DATETIME,
    OPERATORS.IS_DATE,
    OPERATORS.IS_NA,
    OPERATORS.IS_NK,
    OPERATORS.IS_YN,
    OPERATORS.IS_YES,
    OPERATORS.IS_NO,
    OPERATORS.IS_UNIQUE,
    OPERATORS.IS_FIRST,
    OPERATORS.IS_LAST,
    ])
operators_binary = set([
    OPERATORS.BIGGER_EQUAL,
    OPERATORS.SMALLER_EQUAL,
    OPERATORS.BIGGER,
    OPERATORS.SMALLER,
    OPERATORS.EQUALS,
    OPERATORS.SET,
    OPERATORS.CONTAINS,
    OPERATORS.EVAL,
    OPERATORS.ADD,
    ])
operators_metadata = set([
    OPERATORS.SET,
    OPERATORS.ADD,
    ])
operators_is_type = set([
    OPERATORS.IS_STR,
    OPERATORS.IS_INT,
    OPERATORS.IS_FLOAT,
    OPERATORS.IS_NUM,
    OPERATORS.IS_BOOL,
    OPERATORS.IS_DATETIME,
    OPERATORS.IS_DATE,
    OPERATORS.IS_NA,
    ])


  df = df.replace(0, False).replace(1, True)


In [32]:

glyphs = [symbol.glyph for symbol in qp.qlang.CONNECTORS] \
    + [symbol.glyph for symbol in qp.qlang.OPERATORS] \
    + [symbol.glyph for symbol in qp.qlang.FLAGS] \

index = ['type', 'name', 'description', 'unary'] + glyphs

df = pd.DataFrame(np.zeros((len(index), len(glyphs))), columns=glyphs, index=index, dtype=object)

for symbol in qp.qlang.CONNECTORS:
    df.loc['type', symbol.glyph] = 'connector'
    df.loc['name', symbol.glyph] = symbol.name
    df.loc['description', symbol.glyph] = symbol.description


for symbol in qp.qlang.OPERATORS:
    df.loc['type', symbol.glyph] = 'operator'
    df.loc['name', symbol.glyph] = symbol.name
    df.loc['description', symbol.glyph] = symbol.description

for symbol in qp.qlang.FLAGS:
    df.loc['type', symbol.glyph] = 'flag'
    df.loc['name', symbol.glyph] = symbol.name
    df.loc['description', symbol.glyph] = symbol.description

for glyph in glyphs:
    df.loc[glyph, glyph] = 2


df.rename(lambda x: f'"{x}"', axis=0, inplace=True)
df.rename(lambda x: f'"{x}"', axis=1, inplace=True)
df.to_excel('qplib/data/symbols_raw.xlsx')

df

Unnamed: 0,"""%%""","""&&""","""//""","""%""","""&""","""/""","""$""",""">=""","""<=""",""">""","""<""","""==""","""?""","""is any;""","""is str;""","""is int;""","""is float;""","""is num;""","""is bool;""","""is datetime;""","""is date;""","""is na;""","""is nk;""","""is yn;""","""is yes;""","""is no;""","""is unique;""","""is first;""","""is last;""","""+=""","""sort;""","""to str;""","""to int;""","""to float;""","""to num;""","""to bool;""","""to datetime;""","""to date;""","""to na;""","""to nk;""","""to yn;""","""=""","""~""","""any""","""all""","""idx""","""each""","""!""","""strict""","""save""","""load""","""verbosity""","""diff""","""meta""","""tag""","""color""","""bg""","""align""","""width""","""css""","""val""","""header""","""new""","""col""","""regex"""
"""type""",connector,connector,connector,connector,connector,connector,connector,operator,operator,operator,operator,operator,operator,operator,operator,operator,operator,operator,operator,operator,operator,operator,operator,operator,operator,operator,operator,operator,operator,operator,operator,operator,operator,operator,operator,operator,operator,operator,operator,operator,operator,operator,operator,flag,flag,flag,flag,flag,flag,flag,flag,flag,flag,flag,flag,flag,flag,flag,flag,flag,flag,flag,flag,flag,flag
"""name""",NEW_SELECT_ROWS,AND_SELECT_ROWS,OR_SELECT_ROWS,NEW_SELECT_COLS,AND_SELECT_COLS,OR_SELECT_COLS,MODIFY,BIGGER_EQUAL,SMALLER_EQUAL,BIGGER,SMALLER,EQUALS,CONTAINS,IS_ANY,IS_STR,IS_INT,IS_FLOAT,IS_NUM,IS_BOOL,IS_DATETIME,IS_DATE,IS_NA,IS_NK,IS_YN,IS_YES,IS_NO,IS_UNIQUE,IS_FIRST,IS_LAST,ADD,SORT,TO_STR,TO_INT,TO_FLOAT,TO_NUM,TO_BOOL,TO_DATETIME,TO_DATE,TO_NA,TO_NK,TO_YN,SET,EVAL,ANY,ALL,IDX,EACH,NEGATE,STRICT,SAVE_SELECTION,LOAD_SELECTION,VERBOSITY,DIFF,METADATA,TAG_METADATA,COLOR,BACKGROUND_COLOR,ALIGN,WIDTH,CSS,VAL,HEADER,NEW_COL,COL_EVAL,REGEX
"""description""",select rows. disregard previous row selection,this row selection condition AND the previous ...,this row selection condition OR the previous c...,select columns. disregard previous column sele...,this column selection condition AND the previo...,this column selection condition OR the previou...,"modify settings, metadata, format, headers, va...",bigger or equal,smaller or equal,bigger,smaller,equal to,contains a string (not case sensitive),is any value (use to reset selection),is a string,is an integer,is a float,is a number,is a boolean,is a datetime,is a date,is a missing value,is not a known value,is a value representing yes or no,is a value representing yes,is a value representing no,is a unique value,is the first value (of multiple values),is the last value (of multiple values),append a string to the value (coerce to string...,sort values based on the selected column(s),convert to string,convert to integer,convert to float,convert to number,convert to boolean,convert to datetime,convert to date,convert to missing value,convert to not known value,convert to yes or no value,set values,evaluate a python expression,select whole row if ANY value in the selected ...,select whole row if ALL values in the selected...,select whole row if the index of the row fulfi...,select each value (not the whole row) that ful...,negate/invert the selection condition,use strict comparison for selection condition ...,save current selection with given <name>. load...,load a saved selection of rows/values (boolean...,change the verbosity/logging level,change if and how the difference between the o...,modify the metadata of the selected rows/values,add a tag of the currently selected column(s) ...,change the color of the selected values,change the background color of the selected va...,change the alignment of the selected values,change the width of the selected values,use css to format the selected values,modify selected values,modify the headers of the selected columns,create a new column with the selected values,"when used with the eval operator, evaluates on...",use regex for equality and contains operator
"""unary""",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"""%%""",2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
"""val""",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2,0.0,0.0,0.0,0.0
"""header""",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2,0.0,0.0,0.0
"""new""",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2,0.0,0.0
"""col""",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2,0.0


  df = df.replace(0, False).replace(1, True)


# 1

In [110]:

class Symbol:
    """
    A Symbol used in the query languages syntax.
    """
    def __init__(self, glyph, name, description, **kwargs):
        self.glyph = glyph
        self.name = name
        self.description = description
        for key, value in kwargs.items():
            setattr(self, key, value)

    def details(self):
        return f'symbol:\n\tglyph: "{self.glyph}"\n\tname: "{self.name}"\n\tdescription: "{self.description}"'

    def __repr__(self):
        return f'"{self.glyph}": {self.name}'
 
    def __str__(self):
        return f'"{self.glyph}": {self.name}'
    
    def __lt__(self, value):
        return self.glyph < value
    
    def __gt__(self, value):
        return self.glyph > value


class Symbols:
    """
    Multiple Symbols of the same category are collected in a Symbols object.
    """
    def __init__(self, name, *symbols):
        self.name = name
        self.by_name = {symbol.name: symbol for symbol in symbols}
        self.by_glyph = {symbol.glyph: symbol for symbol in symbols}

    def __getattribute__(self, value):
        if value == 'by_name':
            return super().__getattribute__(value)
        elif value == 'by_glyph':
            return super().__getattribute__(value)
        elif value in self.by_glyph:
            return self.by_glyph[value]
        elif value in self.by_name:
            return self.by_name[value]
        else:
            return super().__getattribute__(value)

    def __getitem__(self, key):
        if key in self.by_glyph:
            return self.by_glyph[key]
        elif key in self.by_name:
            return self.by_name[key]
        else:
            log(f'error: symbol "{key}" not found in "{self.name}"', 'qp.qlang.Symbols.__getitem__', VERBOSITY)
            return None

    def __iter__(self):
        return iter(self.by_name.values())

    def __repr__(self):
        return f'{self.name}:\n\t' + '\n\t'.join([str(val) for key,val in self.by_name.items()])

    def __str__(self):
        return self.__repr__()


def get_symbols():
    df = pd.read_csv('qplib/data/symbols.csv', index_col=0)
    df.rename(lambda x: x.strip('"'), axis=0, inplace=True)
    df.rename(lambda x: x.strip('"'), axis=1, inplace=True)

    symbols = df.replace(0, False).replace(1, True).replace(2, True)

    glyphs = symbols.index
    traits = [x for x in symbols.columns if x not in glyphs]

    definitions = symbols.loc[glyphs, traits]
    compatible = symbols.loc[glyphs, glyphs]

    connectors = Symbols('CONNECTORS', *symbols_of_type(definitions, 'connector'))
    operators = Symbols('OPERATORS', *symbols_of_type(definitions, 'operator'))
    flags = Symbols('FLAGS', *symbols_of_type(definitions, 'flag'))

    return symbols, definitions, compatible, connectors, operators, flags

def symbols_of_type(definitions, symbol_type):
    symbols = []
    for ind in definitions[definitions['type'] == symbol_type].index:
        kwargs = {col: definitions.loc[ind, col] for col in definitions.columns}
        symbols.append(Symbol(ind, **kwargs))
    return symbols

SYMBOLS, DEFINITIONS, COMPATIBLE, CONNECTORS, OPERATORS, FLAGS = get_symbols()


OPERATORS.TO_NA.unary

  symbols = df.replace(0, False).replace(1, True).replace(2, True)


np.True_

# 2

In [None]:

def _get_symbols():
    df = pd.read_csv('qplib/data/symbols.csv', index_col=0)
    df.drop(index=['type', 'glyph', 'description'], inplace=True)
    df['glyph'] = df['glyph'].str.strip('"')
    symbols = df.replace('0', False).replace('1', True).replace('2', True)
    return symbols

SYMBOLS = _get_symbols()


  symbols = df.replace('0', False).replace('1', True).replace('2', True)


In [99]:
compatible = SYMBOLS.loc[SYMBOLS.index, SYMBOLS.index]

traits = ['unary', 'SET']

df = compatible.loc[traits, traits]

df

Unnamed: 0,unary,SET
unary,True,False
SET,False,True


In [100]:
%%timeit

df = compatible.loc[traits, traits]



403 μs ± 28 μs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [84]:
cards = pd.read_csv('misc/cards.csv')

  cards = pd.read_csv('misc/cards.csv')
