# qplib - a query language for pandas

The query language works by sequentially applying filter conditions and modification instructions to the dataframe.
Each condition/instruction starts with a connector ("%", "&", "/", "%%", "&&", "//", "$"), followed by some option flags, an operator and a value.
Each of these components is optional, with an associated default behaviour.

The examples will use a very small test dataset so that all filtering and modification is easily traceable.

Some instructions make use of colors which might not render depending on where you few the notebook (eg: github).

In [1]:
import pandas as pd
import numpy as np
import qplib as qp
from qplib import log

pd.set_option('display.max_columns', None)

df = qp.get_df()
df

Unnamed: 0,ID,name,date of birth,age,gender,height,weight,bp systole,bp diastole,cholesterol,diabetes,dose
0,10001,John Doe,1995-01-02,-25,M,170,70.2,20,80,Normal,No,10kg
1,10002,Jane Smith,1990/09/14,30,F,175.5cm,68,130,85,Highe,yes,
2,10003,Alice Johnson,1985.08.23,,Female,,72.5lb,,,,,15 mg once a day
3,20001,Bob Brown,19800406,,Male,280,na,140,90mmHg,GOOD,No,20mg
4,20002,eva white,05-11-2007,40.0,Other,,,135mmhg,,n.a.,Y,20 Mg
5,20003,Frank miller,06-30-1983,forty-five,m,185,75kg,125,75,High,Yes,25g
6,30001,Grace TAYLOR,28-05-1975,,ff,1,,NAN,,Normal,NO,
7,30002,Harry Clark,1960Mar08,unk,,6ft 1in,80.3,122,,,,
8,30003,IVY GREEN,1955-Jan-09,,,-10,130lbs,,95,high,,30 MG
9,30004,JAck Williams,1950 Sep 10,unknown,Mal,,82,130,0,,n,35


# filter/select

## columns




In [2]:
#Select the column called "name":
#(Mulitple equivalent examples are shown to demonstrate default behaviour)

df.q('name')
df.q('%name')
df.q('%=name')
df.q('%==name')
df.q('% == name')

Unnamed: 0,name
0,John Doe
1,Jane Smith
2,Alice Johnson
3,Bob Brown
4,eva white
5,Frank miller
6,Grace TAYLOR
7,Harry Clark
8,IVY GREEN
9,JAck Williams


In [3]:
#select all columns containing the string "bp":
df.q('?bp')

Unnamed: 0,bp systole,bp diastole
0,20,80
1,130,85
2,,
3,140,90mmHg
4,135mmhg,
5,125,75
6,NAN,
7,122,
8,,95
9,130,0


In [4]:
#Multiple selection conditions can be used by combining them with "&" or "/".

#Either condition must be fulfilled:
df.q('name  /?bp')

Unnamed: 0,name,bp systole,bp diastole
0,John Doe,20,80
1,Jane Smith,130,85
2,Alice Johnson,,
3,Bob Brown,140,90mmHg
4,eva white,135mmhg,
5,Frank miller,125,75
6,Grace TAYLOR,NAN,
7,Harry Clark,122,
8,IVY GREEN,,95
9,JAck Williams,130,0


In [5]:
#Both conditions must be fulfilled:
df.q('?bp  &?systole')

Unnamed: 0,bp systole
0,20
1,130
2,
3,140
4,135mmhg
5,125
6,NAN
7,122
8,
9,130


In [6]:
#Notice the warning when no columns fulfill both conditions.
df.q('name  &?bp')

0,1,2,3,4
84,WARNING,"no columns fulfill the condition in ""&?bp"" and the previous condition(s)",qp.qlang._select_cols,2025-04-25 13:08:12.311797


0
1
2
3
4
5
6
7
8
9
10


In [7]:
#"%" creates a new selection, discarding the previous one:
df.q(r'name   /?bp   %id')

Unnamed: 0,ID
0,10001
1,10002
2,10003
3,20001
4,20002
5,20003
6,30001
7,30002
8,30003
9,30004


In [8]:
#Reset selection by selecting everything:
df.q(r'id  /name   %is any;')

Unnamed: 0,ID,name,date of birth,age,gender,height,weight,bp systole,bp diastole,cholesterol,diabetes,dose
0,10001,John Doe,1995-01-02,-25,M,170,70.2,20,80,Normal,No,10kg
1,10002,Jane Smith,1990/09/14,30,F,175.5cm,68,130,85,Highe,yes,
2,10003,Alice Johnson,1985.08.23,,Female,,72.5lb,,,,,15 mg once a day
3,20001,Bob Brown,19800406,,Male,280,na,140,90mmHg,GOOD,No,20mg
4,20002,eva white,05-11-2007,40.0,Other,,,135mmhg,,n.a.,Y,20 Mg
5,20003,Frank miller,06-30-1983,forty-five,m,185,75kg,125,75,High,Yes,25g
6,30001,Grace TAYLOR,28-05-1975,,ff,1,,NAN,,Normal,NO,
7,30002,Harry Clark,1960Mar08,unk,,6ft 1in,80.3,122,,,,
8,30003,IVY GREEN,1955-Jan-09,,,-10,130lbs,,95,high,,30 MG
9,30004,JAck Williams,1950 Sep 10,unknown,Mal,,82,130,0,,n,35


## row filtering

Row filter conditions use the same connector symbols as column conditions but twice, to easily distinguish them.

In [9]:
#Select all rows where the value in the "id" column is greater than 20000:
df.q(r'%id    %%>20000')

Unnamed: 0,ID
3,20001
4,20002
5,20003
6,30001
7,30002
8,30003
9,30004
10,30005


In [10]:
#Select rows based on multiple conditions for the same column:
df.q(r'%id    %%>20000    &&<30003')

Unnamed: 0,ID
3,20001
4,20002
5,20003
6,30001
7,30002


In [11]:
#Creating a new column selection does not change the row selection:
df.q(r'%id    %%>20000    &&<30003   %name')

#Equivalent but more readable version:
df.q(
    r"""
    %id     %%>20000    &&<30003
    %name
    """
    )

Unnamed: 0,name
3,Bob Brown
4,eva white
5,Frank miller
6,Grace TAYLOR
7,Harry Clark


In [12]:
#Now lets add a third column selection connected to the second one:
df.q(
    r"""
    %id     %%>20000    &&<30003
    %name
    /id
    """
    )


Unnamed: 0,ID,name
3,20001,Bob Brown
4,20002,eva white
5,20003,Frank miller
6,30001,Grace TAYLOR
7,30002,Harry Clark


In [13]:
#This behaviour can be used to select rows using conditions on multiple columns.
df.q(
    r"""
    %id     %%>20000    &&<30003
    %name   &&?bob
    /id
    """
    )

Unnamed: 0,ID,name
3,20001,Bob Brown


In [14]:
#Reset selection by selecting everything:
df.q(
    r"""
    %id      %%>20000    &&<30003
    %name    &&?bob
    /id
    is any;  %%is any;
    """
    )

Unnamed: 0,ID,name,date of birth,age,gender,height,weight,bp systole,bp diastole,cholesterol,diabetes,dose
0,10001,John Doe,1995-01-02,-25,M,170,70.2,20,80,Normal,No,10kg
1,10002,Jane Smith,1990/09/14,30,F,175.5cm,68,130,85,Highe,yes,
2,10003,Alice Johnson,1985.08.23,,Female,,72.5lb,,,,,15 mg once a day
3,20001,Bob Brown,19800406,,Male,280,na,140,90mmHg,GOOD,No,20mg
4,20002,eva white,05-11-2007,40.0,Other,,,135mmhg,,n.a.,Y,20 Mg
5,20003,Frank miller,06-30-1983,forty-five,m,185,75kg,125,75,High,Yes,25g
6,30001,Grace TAYLOR,28-05-1975,,ff,1,,NAN,,Normal,NO,
7,30002,Harry Clark,1960Mar08,unk,,6ft 1in,80.3,122,,,,
8,30003,IVY GREEN,1955-Jan-09,,,-10,130lbs,,95,high,,30 MG
9,30004,JAck Williams,1950 Sep 10,unknown,Mal,,82,130,0,,n,35


## flags

A number of flags can be used to modify the behaviour of selection conditions.

In [15]:
#Negate a condition
df.q(r'%id    %%!>20000')

Unnamed: 0,ID
0,10001
1,10002
2,10003


In [16]:
#All values in the selected columns must fulfill the row filter condition:
df.q(r'weight  /height    %%all>0')

Unnamed: 0,height,weight
0,170,70.2


In [17]:
#Any value in the selected columns must fulfill the row filter condition (default behaviour):
df.q(r'weight  /height    %%any>10')

Unnamed: 0,height,weight
0,170,70.2
1,175.5cm,68
3,280,na
5,185,75kg
7,6ft 1in,80.3
9,,82
10,200,-65


In [18]:
#Select each value in the selected columns that fulfills the row filter condition:
#(using background color to highlight the selected values)
#(highlighting does not work in all notebook renderers, eg: github)
df.q(r'weight  /height    %%each>10   $bg=orange')

Unnamed: 0,height,weight
0,170,70.200000
1,175.5cm,68
3,280,na
5,185,75kg
7,6ft 1in,80.3
9,,82
10,200,-65


In [19]:
#Compare to the previous example but now with highlighting:
df.q(r'weight  /height    %%any>10   $bg=orange')

Unnamed: 0,height,weight
0,170,70.200000
1,175.5cm,68
3,280,na
5,185,75kg
7,6ft 1in,80.3
9,,82
10,200,-65


In [20]:
#The index must fulfill the row filter condition:
df.q(r'weight  /height    %%idx>5')

Unnamed: 0,height,weight
6,1,
7,6ft 1in,80.3
8,-10,130lbs
9,,82
10,200,-65


In [21]:
#Interpret the value for comparison as a regex:
df.q(r'name  %%regex=........')  #matches any name with 8 characters

Unnamed: 0,name
0,John Doe
10,john Doe


In [22]:
#Also works with substring search:

#Select all rows where the name contains "J" followed by any 3 characters and then whitespace:
df.q(r'name    %% regex ? J...\s')

Unnamed: 0,name
0,John Doe
1,Jane Smith
9,JAck Williams


In [23]:
#All selection flags:
qp.qlang.FLAGS.by_trait['select']

{"!: NEGATE",
 "all: ALL",
 "any: ANY",
 "col: COL_EVAL",
 "each: EACH",
 "idx: IDX",
 "load: LOAD_SELECTION",
 "regex: REGEX",
 "save: SAVE_SELECTION",
 "strict: STRICT"}

## type filtering

The query language was designed to handle very messy datasets where sometimes no strict typing (or any typing at all!) is enforced during data entry. Therefor, operators like "is date;" do not filter based on the types in the dataset (sometimes all values are strings), but rather if it makes sense for a value to be of a certain type. Obviously, what makes sense depends on the domain and the assumptions for qplib might not align with your use case.

Using the flag "strict" switches to strict type filtering.

In [24]:
#Lets take a look at our dirty data again:
df

Unnamed: 0,ID,name,date of birth,age,gender,height,weight,bp systole,bp diastole,cholesterol,diabetes,dose
0,10001,John Doe,1995-01-02,-25,M,170,70.2,20,80,Normal,No,10kg
1,10002,Jane Smith,1990/09/14,30,F,175.5cm,68,130,85,Highe,yes,
2,10003,Alice Johnson,1985.08.23,,Female,,72.5lb,,,,,15 mg once a day
3,20001,Bob Brown,19800406,,Male,280,na,140,90mmHg,GOOD,No,20mg
4,20002,eva white,05-11-2007,40.0,Other,,,135mmhg,,n.a.,Y,20 Mg
5,20003,Frank miller,06-30-1983,forty-five,m,185,75kg,125,75,High,Yes,25g
6,30001,Grace TAYLOR,28-05-1975,,ff,1,,NAN,,Normal,NO,
7,30002,Harry Clark,1960Mar08,unk,,6ft 1in,80.3,122,,,,
8,30003,IVY GREEN,1955-Jan-09,,,-10,130lbs,,95,high,,30 MG
9,30004,JAck Williams,1950 Sep 10,unknown,Mal,,82,130,0,,n,35


In [25]:
#We can see that "date of birth" is not a date, but a string.
#Lets see what qplib does with that:
df.q(r'date of birth    %%is date;')  #note that unary operators end with a semicolon

  result = pd.to_datetime(x, dayfirst=True)


Unnamed: 0,date of birth
0,1995-01-02
1,1990/09/14
2,1985.08.23
3,19800406
4,05-11-2007
5,06-30-1983
6,28-05-1975
7,1960Mar08
8,1955-Jan-09
9,1950 Sep 10


In [26]:
#both strings '40.0' and '20' are treated as an int by default
df.q(r'age  %%is int;')

Unnamed: 0,age
0,-25.0
1,30.0
4,40.0
10,35.0


In [27]:
#strict mode does not treat those strings as ints
df.q(r'age  %%strict is int;')

Unnamed: 0,age
0,-25
10,35


In [28]:
#70.2 is not treated as an int
df.q(r'weight  %%!is int;')

Unnamed: 0,weight
0,70.2
2,72.5lb
3,na
4,
5,75kg
6,
7,80.3
8,130lbs


## undefined behaviour

Due to expecting very messy data, qplib uses a type of [three-valued logic](https://en.wikipedia.org/wiki/Three-valued_logic) utilizing "True", "False" and "undefined". This means that, for example, numeric operators for numbers can be used on columns which also contain strings. As a result, ">=" is not necessarily the same as "!<=" (inversion of "<=").

In [29]:
df.q(r'height  %%>0')

Unnamed: 0,height
0,170
3,280
5,185
6,1
10,200


In [30]:
df.q(r'height  %%<0')

Unnamed: 0,height
8,-10


In [31]:
df.q(r'height  %%!>0')

Unnamed: 0,height
1,175.5cm
2,
4,
7,6ft 1in
8,-10
9,


In [32]:
df.q(
    r"""
    height
        %%>0   $bg=lime
        %%<0   $bg=orange
        %%!>0  $color=red
        %%is any;
    """
    )

Unnamed: 0,height
0,170
1,175.5cm
2,
3,280
4,
5,185
6,1
7,6ft 1in
8,-10
9,


In [33]:
df.q(r'height  %%!<0')

Unnamed: 0,height
0,170
1,175.5cm
2,
3,280
4,
5,185
6,1
7,6ft 1in
9,
10,200


## saving selections

The simple linear syntax does not allow for nesting of conditions, but the same result can be achieved by saving the intermediate results in a variable.

In [34]:
#Selections can be saved using the "save" flag
df.q(
    r"""
    %id         %%>20000    &&<30003    %%save=1   #save selection to variable "1"
    %name       %%?bob      //?grace    %%save=2   #save selection to variable "2"
    %%load=1    &&load=2   #load both selections and combine them
    /id
    """
    )

Unnamed: 0,ID,name
3,20001,Bob Brown
6,30001,Grace TAYLOR


## more operators

In [35]:
#All operators for selection/filtering:
qp.qlang.OPERATORS.by_trait['select']

{"<: SMALLER",
 "<=: SMALLER_EQUAL",
 "==: EQUALS",
 ">: BIGGER",
 ">=: BIGGER_EQUAL",
 "?: CONTAINS",
 "is any;: IS_ANY",
 "is bool;: IS_BOOL",
 "is date;: IS_DATE",
 "is datetime;: IS_DATETIME",
 "is first;: IS_FIRST",
 "is float;: IS_FLOAT",
 "is int;: IS_INT",
 "is last;: IS_LAST",
 "is na;: IS_NA",
 "is nk;: IS_NK",
 "is no;: IS_NO",
 "is num;: IS_NUM",
 "is str;: IS_STR",
 "is unique;: IS_UNIQUE",
 "is yes;: IS_YES",
 "is yn;: IS_YN",
 "~: EVAL"}

# modify

All modification instructions use the connector "$" and do not modify data inplace. All modification instructions which could affect the original df create and return a copy instead.

## format

In [36]:
#change color:
df.q('$color=red')

Unnamed: 0,ID,name,date of birth,age,gender,height,weight,bp systole,bp diastole,cholesterol,diabetes,dose
0,10001,John Doe,1995-01-02,-25,M,170,70.200000,20,80,Normal,No,10kg
1,10002,Jane Smith,1990/09/14,30,F,175.5cm,68,130,85,Highe,yes,
2,10003,Alice Johnson,1985.08.23,,Female,,72.5lb,,,,,15 mg once a day
3,20001,Bob Brown,19800406,,Male,280,na,140,90mmHg,GOOD,No,20mg
4,20002,eva white,05-11-2007,40.0,Other,,,135mmhg,,n.a.,Y,20 Mg
5,20003,Frank miller,06-30-1983,forty-five,m,185,75kg,125,75,High,Yes,25g
6,30001,Grace TAYLOR,28-05-1975,,ff,1,,NAN,,Normal,NO,
7,30002,Harry Clark,1960Mar08,unk,,6ft 1in,80.3,122,,,,
8,30003,IVY GREEN,1955-Jan-09,,,-10,130lbs,,95,high,,30 MG
9,30004,JAck Williams,1950 Sep 10,unknown,Mal,,82,130,0,,n,35


In [37]:
#change background color:
df.q('$bg=orange')

Unnamed: 0,ID,name,date of birth,age,gender,height,weight,bp systole,bp diastole,cholesterol,diabetes,dose
0,10001,John Doe,1995-01-02,-25,M,170,70.200000,20,80,Normal,No,10kg
1,10002,Jane Smith,1990/09/14,30,F,175.5cm,68,130,85,Highe,yes,
2,10003,Alice Johnson,1985.08.23,,Female,,72.5lb,,,,,15 mg once a day
3,20001,Bob Brown,19800406,,Male,280,na,140,90mmHg,GOOD,No,20mg
4,20002,eva white,05-11-2007,40.0,Other,,,135mmhg,,n.a.,Y,20 Mg
5,20003,Frank miller,06-30-1983,forty-five,m,185,75kg,125,75,High,Yes,25g
6,30001,Grace TAYLOR,28-05-1975,,ff,1,,NAN,,Normal,NO,
7,30002,Harry Clark,1960Mar08,unk,,6ft 1in,80.3,122,,,,
8,30003,IVY GREEN,1955-Jan-09,,,-10,130lbs,,95,high,,30 MG
9,30004,JAck Williams,1950 Sep 10,unknown,Mal,,82,130,0,,n,35


In [38]:
#Use to highlight selection:
df.q(
    r"""
    height      %%>180    $bg=orange
    is any;     %%is any;
    """
    )

Unnamed: 0,ID,name,date of birth,age,gender,height,weight,bp systole,bp diastole,cholesterol,diabetes,dose
0,10001,John Doe,1995-01-02,-25,M,170,70.200000,20,80,Normal,No,10kg
1,10002,Jane Smith,1990/09/14,30,F,175.5cm,68,130,85,Highe,yes,
2,10003,Alice Johnson,1985.08.23,,Female,,72.5lb,,,,,15 mg once a day
3,20001,Bob Brown,19800406,,Male,280,na,140,90mmHg,GOOD,No,20mg
4,20002,eva white,05-11-2007,40.0,Other,,,135mmhg,,n.a.,Y,20 Mg
5,20003,Frank miller,06-30-1983,forty-five,m,185,75kg,125,75,High,Yes,25g
6,30001,Grace TAYLOR,28-05-1975,,ff,1,,NAN,,Normal,NO,
7,30002,Harry Clark,1960Mar08,unk,,6ft 1in,80.3,122,,,,
8,30003,IVY GREEN,1955-Jan-09,,,-10,130lbs,,95,high,,30 MG
9,30004,JAck Williams,1950 Sep 10,unknown,Mal,,82,130,0,,n,35


In [39]:
#Highlight all rows where any value is NA:
df.q('%%any is na;  $bg=orange')
df.q('%%is na;  $bg=orange')  #default behaviour is equivalent to using the "any" flag

Unnamed: 0,ID,name,date of birth,age,gender,height,weight,bp systole,bp diastole,cholesterol,diabetes,dose
1,10002,Jane Smith,1990/09/14,30,F,175.5cm,68,130,85,Highe,yes,
2,10003,Alice Johnson,1985.08.23,,Female,,72.5lb,,,,,15 mg once a day
3,20001,Bob Brown,19800406,,Male,280,na,140,90mmHg,GOOD,No,20mg
4,20002,eva white,05-11-2007,40.0,Other,,,135mmhg,,n.a.,Y,20 Mg
6,30001,Grace TAYLOR,28-05-1975,,ff,1,,NAN,,Normal,NO,
7,30002,Harry Clark,1960Mar08,unk,,6ft 1in,80.3,122,,,,
8,30003,IVY GREEN,1955-Jan-09,,,-10,130lbs,,95,high,,30 MG
9,30004,JAck Williams,1950 Sep 10,unknown,Mal,,82,130,0,,n,35
10,30005,john Doe,1945 October 11,35,female,200,-65,45,,Normal,Yes,40ml


In [40]:
#Highlight each individual NA value:
df.q('%%each is na;  $bg=orange')

Unnamed: 0,ID,name,date of birth,age,gender,height,weight,bp systole,bp diastole,cholesterol,diabetes,dose
1,10002,Jane Smith,1990/09/14,30,F,175.5cm,68,130,85,Highe,yes,
2,10003,Alice Johnson,1985.08.23,,Female,,72.5lb,,,,,15 mg once a day
3,20001,Bob Brown,19800406,,Male,280,na,140,90mmHg,GOOD,No,20mg
4,20002,eva white,05-11-2007,40.0,Other,,,135mmhg,,n.a.,Y,20 Mg
6,30001,Grace TAYLOR,28-05-1975,,ff,1,,NAN,,Normal,NO,
7,30002,Harry Clark,1960Mar08,unk,,6ft 1in,80.3,122,,,,
8,30003,IVY GREEN,1955-Jan-09,,,-10,130lbs,,95,high,,30 MG
9,30004,JAck Williams,1950 Sep 10,unknown,Mal,,82,130,0,,n,35
10,30005,john Doe,1945 October 11,35,female,200,-65,45,,Normal,Yes,40ml


## values

Modification is applied to all values in the current selection.

In [41]:
#Modify whole column:
df.q('age  $val=na')
df.q('age  $=na')  #default behaviour is equivalent to using the "val" flag and the "=" operator

Unnamed: 0,age
0,na
1,na
2,na
3,na
4,na
5,na
6,na
7,na
8,na
9,na


In [42]:
#Set all NA values to "NA":
df.q(r'%%each is na;  $val=NA')

Unnamed: 0,ID,name,date of birth,age,gender,height,weight,bp systole,bp diastole,cholesterol,diabetes,dose
1,10002,Jane Smith,1990/09/14,30,F,175.5cm,68,130,85,Highe,yes,
2,10003,Alice Johnson,1985.08.23,,Female,,72.5lb,,,,,15 mg once a day
3,20001,Bob Brown,19800406,,Male,280,,140,90mmHg,GOOD,No,20mg
4,20002,eva white,05-11-2007,40.0,Other,,,135mmhg,,,Y,20 Mg
6,30001,Grace TAYLOR,28-05-1975,,ff,1,,,,Normal,NO,
7,30002,Harry Clark,1960Mar08,unk,,6ft 1in,80.3,122,,,,
8,30003,IVY GREEN,1955-Jan-09,,,-10,130lbs,,95,high,,30 MG
9,30004,JAck Williams,1950 Sep 10,unknown,Mal,,82,130,0,,n,35
10,30005,john Doe,1945 October 11,35,female,200,-65,45,,Normal,Yes,40ml


## headers

In [43]:
df.q('date of birth   $header=dob')

Unnamed: 0,dob
0,1995-01-02
1,1990/09/14
2,1985.08.23
3,19800406
4,05-11-2007
5,06-30-1983
6,28-05-1975
7,1960Mar08
8,1955-Jan-09
9,1950 Sep 10


## column

In [44]:
#Create and fill a new column:
df.q('$new=abc')

Unnamed: 0,new1
0,abc
1,abc
2,abc
3,abc
4,abc
5,abc
6,abc
7,abc
8,abc
9,abc


In [45]:
#Create, fill and rename a new column:
df.q('$new=abc  $header=new text')

Unnamed: 0,new text
0,abc
1,abc
2,abc
3,abc
4,abc
5,abc
6,abc
7,abc
8,abc
9,abc


# logging

qplib has a lightweight logging system, somewhere between actual logging and using print statement.

In [46]:
#logs from the current session (since importing qplib) can be found here:
logs = qp.log().copy()
logs

Unnamed: 0,level,text,context,time
0,DEBUG,df was checked. no problems found,qp.qlang.check_df,2025-04-25 13:08:12.185524
1,TRACE,"line ""name"" does not start with a connector, a...",qp.qlang.tokenize,2025-04-25 13:08:12.186450
2,TRACE,transformed code into raw instructions:\nInstr...,qp.qlang.tokenize,2025-04-25 13:08:12.186538
3,TRACE,"found ""CONNECTORS.NEW_SELECT_COLS"" in ""%name""",qp.qlang.extract_symbol,2025-04-25 13:08:12.186585
4,TRACE,"no operator found in ""%name"". using default """"...",qp.qlang.parse,2025-04-25 13:08:12.186633
...,...,...,...,...
874,TRACE,"found ""FLAGS.HEADER"" in ""header=new text""",qp.qlang.extract_symbol,2025-04-25 13:08:13.993128
875,TRACE,"found ""OPERATORS.SET"" in ""=new text""",qp.qlang.extract_symbol,2025-04-25 13:08:13.993142
876,DEBUG,"df will be copied since instruction ""$header=n...",qp.qlang.parse,2025-04-25 13:08:13.993172
877,DEBUG,parsed:\nInstruction:\n\tline_num: 0\n\tcode: ...,qp.qlang.parse,2025-04-25 13:08:13.993194


In [47]:
#since the logs are stored in a dataframe, we can use qplib to filter them:
logs.q('level  %%warning   $width=100px  %is any;')

Unnamed: 0,level,text,context,time
83,WARNING,"no columns fulfill the condition in ""&?bp"" and the previous condition(s)",qp.qlang._select_cols,2025-04-25 13:08:12.311797


In [48]:
#clear logs:
qp.log(clear=True)
logs = qp.log().copy()
logs

cleared all logs in qp.util.logs.


In [49]:
#by default, all levels are logged, but only warnings and errors are shown while using qplib:
df.q('name  &?bp')

0,1,2,3,4
14,WARNING,"no columns fulfill the condition in ""&?bp"" and the previous condition(s)",qp.qlang._select_cols,2025-04-25 13:08:14.092661


0
1
2
3
4
5
6
7
8
9
10


In [50]:
#show all log levels:
df.q(
    r"""
    $verbosity=5
    name  &?bp
    """
    )



0,1,2,3,4
23,TRACE,"found ""CONNECTORS.NEW_SELECT_COLS"" in ""%name """,qp.qlang.extract_symbol,2025-04-25 13:08:14.146764


0,1,2,3,4
24,TRACE,"no operator found in ""%name "". using default """"=: SET""""",qp.qlang.parse,2025-04-25 13:08:14.154424


0,1,2,3,4
25,TRACE,"""""=: SET"""" is interpreted as """"==: EQUALS"""" for selection instruction",qp.qlang.parse,2025-04-25 13:08:14.167907


0,1,2,3,4
26,DEBUG,"parsed: Instruction:  line_num: 2  code: %name connector: ""%: NEW_SELECT_COLS""  operator: ""==: EQUALS""  value: name  function: _select_cols",qp.qlang.parse,2025-04-25 13:08:14.178813


0,1,2,3,4
27,TRACE,"instruction ""%name "" is valid",qp.qlang.validate,2025-04-25 13:08:14.189974


0,1,2,3,4
28,TRACE,"value ""name"" is treated as type ""str"" for comparison",qp.qlang._filter_series,2025-04-25 13:08:14.197788


0,1,2,3,4
29,TRACE,"found ""CONNECTORS.AND_SELECT_COLS"" in ""&?bp""",qp.qlang.extract_symbol,2025-04-25 13:08:14.232770


0,1,2,3,4
30,TRACE,"found ""OPERATORS.CONTAINS"" in ""?bp""",qp.qlang.extract_symbol,2025-04-25 13:08:14.240313


0,1,2,3,4
31,DEBUG,"parsed: Instruction:  line_num: 2  code: &?bp  connector: ""&: AND_SELECT_COLS""  operator: ""?: CONTAINS""  value: bp  function: _select_cols",qp.qlang.parse,2025-04-25 13:08:14.252231


0,1,2,3,4
32,TRACE,"instruction ""&?bp"" is valid",qp.qlang.validate,2025-04-25 13:08:14.261088


0,1,2,3,4
33,WARNING,"no columns fulfill the condition in ""&?bp"" and the previous condition(s)",qp.qlang._select_cols,2025-04-25 13:08:14.290396


0
1
2
3
4
5
6
7
8
9
10


# syntax symbols

syntax symbols and their relations are defined in a csv file which gets read into a dataframe when importing qplib

In [51]:
#all syntax symbols (and their traits):
defs = qp.qlang.DEFINITIONS
defs

Unnamed: 0,type,glyph,description,select,select_rows,select_rows_scope,select_cols,modify,unary,conversion,settings,metadata,format,copy_df,is_type,NEW_SELECT_ROWS,AND_SELECT_ROWS,OR_SELECT_ROWS,NEW_SELECT_COLS,AND_SELECT_COLS,OR_SELECT_COLS,MODIFY,BIGGER_EQUAL,SMALLER_EQUAL,BIGGER,SMALLER,EQUALS,CONTAINS,IS_ANY,IS_STR,IS_INT,IS_FLOAT,IS_NUM,IS_BOOL,IS_DATETIME,IS_DATE,IS_NA,IS_NK,IS_YN,IS_YES,IS_NO,IS_UNIQUE,IS_FIRST,IS_LAST,ADD,SET,EVAL,SORT,TO_STR,TO_INT,TO_FLOAT,TO_NUM,TO_BOOL,TO_DATETIME,TO_DATE,TO_NA,TO_NK,TO_YN,NEGATE,ANY,ALL,IDX,EACH,STRICT,SAVE_SELECTION,LOAD_SELECTION,VERBOSITY,DIFF,METADATA,TAG_METADATA,COLOR,BACKGROUND_COLOR,ALIGN,WIDTH,CSS,VAL,HEADER,NEW_COL,COL_EVAL,REGEX
select,trait,,,3,1,1,1,0,1,0,0,0,0,0,1,2,2,2,2,2,2,0,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,2,2
select_rows,trait,,,1,3,1,0,0,1,0,0,0,0,0,1,2,2,2,0,0,0,0,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,2,2
select_rows_scope,trait,,,1,1,3,0,0,1,0,0,0,0,0,1,1,1,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,2,2,2,2,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1
select_cols,trait,,,1,0,0,3,0,1,0,0,0,0,0,1,0,0,0,2,2,2,0,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,2,2
modify,trait,,,0,0,0,0,3,1,1,1,1,1,1,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,0,2,2,2,2,2,2,2,2,2,2,2,2,2,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
VAL,flag,val,modify selected values,0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0
HEADER,flag,header,modify the headers of the selected columns,0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0
NEW_COL,flag,new,create a new column with the selected values,0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0
COL_EVAL,flag,col,"when used with the eval operator, evaluates on...",2,2,1,2,2,0,0,0,0,0,2,0,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,3,0


In [52]:
#a value of 2 means that a symbol has this specific trait.
#eg: all of the following symbols are used for selection:
defs.q('select  %%2')

Unnamed: 0,select
NEW_SELECT_ROWS,2
AND_SELECT_ROWS,2
OR_SELECT_ROWS,2
NEW_SELECT_COLS,2
AND_SELECT_COLS,2
OR_SELECT_COLS,2
BIGGER_EQUAL,2
SMALLER_EQUAL,2
BIGGER,2
SMALLER,2


In [53]:
#a value of 1 means that 2 traits or symbols are compatible with each other.
#eg: the following symbols can be used with the negation flag:
defs.q('NEGATE  %%1')

Unnamed: 0,NEGATE
select_rows_scope,1
unary,1
is_type,1
NEW_SELECT_ROWS,1
AND_SELECT_ROWS,1
OR_SELECT_ROWS,1
NEW_SELECT_COLS,1
AND_SELECT_COLS,1
OR_SELECT_COLS,1
BIGGER_EQUAL,1
