[Reference](https://medium.com/@dreamferus/level-up-your-pandas-skills-with-query-and-eval-f065951162df)

In [1]:
import pandas as pd
import numpy as np

data = []
for _ in range(100):
    data.append({
        "gender": "Male",
        "height": np.random.normal(178, 10),
        "age": np.random.uniform(20, 70)
    })
for _ in range(100):
    data.append({
        "gender": "Female",
        "height": np.random.normal(166, 8),
        "age": np.random.uniform(20, 70)
    })
df = (pd.DataFrame(data)
    # sample to mix order
    .sample(frac=1.0, replace=False)
    .reset_index(drop=True)
 )

In [2]:
(
    df[(df["gender"] == "Female") & (df["age"] >= 20) & (df["age"] <= 30)]["height"]
    .pipe(lambda x: [x.max(), x.min()])
)

[180.04916329760601, 153.87550882936793]

In [3]:
(
    df.query("gender == 'Female' and 20 <= age <= 30")
    .eval("height.max(), height.min()")
)

array([180.04916329760601, 153.87550882936793], dtype=object)

In [4]:
## THE INCORRECT WAY
a = df[df["gender"] == "Male"].reset_index(drop=True).assign(age=df.age-10)
# > This is incorrect, because in the final assign df.age refers to the
# original df and not the df with only males and reset index!

# How we should do it
b = (df[df["gender"] == "Male"].reset_index(drop=True)
    .pipe(lambda x: x.assign(age=x.age-10)))
# not equal
assert not a.equals(b)

# let's add some filtering afterwards
b = (df[df["gender"] == "Male"].reset_index(drop=True)
    .pipe(lambda x: x.assign(age=x.age-10))
    .pipe(lambda x: x[x["age"] > 30]))

In [5]:
c = (df.query("gender == 'Male'")
    .reset_index(drop=True)
    .eval("age=age-10")
    .query("age > 30"))
assert b.equals(c)

In [8]:
# from numpy import power
# import numpy as np

# # works
# df.query("@power(age, 2) > 1000")
# # doesn't work
# df.query("@np.power(age, 2) > 1000")

In [9]:
# from numpy import power

# # assigning a variable
# df.eval("age_2 = @power(age, 2)")

# # to assign multiple variables we have to do it
# # on a new line
# # note that in the third assignment I use
# # sqrt without a @, this is because some default
# # operations exist that you can use
# df.eval("""
#     age_2 = @power(age, 2)
#     age_sqrt = age**(1/2)
#     age_sqrt = sqrt(age)
# """) 
# # => returns df with 3 new columns: age, age_2 and age_sqrt
# # as always adding inplace=True will modify the data frame directly

# df.eval("age+3") # returns column age + 3
# df.eval("[3, 2]") # => return [3, 2]
# df.eval("height.min()") # => return min height