In [34]:
import numexpr
import numpy as np
rng = np.random.RandomState(42)
x = rng.rand(1000000)
y = rng.rand(1000000)
mask = (x > 0.5) & (y < 0.5)
mask_numexpr = numexpr.evaluate('(x > 0.5) & (y < 0.5)')
np.allclose(mask, mask_numexpr)
# mask_numexpr

array([False,  True,  True, ..., False, False, False])

In [20]:
import pandas as pd
nrows, ncols = 100000, 100
rng = np.random.RandomState(42)
df1, df2, df3, df4 = (pd.DataFrame(rng.rand(nrows,ncols))
                      for i in range(4))



In [27]:
%timeit pd.eval('df1 + df2 + df3 + df4') 

77.4 ms ± 5.81 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [29]:
%timeit df1+df2+df3+df4

151 ms ± 9.68 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


pd.eval() supports boolean,arithematic, bitwise, the 'and' and 'or' thing,

### For column wise

In [37]:
df = pd.DataFrame(rng.rand(1000, 3), columns=['A', 'B', 'C'])
df.head()

Unnamed: 0,A,B,C
0,0.101812,0.298283,0.636572
1,0.435671,0.220578,0.990797
2,0.654224,0.820193,0.904737
3,0.137785,0.155398,0.30714
4,0.921909,0.707242,0.528478


In [39]:
#dataframe eval is easier

In [43]:
result1 = (df['A'] + df['B']) / (df['C'] - 1)
result2 = pd.eval("(df.A + df.B) / (df.C - 1)")
np.allclose(result1, result2)

True

In [45]:
result3 = df.eval('(A + B) / (C - 1)')
np.allclose(result1, result3)

True

In [49]:
df.eval('D = (A + B) / C', inplace=True) #assigns
df.head()

Unnamed: 0,A,B,C,D
0,0.101812,0.298283,0.636572,0.628515
1,0.435671,0.220578,0.990797,0.662344
2,0.654224,0.820193,0.904737,1.629663
3,0.137785,0.155398,0.30714,0.954558
4,0.921909,0.707242,0.528478,3.082725


In [51]:
column_mean = df.mean(1)
result1 = df['A'] + column_mean
result2 = df.eval('A + @column_mean')
np.allclose(result1, result2)

True

The @ character here marks a variable name rather than a column name, and lets you efficiently evaluate expressions involving the two "namespaces": the namespace of columns, and the namespace of Python objects. Notice that this @ character is only supported by the DataFrame.eval() method, not by the pandas.eval() function, because the pandas.eval() function only has access to the one (Python) namespace.

In [54]:
result1 = df[(df.A < 0.5) & (df.B < 0.5)]
result2 = pd.eval('df[(df.A < 0.5) & (df.B < 0.5)]')
np.allclose(result1, result2)

True

In [71]:
df[df.eval('A<0.5 and B<0.5')]

Unnamed: 0,A,B,C,D
0,0.101812,0.298283,0.636572,0.628515
1,0.435671,0.220578,0.990797,0.662344
3,0.137785,0.155398,0.307140,0.954558
8,0.470473,0.422362,0.058994,15.134265
12,0.475457,0.049676,0.448709,1.170318
...,...,...,...,...
983,0.412349,0.163616,0.201851,2.853413
989,0.327237,0.108239,0.823618,0.528735
990,0.246177,0.098306,0.896117,0.384417
992,0.469971,0.448903,0.342198,2.685216


In [73]:
type(result1)

pandas.core.frame.DataFrame

In [75]:
result2 = df.query('A < 0.5 and B < 0.5')
np.allclose(result1, result2)

True