# Motivating query() and eval(): Compound Expressions

In [1]:
import numpy as np

rng = np.random.RandomState(42)
x=rng.rand(1000000)
y=rng.rand(1000000)

%timeit x + y

9.65 ms ± 542 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [2]:
%timeit np.fromiter((xi+yi for xi, yi in zip(x,y)),dtype = x.dtype, count =len(x))

486 ms ± 49.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [3]:
mask = (x> 0.5) & (y<0.5)

In [4]:
tmp1 = (x>0.5)
tmp2 = (y<0.5)

mask = tmp1 & tmp2

In [5]:
import numexpr
mask_numexpr = numexpr.evaluate("(x>0.5) & (y<0.5)")
np.allclose(mask,mask_numexpr)

True

# pandas.eval() for Efficient Operations

In [6]:
import pandas as pd
nrows,ncols = 100000,100
rng = np.random.RandomState(42)
df1, df2, df3, df4 = (pd.DataFrame(rng.rand(nrows, ncols))
                     for i in range(4))

In [7]:
%timeit df1 + df2+ df3 + df4

168 ms ± 24.7 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [8]:
%timeit pd.eval("df1 + df2 + df3 + df4")

56.8 ms ± 1.29 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [9]:
np.allclose(df1 + df2 + df3 + df4 ,
           pd.eval("df1+df2+df3+df4"))

True

## Operations Supported by

In [11]:
df1, df2, df3, df4, df5 = (pd.DataFrame(rng.randint(0,1000,(100,3)))
                          for i in range(5))

### Arithmetic Operators

In [13]:
result1 = -df1 * df2 / (df3+df4) - df5
result2 = pd.eval("-df1 * df2 / (df3 + df4) - df5")
np.allclose(result1,result2)

True

### Comparison operators

In [14]:
result1 = (df1 < df2) & (df2 <= df3) & (df3 != df4)
result2 = pd.eval('df1 < df2 <= df3 != df4')
np.allclose(result1, result2)

True

### Bitwise Operators

In [15]:
result1 = (df1 < 0.5) & (df2 < 0.5) | (df3 < df4)
result2 = pd.eval('(df1 < 0.5) & (df2 < 0.5) | (df3 < df4)')
np.allclose(result1, result2)

True

# DataFrame for Column - Wise Operations

In [16]:
df = pd.DataFrame(rng.rand(1000,3), columns=["A","B","C"])
df.head()

Unnamed: 0,A,B,C
0,0.061761,0.925463,0.99742
1,0.209863,0.280456,0.042148
2,0.738991,0.019046,0.715501
3,0.062857,0.516241,0.604588
4,0.204537,0.813392,0.244804


In [17]:
result1 = (df['A'] + df['B']) / (df['C'] - 1)
result2 = pd.eval("(df.A + df.B) / (df.C - 1)")
np.allclose(result1, result2)

True

In [18]:
result3 = df.eval('(A + B) / (C - 1)')
np.allclose(result1, result3)

True