# High performance

```python
mask = (x > 0.5) & (y < 0.5)
```

ex:
df = dff[df[mask]]

```python
tmp1 = (x > 0.5)
tmp2 = (y < 0.5)
mask = tmp1 & tmp2
```

Can use pd.eval("") -> performs elementwise directly using numexpr

Good for compound expressions

In [3]:
import numpy as np
import pandas as pd

nrows, ncols = 1000000, 100
df1, df2, df3, df4 = [pd.DataFrame(np.random.randn(nrows, ncols)) for _ in range(4)]
df1.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,90,91,92,93,94,95,96,97,98,99
0,-1.413877,1.196057,0.558456,1.904732,1.060966,-1.247035,-1.9128,0.494966,-0.807773,-0.506227,...,-1.17302,-0.558082,-0.080112,0.575089,-1.670474,0.890756,-0.588846,1.605141,-1.247288,0.115607
1,-0.207292,2.066073,0.922457,0.063423,0.777674,-0.317366,2.098534,0.169719,0.123358,1.827096,...,-1.355595,0.281215,-0.486158,-1.110272,1.072051,0.80768,0.15293,1.58151,-0.528713,0.786837
2,-0.852549,-0.011128,-2.559354,-2.105246,0.659456,-0.114802,0.349166,-0.705398,0.515848,-0.547832,...,-0.784568,0.318347,0.118804,-0.836228,0.475111,-0.498715,0.59874,-1.41078,-0.227739,0.309349
3,-0.49671,0.972605,0.446056,-0.918706,-0.346973,-1.024539,-0.023737,-1.327531,-0.027886,0.302788,...,-2.085853,1.438647,-0.281029,-0.655393,-0.085709,0.821655,0.172188,0.905978,-0.506872,-1.885563
4,0.54451,-0.292143,0.068029,0.668903,0.756693,-0.331481,0.277662,0.124358,-0.47621,-1.438507,...,-0.965087,-0.271741,-1.793447,-0.738675,2.726956,0.323555,-0.919744,-0.57001,-1.065592,-0.36148


In [4]:
%timeit df1 + df2 + df3 + df4

1.24 s ± 27.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [6]:
%timeit pd.eval("df1 + df2 + df3 + df4")

572 ms ± 25.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [7]:
plain = df1 + df2 + df3 + df4
sum_eval = pd.eval("df1 + df2 + df3 + df4")


True

In [10]:
#df.eval()
rolls = pd.DataFrame(np.random.randint(1,6, (6,3)), columns=["Die1", "Die2", "Die3"])
rolls.eval("Sum = Die1 + Die2 + Die3", inplace = True)
rolls

Unnamed: 0,Die1,Die2,Die3,Sum
0,5,1,5,11
1,4,3,1,8
2,5,1,2,8
3,4,1,5,10
4,3,2,4,9
5,4,2,2,8


In [85]:
# use variables

high = 10 
rolls.eval("Winner = Sum > @high", inplace = True)
rolls

Unnamed: 0,Die1,Die2,Die3,Sum,Winner
0,5,1,5,11,True
1,4,3,1,8,False
2,5,1,2,8,False
3,4,1,5,10,False
4,3,2,4,9,False
5,4,2,2,8,False


In [104]:
rolls[rolls["Sum"] <= high]

Unnamed: 0,Die1,Die2,Die3,Sum,Winner
1,4,3,1,8,False
2,5,1,2,8,False
3,4,1,5,10,False
4,3,2,4,9,False
5,4,2,2,8,False


# Query

In [105]:
rolls.query("Sum <= @high")

Unnamed: 0,Die1,Die2,Die3,Sum,Winner
1,4,3,1,8,False
2,5,1,2,8,False
3,4,1,5,10,False
4,3,2,4,9,False
5,4,2,2,8,False
