# High Performance 

mask = (x > 0.5) % (y < 0.5)

ex:
df = df[df[mask]]
intermediate variables in memory

```python
tmp1 (x > 0.5)
tmp2 (y < 0.5)
mask = tmp1 & tmp2
```

Can use pd.eval("") -> performs elementwise directly using numexpr

Good for compound expressions

In [3]:
import pandas as pd
import numpy as np

nrows, ncols = 1000000, 100
df1, df2, df3, df4 = [pd.DataFrame(np.random.randn(nrows,ncols)) for _ in range(4)]
df1.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,90,91,92,93,94,95,96,97,98,99
0,-0.430286,0.368033,-0.405037,-0.081377,1.203113,-0.963116,0.85561,0.090909,1.134815,0.192952,...,1.400864,0.097015,-1.245528,-0.714713,0.051876,3.077737,-1.302702,-0.799351,-0.830002,0.558278
1,-1.142929,0.154876,-0.435105,-0.780848,-0.030583,1.356635,-0.749856,-0.805077,2.106134,0.08259,...,0.369797,0.816506,-1.452992,1.8816,-0.071023,-0.255898,0.626518,0.391179,0.93027,-0.181975
2,0.443519,1.054843,0.28384,0.444484,0.255824,0.53655,-0.175455,-0.109455,-0.575948,-1.120123,...,-0.219025,3.321252,0.368421,-1.123599,-0.551595,-1.959348,-1.797399,0.758465,-1.291171,1.720917
3,2.545863,1.04279,1.087288,0.337961,0.361818,0.037705,0.236604,1.680111,-1.004373,0.116887,...,-2.240301,0.106368,1.437631,1.100896,-1.369257,-1.111779,0.33875,-0.423102,0.460907,2.017007
4,0.560399,-0.541371,0.699892,0.263667,-1.507937,0.469659,1.876406,-0.587879,0.375981,0.576292,...,-0.769171,0.799461,-0.333139,0.300101,1.00453,-0.24867,-0.100842,0.219072,0.280177,-0.66532


In [11]:
%timeit df1 + df2 +df3 +df4
%timeit pd.eval("df1 + df2 +df3 +df4")

1.62 s ± 603 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
334 ms ± 11.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [12]:
plain = df1 +df2+df3+df4
sum_eval = pd.eval("df1 + df2 +df3 +df4")

sum_eval.equals(plain)

True

In [15]:
#df.eval()
rolls = pd.DataFrame(np.random.randint(1,6, (6,3)), columns = ["Die1", "Die2", "Die3"])
rolls.eval("Sum = Die1 + Die2 + Die3", inplace = True)
rolls

Unnamed: 0,Die1,Die2,Die3,Sum
0,5,4,2,11
1,1,2,3,6
2,5,1,5,11
3,4,5,2,11
4,3,2,5,10
5,2,5,5,12


In [22]:
# use variables
high = 11 
rolls.eval("Winner = Sum > @high", inplace = True)
rolls

Unnamed: 0,Die1,Die2,Die3,Sum,Winner
0,5,4,2,11,False
1,1,2,3,6,False
2,5,1,5,11,False
3,4,5,2,11,False
4,3,2,5,10,False
5,2,5,5,12,True


In [23]:
#traditional way
rolls[rolls["Sum"]<= high]

Unnamed: 0,Die1,Die2,Die3,Sum,Winner
0,5,4,2,11,False
1,1,2,3,6,False
2,5,1,5,11,False
3,4,5,2,11,False
4,3,2,5,10,False


# Query 

In [24]:
rolls.query("Sum <= @high")

Unnamed: 0,Die1,Die2,Die3,Sum,Winner
0,5,4,2,11,False
1,1,2,3,6,False
2,5,1,5,11,False
3,4,5,2,11,False
4,3,2,5,10,False
