## Resource allocation check
#WITHOUT PANDAS EVAL 

In [1]:
import numpy as np

In [3]:
rng = np.random.RandomState(10)
# The point of random state is that you can lock in on the random numbers generated..
#..so that with same input the output will remain consistent

In [4]:
x = rng.rand(10000)
y = rng.rand(10000)

%timeit x+y

The slowest run took 3925.60 times longer than the fastest. This could mean that an intermediate result is being cached.
100000 loops, best of 3: 7.64 µs per loop


## WITH PANDAS EVAL 
- Eval relies on numexpr package. It is time and resource saving. It doesnt create intermediate place holder arrays where extra resources get consumed
- Eval supports arithmetic operations, comparison operations, bitwise operations, Object attributes indexing. 

In [5]:
import pandas as pd

In [17]:
nrows, ncols = 10000,10
rng = np.random.RandomState(43)

df1, df2,df3,df4 = (pd.DataFrame(rng.rand(nrows,ncols))
                   for i in range(4))

In [18]:
%timeit df1 + df2 + df3 + df4

100 loops, best of 3: 2.24 ms per loop


In [19]:
%timeit pd.eval('df1+df2+df3+df4')

100 loops, best of 3: 6.04 ms per loop


In [25]:
%timeit('df1<df2) & (df2<=df3)&(df3!=df4')

10000000 loops, best of 3: 23.4 ns per loop


In [22]:

%timeit (df1 < 0.5) & (df2 < 0.5) | (df3 < df4)
%timeit pd.eval('(df1 < 0.5) & (df2 < 0.5) | (df3 < df4)')

100 loops, best of 3: 4.59 ms per loop
100 loops, best of 3: 6.79 ms per loop


## EVAL Column Wise Operations - DataFrame.eval()

In [26]:
df = pd.DataFrame(rng.rand(1000,3), columns=['A','B','C'])

In [28]:
df.head()

Unnamed: 0,A,B,C
0,0.964792,0.493116,0.25123
1,0.453194,0.368565,0.58563
2,0.914452,0.955842,0.028324
3,0.332774,0.031081,0.268343
4,0.608275,0.198986,0.249062


In [29]:
res = (df['A']+df['B']/(df['C']-1))

In [38]:
res2 = pd.eval("(df.A+ df.B)/(df.C-1)")

In [44]:
res - res2

0        2.253294
1        1.546887
2        1.855559
3        0.787597
4        1.418295
5        1.426131
6        1.022928
7        2.304933
8        1.990502
9        1.146501
10       4.403925
11       4.644101
12       3.833645
13       0.546121
14       4.961431
15       1.803342
16       0.116779
17       1.370874
18       0.095605
19       0.508168
20       1.455460
21       1.253846
22       1.861547
23      43.139693
24      14.126903
25       1.324882
26       0.345778
27       1.308532
28       0.702844
29       0.269678
          ...    
970      1.970954
971      0.643178
972     13.730961
973      2.560381
974      2.691972
975      3.911819
976      1.346668
977      1.777123
978      1.511813
979      1.998093
980      7.204043
981      6.426819
982      1.040305
983      1.265489
984      1.991497
985      3.162604
986    249.012299
987      1.242646
988      1.848384
989      1.724639
990      2.813301
991      0.587423
992      2.184805
993      2.648471
994      1


## QUERY 

In [45]:
df.query('A < 0.5 and B>0.5')

Unnamed: 0,A,B,C
5,0.280100,0.748231,0.755591
9,0.398766,0.686458,0.466701
12,0.220194,0.752404,0.939063
16,0.056755,0.742099,0.054462
18,0.032883,0.887316,0.475725
27,0.034252,0.772278,0.973121
37,0.100324,0.710337,0.370694
38,0.396180,0.516032,0.768987
46,0.345891,0.929820,0.603101
64,0.012250,0.886698,0.275493
