In [16]:
import numpy as np
rng = np.random.RandomState(42)
x = rng.rand(int(1e6))
y = rng.rand(int(1e6))
%timeit x + y

3.46 ms ± 270 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [18]:
%timeit np.fromiter((xi+yi for xi,yi in zip(x,y)),dtype=x.dtype,count=len(x))

291 ms ± 15.1 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [19]:
mask = (x>0.5) & (y<0.5)

In [20]:
tmp1 = x>0.5
tmp2 = y<0.5
mask = tmp1 & tmp2

In [22]:
import numexpr
mask_numexpr = numexpr.evaluate('(x>0.5) & (y<0.5)')
np.allclose(mask,mask_numexpr)

True

In [23]:
import pandas as pd
nrows, ncols = 100000, 100
rng = np.random.RandomState(42)
df1, df2, df3, df4 = (pd.DataFrame(rng.rand(nrows, ncols)) for i in range(4))

In [24]:
%timeit df1 + df2 + df3 + df4

94.2 ms ± 5.23 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [25]:
%timeit pd.eval('df1 +df2 +df3 +df4')

42.1 ms ± 2.2 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [26]:
np.allclose(df1 + df2 + df3 + df4,pd.eval('df1 +df2 +df3 +df4'))

True

In [27]:
df1, df2, df3, df4, df5 = (pd.DataFrame(rng.randint(0,1000,(100,3))) for i in range(5))

In [28]:
result1 = -df1 * df2 / (df3 + df4) - df5
result2 = pd.eval('-df1 * df2 / (df3 + df4) - df5')
np.allclose(result1, result2)

True

In [30]:
result1 = (df1 < df2) & (df2 <= df3) & (df3 != df4)
result2 = pd.eval('(df1 < df2) & (df2 <= df3) & (df3 != df4)')
np.allclose(result1,result2)

True

In [31]:
result1 = (df1 < 0.5) & (df2 < 0.5) | (df3 < df4)
result2 = pd.eval('(df1 < 0.5) & (df2 < 0.5) | (df3 < df4)')
np.allclose(result1,result2)

True

In [32]:
result3 = pd.eval('(df1 < 0.5) and (df2 < 0.5) or (df3 < df4)')
np.allclose(result1,result3)

True

In [33]:
df2

Unnamed: 0,0,1,2
0,75,15,719
1,741,587,37
2,879,695,688
3,475,110,918
4,806,420,361
...,...,...,...
95,928,616,851
96,863,990,828
97,679,689,539
98,649,945,826


In [34]:
df2.T

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,90,91,92,93,94,95,96,97,98,99
0,75,741,879,475,806,238,818,879,576,591,...,594,889,655,672,433,928,863,679,649,922
1,15,587,695,110,420,819,283,290,891,898,...,572,936,343,847,377,616,990,689,945,387
2,719,37,688,918,361,403,615,629,690,482,...,68,29,562,991,273,851,828,539,826,502


In [35]:
df2.T[0]

0     75
1     15
2    719
Name: 0, dtype: int32

In [36]:
result1 = df2.T[0] + df3.iloc[1]
result2 = pd.eval('df2.T[0] + df3.iloc[1]')
np.allclose(result1,result2)

True

In [37]:
df = pd.DataFrame(rng.rand(1000,3), columns=['A','B','C'])
df.head()

Unnamed: 0,A,B,C
0,0.375506,0.406939,0.069938
1,0.069087,0.235615,0.154374
2,0.677945,0.433839,0.652324
3,0.264038,0.808055,0.347197
4,0.589161,0.252418,0.557789


In [43]:
result1 = (df['A']+df['B'])/(df['C']-1)
result2 = pd.eval("(df.A+df.B)/(df.C-1)")
np.allclose(result1,result2)

True

In [44]:
result3 = df.eval('(A+B) / (C-1)')
np.allclose(result1,result3)

True

In [45]:
df.head()

Unnamed: 0,A,B,C
0,0.375506,0.406939,0.069938
1,0.069087,0.235615,0.154374
2,0.677945,0.433839,0.652324
3,0.264038,0.808055,0.347197
4,0.589161,0.252418,0.557789


In [46]:
df.eval('D = (A + B) / C', inplace=True)
df.head()

Unnamed: 0,A,B,C,D
0,0.375506,0.406939,0.069938,11.18762
1,0.069087,0.235615,0.154374,1.973796
2,0.677945,0.433839,0.652324,1.704344
3,0.264038,0.808055,0.347197,3.087857
4,0.589161,0.252418,0.557789,1.508776


In [47]:
df.eval('D = (A - B) / C', inplace=True)
df.head()

Unnamed: 0,A,B,C,D
0,0.375506,0.406939,0.069938,-0.449425
1,0.069087,0.235615,0.154374,-1.078728
2,0.677945,0.433839,0.652324,0.374209
3,0.264038,0.808055,0.347197,-1.566886
4,0.589161,0.252418,0.557789,0.603708


In [48]:
column_mean = df.mean(1)
result1 = df['A'] +column_mean
result2 = df.eval('A + @column_mean')
np.allclose(result1,result2)

True

In [49]:
column_mean

0      0.100740
1     -0.154913
2      0.534579
3     -0.036899
4      0.500769
         ...   
995    0.165847
996    0.100385
997    0.698249
998    0.571263
999    0.008088
Length: 1000, dtype: float64

In [50]:
result1 = df[(df.A < 0.5) & (df.B < 0.5)]
result2 = pd.eval('df[(df.A < 0.5) & (df.B < 0.5)]')
np.allclose(result1,result2)

True

In [51]:
result3 = df.query('A<0.5 and B < 0.5')
np.allclose(result1,result3)

True

In [52]:
Cmean = df['C'].mean()
result1 = df[(df.A < Cmean) & (df.B<Cmean)]
result2 = df.query('A < @Cmean and B < @Cmean')
np.allclose(result1,result2)

True