In [1]:
import numpy as np

In [2]:
rng = np.random.RandomState(42)
x=rng.rand(1000000)
y=rng.rand(1000000)
%timeit x+y

5.44 ms ± 112 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [4]:
%timeit np.fromiter((xi + yi for xi, yi in zip(x,y)),dtype=x.dtype,count=len(x))

520 ms ± 23.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [5]:
mask = (x>0.5)&(y<0.5)

In [6]:
tmp1 = (x>0.5)
tmp2 = (y<0.5)
mask = tmp1 & tmp2

In [7]:
import numexpr

In [8]:
mask_numexpr = numexpr.evaluate('(x>0.5)&(y<0.5)')
np.allclose(mask, mask_numexpr)

True

In [10]:
import pandas as pd
nrows, ncols = 1000000, 100
rng = np.random.RandomState(42)
df1, df2, df3, df4 = (pd.DataFrame(rng.rand(nrows,ncols))for i in range(4))

In [11]:
%timeit df1 + df2 + df3 + df4

951 ms ± 189 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [12]:
%timeit pd.eval('df1 + df2 + df3 + df4')

381 ms ± 13.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [13]:
np.allclose(df1 + df2 +df3 +df4, pd.eval('df1 + df2 + df3 + df4'))

True

In [14]:
df1,df2,df3,df4,df5 = (pd.DataFrame(rng.randint(0,1000,(100,3))) for i in range(5))

In [15]:
result1 = -df1 *df2/(df3+df4)-df5
result2 = pd.eval('-df1 *df2/(df3+df4)-df5')
np.allclose(result1, result2)

True

In [17]:
result1 = (df1 < df2) & (df2 <= df3) & (df3 != df4)
result2 = pd.eval('df1 < df2 <= df3 != df4')
np.allclose(result1, result2)

True

In [19]:
result1 = (df1 < 0.5) & (df2 < 0.5) | (df3 < df4)
result2 = pd.eval('(df1 < 0.5) & (df2 < 0.5) | (df3 < df4)')
np.allclose(result1, result2)

True

In [20]:
result3 = pd.eval('(df1 < 0.5) and (df2 < 0.5) or (df3 < df4)')
np.allclose(result1, result3)

True

In [21]:
result1 = df2.T[0] + df3.iloc[1]
result2 = pd.eval('df2.T[0] + df3.iloc[1]')
np.allclose(result1, result2)

True

In [22]:
df = pd.DataFrame(rng.rand(1000, 3), columns=['A', 'B', 'C'])
df.head()

Unnamed: 0,A,B,C
0,0.350317,0.229595,0.083845
1,0.328702,0.063538,0.083904
2,0.856127,0.340678,0.478523
3,0.808805,0.668873,0.830758
4,0.160789,0.025621,0.595533


In [23]:
result1 = (df['A'] + df['B']) / (df['C'] - 1)
result2 = pd.eval("(df.A + df.B) / (df.C - 1)")
np.allclose(result1, result2)

True

In [24]:
result3 = df.eval('(A + B) / (C - 1)')
np.allclose(result1, result3)

True

In [25]:
df.head()

Unnamed: 0,A,B,C
0,0.350317,0.229595,0.083845
1,0.328702,0.063538,0.083904
2,0.856127,0.340678,0.478523
3,0.808805,0.668873,0.830758
4,0.160789,0.025621,0.595533


In [26]:
df.eval('D = (A + B) / C', inplace=True)
df.head()

Unnamed: 0,A,B,C,D
0,0.350317,0.229595,0.083845,6.916444
1,0.328702,0.063538,0.083904,4.674883
2,0.856127,0.340678,0.478523,2.501041
3,0.808805,0.668873,0.830758,1.778709
4,0.160789,0.025621,0.595533,0.313013


In [27]:
df.eval('D = (A - B) / C', inplace=True)
df.head()

Unnamed: 0,A,B,C,D
0,0.350317,0.229595,0.083845,1.439825
1,0.328702,0.063538,0.083904,3.160335
2,0.856127,0.340678,0.478523,1.077166
3,0.808805,0.668873,0.830758,0.168439
4,0.160789,0.025621,0.595533,0.226969


In [28]:
column_mean = df.mean(1)
result1 = df['A'] + column_mean
result2 = df.eval('A + @column_mean')
np.allclose(result1, result2)

True

In [29]:
result1 = df[(df.A < 0.5) & (df.B < 0.5)]
result2 = pd.eval('df[(df.A < 0.5) & (df.B < 0.5)]')
np.allclose(result1, result2)

True

In [30]:
result2 = df.query('A < 0.5 and B < 0.5')
np.allclose(result1, result2)

True

In [31]:
Cmean = df['C'].mean()
result1 = df[(df.A < Cmean) & (df.B < Cmean)]
result2 = df.query('A < @Cmean and B < @Cmean')
np.allclose(result1, result2)

True