In [8]:
import numpy as np
import pandas as pd

In [9]:
rng = np.random.RandomState(42)
x = rng.rand(int(1E6))
y = rng.rand(int(1E6))

In [10]:
%timeit x + y

2.67 ms ± 85.7 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [11]:
%timeit np.fromiter((xi + yi for xi, yi in zip(x, y)), dtype=x.dtype, count=len(x))

202 ms ± 669 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [12]:
mask = (x > 0.5) & (y < 0.5)

In [13]:
tmp1 = (x < 0.5)
tmp2 = (y < 0.5)

In [14]:
mask = tmp1 & tmp2

In [15]:
!pip install numexpr





In [16]:
import numexpr

In [17]:
mask_numexpr = numexpr.evaluate('(x > 0.5) & (y < 0.5)')
np.allclose(mask, mask_numexpr)

False

In [18]:
nrows, ncols = 100000, 100

rng = np.random.RandomState(42)
df1, df2, df3, df4 = (pd.DataFrame(rng.rand(nrows, ncols)) for i in range (4))

In [19]:
%timeit df1 + df2 + df3 + df4

61.8 ms ± 686 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [20]:
%timeit pd.eval('df1 + df2 + df3 + df4')

27.4 ms ± 565 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [21]:
np.allclose(df1 + df2 + df3 + df4, pd.eval('df1 + df2 + df3 + df4'))

True

In [23]:
df1, df2, df3, df4, df5 = (pd.DataFrame(rng.randint(0, 1000, (100, 3))) for i in range(5))

In [24]:
result1 = -df1 * df2 / (df3 + df4) - df5
result2 = pd.eval('-df1 * df2 / (df3 + df4) - df5')

np.allclose(result1, result2)

True

In [25]:
result1 = (df1 < df2) & (df2 <= df3) & (df3 != df4)
result2 = pd.eval('(df1 < df2) & (df2 <= df3) & (df3 != df4)')

np.allclose(result1, result2)

True

In [26]:
result1 = (df1 < 0.5) & (df2 < 0.5) | (df3< df4)
result2 = pd.eval('(df1 < 0.5) & (df2 < 0.5) | (df3< df4)')

np.allclose(result1, result2)

True

In [27]:
result3 = pd.eval('(df1 < 0.5) and (df2 < 0.5) or (df3< df4)')

np.allclose(result1, result3)

True

In [28]:
result1 = df2.T[0] + df3.iloc[1]
result2 = pd.eval('df2.T[0] + df3.iloc[1]')

np.allclose(result1, result2)

True

In [30]:
df = pd.DataFrame(rng.rand(1000, 3), columns=['A', 'B', 'C'])
df.head()

Unnamed: 0,A,B,C
0,0.818708,0.993065,0.998051
1,0.007756,0.371926,0.229226
2,0.460028,0.189491,0.357344
3,0.993607,0.994859,0.828843
4,0.14108,0.862088,0.40973


In [32]:
res1 = (df['A'] + df['B']) / (df['C'] - 1)
res2 = pd.eval('(df.A + df.B) / (df.C - 1)')

np.allclose(res1, res2)

True

In [34]:
res3 = df.eval('(A + B) / (C - 1)')

np.allclose(res1, res3)

True

In [35]:
df.head()

Unnamed: 0,A,B,C
0,0.818708,0.993065,0.998051
1,0.007756,0.371926,0.229226
2,0.460028,0.189491,0.357344
3,0.993607,0.994859,0.828843
4,0.14108,0.862088,0.40973


In [36]:
df.eval('D = (A + B) / C', inplace=True)

In [37]:
df.head()

Unnamed: 0,A,B,C,D
0,0.818708,0.993065,0.998051,1.81531
1,0.007756,0.371926,0.229226,1.656368
2,0.460028,0.189491,0.357344,1.817626
3,0.993607,0.994859,0.828843,2.399087
4,0.14108,0.862088,0.40973,2.448366


In [38]:
df.eval('D = (A - B) / C', inplace=True)

In [39]:
df.head()

Unnamed: 0,A,B,C,D
0,0.818708,0.993065,0.998051,-0.174697
1,0.007756,0.371926,0.229226,-1.588697
2,0.460028,0.189491,0.357344,0.757077
3,0.993607,0.994859,0.828843,-0.001511
4,0.14108,0.862088,0.40973,-1.759719


In [42]:
column_mean = df.mean(1)
res1 = df['A'] + column_mean
res2 = df.eval('A + @column_mean')

np.allclose(res1, res2)

True

In [43]:
r1 = df[(df.A < 0.5) & (df.B < 0.5)]
r2 = pd.eval('df[(df.A < 0.5) & (df.B < 0.5)]')

np.allclose(r1, r2)

True

In [44]:
r2 = df.query('A < 0.5 and B < 0.5')

np.allclose(r1, r2)

True

In [46]:
cmean = df['C'].mean()

r1 = df[(df.A < cmean) & (df.B < cmean)]
r2 = df.query('A < @cmean and B < @cmean')

np.allclose(r1, r2)

True

In [48]:
x = df[(df.A < 0.5) & (df.B > 0.5)]

In [49]:
tmp1 = df.A < 0.5
tmp2 = df.B < 0.5
tmp3 = tmp1 & tmp2
x = df[tmp3]

In [50]:
df.values.nbytes

32000