In [4]:
import numpy as np
import pandas as pd



In [5]:
nrows,ncols=100000,100
rng = np.random.RandomState(42)

df1,df2,df3,df3=(pd.DataFrame(rng.rand(nrows,ncols))
                for i in range (4))

# x = rng.rand(1000000)
# y = rng.rand(1000000)
# %timeit x + y

In [6]:
%timeit df1+df2+df3+df3

72.6 ms ± 2.38 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [7]:
%timeit pd.eval('df1+df2+df3+df3')

28.4 ms ± 535 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [8]:
np.allclose(df1+df2+df3+df3 ,
           pd.eval('df1+df2+df3+df3'))

True

### Operations supported by pd.eval()
As of Pandas v0.16, pd.eval() supports a wide range of operations. To demonstrate these, we'll use the following integer DataFrames:

In [10]:
df1, df2, df3, df4, df5 = (pd.DataFrame(rng.randint(0, 1000, (100, 3)))
                           for i in range(5))

### Arithmetic operators

In [13]:
print(df1,df2)

      0    1    2
0   180  112  748
1   447  205  487
2   656  100   98
3    90  450  613
4   529  224  530
..  ...  ...  ...
95   31  787  643
96  984  624  352
97  283  543  751
98    5  142  278
99  258  662  895

[100 rows x 3 columns]       0    1    2
0    75   15  719
1   741  587   37
2   879  695  688
3   475  110  918
4   806  420  361
..  ...  ...  ...
95  928  616  851
96  863  990  828
97  679  689  539
98  649  945  826
99  922  387  502

[100 rows x 3 columns]


In [11]:
result1 = -df1 * df2 / (df3 + df4) - df5
result2 = pd.eval('-df1 * df2 / (df3 + df4) - df5')
np.allclose(result1, result2)

True

### Comparison operators

In [14]:
result1 = (df1 < df2) & (df2 <= df3) & (df3 != df4)
result2 = pd.eval('df1 < df2 <= df3 != df4')
np.allclose(result1, result2)

True

### Bitwise operators
pd.eval() supports the & and | bitwise operators:

In [15]:
result1 = (df1 < 0.5) & (df2 < 0.5) | (df3 < df4)
result2 = pd.eval('(df1 < 0.5) & (df2 < 0.5) | (df3 < df4)')
np.allclose(result1, result2)

True

In [16]:
result3 = pd.eval('(df1 < 0.5) and (df2 < 0.5) or (df3 < df4)')
np.allclose(result1, result3)

True

### Object attributes and indices
pd.eval() supports access to object attributes via the obj.attr syntax, and indexes via the obj[index] syntax:

In [17]:
result1 = df2.T[0] + df3.iloc[1]
result2 = pd.eval('df2.T[0] + df3.iloc[1]')
np.allclose(result1, result2)

True

In [20]:
df2.T[0]

0     75
1     15
2    719
Name: 0, dtype: int32

In [21]:
df3.iloc[1]

0    766
1    714
2    218
Name: 1, dtype: int32

#### Other operations
    Other operations such as function calls, conditional statements, loops, and 
    other more involved constructs are currently not implemented in pd.eval().
    If you'd like to execute these more complicated types of expressions, you can use the Numexpr library itself.

### DataFrame.eval() for Column-Wise Operations
    Just as Pandas has a top-level pd.eval() function, DataFrames have an eval() method that works in similar ways. The benefit of the eval() method is that columns can be referred to by name. We'll use this labeled array as an example:

In [22]:
df = pd.DataFrame(rng.rand(1000, 3), columns=['A', 'B', 'C'])
df.head()

Unnamed: 0,A,B,C
0,0.375506,0.406939,0.069938
1,0.069087,0.235615,0.154374
2,0.677945,0.433839,0.652324
3,0.264038,0.808055,0.347197
4,0.589161,0.252418,0.557789


In [23]:
result1 = (df['A'] + df['B']) / (df['C'] - 1)
result2 = pd.eval("(df.A + df.B) / (df.C - 1)")
np.allclose(result1, result2)

True

In [24]:
result3 = df.eval('(A + B) / (C - 1)')
np.allclose(result1, result3)

True

In [27]:
df.eval('D=(A + B) /C ' , inplace=True)
df.head()

Unnamed: 0,A,B,C,D
0,0.375506,0.406939,0.069938,11.18762
1,0.069087,0.235615,0.154374,1.973796
2,0.677945,0.433839,0.652324,1.704344
3,0.264038,0.808055,0.347197,3.087857
4,0.589161,0.252418,0.557789,1.508776


In [29]:
df.eval('D = (A + C) / 2*B', inplace=True)
df.head()

Unnamed: 0,A,B,C,D
0,0.375506,0.406939,0.069938,0.090634
1,0.069087,0.235615,0.154374,0.026325
2,0.677945,0.433839,0.652324,0.288561
3,0.264038,0.808055,0.347197,0.246956
4,0.589161,0.252418,0.557789,0.144756


### Local variables in DataFrame.eval()

    The @ character here marks a variable name rather than a column name, and lets you efficiently evaluate expressions involving the two "namespaces": the namespace of columns, and the namespace of Python objects. Notice that this @ character is only supported by the DataFrame.eval() method, not by the pandas.eval() function, because the pandas.eval() function only has access to the one (Python) namespace.

In [33]:
column_mean = df.mean(1)
result1 = df['A'] + column_mean
result2 = df.eval('A + @column_mean')
np.allclose(result1, result2)

True

In [31]:
df.mean(1)

0      0.235754
1      0.121350
2      0.513167
3      0.416561
4      0.386031
         ...   
995    0.142210
996    0.489254
997    0.916626
998    0.484622
999    0.412103
Length: 1000, dtype: float64

In [32]:
result2

0      0.611261
1      0.190438
2      1.191112
3      0.680599
4      0.975192
         ...   
995    0.224856
996    0.498080
997    1.823896
998    1.243617
999    0.781364
Length: 1000, dtype: float64

In [34]:
DataFrame.query() Method
The DataFrame has another method based on evaluated strings, called the query() method. Consider the following:



SyntaxError: invalid syntax (3886680343.py, line 1)

# Query the columns of a DataFrame with a boolean expression.