## Evaluate and Filter Data

This notebook explains methods of evaluating expressions and querying data in pandas. Takeaways from this notebook:

1. How to efficiently evaluate expressions in pandas using eval() function.
2. How to efficiently filter data using query() method.

In [6]:
import numpy as np
import pandas as pd

### eval() method

In [5]:
##using the same advertising revenur file.

df = pd.read_csv('../data/advertising.csv')
df.head(5)

Unnamed: 0,TV,Radio,Newspaper,Sales
0,230.1,37.8,69.2,22.1
1,44.5,,,10.4
2,17.2,45.9,69.3,12.0
3,151.5,41.3,,
4,180.8,,58.4,17.9


In [17]:
##evaluating expressions using simple arithmetic operations
result1 = df['TV'] + df['Radio'] + df['Newspaper']

##evaluating expressions using pd.eval()
result2 = pd.eval("df.TV + df.Radio + df.Newspaper")

np.allclose(result1, result2, equal_nan=True)

True

In [19]:
##using eval to assign values to a new/existing column
df.eval('Total = TV + Radio + Newspaper', inplace=True)
df.head()

Unnamed: 0,TV,Radio,Newspaper,Sales,Total
0,230.1,37.8,69.2,22.1,337.1
1,44.5,,,10.4,
2,17.2,45.9,69.3,12.0,132.4
3,151.5,41.3,,,
4,180.8,,58.4,17.9,


In [21]:
##using local variables within expressions
avg_tv_rev = df.mean(1)
result1 = df['TV'] + avg_tv_rev
result2 = df.eval('TV + @avg_tv_rev')
np.allclose(result1, result2)

True

### query() method

In [23]:
##extracting rows that have TV revenue greater than average revenue
avg_tv_rev = df['TV'].mean()
df.query('TV > @avg_tv_rev')

Unnamed: 0,TV,Radio,Newspaper,Sales,Total
0,230.1,37.8,69.2,22.1,337.1
3,151.5,41.3,,,
4,180.8,,58.4,17.9,
9,199.8,2.6,21.2,15.6,223.6
11,214.7,24.0,4.0,,242.7
...,...,...,...,...,...
193,166.8,42.0,3.6,19.6,212.4
194,149.7,35.6,6.0,17.3,191.3
197,177.0,9.3,6.4,14.8,192.7
198,283.6,42.0,66.2,25.5,391.8


In [24]:
##adding multiple conditions
avg_radio_rev = df['Radio'].mean()
df.query('TV > @avg_tv_rev & Radio < @avg_radio_rev')

Unnamed: 0,TV,Radio,Newspaper,Sales,Total
9,199.8,2.6,21.2,15.6,223.6
21,237.4,5.1,23.5,17.5,266.0
23,228.3,16.9,26.2,20.5,271.4
25,262.9,3.5,19.5,17.0,285.9
27,240.1,16.7,22.9,20.9,279.7
33,265.6,20.0,0.3,17.4,285.9
35,290.7,4.1,8.5,17.8,303.3
40,202.5,22.3,31.6,16.6,256.4
43,206.9,8.4,26.4,17.9,241.7
45,175.1,22.5,31.5,16.1,229.1
