In [2]:
import pandas as pd

In [6]:
people = {
    "first": ["Corey", "Jane", "John"],
    "last": ["Schafer", "Doe", "Doe"],
    "email": ["corey@gmail.com", "jane@gmail.com", "john@gmail.com"]
}

my_df = pd.DataFrame(people)
my_df

Unnamed: 0,first,last,email
0,Corey,Schafer,corey@gmail.com
1,Jane,Doe,jane@gmail.com
2,John,Doe,john@gmail.com


### While filtering, we get a series of boolean values

This means that we can filter by manually passing a series of boolean values into the dataframe

In [8]:
#Filtering with conditionals
my_df['last'] == 'Doe'

0    False
1     True
2     True
Name: last, dtype: bool

The true values are the rows that satisfy the condition and the false ones are the opposite

In [9]:
#Assigning the condition in a variable
filt = (my_df['last'] == 'Doe')
filt
#Putting the conditional inside () doesn't change anything.
#It is for the ease of reading


0    False
1     True
2     True
Name: last, dtype: bool

In [12]:
#Applying the filter into the dataframe
my_df[filt]  #Will return the rows that satisfy the condition (filter)

Unnamed: 0,first,last,email
1,Jane,Doe,jane@gmail.com
2,John,Doe,john@gmail.com


In [14]:
#ALternative way to do that.
#Without assigning the filter conditional into a variable

my_df[my_df['last']=='Doe']

Unnamed: 0,first,last,email
1,Jane,Doe,jane@gmail.com
2,John,Doe,john@gmail.com


## We can filter the columns subsequently after filtering the rows
Filtering columns, meaning, which columns to show <br>
We can do this by passing the index/indices/slicing of the columns in the next big braces []

In [21]:
#Filtering the columns subsequently after filtering the rows
my_df[filt][['email', 'first']]

Unnamed: 0,email,first
1,jane@gmail.com,Jane
2,john@gmail.com,John


## We can also pass the filter into the .loc[] indexer
It will produce the exact same result

In [19]:
my_df.loc[filt]

#will produce the same result as my_df[filt]

Unnamed: 0,first,last,email
1,Jane,Doe,jane@gmail.com
2,John,Doe,john@gmail.com


### We can also filter the columns subsequently after filtering the rows

The loc method takes the column index/indices/slicing as the second argument, Remember?


In [18]:
my_df.loc[filt, 'email']

#will return the rows where last name == 'Doe' and the columns to be shown is the 'email' column only

1    jane@gmail.com
2    john@gmail.com
Name: email, dtype: object

## Using the logical operators (& and | and ~)

Note: We cannot use the python standard built in 'and' 'or' keywords here.<br>
We use & and | for AND and OR operations <br>
And ~ for inversion

### Using the & operator

In [24]:
#Filtering rows where last name is Doe and first name is John
filt2 = (my_df['last'] == 'Doe') & (my_df['first'] == 'John')

#Remember the () don't matter, it's only for reading feasibility

In [27]:
my_df[filt2]

Unnamed: 0,first,last,email
2,John,Doe,john@gmail.com


In [31]:
#Alternative, using the loc[] indexer
my_df.loc[filt2]

Unnamed: 0,first,last,email
2,John,Doe,john@gmail.com


In [32]:
# Subsequently filtering the columns to be shown
my_df[filt2][['first', 'last']]

Unnamed: 0,first,last
2,John,Doe


In [34]:
# Doing the same thing using the locp[] indexer
my_df.loc[filt2, ['first','last']]

Unnamed: 0,first,last
2,John,Doe


### Using the | Operator

In [36]:
filt3 = (my_df['last'] == 'Schafer') | (my_df['first'] == 'John') 

In [37]:
my_df[filt3]

Unnamed: 0,first,last,email
0,Corey,Schafer,corey@gmail.com
2,John,Doe,john@gmail.com


In [38]:
#Doing the same thing using the loc[] indexer
my_df.loc[filt3]

Unnamed: 0,first,last,email
0,Corey,Schafer,corey@gmail.com
2,John,Doe,john@gmail.com


### Negating the filter
#### Using the ~ operator

We can use the ~ operator to negate any filter

In [40]:
my_df[~filt3]

Unnamed: 0,first,last,email
1,Jane,Doe,jane@gmail.com
