# Pandas for data filtering

The way we filter data in SQL, Pandas also provides several ways to filter the data to perform analysis on a specific set of data.

Fetch data and define it as a Pandas Data frame:

In [1]:
import pandas as pd
 
df = pd.read_csv(
    filepath_or_buffer='https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data',
    header=None,
    sep=',')
df.columns=['sepal_len', 'sepal_wid', 'petal_len', 'petal_wid', 'class']
 
df.dropna(how="all", inplace=True) # drops the empty line at file-end

In [2]:
df.head()

Unnamed: 0,sepal_len,sepal_wid,petal_len,petal_wid,class
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


In [3]:
df.tail()

Unnamed: 0,sepal_len,sepal_wid,petal_len,petal_wid,class
145,6.7,3.0,5.2,2.3,Iris-virginica
146,6.3,2.5,5.0,1.9,Iris-virginica
147,6.5,3.0,5.2,2.0,Iris-virginica
148,6.2,3.4,5.4,2.3,Iris-virginica
149,5.9,3.0,5.1,1.8,Iris-virginica


In [11]:
# Filter Results , Select rows where df.petal_len is greater than 4.5
df[df['petal_len'] > 4.5].head(5)

Unnamed: 0,sepal_len,sepal_wid,petal_len,petal_wid,class
50,7.0,3.2,4.7,1.4,Iris-versicolor
52,6.9,3.1,4.9,1.5,Iris-versicolor
54,6.5,2.8,4.6,1.5,Iris-versicolor
56,6.3,3.3,4.7,1.6,Iris-versicolor
58,6.6,2.9,4.6,1.3,Iris-versicolor


In [12]:
# FILTER WITH ‘AND’ OPERATOR,    Select rows where df.petal_len is greater than 4.5 AND less than 5.5
df[(df['petal_len'] > 4.5) & (df['petal_len'] < 5.5)].head(5)

Unnamed: 0,sepal_len,sepal_wid,petal_len,petal_wid,class
50,7.0,3.2,4.7,1.4,Iris-versicolor
52,6.9,3.1,4.9,1.5,Iris-versicolor
54,6.5,2.8,4.6,1.5,Iris-versicolor
56,6.3,3.3,4.7,1.6,Iris-versicolor
58,6.6,2.9,4.6,1.3,Iris-versicolor


In [9]:
# FILTER WITH ‘OR’ OPERATOR,     Select rows where df.petal_len is greater than 5.5 OR less than 1.0
df[(df['petal_len'] > 5.5) | (df['petal_len'] < 2.0)].head(5)

Unnamed: 0,sepal_len,sepal_wid,petal_len,petal_wid,class
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


In [13]:
#FILTER WITH ‘NOT’ OPERATOR,      Select all the classes (Iris flower types) except Iris-virginica
df[~(df['class'] == 'Iris-virginica')].head(5)

Unnamed: 0,sepal_len,sepal_wid,petal_len,petal_wid,class
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa
