# Фильтрация данных на основе условий (Conditional Filtering)

## Импортируем библиотеки и загружаем файл

In [1]:
import numpy as np
import pandas as pd
pd.options.display.max_rows = 5

In [2]:
df = pd.read_csv('tips.csv')

In [3]:
df.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number,Payment ID
0,16.99,1.01,Female,No,Sun,Dinner,2,8.49,Christy Cunningham,3560325168603410,Sun2959
1,10.34,1.66,Male,No,Sun,Dinner,3,3.45,Douglas Tucker,4478071379779230,Sun4608
2,21.01,3.5,Male,No,Sun,Dinner,3,7.0,Travis Walters,6011812112971322,Sun4458
3,23.68,3.31,Male,No,Sun,Dinner,2,11.84,Nathaniel Harris,4676137647685994,Sun5260
4,24.59,3.61,Female,No,Sun,Dinner,4,6.15,Tonya Carter,4832732618637221,Sun2251


## Условия

In [4]:
bool_series = df['total_bill'] > 30

In [5]:
df[bool_series]

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number,Payment ID
11,35.26,5.00,Female,No,Sun,Dinner,4,8.82,Diane Macias,4577817359320969,Sun6686
23,39.42,7.58,Male,No,Sat,Dinner,4,9.86,Lance Peterson,3542584061609808,Sat239
...,...,...,...,...,...,...,...,...,...,...,...
237,32.83,1.17,Male,Yes,Sat,Dinner,2,16.42,Thomas Brown,4284722681265508,Sat2929
238,35.83,4.67,Female,No,Sat,Dinner,3,11.94,Kimberly Crane,676184013727,Sat9777


In [6]:
df[df['total_bill']>30]

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number,Payment ID
11,35.26,5.00,Female,No,Sun,Dinner,4,8.82,Diane Macias,4577817359320969,Sun6686
23,39.42,7.58,Male,No,Sat,Dinner,4,9.86,Lance Peterson,3542584061609808,Sat239
...,...,...,...,...,...,...,...,...,...,...,...
237,32.83,1.17,Male,Yes,Sat,Dinner,2,16.42,Thomas Brown,4284722681265508,Sat2929
238,35.83,4.67,Female,No,Sat,Dinner,3,11.94,Kimberly Crane,676184013727,Sat9777


In [7]:
df[df['sex'] == 'Male']

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number,Payment ID
1,10.34,1.66,Male,No,Sun,Dinner,3,3.45,Douglas Tucker,4478071379779230,Sun4608
2,21.01,3.50,Male,No,Sun,Dinner,3,7.00,Travis Walters,6011812112971322,Sun4458
...,...,...,...,...,...,...,...,...,...,...,...
241,22.67,2.00,Male,Yes,Sat,Dinner,2,11.34,Keith Wong,6011891618747196,Sat3880
242,17.82,1.75,Male,No,Sat,Dinner,2,8.91,Dennis Dixon,4375220550950,Sat17


## Несколько условий

Последовательность шагов:

* Пишем условия
* Берём каждое условие в скобки
* Соединяем их с помощью операторов | или &:
    * | - логическое ИЛИ (OR) - когда хотя бы одно из условий должно быть истинно (True)
    * & - логическое И (AND) - когда оба условия должны быть истинны
* Также есть оператор ~ (тильда) - это оператор NOT, замена значения истинности на противоположное

In [8]:
df[(df['total_bill'] > 30) & (df['sex']=='Male')]

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number,Payment ID
23,39.42,7.58,Male,No,Sat,Dinner,4,9.86,Lance Peterson,3542584061609808,Sat239
39,31.27,5.00,Male,No,Sat,Dinner,3,10.42,Mr. Brandon Berry,6011525851069856,Sat6373
...,...,...,...,...,...,...,...,...,...,...,...
212,48.33,9.00,Male,No,Sat,Dinner,4,12.08,Alex Williamson,676218815212,Sat4590
237,32.83,1.17,Male,Yes,Sat,Dinner,2,16.42,Thomas Brown,4284722681265508,Sat2929


In [9]:
df[(df['total_bill'] > 30) & ~(df['sex']=='Male')]

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number,Payment ID
11,35.26,5.00,Female,No,Sun,Dinner,4,8.82,Diane Macias,4577817359320969,Sun6686
52,34.81,5.20,Female,No,Sun,Dinner,4,8.70,Emily Daniel,4291280793094374,Sun6165
...,...,...,...,...,...,...,...,...,...,...,...
219,30.14,3.09,Female,Yes,Sat,Dinner,4,7.54,Shelby House,502097403252,Sat8863
238,35.83,4.67,Female,No,Sat,Dinner,3,11.94,Kimberly Crane,676184013727,Sat9777


In [10]:
df[(df['total_bill'] > 30) & (df['sex']!='Male')]

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number,Payment ID
11,35.26,5.00,Female,No,Sun,Dinner,4,8.82,Diane Macias,4577817359320969,Sun6686
52,34.81,5.20,Female,No,Sun,Dinner,4,8.70,Emily Daniel,4291280793094374,Sun6165
...,...,...,...,...,...,...,...,...,...,...,...
219,30.14,3.09,Female,Yes,Sat,Dinner,4,7.54,Shelby House,502097403252,Sat8863
238,35.83,4.67,Female,No,Sat,Dinner,3,11.94,Kimberly Crane,676184013727,Sat9777


In [11]:
# Выходные - воскресенье или суббота
df[(df['day'] =='Sun') | (df['day']=='Sat')]

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number,Payment ID
0,16.99,1.01,Female,No,Sun,Dinner,2,8.49,Christy Cunningham,3560325168603410,Sun2959
1,10.34,1.66,Male,No,Sun,Dinner,3,3.45,Douglas Tucker,4478071379779230,Sun4608
...,...,...,...,...,...,...,...,...,...,...,...
241,22.67,2.00,Male,Yes,Sat,Dinner,2,11.34,Keith Wong,6011891618747196,Sat3880
242,17.82,1.75,Male,No,Sat,Dinner,2,8.91,Dennis Dixon,4375220550950,Sat17


## Условный оператор isin()

С помощью оператора .isin() можно фильтровать данные по набору из нескольких указанных значений.   
Название "isin" означает "is in", то есть это аналог оператора IN в других языках программирования.  

In [12]:
options = ['Sat','Sun']
df['day'].isin(options)

0       True
1       True
       ...  
242     True
243    False
Name: day, Length: 244, dtype: bool

In [13]:
df[df['day'].isin(['Sat','Sun'])]

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number,Payment ID
0,16.99,1.01,Female,No,Sun,Dinner,2,8.49,Christy Cunningham,3560325168603410,Sun2959
1,10.34,1.66,Male,No,Sun,Dinner,3,3.45,Douglas Tucker,4478071379779230,Sun4608
...,...,...,...,...,...,...,...,...,...,...,...
241,22.67,2.00,Male,Yes,Sat,Dinner,2,11.34,Keith Wong,6011891618747196,Sat3880
242,17.82,1.75,Male,No,Sat,Dinner,2,8.91,Dennis Dixon,4375220550950,Sat17
