In [1]:
import pandas as pd
import numpy as np

In [2]:
sales = pd.read_csv('sales.csv')
sales

Unnamed: 0,SalesRep,Region,Month,Sales,Units Sold
0,Amy,North,Q1,24971,84
1,Amy,South,Q2,25749,557
2,Amy,East,Q3,24437,95
3,Amy,West,Q4,25355,706
4,Bob,North,Q1,25320,231
5,Bob,South,Q2,25999,84
6,Bob,East,Q3,22639,260
7,Bob,West,Q4,23949,109
8,Chuck,North,Q1,20280,453
9,Chuck,South,Q2,21584,114


### Method 1

In [3]:
#Select 2 SalesReps based on SQL's "in" - Filter for ['Amy','Bob']
sales[sales.SalesRep.isin(['Amy','Bob'])]

Unnamed: 0,SalesRep,Region,Month,Sales,Units Sold
0,Amy,North,Q1,24971,84
1,Amy,South,Q2,25749,557
2,Amy,East,Q3,24437,95
3,Amy,West,Q4,25355,706
4,Bob,North,Q1,25320,231
5,Bob,South,Q2,25999,84
6,Bob,East,Q3,22639,260
7,Bob,West,Q4,23949,109


In [4]:
#Select 2 SalesReps based on SQL's "not in"
sales[~sales.SalesRep.isin(['Amy','Bob'])]

Unnamed: 0,SalesRep,Region,Month,Sales,Units Sold
8,Chuck,North,Q1,20280,453
9,Chuck,South,Q2,21584,114
10,Chuck,East,Q3,19625,83
11,Chuck,West,Q4,19832,70
12,Doug,North,Q1,25150,242
13,Doug,South,Q2,29061,146
14,Doug,East,Q3,27113,120
15,Doug,West,Q4,25953,81


### Method 2

In [5]:
#isin
#Create a list of SalesRep's you want to select. To select then use 'isin'
filter_SalesRep = ['Amy','Bob']
sales[sales.SalesRep.isin(filter_SalesRep)]

Unnamed: 0,SalesRep,Region,Month,Sales,Units Sold
0,Amy,North,Q1,24971,84
1,Amy,South,Q2,25749,557
2,Amy,East,Q3,24437,95
3,Amy,West,Q4,25355,706
4,Bob,North,Q1,25320,231
5,Bob,South,Q2,25999,84
6,Bob,East,Q3,22639,260
7,Bob,West,Q4,23949,109


In [6]:
#not in
#Create a list of SalesRep's you want to select. To select then use 'isin' along with the '~'
filter_SalesRep = ['Amy','Bob']
sales[~sales.SalesRep.isin(filter_SalesRep)]

Unnamed: 0,SalesRep,Region,Month,Sales,Units Sold
8,Chuck,North,Q1,20280,453
9,Chuck,South,Q2,21584,114
10,Chuck,East,Q3,19625,83
11,Chuck,West,Q4,19832,70
12,Doug,North,Q1,25150,242
13,Doug,South,Q2,29061,146
14,Doug,East,Q3,27113,120
15,Doug,West,Q4,25953,81


### Method 3 - Multiple Criteria

In [7]:
#in
#You can ue 2 filters using & operator and provide seperate list for each column - use '&'
filter_SalesRep = ['Amy','Bob']
filter_Region = ['North', 'West']
sales[sales.SalesRep.isin(filter_SalesRep) & sales.Region.isin(filter_Region)]

Unnamed: 0,SalesRep,Region,Month,Sales,Units Sold
0,Amy,North,Q1,24971,84
3,Amy,West,Q4,25355,706
4,Bob,North,Q1,25320,231
7,Bob,West,Q4,23949,109


In [8]:
#in & not in
#Just use the '~' in front of column you don't want values from
filter_SalesRep = ['Amy','Bob']
filter_Region = ['North', 'West']
sales[sales.SalesRep.isin(filter_SalesRep) & ~sales.Region.isin(filter_Region)]

Unnamed: 0,SalesRep,Region,Month,Sales,Units Sold
1,Amy,South,Q2,25749,557
2,Amy,East,Q3,24437,95
5,Bob,South,Q2,25999,84
6,Bob,East,Q3,22639,260


### Method 4 - Using Numpy (Faster)

In [9]:
#in
filter_SalesRep = ['Amy','Bob']
sales[np.isin(sales.SalesRep, filter_SalesRep)]

Unnamed: 0,SalesRep,Region,Month,Sales,Units Sold
0,Amy,North,Q1,24971,84
1,Amy,South,Q2,25749,557
2,Amy,East,Q3,24437,95
3,Amy,West,Q4,25355,706
4,Bob,North,Q1,25320,231
5,Bob,South,Q2,25999,84
6,Bob,East,Q3,22639,260
7,Bob,West,Q4,23949,109


In [10]:
#not in
filter_SalesRep = ['Amy','Bob']
sales[np.isin(sales.SalesRep, filter_SalesRep, invert = True)]

Unnamed: 0,SalesRep,Region,Month,Sales,Units Sold
8,Chuck,North,Q1,20280,453
9,Chuck,South,Q2,21584,114
10,Chuck,East,Q3,19625,83
11,Chuck,West,Q4,19832,70
12,Doug,North,Q1,25150,242
13,Doug,South,Q2,29061,146
14,Doug,East,Q3,27113,120
15,Doug,West,Q4,25953,81


### Method 5 - Using List Comprehensions (Much Faster)

In [11]:
#in
filter_SalesRep = ['Amy','Bob']
sales[[x in filter_SalesRep for x in sales.SalesRep]]

Unnamed: 0,SalesRep,Region,Month,Sales,Units Sold
0,Amy,North,Q1,24971,84
1,Amy,South,Q2,25749,557
2,Amy,East,Q3,24437,95
3,Amy,West,Q4,25355,706
4,Bob,North,Q1,25320,231
5,Bob,South,Q2,25999,84
6,Bob,East,Q3,22639,260
7,Bob,West,Q4,23949,109


In [14]:
#not in
filter_SalesRep = ['Amy','Bob']
sales[[x not in filter_SalesRep for x in sales.SalesRep]]

Unnamed: 0,SalesRep,Region,Month,Sales,Units Sold
8,Chuck,North,Q1,20280,453
9,Chuck,South,Q2,21584,114
10,Chuck,East,Q3,19625,83
11,Chuck,West,Q4,19832,70
12,Doug,North,Q1,25150,242
13,Doug,South,Q2,29061,146
14,Doug,East,Q3,27113,120
15,Doug,West,Q4,25953,81


### Method 6 - Select Rows based on wherever the value is present in any Column

In [12]:
games = pd.read_csv('games.csv')
games

Unnamed: 0,SalesRep,Game1,Game2,Game3,Game4
0,Amy,Yes,No,No,No
1,Bob,No,Yes,No,No
2,Chuck,No,No,No,Yes
3,Doug,Yes,No,No,No


In [16]:
games[['Game2','Game4']]

Unnamed: 0,Game2,Game4
0,No,No
1,Yes,No
2,No,Yes
3,No,No


In [14]:
criteria = ['Yes']
games[games[['Game2','Game4']].isin(criteria).any(axis = 1)]

Unnamed: 0,SalesRep,Game1,Game2,Game3,Game4
1,Bob,No,Yes,No,No
2,Chuck,No,No,No,Yes


### END