## Data Selection & Filtering

#### Selecting rows & columns

In [1]:
import pandas as pd 

df = pd.read_csv('students.csv')
df_original = df.copy()

In [2]:
df

Unnamed: 0,ID,Name,Age,Gender,City,Math,English,Science
0,1,Ali,20,M,Lahore,78,65,72
1,2,Sara,22,F,Karachi,88,90,85
2,3,Hassan,21,M,Islamabad,67,70,75
3,4,Ayesha,23,F,Multan,92,95,89
4,5,Omar,20,M,Lahore,55,60,58
5,6,Zara,22,F,Karachi,81,85,79
6,7,Usman,21,M,Islamabad,73,68,70
7,8,Maria,23,F,Multan,89,92,94


In [3]:
df['Name']

0       Ali
1      Sara
2    Hassan
3    Ayesha
4      Omar
5      Zara
6     Usman
7     Maria
Name: Name, dtype: object

In [4]:
df[['Name', 'City']]

Unnamed: 0,Name,City
0,Ali,Lahore
1,Sara,Karachi
2,Hassan,Islamabad
3,Ayesha,Multan
4,Omar,Lahore
5,Zara,Karachi
6,Usman,Islamabad
7,Maria,Multan


In [5]:
df.loc[0:3]

Unnamed: 0,ID,Name,Age,Gender,City,Math,English,Science
0,1,Ali,20,M,Lahore,78,65,72
1,2,Sara,22,F,Karachi,88,90,85
2,3,Hassan,21,M,Islamabad,67,70,75
3,4,Ayesha,23,F,Multan,92,95,89


In [6]:
df.loc[0:2, ['Name', 'Gender']]

Unnamed: 0,Name,Gender
0,Ali,M
1,Sara,F
2,Hassan,M


In [7]:
df.iloc[1, [2, 5]]

Age     22
Math    88
Name: 1, dtype: object

In [8]:
df.loc[0, 'Name']

'Ali'

In [9]:
df.at[0, 'Gender']

'M'

In [10]:
df.iat[4, 3]

'M'

### Filtering with Conditions

In [11]:
df[df['Math'] > 85]

Unnamed: 0,ID,Name,Age,Gender,City,Math,English,Science
1,2,Sara,22,F,Karachi,88,90,85
3,4,Ayesha,23,F,Multan,92,95,89
7,8,Maria,23,F,Multan,89,92,94


In [12]:
df[df['Gender'] == 'F']

Unnamed: 0,ID,Name,Age,Gender,City,Math,English,Science
1,2,Sara,22,F,Karachi,88,90,85
3,4,Ayesha,23,F,Multan,92,95,89
5,6,Zara,22,F,Karachi,81,85,79
7,8,Maria,23,F,Multan,89,92,94


In [13]:
df

Unnamed: 0,ID,Name,Age,Gender,City,Math,English,Science
0,1,Ali,20,M,Lahore,78,65,72
1,2,Sara,22,F,Karachi,88,90,85
2,3,Hassan,21,M,Islamabad,67,70,75
3,4,Ayesha,23,F,Multan,92,95,89
4,5,Omar,20,M,Lahore,55,60,58
5,6,Zara,22,F,Karachi,81,85,79
6,7,Usman,21,M,Islamabad,73,68,70
7,8,Maria,23,F,Multan,89,92,94


In [14]:
df[df['City'] == 'Lahore']

Unnamed: 0,ID,Name,Age,Gender,City,Math,English,Science
0,1,Ali,20,M,Lahore,78,65,72
4,5,Omar,20,M,Lahore,55,60,58


In [15]:
df[df['Gender'] == 'F']

Unnamed: 0,ID,Name,Age,Gender,City,Math,English,Science
1,2,Sara,22,F,Karachi,88,90,85
3,4,Ayesha,23,F,Multan,92,95,89
5,6,Zara,22,F,Karachi,81,85,79
7,8,Maria,23,F,Multan,89,92,94


In [16]:
df[df['Math'] > 80]

Unnamed: 0,ID,Name,Age,Gender,City,Math,English,Science
1,2,Sara,22,F,Karachi,88,90,85
3,4,Ayesha,23,F,Multan,92,95,89
5,6,Zara,22,F,Karachi,81,85,79
7,8,Maria,23,F,Multan,89,92,94


In [17]:
df[df['Science'].between(70,90)]

Unnamed: 0,ID,Name,Age,Gender,City,Math,English,Science
0,1,Ali,20,M,Lahore,78,65,72
1,2,Sara,22,F,Karachi,88,90,85
2,3,Hassan,21,M,Islamabad,67,70,75
3,4,Ayesha,23,F,Multan,92,95,89
5,6,Zara,22,F,Karachi,81,85,79
6,7,Usman,21,M,Islamabad,73,68,70


In [18]:
df[df['City'].isin(['Karachi', 'Multan'])]

Unnamed: 0,ID,Name,Age,Gender,City,Math,English,Science
1,2,Sara,22,F,Karachi,88,90,85
3,4,Ayesha,23,F,Multan,92,95,89
5,6,Zara,22,F,Karachi,81,85,79
7,8,Maria,23,F,Multan,89,92,94


In [19]:
df[df['English'] < 70]

Unnamed: 0,ID,Name,Age,Gender,City,Math,English,Science
0,1,Ali,20,M,Lahore,78,65,72
4,5,Omar,20,M,Lahore,55,60,58
6,7,Usman,21,M,Islamabad,73,68,70


In [20]:
df[(df['Math'] > 85) & (df['Science'] > 85)]

Unnamed: 0,ID,Name,Age,Gender,City,Math,English,Science
3,4,Ayesha,23,F,Multan,92,95,89
7,8,Maria,23,F,Multan,89,92,94


In [21]:
df[df['City'] != 'Karachi']

Unnamed: 0,ID,Name,Age,Gender,City,Math,English,Science
0,1,Ali,20,M,Lahore,78,65,72
2,3,Hassan,21,M,Islamabad,67,70,75
3,4,Ayesha,23,F,Multan,92,95,89
4,5,Omar,20,M,Lahore,55,60,58
6,7,Usman,21,M,Islamabad,73,68,70
7,8,Maria,23,F,Multan,89,92,94


In [22]:
df[(df['Age'] > 21) & (df['Math'] < 80)]

Unnamed: 0,ID,Name,Age,Gender,City,Math,English,Science


In [23]:
df

Unnamed: 0,ID,Name,Age,Gender,City,Math,English,Science
0,1,Ali,20,M,Lahore,78,65,72
1,2,Sara,22,F,Karachi,88,90,85
2,3,Hassan,21,M,Islamabad,67,70,75
3,4,Ayesha,23,F,Multan,92,95,89
4,5,Omar,20,M,Lahore,55,60,58
5,6,Zara,22,F,Karachi,81,85,79
6,7,Usman,21,M,Islamabad,73,68,70
7,8,Maria,23,F,Multan,89,92,94


In [24]:
df[(df['Gender'] == 'M') & (df['English'] < 70)]

Unnamed: 0,ID,Name,Age,Gender,City,Math,English,Science
0,1,Ali,20,M,Lahore,78,65,72
4,5,Omar,20,M,Lahore,55,60,58
6,7,Usman,21,M,Islamabad,73,68,70


In [25]:
df.loc[(df['City'] != 'Lahore') & (df['Math'] >= 75), 'Name']

1      Sara
3    Ayesha
5      Zara
7     Maria
Name: Name, dtype: object

In [26]:
df[~df['Science'].between(60,80)]

Unnamed: 0,ID,Name,Age,Gender,City,Math,English,Science
1,2,Sara,22,F,Karachi,88,90,85
3,4,Ayesha,23,F,Multan,92,95,89
4,5,Omar,20,M,Lahore,55,60,58
7,8,Maria,23,F,Multan,89,92,94


In [27]:
df[df['City'].str.startswith('K')]

Unnamed: 0,ID,Name,Age,Gender,City,Math,English,Science
1,2,Sara,22,F,Karachi,88,90,85
5,6,Zara,22,F,Karachi,81,85,79


In [28]:
df[df['Name'].str.contains('m')]

Unnamed: 0,ID,Name,Age,Gender,City,Math,English,Science
4,5,Omar,20,M,Lahore,55,60,58
6,7,Usman,21,M,Islamabad,73,68,70


In [29]:
df[(df['Gender'] == 'F') & (df['City'] == 'Islamabad')]

Unnamed: 0,ID,Name,Age,Gender,City,Math,English,Science


In [30]:
df[(df['Math'] > 80) & (df['English'] < 90)]

Unnamed: 0,ID,Name,Age,Gender,City,Math,English,Science
5,6,Zara,22,F,Karachi,81,85,79


In [31]:
df.loc[df['Science'] >= 85, ['Name',  'City', 'Math']]

Unnamed: 0,Name,City,Math
1,Sara,Karachi,88
3,Ayesha,Multan,92
7,Maria,Multan,89


In [32]:
df[df['Age'].isin([20,22])]

Unnamed: 0,ID,Name,Age,Gender,City,Math,English,Science
0,1,Ali,20,M,Lahore,78,65,72
1,2,Sara,22,F,Karachi,88,90,85
4,5,Omar,20,M,Lahore,55,60,58
5,6,Zara,22,F,Karachi,81,85,79


In [33]:
df

Unnamed: 0,ID,Name,Age,Gender,City,Math,English,Science
0,1,Ali,20,M,Lahore,78,65,72
1,2,Sara,22,F,Karachi,88,90,85
2,3,Hassan,21,M,Islamabad,67,70,75
3,4,Ayesha,23,F,Multan,92,95,89
4,5,Omar,20,M,Lahore,55,60,58
5,6,Zara,22,F,Karachi,81,85,79
6,7,Usman,21,M,Islamabad,73,68,70
7,8,Maria,23,F,Multan,89,92,94


In [34]:
df[(df['Gender'] == 'M') & (df['English'] < 70)]

Unnamed: 0,ID,Name,Age,Gender,City,Math,English,Science
0,1,Ali,20,M,Lahore,78,65,72
4,5,Omar,20,M,Lahore,55,60,58
6,7,Usman,21,M,Islamabad,73,68,70


In [35]:
df.loc[(df['City'] == 'Lahore') & (df['Math'] >= 75), 'Name']

0    Ali
Name: Name, dtype: object

In [36]:
df.loc[~df['Science'].between(60,80) ]

Unnamed: 0,ID,Name,Age,Gender,City,Math,English,Science
1,2,Sara,22,F,Karachi,88,90,85
3,4,Ayesha,23,F,Multan,92,95,89
4,5,Omar,20,M,Lahore,55,60,58
7,8,Maria,23,F,Multan,89,92,94


In [37]:
df[df['City'].str.startswith('K')]

Unnamed: 0,ID,Name,Age,Gender,City,Math,English,Science
1,2,Sara,22,F,Karachi,88,90,85
5,6,Zara,22,F,Karachi,81,85,79


In [38]:
df[df['Name'].str.contains('m')]

Unnamed: 0,ID,Name,Age,Gender,City,Math,English,Science
4,5,Omar,20,M,Lahore,55,60,58
6,7,Usman,21,M,Islamabad,73,68,70


In [39]:
df[(df['Gender'] == 'F') & (df['City'] == 'Islamabad')]

Unnamed: 0,ID,Name,Age,Gender,City,Math,English,Science


In [40]:
df[(df['Math'] > 80) & (df['English'] < 90)]

Unnamed: 0,ID,Name,Age,Gender,City,Math,English,Science
5,6,Zara,22,F,Karachi,81,85,79


In [41]:
df.loc[df['Science'] >= 85, ['Name', 'City', 'Math']]

Unnamed: 0,Name,City,Math
1,Sara,Karachi,88
3,Ayesha,Multan,92
7,Maria,Multan,89


In [42]:
df[df['Age'].isin([20,22])]

Unnamed: 0,ID,Name,Age,Gender,City,Math,English,Science
0,1,Ali,20,M,Lahore,78,65,72
1,2,Sara,22,F,Karachi,88,90,85
4,5,Omar,20,M,Lahore,55,60,58
5,6,Zara,22,F,Karachi,81,85,79


In [43]:
df[(df['Science'] + df['Math'] > 160)]

Unnamed: 0,ID,Name,Age,Gender,City,Math,English,Science
1,2,Sara,22,F,Karachi,88,90,85
3,4,Ayesha,23,F,Multan,92,95,89
7,8,Maria,23,F,Multan,89,92,94


### Filtering with .querry() function

In [44]:
df

Unnamed: 0,ID,Name,Age,Gender,City,Math,English,Science
0,1,Ali,20,M,Lahore,78,65,72
1,2,Sara,22,F,Karachi,88,90,85
2,3,Hassan,21,M,Islamabad,67,70,75
3,4,Ayesha,23,F,Multan,92,95,89
4,5,Omar,20,M,Lahore,55,60,58
5,6,Zara,22,F,Karachi,81,85,79
6,7,Usman,21,M,Islamabad,73,68,70
7,8,Maria,23,F,Multan,89,92,94


In [45]:
df.query("Age > 20 and City == 'Karachi'")

Unnamed: 0,ID,Name,Age,Gender,City,Math,English,Science
1,2,Sara,22,F,Karachi,88,90,85
5,6,Zara,22,F,Karachi,81,85,79


In [46]:
df.query("Age in [20,22] ")

Unnamed: 0,ID,Name,Age,Gender,City,Math,English,Science
0,1,Ali,20,M,Lahore,78,65,72
1,2,Sara,22,F,Karachi,88,90,85
4,5,Omar,20,M,Lahore,55,60,58
5,6,Zara,22,F,Karachi,81,85,79
