7. DataFrame Filtering (advanced conditions & multiple columns)

In [2]:
import pandas as pd

df = pd.DataFrame({
    "Name": ["arman","akansha","zoya","pavitra","mehak","abhishek","manav"],
    "Age": [20,21,19,22,20,21,23],
    "Marks": [80,90,85,70,88,70,72],
    "City": ["Delhi","Delhi","Mumbai","Delhi","Mumbai","Noida","Delhi"]
})


In [3]:
# 1 Basic filtering (single condition)
df[df["Marks"]>80] # marks greater than 80

Unnamed: 0,Name,Age,Marks,City
1,akansha,21,90,Delhi
2,zoya,19,85,Mumbai
4,mehak,20,88,Mumbai


In [6]:
# Age == 21
df[df["Age"] == 21]
# Rule: df[ condition ]


Unnamed: 0,Name,Age,Marks,City
1,akansha,21,90,Delhi
5,abhishek,21,70,Noida


In [8]:
# 2 Filtering + selecting columns (MOST USED)
df.loc[df["Marks"]>80,["Name","Marks"]]

Unnamed: 0,Name,Marks
1,akansha,90
2,zoya,85
4,mehak,88


In [11]:
# Multiple conditions (AND / OR)
# AND (&)
df.loc[(df["Marks"]>80) & (df["Age"]>20)]

Unnamed: 0,Name,Age,Marks,City
1,akansha,21,90,Delhi


In [12]:
# OR (|)
df.loc[(df["Marks"]>80)| (df["City"]=="Noida")]

Unnamed: 0,Name,Age,Marks,City
1,akansha,21,90,Delhi
2,zoya,19,85,Mumbai
4,mehak,20,88,Mumbai
5,abhishek,21,70,Noida


In [13]:
# isin() – specific values filter
# City in Delhi or Mumbai
df[df["City"].isin(["Delhi","Mumbai"])]

Unnamed: 0,Name,Age,Marks,City
0,arman,20,80,Delhi
1,akansha,21,90,Delhi
2,zoya,19,85,Mumbai
3,pavitra,22,70,Delhi
4,mehak,20,88,Mumbai
6,manav,23,72,Delhi


In [14]:
# Marks in [70, 80]
df[df["Marks"].isin([70,80])]

Unnamed: 0,Name,Age,Marks,City
0,arman,20,80,Delhi
3,pavitra,22,70,Delhi
5,abhishek,21,70,Noida


In [15]:
# String-based filtering (VERY IMPORTANT)
# Names starting with “a”
df[df["Name"].str.startswith("a")]

Unnamed: 0,Name,Age,Marks,City
0,arman,20,80,Delhi
1,akansha,21,90,Delhi
5,abhishek,21,70,Noida


In [16]:
# Names containing “av”
df[df["Name"].str.contains("av")]

Unnamed: 0,Name,Age,Marks,City
3,pavitra,22,70,Delhi
6,manav,23,72,Delhi


In [17]:
# Case-insensitive contains
df[df["Name"].str.contains("A",case=False)]

Unnamed: 0,Name,Age,Marks,City
0,arman,20,80,Delhi
1,akansha,21,90,Delhi
2,zoya,19,85,Mumbai
3,pavitra,22,70,Delhi
4,mehak,20,88,Mumbai
5,abhishek,21,70,Noida
6,manav,23,72,Delhi


In [18]:
# Range filtering (between)
df[df["Marks"].between(75,90)] # Inclusive by default (75 & 90 included)

Unnamed: 0,Name,Age,Marks,City
0,arman,20,80,Delhi
1,akansha,21,90,Delhi
2,zoya,19,85,Mumbai
4,mehak,20,88,Mumbai


In [21]:
# Negation (NOT condition)
df[~(df["City"]== "Delhi")]

Unnamed: 0,Name,Age,Marks,City
2,zoya,19,85,Mumbai
4,mehak,20,88,Mumbai
5,abhishek,21,70,Noida


In [23]:
# Filtering with query() (clean & readable)     ,Looks like SQL — great for readability.
df.query("Marks > 80 and Age >= 20")

Unnamed: 0,Name,Age,Marks,City
1,akansha,21,90,Delhi
4,mehak,20,88,Mumbai


In [24]:
df.query("City == 'Delhi' or City == 'Noida' ")

Unnamed: 0,Name,Age,Marks,City
0,arman,20,80,Delhi
1,akansha,21,90,Delhi
3,pavitra,22,70,Delhi
5,abhishek,21,70,Noida
6,manav,23,72,Delhi


In [26]:
# Filter + Update (real-life cleaning)
df.loc[df["Marks"]<75,"Marks"] = 75
df

Unnamed: 0,Name,Age,Marks,City
0,arman,20,80,Delhi
1,akansha,21,90,Delhi
2,zoya,19,85,Mumbai
3,pavitra,22,75,Delhi
4,mehak,20,88,Mumbai
5,abhishek,21,75,Noida
6,manav,23,75,Delhi


In [29]:
# Give +5 grace to Delhi students
df.loc[df["City"]=='Delhi','Marks'] += 5
df

Unnamed: 0,Name,Age,Marks,City
0,arman,20,90,Delhi
1,akansha,21,100,Delhi
2,zoya,19,85,Mumbai
3,pavitra,22,85,Delhi
4,mehak,20,88,Mumbai
5,abhishek,21,75,Noida
6,manav,23,85,Delhi


In [34]:
# Real-life example
# Shortlist students:
# Marks >= 80 AND City = Delhi
shortlist = df.loc[(df["Marks"]>=80) & (df["City"]=='Delhi'), ['Name','Marks','City']]
shortlist


Unnamed: 0,Name,Marks,City
0,arman,90,Delhi
1,akansha,100,Delhi
3,pavitra,85,Delhi
6,manav,85,Delhi


In [35]:
# Practice DataFrame (Employees)
import pandas as pd

emp_df = pd.DataFrame({
    "EmpID": [101, 102, 103, 104, 105, 106],
    "Name": ["Aman", "Riya", "Kunal", "Sneha", "Vikram", "Pooja"],
    "Department": ["IT", "HR", "IT", "Finance", "HR", "IT"],
    "Salary": [55000, 48000, 62000, 70000, 45000, 58000],
    "Experience": [2, 1, 4, 6, 1, 3]
})

emp_df


Unnamed: 0,EmpID,Name,Department,Salary,Experience
0,101,Aman,IT,55000,2
1,102,Riya,HR,48000,1
2,103,Kunal,IT,62000,4
3,104,Sneha,Finance,70000,6
4,105,Vikram,HR,45000,1
5,106,Pooja,IT,58000,3


In [None]:
# 1️. Employees with Salary > 55000
emp_df.loc[(emp_df["Salary"]>55000),["Name","Salary"]]
# emp_df.loc[(emp_df["Salary"]>55000),["EmpID","Name","Salary"]]

Unnamed: 0,Name,Salary
2,Kunal,62000
3,Sneha,70000
5,Pooja,58000


In [42]:
# 2. Employees from IT department
emp_df.loc[(emp_df["Department"]=='IT'),["Name","Department"]]

Unnamed: 0,Name,Department
0,Aman,IT
2,Kunal,IT
5,Pooja,IT


In [43]:
# Name & Salary of employees with Experience ≥ 3
print(emp_df.loc[(emp_df["Experience"]>=3),["Name","Salary","Experience"]])

    Name  Salary  Experience
2  Kunal   62000           4
3  Sneha   70000           6
5  Pooja   58000           3


In [57]:
# # Increase salary by 10% for IT employees
emp_df.loc[(emp_df["Department"]=='IT'),["Salary"]] *= 1.10
print(emp_df)

   EmpID    Name Department   Salary  Experience
0    101    Aman         IT  60522.0           2
1    102    Riya         HR  48000.0           1
2    103   Kunal         IT  68222.0           4
3    104   Sneha    Finance  70000.0           6
4    105  Vikram         HR  45000.0           1
5    106   Pooja         IT  63822.0           3


  emp_df.loc[(emp_df["Department"]=='IT'),["Salary"]] *= 1.10


In [58]:
# # Employees with Salary between 50k and 65k
print(emp_df.loc[(emp_df["Salary"]>50000) & (emp_df["Salary"]<65000)])

   EmpID   Name Department   Salary  Experience
0    101   Aman         IT  60522.0           2
5    106  Pooja         IT  63822.0           3


In [59]:
# Select first 3 rows using iloc
print(emp_df.iloc[0:3])

   EmpID   Name Department   Salary  Experience
0    101   Aman         IT  60522.0           2
1    102   Riya         HR  48000.0           1
2    103  Kunal         IT  68222.0           4


In [60]:
# Select Name & Department columns using loc
print(emp_df.loc[:,["Name","Department"]])

     Name Department
0    Aman         IT
1    Riya         HR
2   Kunal         IT
3   Sneha    Finance
4  Vikram         HR
5   Pooja         IT
