# Data Cleaning Notebook

This notebook demonstrates handling **missing values, duplicates, and outliers** using Pandas.

In [None]:
import pandas as pd

# Step 1: Create sample data with missing values, duplicates, and outliers
df = pd.DataFrame({
    "Name": ["Abeer", "Qadeer", "Umer", "Sehrish", "Khani", "Qadeer"],
    "Age": [25, None, 28, 26, 35, 30],
    "Salary": [50000, 60000, 55000, 65000, 700000, 60000]  # 700000 is an outlier
})

print("Original Data:")
print(df)

In [None]:
# Step 2: Handle Missing Values
print("\nMissing values before cleaning:")
print(df.isnull().sum())

df["Age"].fillna(df["Age"].mean(), inplace=True)

print("\nAfter filling missing values:")
print(df)

In [None]:
# Step 3: Handle Duplicates
print("\nDuplicate rows count before cleaning:", df.duplicated().sum())

df.drop_duplicates(inplace=True)

print("\nAfter removing duplicates:")
print(df)

In [None]:
# Step 4: Handle Outliers (IQR method)
Q1 = df["Salary"].quantile(0.25)
Q3 = df["Salary"].quantile(0.75)
IQR = Q3 - Q1

df = df[(df["Salary"] >= Q1 - 1.5*IQR) & (df["Salary"] <= Q3 + 1.5*IQR)]

print("\nAfter removing outliers:")
print(df)