In [2]:
import pandas as pd
import numpy as np

In [3]:
df = pd.DataFrame({
    "Name": ["Aman", "Riya", "Kunal", "Sneha", "Vikram"],
    "Age": [22, np.nan, 24, np.nan, 28],
    "Salary": [50000, 48000, np.nan, 70000, np.nan],
    "City": ["Delhi", None, "Mumbai", None, "Pune"]
})

In [4]:
# Check missing values

# Find where values are missing

# Count missing values column-wise

df.isnull()

Unnamed: 0,Name,Age,Salary,City
0,False,False,False,False
1,False,True,False,True
2,False,False,True,False
3,False,True,False,True
4,False,False,True,False


In [5]:
df.isnull().sum()

Name      0
Age       2
Salary    2
City      2
dtype: int64

In [7]:
# Check total missing values
df.isnull().sum().sum()

np.int64(6)

In [8]:
# Check rows with at least one missing value
df.isnull().any(axis = 1)

0    False
1     True
2     True
3     True
4     True
dtype: bool

In [9]:
# Fill missing Age with mean
df["Age"].fillna(df["Age"].mean(),inplace = True)
df

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df["Age"].fillna(df["Age"].mean(),inplace = True)


Unnamed: 0,Name,Age,Salary,City
0,Aman,22.0,50000.0,Delhi
1,Riya,24.666667,48000.0,
2,Kunal,24.0,,Mumbai
3,Sneha,24.666667,70000.0,
4,Vikram,28.0,,Pune


In [11]:
# Fill missing Salary with median
df["Salary"].fillna(df["Salary"].median(),inplace= True)
df

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df["Salary"].fillna(df["Salary"].median(),inplace= True)


Unnamed: 0,Name,Age,Salary,City
0,Aman,22.0,50000.0,Delhi
1,Riya,24.666667,48000.0,
2,Kunal,24.0,50000.0,Mumbai
3,Sneha,24.666667,70000.0,
4,Vikram,28.0,50000.0,Pune


In [12]:
# Fill missing City with "Unknown"
df["City"].fillna("unknown",inplace=True)
df

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df["City"].fillna("unknown",inplace=True)


Unnamed: 0,Name,Age,Salary,City
0,Aman,22.0,50000.0,Delhi
1,Riya,24.666667,48000.0,unknown
2,Kunal,24.0,50000.0,Mumbai
3,Sneha,24.666667,70000.0,unknown
4,Vikram,28.0,50000.0,Pune


In [13]:
# Verify again:
df.isnull()

Unnamed: 0,Name,Age,Salary,City
0,False,False,False,False
1,False,False,False,False
2,False,False,False,False
3,False,False,False,False
4,False,False,False,False


In [14]:
# Drop rows where Salary is missing
df.dropna(subset=["Salary"])

Unnamed: 0,Name,Age,Salary,City
0,Aman,22.0,50000.0,Delhi
1,Riya,24.666667,48000.0,unknown
2,Kunal,24.0,50000.0,Mumbai
3,Sneha,24.666667,70000.0,unknown
4,Vikram,28.0,50000.0,Pune


In [15]:
# Fill remaining missing values using forward fill
df.fillna(method="ffill")

  df.fillna(method="ffill")


Unnamed: 0,Name,Age,Salary,City
0,Aman,22.0,50000.0,Delhi
1,Riya,24.666667,48000.0,unknown
2,Kunal,24.0,50000.0,Mumbai
3,Sneha,24.666667,70000.0,unknown
4,Vikram,28.0,50000.0,Pune


In [16]:
# Create a column Age_Group

# Age ≥ 25 → "Senior"

# Else → "Junior"

df["Age_Group"] = 'Junior'
df.loc[df["Age"]>=25,"Age_Group"] = "senior"
df

Unnamed: 0,Name,Age,Salary,City,Age_Group
0,Aman,22.0,50000.0,Delhi,Junior
1,Riya,24.666667,48000.0,unknown,Junior
2,Kunal,24.0,50000.0,Mumbai,Junior
3,Sneha,24.666667,70000.0,unknown,Junior
4,Vikram,28.0,50000.0,Pune,senior


In [17]:
# Replace all remaining NaN with 0
df.fillna(0,inplace=True)

In [18]:
df

Unnamed: 0,Name,Age,Salary,City,Age_Group
0,Aman,22.0,50000.0,Delhi,Junior
1,Riya,24.666667,48000.0,unknown,Junior
2,Kunal,24.0,50000.0,Mumbai,Junior
3,Sneha,24.666667,70000.0,unknown,Junior
4,Vikram,28.0,50000.0,Pune,senior
