In [14]:
import pandas as pd
import numpy as np

In [15]:
# Create dataset with missing values
data = {
    'Name': ['Alice', 'Bob', 'Carol', 'Dave', 'Eve'],
    'Age': [20, np.nan, 19, 21, 22],
    'Grade': ['A', 'B', np.nan, 'B', 'A'],
    'Score': [90, 85, 78, np.nan, 88]
}

df = pd.DataFrame(data)
print("Original dataset:")
print(df)
print(f"\nMissing values per column:\n{df.isnull().sum()}")

Original dataset:
    Name   Age Grade  Score
0  Alice  20.0     A   90.0
1    Bob   NaN     B   85.0
2  Carol  19.0   NaN   78.0
3   Dave  21.0     B    NaN
4    Eve  22.0     A   88.0

Missing values per column:
Name     0
Age      1
Grade    1
Score    1
dtype: int64


In [16]:
# Fill missing numeric values with median
df['Age'] = df['Age'].fillna(df['Age'].median())
df['Score'] = df['Score'].fillna(df['Score'].median())

print("After filling numeric values with median:")
print(df)


After filling numeric values with median:
    Name   Age Grade  Score
0  Alice  20.0     A   90.0
1    Bob  20.5     B   85.0
2  Carol  19.0   NaN   78.0
3   Dave  21.0     B   86.5
4    Eve  22.0     A   88.0


In [17]:
# Fill missing categorical values with mode
grade_mode = df['Grade'].mode()[0]
df['Grade'] = df['Grade'].fillna(grade_mode)

print("After filling categorical values with mode:")
print(df)

After filling categorical values with mode:
    Name   Age Grade  Score
0  Alice  20.0     A   90.0
1    Bob  20.5     B   85.0
2  Carol  19.0     A   78.0
3   Dave  21.0     B   86.5
4    Eve  22.0     A   88.0


In [18]:
# Verify all missing values handled
print("Final verification - Missing values per column:")
print(df.isnull().sum())
print("\nFinal dataset:")
print(df)

Final verification - Missing values per column:
Name     0
Age      0
Grade    0
Score    0
dtype: int64

Final dataset:
    Name   Age Grade  Score
0  Alice  20.0     A   90.0
1    Bob  20.5     B   85.0
2  Carol  19.0     A   78.0
3   Dave  21.0     B   86.5
4    Eve  22.0     A   88.0
