In [1]:
# Detect & Handle Missing Data in Pandas

import pandas as pd
import numpy as np

# Sample DataFrame with missing values
data = {
    'Name': ['Alice', 'Bob', 'Charlie', 'David', 'Eve'],
    'Age': [25, np.nan, 30, 22, np.nan],
    'Gender': ['F', 'M', np.nan, 'M', 'F'],
    'Score': [85, 90, np.nan, 88, 95]
}

df = pd.DataFrame(data)

# Display the original DataFrame
print("Original DataFrame:")
print(df)

# 1. Detect missing values
print("\nMissing values in each column:")
print(df.isnull().sum())

# 2. Drop rows with any missing values
df_dropped = df.dropna()
print("\nDataFrame after dropping rows with missing values:")
print(df_dropped)

# 3. Fill missing values with specific values
df_filled = df.copy()
df_filled['Age'] = df_filled['Age'].fillna(df_filled['Age'].mean())  # Fill Age with mean
df_filled['Gender'] = df_filled['Gender'].fillna('Unknown')          # Fill Gender with 'Unknown'
df_filled['Score'] = df_filled['Score'].fillna(df_filled['Score'].median())  # Fill Score with median
print("\nDataFrame after filling missing values:")
print(df_filled)

# 4. Forward fill missing values
df_ffill = df.fillna(method='ffill')
print("\nDataFrame after forward fill:")
print(df_ffill)

# 5. Backward fill missing values
df_bfill = df.fillna(method='bfill')
print("\nDataFrame after backward fill:")
print(df_bfill)

# 6. Interpolate missing values
df_interpolated = df.copy()
df_interpolated['Age'] = df_interpolated['Age'].interpolate()
df_interpolated['Score'] = df_interpolated['Score'].interpolate()
print("\nDataFrame after interpolation:")
print(df_interpolated)

Original DataFrame:
      Name   Age Gender  Score
0    Alice  25.0      F   85.0
1      Bob   NaN      M   90.0
2  Charlie  30.0    NaN    NaN
3    David  22.0      M   88.0
4      Eve   NaN      F   95.0

Missing values in each column:
Name      0
Age       2
Gender    1
Score     1
dtype: int64

DataFrame after dropping rows with missing values:
    Name   Age Gender  Score
0  Alice  25.0      F   85.0
3  David  22.0      M   88.0

DataFrame after filling missing values:
      Name        Age   Gender  Score
0    Alice  25.000000        F   85.0
1      Bob  25.666667        M   90.0
2  Charlie  30.000000  Unknown   89.0
3    David  22.000000        M   88.0
4      Eve  25.666667        F   95.0

DataFrame after forward fill:
      Name   Age Gender  Score
0    Alice  25.0      F   85.0
1      Bob  25.0      M   90.0
2  Charlie  30.0      M   90.0
3    David  22.0      M   88.0
4      Eve  22.0      F   95.0

DataFrame after backward fill:
      Name   Age Gender  Score
0    Alice  2

  df_ffill = df.fillna(method='ffill')
  df_bfill = df.fillna(method='bfill')
