**Imports**

In [2]:
import pandas as pd
import numpy as np
import seaborn as sns
import json
import simplejson
from pathlib import Path
from typing import List, Dict, Tuple

**Code**

In [43]:
df_missing = pd.DataFrame({
    "user_id": [1, 2, 3, 4, 5, 6],
    "income_reported": ["55000", "62000", "N/A", "not reported",   None, "0"],
    "pregnancy_weeks": [None, None, 20, 32, None, 0], # Not applicable vs real
    "age": [25, 30, np.nan, 40, 35, 28]
}) 

# mean
meanage = df_missing["age"].mean()
# income reported
df_missing["income_reported"] = pd.to_numeric(df_missing["income_reported"].replace(["not reported", "N/A", "0"], np.nan))

# pregnancy weeks
df_missing["pregnancy_weeks"] = df_missing["pregnancy_weeks"].replace(0, np.nan)

# age
df_missing["age"] = df_missing["age"].replace(np.nan,meanage)

# print
print(df_missing)

   user_id  income_reported  pregnancy_weeks   age
0        1          55000.0              NaN  25.0
1        2          62000.0              NaN  30.0
2        3              NaN             20.0  31.6
3        4              NaN             32.0  40.0
4        5              NaN              NaN  35.0
5        6              NaN              NaN  28.0


In [61]:
df = pd.DataFrame({
    "age" : [25, 30, np.nan, 40],
    "income" : [50000, None, 55000, 60000],
    "city" : ["Riyadh", "Jeddah", None, None]
})

df_missing = pd.DataFrame({
    "user_id" : [1, 2, 3, 4, 5],
    "income_reported" : ["55000", "N/A", "62000", "not reported", "?"]
})

custom_missing = ["N/A", "NA", "not reported", "unknown", "?"]

df_norm = df_missing.copy()
df_norm["income_reported"] = df_norm["income_reported"].replace(custom_missing, np.nan)

print(df_norm)
print("\nMissing per column:\n", df_norm.isna().sum(), "\nMissing percentage:\n",df_norm.isna().mean())
print("\nMissing per column:\n", df.isna().sum(), "\nMissing percentage:\n", df.isna().mean())

   user_id income_reported
0        1           55000
1        2             NaN
2        3           62000
3        4             NaN
4        5             NaN

Missing per column:
 user_id            0
income_reported    3
dtype: int64 
Missing percentage:
 user_id            0.0
income_reported    0.6
dtype: float64

Missing per column:
 age       1
income    1
city      2
dtype: int64 
Missing percentage:
 age       0.25
income    0.25
city      0.50
dtype: float64


In [None]:
#==========================
# Sample Data with Missing Values
#==========================
df_norm = pd.DataFrame({
    "user_id" : [1, 2, 3, 4, 5, 6],
    "income_reported" : [55000, 62000, None, None, 48000, 0],
    "age" : [25, 30, None, 45, 28, None],
    "city" : ["Riyadh", None, "Jeddah", "Riyadh", None, "Dammam"]
})

#==========================
# Function: Column-level Missing Summary
#==========================
def missing_summary(df: pd.DataFrame) -> pd.DataFrame:
    total = df.isna().sum()
    pct = (df.isna().mean() * 100).round(1)

    return (
        pd.DataFrame({
            "missing_count" : total,
            "missing_percentage" : pct
        })
        .sort_values("missing_percentage", ascending = False)
    )

#==========================
# Run Missingness Analysis
#==========================

# Column-level report
summary = missing_summary(df_norm)

# Row-leve l missing count
df_norm["missing_per_row"] = df_norm.isna().sum(axis = 1)

#==========================
# Display Results
#==========================
print("---=== Column-level Missingness Summary ===---\n")
print(summary)

print("\n---=== DataFrame with Row-level Missingness Summary ===---\n")
print(df_norm)

---=== Column-level Missingness Summary ===---

                 missing_count  missing_percentage
income_reported              2                33.3
age                          2                33.3
city                         2                33.3
user_id                      0                 0.0

---=== DataFrame with Row-level Missingness Summary ===---

   user_id  income_reported   age    city  missing_per_row
0        1          55000.0  25.0  Riyadh                0
1        2          62000.0  30.0    None                1
2        3              NaN   NaN  Jeddah                2
3        4              NaN  45.0  Riyadh                1
4        5          48000.0  28.0    None                1
5        6              0.0   NaN  Dammam                1


In [None]:
#==========================
# Sample Data with Missing Values
#==========================
df_norm = pd.DataFrame({
    "user_id" : [1, 2, 3, 4, 5, 6],
    "income_reported" : [55000, 62000, None, None, 48000, 0],
    "age" : [25, 30, None, 45, 28, None],
    "city" : ["Riyadh", None, "Jeddah", "Riyadh", None, "Dammam"]
})

print("===== Original Data =====")
print(df_norm)
print(f"Shape : {df_norm.shape}")

#==========================
# Basic Dropping Patterns 
#==========================
