In [1]:
import pandas as pd
import numpy as np

# Sample cleaned dataset
data = {
    'Patient_ID': [101, 102, 103, 104, 105],
    'Age': [25, 30, 40, 35, 50],
    'Blood_Pressure': [120, 130, 110, 110, 125],
    'Blood_Type': ['O+', 'A+', 'O+', 'B+', 'A+'],
    'Patient_Name': [' Alice ', 'Bob', 'Charlie', ' David', 'Eve ']
}
df = pd.DataFrame(data)

# 🔍 Define the quality check function
def data_quality_report(df):
    print("🔍 Missing Values:\n", df.isnull().sum(), "\n")
    print("📊 Data Types:\n", df.dtypes, "\n")
    print("🗂 Duplicate Rows:", df.duplicated().sum(), "\n")
    print("📈 Summary Stats:\n", df.describe(), "\n")

    for col in df.select_dtypes(include=['object']).columns:
        print(f"🔢 Unique values in {col}:\n", df[col].unique(), "\n")

# ✅ Run full audit
data_quality_report(df)

# 🧼 Example cleanup
df['Patient_Name'] = df['Patient_Name'].str.strip()
df['Blood_Type'] = df['Blood_Type'].str.upper()


🔍 Missing Values:
 Patient_ID        0
Age               0
Blood_Pressure    0
Blood_Type        0
Patient_Name      0
dtype: int64 

📊 Data Types:
 Patient_ID         int64
Age                int64
Blood_Pressure     int64
Blood_Type        object
Patient_Name      object
dtype: object 

🗂 Duplicate Rows: 0 

📈 Summary Stats:
        Patient_ID        Age  Blood_Pressure
count    5.000000   5.000000        5.000000
mean   103.000000  36.000000      119.000000
std      1.581139   9.617692        8.944272
min    101.000000  25.000000      110.000000
25%    102.000000  30.000000      110.000000
50%    103.000000  35.000000      120.000000
75%    104.000000  40.000000      125.000000
max    105.000000  50.000000      130.000000 

🔢 Unique values in Blood_Type:
 ['O+' 'A+' 'B+'] 

🔢 Unique values in Patient_Name:
 [' Alice ' 'Bob' 'Charlie' ' David' 'Eve '] 

