In [None]:
import pandas as pd
import numpy as np

# Load your dataset
df = pd.read_csv('your_dataset.csv')  # Replace with your actual data source

# Define expected columns and their valid ranges or formats
expected_columns = ['Name', 'Age', 'Email', 'JoinDate']
valid_age_range = (0, 120)
email_pattern = r'^[\w\.-]+@[\w\.-]+\.\w+$'

# Completeness: Percentage of non-null entries per column
completeness = df[expected_columns].notnull().mean() * 100

# Uniqueness: Percentage of unique entries per column
uniqueness = df[expected_columns].nunique() / len(df) * 100

# Validity: Percentage of entries conforming to expected formats or ranges
valid_age = df['Age'].between(*valid_age_range).mean() * 100
valid_email = df['Email'].str.match(email_pattern).mean() * 100

# Consistency: Example check - 'JoinDate' should not be in the future
current_date = pd.Timestamp.now()
consistent_joindate = (pd.to_datetime(df['JoinDate'], errors='coerce') <= current_date).mean() * 100

# Compile the metrics into a DataFrame
quality_metrics = pd.DataFrame({
    'Completeness (%)': completeness,
    'Uniqueness (%)': uniqueness,
    'Validity (%)': [np.nan, valid_age, valid_email, np.nan],
    'Consistency (%)': [np.nan, np.nan, np.nan, consistent_joindate]
}, index=expected_columns)

print(quality_metrics)