In [None]:
import pandas as pd

# ---------- Utility Functions ----------

def validate_columns(df, columns):
    """Check if required columns exist in the DataFrame."""
    return all(col in df.columns for col in columns)

def calculate_completeness(df, columns):
    
    if not validate_columns(df, columns):
        return None
    return 1 - df[columns].isnull().mean().mean()

def calculate_uniqueness(df, column):
    if column not in df.columns:
        return None
    return 1 - df[column].duplicated().mean()

def calculate_consistency(df, column, pattern=None):
    if column not in df.columns:
        return None
    if pattern:
        return df[column].str.match(pattern).mean()
    # Simple consistency: check if non-null values have similar structure
    return df[column].dropna().apply(lambda x: isinstance(x, str)).mean()

def quality_summary(df, metrics_config):
    """Compute data quality metrics for a dataset using config."""
    summary = {}
    for metric, config in metrics_config.items():
        try:
            if metric == 'completeness':
                summary[metric] = calculate_completeness(df, config)
            elif metric == 'uniqueness':
                summary[metric] = calculate_uniqueness(df, config)
            elif metric == 'consistency':
                summary[metric] = calculate_consistency(df, config)
        except Exception as e:
            summary[metric] = f"Error: {str(e)}"
    return summary

# ---------- Dataset 1: Customer Data ----------

customer_data = pd.DataFrame({
    'customer_id': [1, 2, 3, 4, 5],
    'email': ['a@example.com', 'b@example.com', None, 'd@example.com', 'd@example.com'],
    'name': ['Alice', 'Bob', 'Charlie', None, 'Eve']
})

customer_config = {
    'completeness': ['email', 'name'],
    'uniqueness': 'email',
    'consistency': 'email'
}

customer_quality = quality_summary(customer_data, customer_config)

# ---------- Dataset 2: Sales Data ----------

sales_data = pd.DataFrame({
    'transaction_id': [1001, 1002, 1003, 1004, 1005],
    'transaction_date': ['2024-01-01', None, '2024-01-03', '2024-01-04', '2024-01-05'],
    'amount': [100, 200, 300, 400, 500]
})

sales_config = {
    'completeness': ['transaction_date'],
    'uniqueness': 'transaction_id',
    'consistency': 'transaction_date'
}

sales_quality = quality_summary(sales_data, sales_config)

# ---------- Dataset 3: Employee Registry ----------

employee_data = pd.DataFrame({
    'employee_id': [1, 2, 3, 4, 5],
    'department': ['HR', 'IT', None, 'Finance', 'HR'],
    'email': ['x@corp.com', 'y@corp.com', 'z@corp.com', 'w@corp.com', 'x@corp.com']
})

employee_config = {
    'completeness': ['department'],
    'uniqueness': 'email',
    'consistency': 'department'
}

employee_quality = quality_summary(employee_data, employee_config)

# ---------- Final Output ----------

print("Customer Data Quality Metrics:\n", customer_quality)
print("\nSales Data Quality Metrics:\n", sales_quality)
print("\nEmployee Registry Data Quality Metrics:\n", employee_quality)


Customer Data Quality Metrics:
 {'completeness': np.float64(0.8), 'uniqueness': np.float64(0.8), 'consistency': np.float64(1.0)}

Sales Data Quality Metrics:
 {'completeness': np.float64(0.8), 'uniqueness': np.float64(1.0), 'consistency': np.float64(1.0)}

Employee Registry Data Quality Metrics:
 {'completeness': np.float64(0.8), 'uniqueness': np.float64(0.8), 'consistency': np.float64(1.0)}
