## Data Quality Framework Implementation

**Description**: Implement a simple data quality measurement framework using ISO 8000 principles to assess key dimensions in a dataset.

In [1]:
# Write a conceptual framework described in Python pseudo-code:
import pandas as pd
import numpy as np

# Sample dataset
data = {
    'id': [1, 2, 3, 4, 5, 5],
    'name': ['Alice', 'Bob', 'Charlie', 'David', None, 'Eve'],
    'email': ['alice@example.com', 'bob@example.com', 'charlie@example', 'david@example.com', 'eve@example.com', 'eve@example.com'],
    'age': [25, 30, 35, None, 40, 40]
}
df = pd.DataFrame(data)

# Define expected valid formats
def is_valid_email(email):
    return isinstance(email, str) and '@' in email and '.' in email.split('@')[-1]

# Data Quality Metrics
def calculate_completeness(df):
    return df.notnull().mean()

def calculate_uniqueness(df, key_cols):
    return df.duplicated(subset=key_cols).mean()

def calculate_validity(df):
    return {
        'email': df['email'].apply(is_valid_email).mean()
    }

def calculate_consistency(df):
    consistency_checks = {
        'age_non_negative': (df['age'] >= 0).fillna(False).mean()
    }
    return consistency_checks

def assess_data_quality(df):
    results = {}
    results['completeness'] = calculate_completeness(df).to_dict()
    results['uniqueness'] = 1 - calculate_uniqueness(df, key_cols=['id'])  # proportion unique
    results['validity'] = calculate_validity(df)
    results['consistency'] = calculate_consistency(df)
    return results

# Run assessment
quality_report = assess_data_quality(df)
print("Data Quality Assessment Report:")
for category, metrics in quality_report.items():
    print(f"\n{category.upper()}:")
    for key, value in (metrics.items() if isinstance(metrics, dict) else [('overall', metrics)]):
        print(f"  {key}: {value:.2f}")

Data Quality Assessment Report:

COMPLETENESS:
  id: 1.00
  name: 0.83
  email: 1.00
  age: 0.83

UNIQUENESS:
  overall: 0.83

VALIDITY:
  email: 0.83

CONSISTENCY:
  age_non_negative: 0.83
