## Data Quality Framework Implementation

**Description**: Implement a simple data quality measurement framework using ISO 8000 principles to assess key dimensions in a dataset.

In [None]:
# Write a conceptual framework described in Python pseudo-code:
# Write a conceptual framework described in Python pseudo-code
import pandas as pd
class DataQualityFramework:
    def __init__(self, df):
        self.df = df
        self.results = {}
    def assess_completeness(self):
        completeness = 1 - self.df.isnull().mean()
        self.results['Completeness'] = completeness
    def assess_uniqueness(self, subset=None):
        duplicate_ratio = 1 - self.df.duplicated(subset=subset).mean()
        self.results['Uniqueness'] = duplicate_ratio
    def assess_consistency(self, column, valid_values):
        consistency = self.df[column].isin(valid_values).mean()
        self.results.setdefault('Consistency', {})[column] = consistency
    def assess_accuracy(self, column, validation_func):
        accuracy = self.df[column].apply(validation_func).mean()
        self.results.setdefault('Accuracy', {})[column] = accuracy
    def assess_timeliness(self, column, max_age_days):
        from datetime import datetime, timedelta
        today = pd.Timestamp(datetime.now().date())
        timeliness = (self.df[column] >= (today - pd.Timedelta(days=max_age_days))).mean()
        self.results['Timeliness'] = timeliness
    def report(self):
        return self.results
data = {
    'id': [1, 2, 2, 4],
    'name': ['Alice', 'Bob', 'Bob', None],
    'status': ['active', 'inactive', 'active', 'archived'],
    'email': ['a@example.com', 'b@example.com', 'b@example.com', 'bademail'],
    'last_updated': pd.to_datetime(['2025-04-01', '2025-04-30', '2025-03-01', '2023-12-01'])
}
df = pd.DataFrame(data)
dq = DataQualityFramework(df)
dq.assess_completeness()
dq.assess_uniqueness(subset=['id'])
dq.assess_consistency('status', ['active', 'inactive'])
dq.assess_accuracy('email', lambda x: '@' in str(x) and '.' in str(x))
dq.assess_timeliness('last_updated', max_age_days=60)
print(dq.report())
{
  'Completeness': {'id': 1.0, 'name': 0.75, 'status': 1.0, 'email': 1.0, 'last_updated': 1.0},
  'Uniqueness': 0.75,
  'Consistency': {'status': 0.75},
  'Accuracy': {'email': 0.75},
  'Timeliness': 0.5
}

{'Completeness': id              1.00
name            0.75
status          1.00
email           1.00
last_updated    1.00
dtype: float64, 'Uniqueness': np.float64(0.75), 'Consistency': {'status': np.float64(0.75)}, 'Accuracy': {'email': np.float64(0.75)}, 'Timeliness': np.float64(0.5)}


{'Completeness': {'id': 1.0,
  'name': 0.75,
  'status': 1.0,
  'email': 1.0,
  'last_updated': 1.0},
 'Uniqueness': 0.75,
 'Consistency': {'status': 0.75},
 'Accuracy': {'email': 0.75},
 'Timeliness': 0.5}