In [10]:
# Import necessary libraries
import pandas as pd
import numpy as np

# Create a sample DataFrame
data = {
    'CustomerID': [101, 102, 103, 104, 105],
    'Name': ['Alice', 'Bob', 'Charlie', 'David', 'Eve'],
    'Age': [25, np.nan, 35, 45, np.nan],
    'Email': ['alice@example.com', 'bob@example.com', 'charlie@example.com', 'david@example.com', 'eve@example.com'],
    'PurchaseAmount': [250.0, 150.0, np.nan, 300.0, 200.0]
}

df = pd.DataFrame(data)

# Display the original DataFrame
print("Original DataFrame:")
print(df)

# Define metadata for each column
metadata = {
    'CustomerID': {
        'Description': 'Unique identifier for each customer',
        'DataType': 'Integer',
        'Constraints': 'Not Null, Unique'
    },
    'Name': {
        'Description': 'Name of the customer',
        'DataType': 'String',
        'Constraints': 'Not Null'
    },
    'Age': {
        'Description': 'Age of the customer',
        'DataType': 'Integer',
        'Constraints': 'Nullable, Age > 0'
    },
    'Email': {
        'Description': 'Email address of the customer',
        'DataType': 'String',
        'Constraints': 'Not Null, Unique'
    },
    'PurchaseAmount': {
        'Description': 'Total purchase amount by the customer',
        'DataType': 'Float',
        'Constraints': 'Nullable, PurchaseAmount >= 0'
    }
}

# Function to display metadata
def display_metadata(metadata_dict):
    print("\nMetadata Information:")
    for column, meta in metadata_dict.items():
        print(f"\nColumn: {column}")
        for key, value in meta.items():
            print(f"  {key}: {value}")

# Display the metadata
display_metadata(metadata)

# Data Quality Checks based on metadata

# Check for missing values
print("\nMissing Values per Column:")
print(df.isnull().sum())

# Check for unique constraints
print("\nChecking Unique Constraints:")
for column, meta in metadata.items():
    if 'Unique' in meta['Constraints']:
        unique_count = df[column].nunique(dropna=True)
        total_count = df[column].dropna().shape[0]
        if unique_count == total_count:
            print(f"  {column}: Passed (All values are unique)")
        else:
            print(f"  {column}: Failed (Duplicate values found)")

# Check for data type consistency
print("\nData Type Consistency Check:")
for column, meta in metadata.items():
    expected_type = meta['DataType']
    actual_type = df[column].dropna().map(type).unique()
    type_mapping = {
        'Integer': int,
        'Float': float,
        'String': str
    }
    expected_python_type = type_mapping.get(expected_type)
    if expected_python_type:
        if all(issubclass(t, expected_python_type) for t in actual_type):
            print(f"  {column}: Passed (Data type matches {expected_type})")
        else:
            print(f"  {column}: Failed (Data type does not match {expected_type})")
    else:
        print(f"  {column}: Skipped (Unknown expected data type)")

# Check for value constraints
print("\nValue Constraints Check:")
for column, meta in metadata.items():
    constraints = meta['Constraints']
    if 'Age > 0' in constraints:
        if (df[column] <= 0).any():
            print(f"  {column}: Failed (Contains non-positive ages)")
        else:
            print(f"  {column}: Passed (All ages are positive)")
    if 'PurchaseAmount >= 0' in constraints:
        if (df[column] < 0).any():
            print(f"  {column}: Failed (Contains negative purchase amounts)")
        else:
            print(f"  {column}: Passed (All purchase amounts are non-negative)")

Original DataFrame:
   CustomerID     Name   Age                Email  PurchaseAmount
0         101    Alice  25.0    alice@example.com           250.0
1         102      Bob   NaN      bob@example.com           150.0
2         103  Charlie  35.0  charlie@example.com             NaN
3         104    David  45.0    david@example.com           300.0
4         105      Eve   NaN      eve@example.com           200.0

Metadata Information:

Column: CustomerID
  Description: Unique identifier for each customer
  DataType: Integer
  Constraints: Not Null, Unique

Column: Name
  Description: Name of the customer
  DataType: String
  Constraints: Not Null

Column: Age
  Description: Age of the customer
  DataType: Integer
  Constraints: Nullable, Age > 0

Column: Email
  Description: Email address of the customer
  DataType: String
  Constraints: Not Null, Unique

Column: PurchaseAmount
  Description: Total purchase amount by the customer
  DataType: Float
  Constraints: Nullable, PurchaseAmoun