### Metadata Management for Data Quality
**Description**: Store and use metadata to manage data quality in a pipeline.

**Steps**:
1. Load metadata
2. Load data
3. Use metadata to validate data quality
4. Show valid data


In [None]:
# write your code from here

import pandas as pd

# Step 1: Load metadata (this could be from a config, JSON, or DB — here we use a dictionary)
# Metadata defines expected column names, types, value ranges, etc.
metadata = {
    "columns": {
        "id": {"type": "int", "required": True},
        "name": {"type": "str", "required": True},
        "age": {"type": "int", "min": 18, "max": 99, "required": True}
    }
}

# Step 2: Load data (simulating input to the pipeline)
data = {
    "id": [1, 2, 3, 4],
    "name": ["Alice", "Bob", None, "Dave"],
    "age": [25, 17, 35, 100]
}

df = pd.DataFrame(data)
print("Raw Data:")
print(df)

# Step 3: Use metadata to validate data quality
# Validate required fields
for col, rules in metadata["columns"].items():
    if rules.get("required", False):
        df = df[df[col].notnull()]

# Validate data types (simple check)
for col, rules in metadata["columns"].items():
    expected_type = rules["type"]
    if expected_type == "int":
        df = df[pd.to_numeric(df[col], errors='coerce').notnull()]

# Validate value ranges
for col, rules in metadata["columns"].items():
    if "min" in rules:
        df = df[df[col] >= rules["min"]]
    if "max" in rules:
        df = df[df[col] <= rules["max"]]

# Step 4: Show valid data
print("\nValidated Data (According to Metadata):")
print(df)
