### Metadata Management for Data Quality
**Description**: Store and use metadata to manage data quality in a pipeline.

**Steps**:
1. Load metadata
2. Load data
3. Use metadata to validate data quality
4. Show valid data


In [None]:
# write your code from here
import pandas as pd
import numpy as np
metadata={
'customer_id':{'type':'int64','nullable':False,'min_value':1000,'max_value':9999},
'customer_name':{'type':'object','nullable':False},
'age':{'type':'int64','nullable':True,'min_value':18,'max_value':99},
'city':{'type':'object','nullable':False,'allowed_values':['New York','Los Angeles','Chicago','Houston','Miami']},
'purchase_amount':{'type':'float64','nullable':False,'min_value':0.0,'max_value':10000.0},
'is_vip':{'type':'bool','nullable':True}
}
data={
'customer_id':[1001,1002,1003,1004,1005,1006,1007,1008,1009,1010],
'customer_name':['Alice','Bob',None,'David','Eve','Frank','Grace','Henry','Ivy','Jack'],
'age':[25,30,15,40,np.nan,60,75,100,22,33],
'city':['New York','Los Angeles','Paris','Houston','Miami','London','Chicago','New York','Dallas','Los Angeles'],
'purchase_amount':[50.50,120.00,-10.00,np.nan,500.00,12000.00,75.25,300.00,10.00,9999.99],
'is_vip':[True,False,True,False,None,True,False,True,False,True]
}
df=pd.DataFrame(data)
def validate_data_with_metadata(dataframe,metadata):
    valid_rows=pd.Series(True,index=dataframe.index)
    for col_name,col_meta in metadata.items():
        if col_name not in dataframe.columns:
            print(f"ERROR:Column'{col_name}'missing.")
            valid_rows=pd.Series(False,index=dataframe.index)
            continue
        if dataframe[col_name].dtype != col_meta['type']:
            try:
                dataframe[col_name]=dataframe[col_name].astype(col_meta['type'])
            except ValueError:
                print(f"WARNING:Column'{col_name}'typecastfailed.Somevaluesmaybemarkedinvalid.")
                valid_rows&=dataframe[col_name].apply(lambda x: pd.api.types.is_dtype_equal(type(x), col_meta['type']) or pd.isna(x))
        if not col_meta['nullable']:
            null_rows=dataframe[col_name].isnull()
            if null_rows.any():
                print(f"ERROR:Column'{col_name}'hasnullvaluesbutisnotnullable.")
                valid_rows&=~null_rows
        if 'min_value'in col_meta and 'max_value'in col_meta:
            invalid_range_rows=(dataframe[col_name]<col_meta['min_value'])|(dataframe[col_name]>col_meta['max_value'])
            if invalid_range_rows.any():
                print(f"ERROR:Column'{col_name}'hasvaluesoutsiderange[{col_meta['min_value']}-{col_meta['max_value']}].")
                valid_rows&=~invalid_range_rows
        if 'allowed_values'in col_meta:
            invalid_values_rows=~dataframe[col_name].isin(col_meta['allowed_values'])
            if invalid_values_rows.any():
                print(f"ERROR:Column'{col_name}'hasvaluesnotinallowedset:{col_meta['allowed_values']}.")
                valid_rows&=~invalid_values_rows
    return dataframe[valid_rows]
valid_df=validate_data_with_metadata(df.copy(),metadata)
print("\nOriginalDataFrameHead:")
print(df.head())
print("\nOriginalDataFrameInfo:")
df.info()
print("\nValidDataFrameHead:")
print(valid_df.head())
print("\nValidDataFrameInfo:")
valid_df.info()
print(f"\nNumberofValidRows:{len(valid_df)}out of{len(df)}.")

SyntaxError: invalid syntax (662044575.py, line 27)