## Ensuring Consistency in Multi-source Data Integration

**Description**: Validate the integration of two datasets `products_A.csv` and `products_B.csv` . Ensure consistency in product "category" information.

In [4]:
# Write your code from here
import pandas as pd
import os

def load_csv_with_validation(filepath, required_columns):
    """
    Load CSV with validation for file existence, format, and required columns.
    """
    if not os.path.exists(filepath):
        raise FileNotFoundError(f"File '{filepath}' does not exist.")
    
    try:
        df = pd.read_csv(filepath)
    except Exception as e:
        raise ValueError(f"Error reading '{filepath}': {e}")
    
    if not required_columns.issubset(df.columns):
        raise ValueError(f"Missing columns in '{filepath}'. Required: {required_columns}, Found: {df.columns}")
    
    # Optional: Validate data types for 'product_id' and 'category'
    if not pd.api.types.is_numeric_dtype(df['product_id']):
        raise TypeError("Column 'product_id' must be numeric.")
    if not pd.api.types.is_string_dtype(df['category']):
        raise TypeError("Column 'category' must be of string/object type.")
    
    return df


def check_category_consistency(df1, df2):
    """
    Compare 'category' values for matching 'product_id' in two dataframes.
    """
    merged = pd.merge(df1, df2, on='product_id', suffixes=('_a', '_b'))
    inconsistent = merged[merged['category_a'] != merged['category_b']]
    return inconsistent[['product_id', 'product_name_a', 'category_a', 'category_b']]


# --- Main Execution ---
if __name__ == "__main__":
    required_cols = {'product_id', 'product_name', 'category'}
    try:
        df_a = load_csv_with_validation("products_A.csv", required_cols)
        df_b = load_csv_with_validation("products_B.csv", required_cols)

        inconsistencies = check_category_consistency(df_a, df_b)
        if inconsistencies.empty:
            print("✅ All categories match between the two datasets.")
        else:
            print("⚠️ Category inconsistencies found:\n")
            print(inconsistencies)

    except Exception as err:
        print(f"❌ Error: {err}")


⚠️ Category inconsistencies found:

   product_id product_name_a   category_a   category_b
1         102          Mouse  Accessories  Peripherals
