## Ensuring Consistency in Multi-source Data Integration

**Description**: Validate the integration of two datasets `products_A.csv` and `products_B.csv` . Ensure consistency in product "category" information.

In [1]:
import pandas as pd

# Sample data for products_A.csv
data_a = {
    "product_id": [101, 102, 103, 104],
    "product_name": ["Pen", "Notebook", "Pencil", "Eraser"],
    "category": ["Stationery", "Stationery", "Stationery", "Stationery"]
}
df_a = pd.DataFrame(data_a)

# Sample data for products_B.csv (with one inconsistent category for product_id=103)
data_b = {
    "product_id": [101, 102, 103, 104],
    "product_name": ["Pen", "Notebook", "Pencil", "Eraser"],
    "category": ["Stationery", "Stationery", "Office Supply", "Stationery"]
}
df_b = pd.DataFrame(data_b)

# Step 1: Merge datasets on product_id
merged_df = df_a.merge(df_b, on="product_id", suffixes=('_A', '_B'))

# Step 2: Identify category mismatches
inconsistencies = merged_df[merged_df["category_A"] != merged_df["category_B"]]

# Step 3: Display results
print("Inconsistent Category Entries Between A and B:")
print(inconsistencies[["product_id", "product_name_A", "category_A", "category_B"]])

Inconsistent Category Entries Between A and B:
   product_id product_name_A  category_A     category_B
2         103         Pencil  Stationery  Office Supply
