## Find Conflicting Values Across Datasets

**Description**: You have two datasets: `crm_customers.csv` and `erp_customers.csv` . Find customers with conflicting "email" information.

In [None]:


import pandas as pd
def find_conflicting_emails(crm_file, erp_file):
    """
    Finds customers with conflicting "email" information across two datasets
    based on a common identifier ("customer_id").

    Args:
        crm_file (str): Path to the CSV file containing CRM customer data.
        erp_file (str): Path to the CSV file containing ERP customer data.

    Returns:
        pandas.DataFrame: A DataFrame containing customers with conflicting emails,
                          including customer_id, email from CRM, and email from ERP.
                          Returns None if files are not found or no conflicts are found.
    """
    try:
        crm_df = pd.read_csv(crm_file)
        erp_df = pd.read_csv(erp_file)
    except FileNotFoundError as e:
        print(f"Error: One or both files not found: {e}")
        return None
    if 'customer_id' not in crm_df.columns or 'email' not in crm_df.columns:
        print("Error: 'customer_id' or 'email' column missing in CRM data.")
        return None
    if 'customer_id' not in erp_df.columns or 'email' not in erp_df.columns:
        print("Error: 'customer_id' or 'email' column missing in ERP data.")
        return None
    merged_df = pd.merge(crm_df, erp_df, on='customer_id', suffixes=('_crm', '_erp'))
    if merged_df.empty:
        print("No matching customer IDs found between the CRM and ERP datasets.")
        return pd.DataFrame()
    conflicting_emails_df = merged_df[merged_df['email_crm'] != merged_df['email_erp']]
    if not conflicting_emails_df.empty:
        print("Customers with conflicting email information:")
        return conflicting_emails_df[['customer_id', 'email_crm', 'email_erp']]
    else:
        print("No conflicting email information found across the datasets for matching customers.")
        return pd.DataFrame()
crm_data = {'customer_id': [1, 2, 3, 4, 5],
            'name': ['Alice', 'Bob', 'Charlie', 'David', 'Eve'],
            'email': ['alice@example.com', 'bob@example.net', 'charlie@example.org', 'david@example.com', 'eve@example.net']}
erp_data = {'customer_id': [1, 2, 3, 6, 5],
            'product': ['A', 'B', 'C', 'D', 'E'],
            'email': ['alice@example.com', 'bob@example.com', 'charlie@example.org', 'frank@example.org', 'eve@example.net']}
crm_df = pd.DataFrame(crm_data)
erp_df = pd.DataFrame(erp_data)
crm_df.to_csv('crm_customers.csv', index=False)
erp_df.to_csv('erp_customers.csv', index=False)
conflicts_report = find_conflicting_emails('crm_customers.csv', 'erp_customers.csv')
if conflicts_report is not None:
    print(conflicts_report)

Customers with conflicting email information:
   customer_id        email_crm        email_erp
1            2  bob@example.net  bob@example.com
