In [None]:
import pandas as pd

# Load both datasets
df1 = pd.read_csv("dataset_1.csv")
df2 = pd.read_csv("dataset_2.csv")

# Merge on a common key (e.g., 'id')
merged = pd.merge(df1, df2, on='id', suffixes=('_df1', '_df2'))

# Fields to check for consistency
fields_to_check = ['email', 'name', 'category']

# Detect conflicts
conflicts = pd.DataFrame()
conflicts['id'] = merged['id']
for field in fields_to_check:
    conflicts[field + '_conflict'] = merged[f'{field}_df1'] != merged[f'{field}_df2']

# Get rows with any conflict
conflict_mask = conflicts[[col for col in conflicts.columns if col.endswith('_conflict')]].any(axis=1)
conflicting_records = merged[conflict_mask]

print("Conflicting records:")
print(conflicting_records[['id'] + [f'{field}_df1' for field in fields_to_check] + [f'{field}_df2' for field in fields_to_check]])

# Optional: Resolve conflicts by trusting one source (e.g., dataset_1)
resolved = merged.copy()
for field in fields_to_check:
    resolved[field] = resolved[f'{field}_df1']  # or use df2 if preferred

# Save cleaned/resolved dataset
resolved = resolved[['id'] + fields_to_check]
resolved.to_csv("resolved_dataset.csv", index=False)