In [None]:
import pandas as pd

# Part 1: Load a Dataset & Check Missing Values
# Simulate the 'customer_data.csv' dataset as an example
data = {
    'CustomerID': [1, 2, 3, 4, 5],
    'Email': ['john.doe@example.com', 'jane.smith@example.com', None, 'alice.jones@example.com', 'john.doe@example.com'],
    'Phone': ['1234567890', None, '2345678901', '3456789012', '1234567890'],
    'Name': ['John Doe', 'Jane Smith', 'Mike Taylor', 'Alice Jones', 'John Doe']
}
df = pd.DataFrame(data)

# Find any missing values in specific columns like 'Email' and 'Phone'
missing_values = df[['Email', 'Phone']].isnull().sum()

# Print missing values for the 'Email' and 'Phone' columns
print("Missing values in 'Email' and 'Phone' columns:")
print(missing_values)

# Part 2: Identify Duplicates & Inconsistencies
# Identify duplicate emails which might indicate duplicate customer records
duplicate_emails = df[df.duplicated(subset=['Email'], keep=False)]

# Print duplicate emails (if any)
print("\nDuplicate Emails in the Customer Dataset:")
print(duplicate_emails[['CustomerID', 'Email']])

# Part 3: Generate a Data Quality Report
# 1. Missing Values Summary for 'Email' and 'Phone'
missing_values_summary = df[['Email', 'Phone']].isnull().sum()

# 2. Number of Duplicate Emails
duplicate_email_count = df.duplicated(subset=['Email']).sum()

# 3. Data Quality Summary (Customer Dataset)
report = {
    "Missing Values": missing_values_summary,
    "Duplicate Emails": duplicate_email_count
}

# Print the Data Quality Report
print("\nData Quality Report for Customer Dataset:")
print("-------------------------------------------------")
print("Missing Values:")
print(report["Missing Values"])
print("\nNumber of Duplicate Emails:", report["Duplicate Emails"])