In [None]:

import pandas as pd
import numpy as np

def calculate_score(value, total):
    return round(value / total, 2) if total > 0 else 0.0

def assess_customer_data(df):
    print("=== Customer Dataset Quality Metrics ===")
    total_fields = df.shape[0] * 5  
    non_null_fields = df[['Name', 'Email', 'Country', 'Date of Birth', 'Phone Number']].notnull().sum().sum()
    completeness = calculate_score(non_null_fields, total_fields)
    unique_emails = df['Email'].nunique()
    total_records = df.shape[0]
    uniqueness = calculate_score(unique_emails, total_records)
    standardized_country = df['Country'].str.upper().str.strip()
    consistency = calculate_score((standardized_country == "USA").sum(), total_records)
    dqs = round((completeness + uniqueness + consistency) / 3, 2)
    print(f"Completeness Score: {completeness}")
    print(f"Uniqueness Score: {uniqueness}")
    print(f"Consistency Score: {consistency}")
    print(f"Overall DQS: {dqs}")

def assess_shop_data(df):
    print("=== Online Shop Dataset Quality Metrics ===")
    accuracy = calculate_score(df[df['Price'] > 0].shape[0], df.shape[0])

    timely_orders = df[df['Order Delay (Days)'] <= 1].shape[0]
    timeliness = calculate_score(timely_orders, df.shape[0])

    integrity = calculate_score(df['Customer Exists'].sum(), df.shape[0])

    dqs = round((accuracy + timeliness + integrity) / 3, 2)
    print(f"Accuracy Score: {accuracy}")
    print(f"Timeliness Score: {timeliness}")
    print(f"Integrity Score: {integrity}")
    print(f"Overall DQS: {dqs}")  
def assess_financial_data(df):
    print("=== Financial Dataset Quality Metrics ===")
    valid_dates = pd.to_datetime(df['Transaction Date'], errors='coerce').notnull().sum()
    validity = calculate_score(valid_dates, df.shape[0])
    precise_amounts = df['Amount'].apply(lambda x: str(x)[::-1].find('.')).eq(2).sum()
    precision = calculate_score(precise_amounts, df.shape[0])
    accessibility = calculate_score(df['Accessible'].sum(), df.shape[0])
    dqs = round((validity + precision + accessibility) / 3, 2)
    print(f"Validity Score: {validity}")
    print(f"Precision Score: {precision}")
    print(f"Accessibility Score: {accessibility}")
    print(f"Overall DQS: {dqs}")
customer_df = pd.DataFrame({
    'Customer ID': [1, 2, 3, 4],
    'Name': ['John', 'Alice', 'Bob', 'Alice'],
    'Email': ['john@example.com', 'alice@example.com', 'bob@example.com', 'alice@example.com'],
    'Country': ['USA', 'US', 'USA', 'usa'],
    'Date of Birth': ['1985-06-21', '', '1990-09-09', '1988-01-01'],
    'Phone Number': ['1234567890', '9876543210', '', '9876543210']
})
shop_df = pd.DataFrame({
    'Order ID': [101, 102, 103],
    'Price': [20.5, 0.0, 15.0],
    'Order Delay (Days)': [0, 3, 1],
    'Customer Exists': [True, False, True]
})
financial_df = pd.DataFrame({
    'Transaction ID': [1001, 1002, 1003],
    'Transaction Date': ['2024-04-30', 'invalid-date', '2024-03-15'],
    'Amount': [123.45, 200, 10.9],
    'Accessible': [True, False, True]
})
assess_customer_data(customer_df)
assess_shop_data(shop_df)
assess_financial_data(financial_df)





=== Customer Dataset Quality Metrics ===
Completeness Score: 1.0
Uniqueness Score: 0.75
Consistency Score: 0.75
Overall DQS: 0.83
=== Online Shop Dataset Quality Metrics ===
Accuracy Score: 0.67
Timeliness Score: 0.67
Integrity Score: 0.67
Overall DQS: 0.67
=== Financial Dataset Quality Metrics ===
Validity Score: 0.67
Precision Score: 0.33
Accessibility Score: 0.67
Overall DQS: 0.56
