## Format Compliance for Email

**Description**: Check if emails in a list are valid based on a basic regex pattern.

## Format Compliance for Email

**Description**: Check if emails in a list are valid based on a basic regex pattern.

In [1]:
# Write your code from here
import re

# Sample list of emails
emails = [
    "john.doe@example.com",
    "jane_smith@company.net",
    "invalid-email@.com",
    "noatsymbol.com",
    "user@domain",
    "user@domain.co.uk"
]

# Basic regex pattern for email validation
email_pattern = re.compile(r'^[\w\.-]+@[\w\.-]+\.\w+$')

def validate_emails(email_list):
    results = {}
    for email in email_list:
        if email_pattern.match(email):
            results[email] = True
        else:
            results[email] = False
    return results

# Validate and print results
validation_results = validate_emails(emails)
for email, is_valid in validation_results.items():
    print(f"{email}: {'Valid' if is_valid else 'Invalid'}")


john.doe@example.com: Valid
jane_smith@company.net: Valid
invalid-email@.com: Invalid
noatsymbol.com: Invalid
user@domain: Invalid
user@domain.co.uk: Valid


## Format Compliance for Phone Numbers

**Description**: Verify if the phone numbers follow a specific pattern.

In [2]:
import pandas as pd
import re

# -------------------------------------
# Step 1: Simulate a dataset with phone numbers
# -------------------------------------
data = {
    'customer_id': [1, 2, 3, 4, 5, 6],
    'customer_name': ['Alice', 'Bob', 'Charlie', 'Diana', 'Evan', 'Fay'],
    'phone_number': [
        '123-456-7890',     # valid
        '(123) 456-7890',   # valid
        '1234567890',       # valid
        '123.456.7890',     # valid
        '123-45-67890',     # invalid
        'abc-def-ghij'      # invalid
    ]
}

df = pd.DataFrame(data)

# -------------------------------------
# Step 2: Define regex pattern for valid phone numbers
# -------------------------------------
pattern = re.compile(r'^(\(\d{3}\)\s?|\d{3}[-.]?)\d{3}[-.]?\d{4}$')

# -------------------------------------
# Step 3: Apply validation
# -------------------------------------
df['is_valid'] = df['phone_number'].apply(lambda x: bool(pattern.match(x)))

# -------------------------------------
# Step 4: Report
# -------------------------------------
valid_count = df['is_valid'].sum()
invalid_count = len(df) - valid_count
valid_rate = valid_count / len(df) * 100

print(f"Total Phone Numbers     : {len(df)}")
print(f"Valid Phone Numbers     : {valid_count}")
print(f"Invalid Phone Numbers   : {invalid_count}")
print(f"Format Compliance Rate  : {valid_rate:.1f}%")

# Print invalid entries
invalid_entries = df[~df['is_valid']]
if not invalid_entries.empty:
    print("\n Invalid Phone Numbers:")
    print(invalid_entries[['customer_id', 'customer_name', 'phone_number']].to_string(index=False))
    invalid_entries.to_csv("invalid_phone_numbers.csv", index=False)
    print("\n Invalid phone numbers saved to 'invalid_phone_numbers.csv'")
else:
    print("\n All phone numbers are valid.")


Total Phone Numbers     : 6
Valid Phone Numbers     : 4
Invalid Phone Numbers   : 2
Format Compliance Rate  : 66.7%

 Invalid Phone Numbers:
 customer_id customer_name phone_number
           5          Evan 123-45-67890
           6           Fay abc-def-ghij

 Invalid phone numbers saved to 'invalid_phone_numbers.csv'


## Checking Date Validity Format

**Description**: Ensure the dates in a list adhere to a specific format (e.g., YYYY-MM-DD).

In [3]:
import pandas as pd
from datetime import datetime

# ---------------------------------------
# Step 1: Simulated dataset with dates
# ---------------------------------------
data = {
    'record_id': [1, 2, 3, 4, 5, 6],
    'event_date': [
        '2024-05-01',   # valid
        '2024-12-31',   # valid
        '2024-02-30',   # invalid - Feb 30 doesn't exist
        '05-01-2024',   # invalid format
        '2024/06/15',   # invalid format
        '2024-13-01'    # invalid - month 13 doesn't exist
    ]
}

df = pd.DataFrame(data)

# ---------------------------------------
# Step 2: Define date format to validate
# ---------------------------------------
required_format = "%Y-%m-%d"

def validate_date(date_str, fmt):
    try:
        datetime.strptime(date_str, fmt)
        return True
    except ValueError:
        return False

# ---------------------------------------
# Step 3: Apply validation
# ---------------------------------------
df['is_valid_date'] = df['event_date'].apply(lambda x: validate_date(x, required_format))

# ---------------------------------------
# Step 4: Report Results
# ---------------------------------------
valid_count = df['is_valid_date'].sum()
invalid_count = len(df) - valid_count
valid_rate = valid_count / len(df) * 100

print(f"Total Dates Checked     : {len(df)}")
print(f"Valid Dates             : {valid_count}")
print(f"Invalid Dates           : {invalid_count}")
print(f"Date Format Compliance  : {valid_rate:.1f}%")

# Show invalid entries
invalid_dates = df[~df['is_valid_date']]
if not invalid_dates.empty:
    print("\n Invalid Dates:")
    print(invalid_dates[['record_id', 'event_date']].to_string(index=False))
    invalid_dates.to_csv("invalid_dates_report.csv", index=False)
    print("\n Invalid dates saved to 'invalid_dates_report.csv'")
else:
    print("\n All dates are valid and properly formatted.")


Total Dates Checked     : 6
Valid Dates             : 2
Invalid Dates           : 4
Date Format Compliance  : 33.3%

 Invalid Dates:
 record_id event_date
         3 2024-02-30
         4 05-01-2024
         5 2024/06/15
         6 2024-13-01

 Invalid dates saved to 'invalid_dates_report.csv'
