In [1]:
import pandas as pd
import re

# Sample data including date, age, and email columns
data = {
    "date": ["01/12/2022", "2022-05-20", "March 3, 2023", "2023/04/15"],
    "age": [25, -3, 40, 0],
    "email": ["john.doe@example.com", "invalid-email", "alice@mail.com", "bob@website"]
}
df = pd.DataFrame(data)

# 13. Date Format Standardization to YYYY-MM-DD
df['date'] = pd.to_datetime(df['date'], errors='coerce').dt.strftime('%Y-%m-%d')

# 14. Numeric Constraints Enforcement (age > 0)
df['age'] = df['age'].apply(lambda x: x if x > 0 else None)

# 15. String Format Checks: Validate email format using regex
email_pattern = re.compile(r"^[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+$")

def validate_email(email):
    return email if email_pattern.match(email) else None

df['email'] = df['email'].apply(validate_email)

print(df)


         date   age                 email
0  2022-01-12  25.0  john.doe@example.com
1         NaN   NaN                  None
2         NaN  40.0        alice@mail.com
3         NaN   NaN                  None


In [2]:
import pandas as pd
import re

# Sample data with inconsistent date formats, phone numbers, and mixed case text
data = {
    "date": ["01/12/22", "2022-5-20", "March 3, 2023", "15-Apr-2023", "2023/04/15"],
    "phone": ["123-456-7890", "1234567890", "(123)456-7890", "123.456.7890", "+1 123 456 7890"],
    "name": ["Alice Smith", "bob JOHNSON", "Charlie Brown", "diana WHITE", "Eve Black"]
}
df = pd.DataFrame(data)

# 16. Standardizing Date Formats (all to YYYY-MM-DD)
df['date'] = pd.to_datetime(df['date'], errors='coerce').dt.strftime('%Y-%m-%d')

# 17. Standardize phone numbers to (123) 456-7890 format
def standardize_phone(phone):
    digits = re.sub(r'\D', '', phone)  # remove all non-digit characters
    if len(digits) == 11 and digits.startswith('1'):
        digits = digits[1:]  # remove leading country code if US number
    if len(digits) == 10:
        return f"({digits[:3]}) {digits[3:6]}-{digits[6:]}"
    return None  # invalid phone number

df['phone'] = df['phone'].apply(standardize_phone)

# 18. Convert all text entries in 'name' to uppercase
df['name'] = df['name'].str.upper()

print(df)


         date           phone           name
0  2022-01-12  (123) 456-7890    ALICE SMITH
1  2022-05-20  (123) 456-7890    BOB JOHNSON
2  2023-03-03  (123) 456-7890  CHARLIE BROWN
3  2023-04-15  (123) 456-7890    DIANA WHITE
4  2023-04-15  (123) 456-7890      EVE BLACK


  df['date'] = pd.to_datetime(df['date'], errors='coerce').dt.strftime('%Y-%m-%d')
