In [None]:
#| default_exp test

# Validation 

`Creating a notebook to test out my transformation`<br>
<br>

Going to use the `pytest` lib to help

In [None]:
#| export
import pandas as pd
import re
from virtuous_interview.utils import valid_email, validate_us_phone_number
from virtuous_interview.solution_pd import postal_code_pattern
from nbdev.showdoc import *

In [None]:
#| exports
final_contacts = pd.read_csv('data/final_contacts.csv').fillna('')
final_contact_methods = pd.read_csv('data/final_contact_methods.csv').fillna('')
final_gifts = pd.read_csv('data/final_gifts.csv').fillna('')

# Helpers

In [None]:
#| export
def missing_no_required_fields(df: pd.DataFrame, columns: list):
    """Validate that no required fields are missing in the dataframe"""
    for column in columns:
        assert ~(df[column] == '').any(), f"Missing values found in column '{column}'"


<br>

In [None]:
#| export
def email_is_valid(s):
    """Validates an email address."""
    if s == '':
        return True
    return valid_email(s)

<br>

In [None]:
#| export
def number_is_valid(s):
    """Validates a US phone number."""
    if s == '':
        return True
    elif validate_us_phone_number(s) == '':
        return False
    else:
        return True

<br>

In [None]:
#| export
def zip_is_valid(p):
    """Validates a postal code."""
    s = str(p).replace('.0', '')
    if s == '':
        return True
    else:
        return bool(re.match(postal_code_pattern, s))

<br>

# Testing Column Names

Contacts Column Names

In [None]:
#| export
def test_contacts_columns():
    """Test that the final_contacts dataframe has the correct columns."""
    assert final_contacts.columns.tolist() == [
    'LegacyContactId', 'LegacyIndividualId', 'ContactType', 'ContactName',
    'FirstName', 
    'LastName', 'SecondaryLegacyIndividualId', 'SecondaryFirstName',
    'SecondaryLastName', 'HomePhone', 'HomeEmail', 'Address1', 
    'City', 'State', 'PostalCode', 'IsPrivate', 'IsDeceased',
    ]

<br>

In [None]:
test_contacts_columns()

Gift Column Names

In [None]:
#| export
def test_gifts_columns():
    """Validate the columns of the final gifts dataframe"""
    assert final_gifts.columns.tolist() == ['LegacyContactId', 'LegacyGiftId', 'GiftType', 'GiftDate',
           'GiftAmount', 'Notes', 'CreditCardType', 'Project1Code',
           'Project2Code', 'LegacyPledgeID']

<br>

In [None]:
test_gifts_columns()

Contact Method Column Names

In [None]:
#| export
def test_contact_method_columns():
    """Validate the columns of the final_contact_methods dataframe"""
    assert final_contact_methods.columns.tolist() == ['LegacyContactId', 'Type', 'Value']

<br>

In [None]:
test_contact_method_columns()

# Validating Contacts Table

## Validating Required Fields Are Not Missing

In [None]:
#| export
def test_contact_required_fields():
    """Validate that all required fields are present in the final_contacts dataframe."""
    missing_no_required_fields(final_contacts, ['LegacyContactId', 'LegacyIndividualId', 'ContactType', 'FirstName', 'LastName'])

<br>

In [None]:
test_contact_required_fields()

## Validating ContactType

In [None]:
#| export
def test_contacts_contact_type():
    """Validates that the ContactType column only contains the values 'Household' and 'Organization'"""
    assert final_contacts.ContactType.isin(['Household', 'Organization']).all()

<br>

In [None]:
test_contacts_contact_type()

## Validating Email

In [None]:
#| export
def test_contact_email():
    """Validates that all emails are valid"""
    assert final_contacts.HomeEmail.apply(email_is_valid).all()

<br>

In [None]:
test_contact_email()

## Validating Phone Number

In [None]:
#| export
def test_contact_phone_number_valid():
    """Validate that all phone numbers are valid"""
    assert final_contacts.HomePhone.apply(number_is_valid).all()

<br>

In [None]:
test_contact_phone_number_valid()

## Validating Postal

In [None]:
#| export
def test_contact_valid_zip():
    """Validates that all contacts have a valid zip code"""
    assert final_contacts.PostalCode.apply(zip_is_valid).all()

<br>

In [None]:
test_contact_valid_zip()

## Validating Deceased

In [None]:
#| export
def test_contact_deceased():
    """Validate that all contacts are either deceased or not deceased."""
    assert final_contacts.IsDeceased.isin([True, False]).all()

<br>

In [None]:
test_contact_deceased()

# Validating Gifts

## Validating Required Fields Are Not Missing

In [None]:
#| export
def test_gift_required_fields():
    """Validate that all required fields are present in the gift data."""
    missing_no_required_fields(final_gifts, ['LegacyContactId', 'LegacyGiftId', 'GiftType', 'GiftDate', 'GiftAmount', 'LegacyPledgeID'])

<br>

In [None]:
test_gift_required_fields()

## Validating GiftType

In [None]:
#| export
def test_gift_type():
    """Test that all gift types are valid"""
    assert final_gifts.GiftType.isin(['Cash', 'Check', 'Credit', 'Other',  'Reversing Transaction']).all()

<br>

In [None]:
test_gift_type()

## Validating GiftAmount

In [None]:
#| export
def test_gift_amount_is_float():
    """Test that the GiftAmount column is a float"""
    assert final_gifts['GiftAmount'].dtype == 'float64'

<br>

In [None]:
test_gift_amount_is_float()

## Validating CreditCardType

In [None]:
#| export
def test_credit_card_type():
    """Test that all credit card types are valid."""
    assert final_gifts.CreditCardType.isin(['Visa', 'Mastercard', 'AMEX', 'Discover', '']).all()

<br>

In [None]:
test_credit_card_type()

## Validating PledgeId

In [None]:
#| export
def test_gift_pledge_id():
    """Test that the number of gifts is equal to the number of unique pledge IDs"""
    assert len(final_gifts) == len(final_gifts.LegacyPledgeID.unique())

<br>

In [None]:
test_gift_pledge_id()

# Validating ContactMethods

## Validating Required Fields Are Not Missing

In [None]:
#| export
def test_contact_method_required_fields():
    
    missing_no_required_fields(final_contact_methods, ['LegacyContactId', 'Type', 'Value'])

<br>

In [None]:
test_contact_method_required_fields()

## Validating Type

In [None]:
#| export
def test_contact_method_type():
    """Test that all contact methods are one of the three types"""
    assert final_contact_methods.Type.isin(['HomePhone', 'HomeEmail', 'Fax']).all()

<br>

In [None]:
test_contact_method_type()

In [None]:
# | hide
import nbdev

In [None]:
# | hide
nbdev.nbdev_export('04_Test.ipynb')