# Patronage Relationships: End-to-End Test Suite
This notebook demonstrates and tests the reusable Spark functions in the `patronage_relationships` package using sample data. Use this as a template for validation and onboarding.

In [0]:
# Import required libraries and the reusable package
from pyspark.sql import SparkSession
from pyspark.sql.types import StructType, StructField, StringType, IntegerType, TimestampType
from datetime import datetime
from patronage_relationships.core import *

spark = SparkSession.builder.getOrCreate()

In [0]:
# Sample DataFrames for testing
psa_data = [
    ("ICN1", 1001, "12345", "Active", datetime(2024, 1, 1, 10, 0)),
    ("ICN1", 1001, "12345", "Active", datetime(2024, 1, 2, 10, 0)),
    ("ICN1", 1001, "12345", "Active", datetime(2024, 1, 2, 10, 0)),

    ("ICN1", 1002, "12345", "Active", datetime(2024, 1, 2, 10, 0)),
    ("ICN2", 1002, "54321", None, datetime(2024, 1, 1, 11, 0)),
    ("ICN3", 1003, "99999", "Inactive", datetime(2024, 1, 1, 12, 0)),
    ("ICN3", 1003, "12345", "Active", datetime(2025, 1, 1, 12, 0)),

]
psa_schema = StructType([
    StructField("MVIPersonICN", StringType()),
    StructField("MVITreatingFacilityInstitutionSID", IntegerType()),
    StructField("TreatingFacilityPersonIdentifier", StringType()),
    StructField("ActiveMergedIdentifier", StringType()),
    StructField("CorrelationModifiedDateTime", TimestampType()),
])
psa_df = spark.createDataFrame(psa_data, psa_schema)

person_data = [
    ("ICN1", "A", datetime(2024, 1, 2, 10, 0)),
    ("ICN2", "B", datetime(2024, 1, 1, 11, 0)),
    ("ICN3", "C", datetime(2024, 1, 1, 12, 0)),
]
person_schema = StructType([
    StructField("MVIPersonICN", StringType()),
    StructField("ICNStatus", StringType()),
    StructField("calc_IngestionTimestamp", TimestampType()),
])
person_df = spark.createDataFrame(person_data, person_schema)

institution_data = [
    (1001, "200CORP"),
    (1002, "200DOD"),
    (1003, "200VETS"),
]
institution_schema = StructType([
    StructField("MVIInstitutionSID", IntegerType()),
    StructField("InstitutionCode", StringType()),
])
institution_df = spark.createDataFrame(institution_data, institution_schema)

In [0]:
# Test filter_psa
filtered_psa = filter_psa(psa_df)
filtered_psa.show()

In [0]:
# Test filter_person
filtered_person = filter_person(person_df)
filtered_person.show()

In [0]:
# Test filter_institution
filtered_institution = filter_institution(institution_df)
filtered_institution.show()

In [0]:
# Test join_psa_institution
joined = join_psa_institution(filtered_psa, filtered_institution)
joined.show()

In [0]:
# Test find_duplicate_relationships
person_institution_dups, institution_tfpi_dups = find_duplicate_relationships(joined)
person_institution_dups.show()
institution_tfpi_dups.show()

In [0]:
# Test remove_duplicate_relationships
clean_corr, dup_corr = remove_duplicate_relationships(joined, person_institution_dups, institution_tfpi_dups)
clean_corr.show()
dup_corr.show()

In [0]:
# Test get_latest_correlation_date
latest_date = get_latest_correlation_date(joined)
latest_date.show()

In [0]:
# Test build_correlation_lookup_table
lookup_table = build_correlation_lookup_table(latest_date, clean_corr, filtered_person)
lookup_table.show()

In [0]:
# Test build_json_correlation_table
json_table = build_json_correlation_table(latest_date, clean_corr, filtered_person)
json_table.show(truncate=False)