# Introduction

The following notebook orchestrates the migration of ADPDX Accounts into Salesforce.


# Order of Loading

1. Vicariates
1. Organizations
1. Religious Parents
1. Religious Communities
1. Religious Superiors
1. Contacts
1. Contact > Register Entries
1. Contact > Education Affiliations
1. Contact > Ecclesial Affiliations
1. Affiliations



# Setup Enviro


In [None]:
# !conda install -y simple-salesforce
# !conda install -y email_validator
# !conda install -y python-dotenv
# !conda install import-ipynb

In [None]:
import pandas as pd
print(pd.__version__)

In [None]:
# enviro setup

import pandas as pd
import numpy as np

from datetime import datetime
now = datetime.now()

from simple_salesforce import Salesforce

In [None]:
# import environment variables (SF login credentials)
from dotenv import load_dotenv
import os

load_dotenv()

In [None]:
# Global Variables { run: "auto", vertical-output: true, display-mode: "both" }

target_enviro = "adpdx_devpro" # @param {type:"string"}

# The `run_upserts` variable controls whether or not upserts to Salesforce are executed when the notebook is run.
run_upserts = "True" # @param ["True", "False"]

In [None]:
load_dotenv()

# ADPDX dev_pro credentials
sf_user = os.getenv('ADPDX_QA_USER')
print(sf_user)
sf_password = os.getenv('ADPDX_QA_PASS')
print(sf_password)
sf_token = os.getenv('ADPDX_QA_TOKEN')
print(sf_token)

# instantiate a SF session object
sf = Salesforce(domain='test', username=sf_user, password=sf_password, security_token=sf_token)

## UDFs


In [None]:
# General notebook UDFs

import json
import csv
from datetime import datetime
from simple_salesforce import Salesforce

# Job ID Incrementer
def update_job_id(file_name):
    # Open the file in read mode and get the current job ID
    with open(file_name, 'r') as file:
        current_job_id = int(file.readline())

    # Increment the job ID
    new_job_id = current_job_id + 1

    # Open the file in write mode and update the job ID
    with open(file_name, 'w') as file:
        file.write(str(new_job_id))

    # Return the new job ID
    return new_job_id


def concat_columns(df, columns, new_column, separator='_'):
    """
    Concatenates the values from specified columns into a single string
    with the specified separator and populates a new column in the DataFrame.

    Args:
    - df: pandas DataFrame
    - columns: list of column names to concatenate
    - new_column: name of the new column to be created
    - separator: separator to use between concatenated values (default is '_')

    Returns:
    - Updated pandas DataFrame with the new column
    """
    df[new_column] = df[columns].astype(str).apply(lambda x: separator.join(x), axis=1)
    return df


def convert_non_serializables(data):
    """Convert non-serializable objects to serializable formats."""
    for key, value in data.items():
        try:
            if isinstance(value, (datetime, date)):
                data[key] = value.isoformat()
            elif isinstance(value, float) and np.isnan(value):
                data[key] = None
            elif pd.isna(value):
                data[key] = None
            elif isinstance(value, (int, bool, str, bool)):
                data[key] = value
            else:
                data[key] = str(value)  # Convert other types to string
        except Exception as e:
            print(f"Error processing key: {key}, value: {value}, error: {e}")
    return data

In [None]:
# Query, merge data with SF data  

import pandas as pd
from simple_salesforce import Salesforce
from simple_salesforce.exceptions import SalesforceMalformedRequest, SalesforceError

def find_salesforce_record_id(sf, df, column_to_search, sf_object_name, sf_field_name, new_column_name, match_behavior='first'):
    """
    Find Salesforce record IDs for a DataFrame column and add a new column with the Salesforce record IDs.

    Parameters:
    sf (Salesforce): The Salesforce connection instance.
    df (pd.DataFrame): The pandas DataFrame containing data.
    column_to_search (str): The column name in the DataFrame to search against Salesforce.
    sf_object_name (str): The Salesforce object name (e.g., 'Contact').
    sf_field_name (str): The field name in Salesforce to match.
    new_column_name (str): The name for the new DataFrame column to hold Salesforce record IDs.
    match_behavior (str): Behavior when multiple matches found ('first' or 'alert').

    Returns:
    pd.DataFrame: The original DataFrame with the new column containing Salesforce record IDs.

    Example usage:
    df_contact_staging = find_salesforce_record_id(sf, df_contact_staging, 'Link_to_Religious_Community', 'Contact', 'Archdpdx_Migration_Id__c', 'New_Column_Name', match_behavior='alert')

    """
    if column_to_search not in df.columns:
        raise ValueError(f"Column '{column_to_search}' not found in DataFrame.")

    df[new_column_name] = None
    multiple_matches_found = False

    unique_values = df[column_to_search].dropna().unique()
    chunk_size = 1000  # Adjust chunk size as needed

    for start in range(0, len(unique_values), chunk_size):
        chunk_values = unique_values[start:start + chunk_size]
        chunk_values_str = ", ".join([f"'{val}'" for val in chunk_values])

        soql_query = f"SELECT Id, {sf_field_name} FROM {sf_object_name} WHERE {sf_field_name} IN ({chunk_values_str})"
        
        try:
            query_result = sf.query_all(soql_query)
        except SalesforceMalformedRequest as e:
            raise ValueError(f"Malformed request error: {e.content}")
        except SalesforceError as e:
            raise ValueError(f"Salesforce error: {e.content}")

        id_mapping = {}
        for record in query_result['records']:
            key = record[sf_field_name]
            if key in id_mapping:
                multiple_matches_found = True
                if match_behavior == 'first':
                    continue  # Skip subsequent matches if 'first' behavior is selected
            id_mapping[key] = record['Id']

        df[new_column_name] = df[column_to_search].map(id_mapping)

    if multiple_matches_found and match_behavior == 'alert':
        print("Alert: Multiple matches found for some records.")

    return df


def get_recordtype_id(df_recordTypes, developer_name, sobject_type, namespace):
    """
    Retrieves the Record Type ID for a specific Developer Name, SObject Type, and Namespace.

    Parameters:
    df_recordTypes (pd.DataFrame): The DataFrame containing Salesforce Record Types.
    developer_name (str): The DeveloperName to filter by.
    sobject_type (str): The SObjectType to filter by.
    namespace (str): The Namespace to filter by.

    Returns:
    str: The Record Type ID if a match is found, otherwise raises an error.

    Example: 
    religious_recordtype_id = get_recordtype_id(df_sf_recordTypes, 'Religious', 'Account', 'mbfc')
    """
    try:
        recordtype_id = df_recordTypes.loc[
            (df_recordTypes['DeveloperName'] == developer_name) & 
            (df_recordTypes['SobjectType'] == sobject_type) &
            (df_recordTypes['NamespacePrefix'] == namespace),
            'Id'
        ].iloc[0]  # Retrieve the first match
        
        return recordtype_id
    except IndexError:
        raise ValueError(f"No record type found for DeveloperName '{developer_name}', SObjectType '{sobject_type}', and Namespace '{namespace}'")


# Add a Salesforce record ID column to a DataFrame based on matching external ID field values
def add_salesforce_record_ids(sf, dataframe, df_column_name, sf_object_name, sf_external_id_field, new_column_name, chunk_size=1000):
    """
    Add a Salesforce record ID column to a DataFrame based on matching external ID field values.

    Parameters:
    sf (Salesforce): The Salesforce connection instance.
    dataframe (pd.DataFrame): The pandas DataFrame containing data to match.
    df_column_name (str): The column name in the DataFrame to match with Salesforce.
    sf_object_name (str): The Salesforce object name (e.g., 'Contact').
    sf_external_id_field (str): The external ID field in Salesforce to match.
    new_column_name (str): The name for the new DataFrame column to hold Salesforce record IDs.
    chunk_size (int): The number of records to include in each chunk for querying Salesforce.

    Returns:
    pd.DataFrame: The original DataFrame with the new column containing Salesforce record IDs.
    """
    # Ensure the dataframe column name exists in the dataframe
    if df_column_name not in dataframe.columns:
        raise ValueError(f"Column '{df_column_name}' not found in DataFrame.")
    
    # Create a set of unique values from the specified DataFrame column
    unique_values = dataframe[df_column_name].dropna().unique()
    
    id_mapping = {}
    
    # Process the unique values in chunks
    for start in range(0, len(unique_values), chunk_size):
        chunk_values = unique_values[start:start + chunk_size]
        chunk_values_str = ", ".join([f"'{val}'" for val in chunk_values])
        
        soql_query = f"SELECT Id, {sf_external_id_field} FROM {sf_object_name} WHERE {sf_external_id_field} IN ({chunk_values_str})"
        
        try:
            query_result = sf.query_all(soql_query)
        except SalesforceMalformedRequest as e:
            raise ValueError(f"Malformed request error: {e.content}")
        except SalesforceError as e:
            raise ValueError(f"Salesforce error: {e.content}")
        
        # Update the id_mapping with results from the current chunk
        id_mapping.update({record[sf_external_id_field]: record['Id'] for record in query_result['records']})
    
    # Map the Salesforce record IDs to the DataFrame
    dataframe[new_column_name] = dataframe[df_column_name].map(id_mapping)
    
    return dataframe

In [None]:
# Upsert to SF

import pandas as pd
import numpy as np
from simple_salesforce import Salesforce, SalesforceMalformedRequest, SalesforceError
from datetime import datetime, date

# Gets or creates a Diocesan account based on the Account Name
def get_or_create_diocesan_account(sf, account_name):
    """
    Searches for an account by name, returns the ID if found,
    otherwise creates the account with RecordType 'Church' and 'mbfc__Church_Type__c' set to 'Diocese',
    and then returns the new ID.

    Parameters:
    sf (Salesforce): Salesforce connection object
    account_name (str): The name of the account to search for or create

    Returns:
    str: The ID of the found or created account
    """

    # Query for the Record Type ID using the Developer Name 'Church'
    record_type_query = "SELECT Id FROM RecordType WHERE SobjectType = 'Account' AND DeveloperName = 'Church' LIMIT 1"
    record_type_result = sf.query(record_type_query)
    if record_type_result['records']:
        record_type_id = record_type_result['records'][0]['Id']
    else:
        raise ValueError("No RecordType found with DeveloperName 'Church'")

    # Search for the Account by name
    account_query = f"SELECT Id FROM Account WHERE Name = '{account_name}' LIMIT 1"
    account_result = sf.query(account_query)
    
    if account_result['records']:
        # Account found, return the ID
        return account_result['records'][0]['Id']
    else:
        # Account not found, create a new Account
        account_data = {
            'Name': account_name,
            'RecordTypeId': record_type_id,
            'mbfc__Church_Type__c': 'Diocese'
        }
        new_account = sf.Account.create(account_data)
        return new_account['id']
    
    from simple_salesforce import Salesforce

# improved version of the get_or_create_diocesan_account function
def get_or_create_account(sf, account_name, record_type_dev_name, church_type):
    """
    Searches for an account by name, returns the ID if found,
    otherwise creates the account with the specified Record Type and Church Type,
    and then returns the new ID.

    Parameters:
    sf (Salesforce): Salesforce connection object
    account_name (str): The name of the account to search for or create
    record_type_dev_name (str): The developer name of the Record Type to use for creating the account
    church_type (str): The Church Type to set for the new account

    Returns:
    str: The ID of the found or created account

    Example usage: 
    sf = Salesforce(username='your_username', password='your_password', security_token='your_security_token')
    account_id = get_or_create_account(sf, 'Diocese of Calgary', 'Church', 'Diocese')
    print(f"Account ID: {account_id}")
    """

    # Query for the Record Type ID using the provided developer name
    record_type_query = f"SELECT Id FROM RecordType WHERE SobjectType = 'Account' AND DeveloperName = '{record_type_dev_name}' LIMIT 1"
    record_type_result = sf.query(record_type_query)
    if record_type_result['records']:
        record_type_id = record_type_result['records'][0]['Id']
    else:
        raise ValueError(f"No RecordType found with DeveloperName '{record_type_dev_name}'")

    # Search for the Account by name
    account_query = f"SELECT Id FROM Account WHERE Name = '{account_name}' LIMIT 1"
    account_result = sf.query(account_query)
    
    if account_result['records']:
        # Account found, return the ID
        return account_result['records'][0]['Id']
    else:
        # Account not found, create a new Account
        account_data = {
            'Name': account_name,
            'RecordTypeId': record_type_id,
            'mbfc__Church_Type__c': church_type
        }
        new_account = sf.Account.create(account_data)
        return new_account['id']


# def upsert_to_salesforce(sf, dataframe, object_name, external_id_field):
#     """
#     Upsert records to Salesforce from a pandas DataFrame.

#     Parameters:
#     sf (Salesforce): The Salesforce connection instance.
#     dataframe (pd.DataFrame): The pandas DataFrame containing data to upsert.
#     object_name (str): The Salesforce object name (e.g., 'Contact').
#     external_id_field (str): The external ID field used for upserts.
#     """
#     successful_upserts = 0
#     failed_upserts = 0

#     # Replace placeholder values with None in the DataFrame
#     dataframe.replace({None: pd.NA, ' ': None, '': None}, inplace=True)

#     # Convert DataFrame to a list of dictionaries
#     data_to_upsert = dataframe.to_dict(orient='records')

#     for data in data_to_upsert:
#         try:
#             data = convert_non_serializables(data)
#             external_id = data.pop(external_id_field)

#             # Perform upsert using only the External ID
#             response = getattr(sf, object_name).upsert(f'{external_id_field}/{external_id}', data)
#             successful_upserts += 1
#             print(f"Successfully upserted {object_name} with External ID: {external_id}")
#         except SalesforceMalformedRequest as e:
#             failed_upserts += 1
#             print(f"Malformed request error when upserting {object_name} with External ID: {external_id}. Error: {e.content}")
#         except SalesforceError as e:
#             failed_upserts += 1
#             print(f"Salesforce error when upserting {object_name} with External ID: {external_id}. Error: {e.content}")
#         except Exception as e:
#             failed_upserts += 1
#             print(f"Failed to upsert {object_name} with External ID: {external_id}. Error: {e}")

#     print(f"Upsert completed. Successful upserts: {successful_upserts}, Failed upserts: {failed_upserts}")


def upsert_to_salesforce_bulk(sf, dataframe, object_name, external_id_field, results_log_file, batch_size=100):
    """
    Upsert records to Salesforce from a pandas DataFrame using the Bulk API.

    Parameters:
    sf (Salesforce): The Salesforce connection instance.
    dataframe (pd.DataFrame): The pandas DataFrame containing data to upsert.
    object_name (str): The Salesforce object name (e.g., 'Contact').
    external_id_field (str): The external ID field used for upserts.
    results_log_file (str): The file name where the full upsert results will be logged.
    batch_size (int): The number of records to include in each batch.
    """
    successful_upserts = 0
    failed_upserts = 0
    batch_number = 0

    # Replace placeholder values with None in the DataFrame
    dataframe.replace({pd.NA: None, ' ': None, '': None}, inplace=True)

    # Convert DataFrame to a list of dictionaries
    data_to_upsert = dataframe.to_dict(orient='records')

    # Open the results log file in 'write' mode to truncate/overwrite existing data
    with open(results_log_file, 'w') as results_log:
        writer = csv.writer(results_log)
        writer.writerow(['Batch Number', 'Record', 'Success', 'Error'])  # Write the headers

        # Process data in batches
        for i in range(0, len(data_to_upsert), batch_size):
            batch_number += 1
            batch_data = data_to_upsert[i:i + batch_size]
            batch_data = [convert_non_serializables(record) for record in batch_data]

            try:
                # Perform bulk upsert
                response = sf.bulk.__getattr__(object_name).upsert(batch_data, external_id_field=external_id_field)

                for index, res in enumerate(response):
                    if res['success']:
                        successful_upserts += 1
                        writer.writerow([batch_number, json.dumps(batch_data[index]), 'True', ''])
                    else:
                        failed_upserts += 1
                        writer.writerow([batch_number, json.dumps(batch_data[index]), 'False', json.dumps(res['errors'])])

            except SalesforceMalformedRequest as e:
                failed_upserts += len(batch_data)
                writer.writerow([batch_number, '', 'False', f"Malformed request: {e.content}"])
                for record in batch_data:
                    writer.writerow([batch_number, json.dumps(record), 'False', f"Failed record due to malformed request"])

            except SalesforceError as e:
                failed_upserts += len(batch_data)
                writer.writerow([batch_number, '', 'False', f"Salesforce error: {e.content}"])
                for record in batch_data:
                    writer.writerow([batch_number, json.dumps(record), 'False', f"Failed record due to Salesforce error"])

            except Exception as e:
                failed_upserts += len(batch_data)
                writer.writerow([batch_number, '', 'False', f"Unexpected error: {str(e)}"])
                for record in batch_data:
                    writer.writerow([batch_number, json.dumps(record), 'False', f"Failed record due to unexpected error"])

            # Progress monitoring
            print(f"Batch {batch_number} processed: {successful_upserts} successful, {failed_upserts} failed.")

    # Final summary message
    total_records = len(data_to_upsert)
    total_batches = batch_number
    print(f"Upsert completed. Total records processed: {total_records}, Batches: {total_batches}, "
          f"Successful upserts: {successful_upserts}, Failed upserts: {failed_upserts}")

## Extract Salesforce xref data

The following cells downloads all records from the target Salesforce enviro for the following objects:

- RecordTypes
- Users
- Accounts
- Contacts


In [None]:
# Get or create the Diocesan Account and get its ID
diocesan_account_id = get_or_create_account(sf, 'Archdiocese of Portland in Oregon', 'Church', 'Diocese')

print(f"Account ID: {diocesan_account_id}")

In [None]:
# get all ACTIVE SF users

sf_users = sf.query('Select Alias, FirstName, LastName, Username, id from User WHERE IsActive = True')
df_sf_users = pd.DataFrame(sf_users['records'])
df_sf_users = df_sf_users.drop(columns = 'attributes')

In [None]:
# get all SF Record Types
get_all_recordTypes = 'Select Id, Name, DeveloperName, sObjecttype, namespaceprefix from RecordType'

# get list of records, add to dataframe
sf_recordTypes = sf.query(get_all_recordTypes)
df_sf_recordTypes = pd.DataFrame(sf_recordTypes['records'])
df_sf_recordTypes = df_sf_recordTypes.drop(columns = 'attributes')

# Drop NPSP's 'Organization' record type  
df_sf_recordTypes = df_sf_recordTypes[
    ~((df_sf_recordTypes['Name'] == 'Organization') & (df_sf_recordTypes['NamespacePrefix'].isna()))
]

# Create a dictionary mapping 'DeveloperName' to 'Id' for faster lookup
record_types_mapping = df_sf_recordTypes.set_index('DeveloperName')['Id'].to_dict()

In [None]:
# get SF Accounts
get_all_accounts = 'Select id, Name, RecordTypeId, Type, mbfc__Parish_Code__c, Job_Id__c, Archdpdx_Migration_Id__c from Account'

# get list of records, add to dataframe
sf_accounts = sf.query(get_all_accounts)
df_sf_accounts = pd.DataFrame(sf_accounts['records'])
df_sf_accounts = df_sf_accounts.drop(columns = 'attributes')
df_sf_accounts.shape

In [None]:
# get SF Contacts
get_all_contacts = 'Select id, Name, npe01__Type_of_Account__c, RecordTypeId, Archdpdx_Migration_Id__c, CreatedById from Contact'

# get list of records, add to dataframe
sf_contacts = sf.query(get_all_contacts)
df_sf_contacts = pd.DataFrame(sf_contacts['records'])
# df_sf_contacts = df_sf_contacts.drop(columns = 'attributes')
df_sf_contacts.shape

# ACCOUNTS


## Extract


### Load ArchdPDX csvs as DataFrames

ADPDX data for organizations is held in 6 tables, all of which will be migrated into Salesforce's Accounts object.


In [None]:
df_offices = pd.read_csv('/Users/matthewmartin/Library/CloudStorage/GoogleDrive-matt@meribahflow.com/Shared drives/Clients/ADPDX (Portland)/Data/Clergy DB 2025_03_21/Reports/Offices.csv', skiprows= lambda x: x in [1])
df_offices["src_table"] = 'Offices'
df_offices["AccountRecordType"] = 'Organization'
df_offices.rename({
    "Common Name": "Name",
    "Name": "Formal_Name__c"
    }, axis="columns", inplace=True)

df_offices

In [None]:
df_parishes = pd.read_csv('/Users/matthewmartin/Library/CloudStorage/GoogleDrive-matt@meribahflow.com/Shared drives/Clients/ADPDX (Portland)/Data/Clergy DB 2025_03_21/Reports/Parishes.csv', dtype={'Vicariate': 'object', 'Established': 'str', 'Mission Of': 'str'}, skiprows= lambda x: x in [1])
df_parishes["src_table"] = 'Parishes'
df_parishes["AccountRecordType"] = 'Church'
# df_parishes.rename({"Parish Formal Name": "Account Name"}, axis="columns", inplace=True)
df_parishes.rename({
                    "Parish Formal Name": "Formal_Name__c",
                    "Common Name": "Name",
                    'Mission Of': 'Parent_Parish'
                }, axis="columns", inplace=True)

df_parishes

In [None]:
df_religious = pd.read_csv('/Users/matthewmartin/Library/CloudStorage/GoogleDrive-matt@meribahflow.com/Shared drives/Clients/ADPDX (Portland)/Data/Clergy DB 2025_03_21/Reports/RelCommunities.csv', skiprows= lambda x: x in [1])
df_religious["src_table"] = 'RelCommunities'
df_religious["AccountRecordType"] = 'Religious'
df_religious.rename({
                    "Community Name": "Formal_Name__c",
                    "Common Name": "Name"
                     }, axis="columns", inplace=True)

df_religious.sample(10)

In [None]:
df_schools = pd.read_csv('/Users/matthewmartin/Library/CloudStorage/GoogleDrive-matt@meribahflow.com/Shared drives/Clients/ADPDX (Portland)/Data/Clergy DB 2025_03_21/Reports/Schools.csv', skiprows= lambda x: x in [1])
df_schools["src_table"] = 'Schools'
df_schools["AccountRecordType"] = 'Organization'
df_schools.rename({
                    "School Name": "Formal_Name__c",
                    "Common Name": "Name",
                    'Parish Link': 'Parent_Parish'
                    
                    }, axis="columns", inplace=True)

df_schools.sample(10)

In [None]:
df_vicariates = pd.read_csv('/Users/matthewmartin/Library/CloudStorage/GoogleDrive-matt@meribahflow.com/Shared drives/Clients/ADPDX (Portland)/Data/Clergy DB 2025_03_21/Reports/Vicariates.csv', skiprows= lambda x: x in [1])
df_vicariates["src_table"] = 'Vicariates'
df_vicariates["AccountRecordType"] = 'Deanery'
# As we want to designate the Common Name as what will be the Account Name in Salesforce, we are renaming these columns in a different pattern than prior CSVs.
df_vicariates.rename({"Common Name": "Name"}, axis="columns", inplace=True)

df_vicariates.sample(10)

In [None]:
df_newman = pd.read_csv('/Users/matthewmartin/Library/CloudStorage/GoogleDrive-matt@meribahflow.com/Shared drives/Clients/ADPDX (Portland)/Data/Clergy DB 2025_03_21/Reports/NewmanCenters.csv', skiprows= lambda x: x in [1])
df_newman["src_table"] = 'NewmanCenters'
df_newman["AccountRecordType"] = 'Organization'
df_newman.rename({
                    "Newman Center Name": "Formal_Name__c",
                    "Common Name": "Name",
                    "Newman Center City": "Mailing Address City2"
                  }, axis="columns", inplace=True)

df_newman

Each of the 6 tables has an overlapping but distinct set of columns, making it challenging to conform these tables into a single staging table.

In addition, columns that correspond to the same field in salesforce are named differently in each table (eg. 'Parish City' vs. 'Religious City' vs. 'Newman Center City')


In [None]:
print('TABLE: (ROWS, COLUMNS)\n')

print(f'Offices:    {df_offices.shape}')
print(f'Parishes:   {df_parishes.shape}')
print(f'Religious:  {df_religious.shape}')
print(f'Schools:    {df_schools.shape}')
print(f'Vicariates: {df_vicariates.shape}')
print(f'Newman Ctr: {df_newman.shape}')

### Merge DFs into a single Accounts DF

This step takes 6 different tables and combines them into a single Accounts table for cleaning and staging.


In [None]:
# init list of DataFrames
src_accounts = [df_offices, df_parishes, df_religious, df_schools, df_vicariates, df_newman]

# concats the various Account dataframes into one large table
accounts = pd.concat(src_accounts, ignore_index=True)

## Transform


Time to do some table column renaming and re-organizing!


In [None]:
# renames columns headers to consolidate account names into SF-conformed data model
accounts.rename({"Common Name": "Name, City"}, axis="columns", inplace=True)

accounts.rename(
    columns={
        # 'Account Name': 'Name',
        'Mailing Address': 'BillingStreet1',
        'Mailing Address 2': 'BillingStreet2',
        'Mailing Address City': 'BillingCity',
        'Mailing Address State': 'BillingState',
        'Mailing Address Postal Code': 'BillingPostalCode',
        'Mailing Address Country': 'BillingCountry',
        'Email': 'mbfc__Email__c',
        'Web Site': 'Website',
        'Order Common Name': 'mbfc__Abbreviation__c',
        'Order Letters': 'mbfc__Religious_Suffix__c',
        'Men or Women': 'mbfc__Type_Members__c',
        'Archdiocese Assigns Clergy': 'Archdiocese_Assigns_Clergy__c',
        'Locator Description': 'Locator_Description__c',
        'Established': 'mbfc__Date_Established__c',
        'County': 'County__c',
        'Disabled Access': 'Disabled_Access__c',
        'Sanctuary Capacity': 'Sanctuary_Capacity__c',
        'Miles to Pastoral Centre': 'Miles_to_Pastoral_Centre__c',
        'Archdiocesan School Code': 'Archdiocesan_School_Code__c',
        'Grades Provided': 'Grades_Provided__c'
    },
    inplace=True
)


# reorder column order
col = accounts.pop('Name')
accounts.insert(2, col.name, col)

col = accounts.pop('Parish Name')
accounts.insert(3, col.name, col)

col = accounts.pop('AccountRecordType')
accounts.insert(1, col.name, col)



In [None]:
accounts.columns

In [None]:
accounts[accounts.BillingStreet2.isna() == False]

In [None]:
# merge two Non-Latin columns into one 
accounts['Non_Latin__c'] = accounts['Non-Latin'].combine_first(accounts['Non-Latin Rite']) 

# Rename the 'Non_Latin__c' field to 'mbfc__Non_Latin__c'
accounts.rename(columns={'Non_Latin__c': 'mbfc__Non_Latin__c'}, inplace=True)


In [None]:
# export merged tables DESCRIPTION to CSV for mapping
accounts.describe(include='all').transpose().to_csv(f'/Users/matthewmartin/Library/CloudStorage/GoogleDrive-matt@meribahflow.com/Shared drives/Clients/ADPDX (Portland)/Data/Clergy DB/working/accounts.csv')
accounts.describe(include='all').transpose()

In [None]:
# Create a single BillingAddress field

# Concatenate the two columns with CHAR(10) as separator
accounts['BillingStreet'] = accounts[['BillingStreet1', 'BillingStreet2']].apply(lambda x: '\n'.join(x.dropna()), axis=1)

# Drop the original columns
accounts.drop(columns=['BillingStreet1', 'BillingStreet2'], inplace=True)

In [None]:
# Handle boolean fields

boolean_columns_to_convert = [
    'Archdiocese_Assigns_Clergy__c', 
    'mbfc__Non_Latin__c', 
    'Disabled_Access__c', 
    ]

# Convert 'Yes'/'No' to True/False
accounts[boolean_columns_to_convert] = accounts[boolean_columns_to_convert].replace({'Yes': True, 'No': False, None: False})



In [None]:
accounts[boolean_columns_to_convert].sample(10)

In [None]:
# Religious Order fields > conform to new data model

# Apply logic to create new columns
accounts['Religious_Secular_Order__c'] = accounts.apply(
    lambda x: 'Religious Order' if x['Religious Order'] == 'Yes' else ('Secular Order' if x['Secular Order'] == 'Yes' else None), axis=1
)

accounts['Pontifical_or_Diocesan_Order__c'] = accounts.apply(
    lambda x: 'Diocesan Order' if x['Diocesan Order'] == 'Yes' else ('Pontifical Order' if x['Pontifical Order'] == 'Yes' else None), axis=1
)

accounts.drop(columns=['Religious Order', 'Secular Order', 'Diocesan Order', 'Pontifical Order'], inplace=True)

In [None]:
print(accounts['mbfc__Date_Established__c'].dtype)

In [None]:
# Handle Date fields that are only YYYY

# Ensure all values in 'mbfc__Date_Established__c' are strings
accounts['mbfc__Date_Established__c'] = accounts['mbfc__Date_Established__c'].astype(str)

# Define a function to transform valid year values
def transform_year(year):
    if pd.notna(year) and year.replace('.', '', 1).isdigit() and len(year.split('.')[0]) == 4:
        return pd.to_datetime(year.split('.')[0] + '-01-01')
    else:
        return pd.NaT

# Apply the function to the 'mbfc__Date_Established__c' column
accounts['mbfc__Date_Established__c'] = accounts['mbfc__Date_Established__c'].apply(transform_year)


In [None]:
accounts['mbfc__Date_Established__c'].sample(10)

In [None]:
accounts[accounts.src_table == 'Schools'].Parent_Parish

In [None]:
# Format Parent_Parish field

# Remove instances of '0'
accounts.Parent_Parish = accounts.Parent_Parish.apply(lambda x: '' if x == 0 else x)


In [None]:
# Append prefix
accounts['Parent_Parish'] = accounts['Parent_Parish'].apply(lambda x: 'Parishes_' + str(x) if pd.notna(x) and x != '' else x)

In [None]:
# Check final results, in particular the 'Schools' records
accounts.Parent_Parish[(accounts.Parent_Parish.isna() == False) & (accounts["src_table"] == "Schools")].sample(10)

In [None]:
# Replace Parent_Parish unique ids with SF records
add_salesforce_record_ids(sf, accounts, "Parent_Parish", "Account", "Archdpdx_Migration_Id__c", "Parent_Parish__c", 10 )

In [None]:
# ParentID field

accounts['ParentId'] = accounts['Parent_Parish__c']

# Verify results
accounts[accounts.Parent_Parish__c.isna() == False]


### AccountRecordType & ChurchType


In [None]:
#Sets all rows where AccountRecordType is Church as a Parish.
accounts.loc[accounts['AccountRecordType'] == 'Church', 'mbfc__Church_Type__c'] = 'Parish'
accounts[accounts['AccountRecordType'] == 'Church'].head(5)


In [None]:
# Update 'mbfc__organization_type__c' field for rows where the source table is 'Schools'
accounts.loc[accounts['src_table'] == 'Schools', 'mbfc__Organization_Type__c'] = 'School'

### Generate ExternalId


In [None]:
# Generate an External ID
columns_to_concate = ['src_table', 'Record Number']
accounts = concat_columns(accounts, columns_to_concate, 'Archdpdx_Migration_Id__c', separator='_')

In [None]:
# set Deanery RecordTypeId to the Church RecordTypeId
# map in RecordTypeIds
accounts['RecordTypeId'] = accounts['AccountRecordType'].map(record_types_mapping)
record_types_mapping

## Load


### Generate a new Job ID


In [None]:
# increment to the job_id
file_name = '/Users/matthewmartin/Library/CloudStorage/GoogleDrive-matt@meribahflow.com/Shared drives/Clients/ADPDX (Portland)/Data/Clergy DB/jobs/job_id'
curr_job_id = update_job_id(file_name)
print(f"New job ID: {curr_job_id}")

# add/update account DF with job_id
accounts["Job_Id__c"] = curr_job_id


### A) Vicariates


In [None]:
# Get Account Group RecordTypeID
deanery_recordTypeId = df_sf_recordTypes.loc[
    (df_sf_recordTypes['DeveloperName'] == 'Deanery') & (df_sf_recordTypes['SobjectType'] == 'Account'),
    'Id'
    ].iloc[0]  # Use .iloc[0] to get the first item if you're expecting exactly one match


# Insert Vicariates holding account
vicariate_account = sf.Account.upsert('Archdpdx_Migration_Id__c/Vicariates_Holding_Acc',
    {
    "Name": "Vicariates",
    "ParentId": diocesan_account_id,
    "mbfc__Diocese__c": diocesan_account_id,
    "RecordTypeId": deanery_recordTypeId,
    # "mbfc__Group_Type__c": 'Office',
    "Job_Id__c": curr_job_id
    }
)

# Get Vicariate Holding Acc's SF ID (as an upsert doesn't return the actual record ID)
vicariate_account = sf.Account.get_by_custom_id('Archdpdx_Migration_Id__c', 'Vicariates_Holding_Acc')
vicariate_account_id = vicariate_account['Id']

vicariate_account_id

In [None]:
# Prepare Vicariates staging DF

vicariates = accounts[accounts['AccountRecordType'] == 'Deanery']


vicariates = vicariates[[
    'Record Number',
    'Name',
    # 'AccountRecordType',
    'Job_Id__c',
    'Archdpdx_Migration_Id__c',
    'RecordTypeId'
    ]]

# add parentid
vicariates["mbfc__Diocese__c"] = diocesan_account_id
vicariates['ParentId'] = vicariate_account_id
# vicariates['mbfc__Church_Type__c'] = 'Deanery'
vicariates['RecordTypeId'] = deanery_recordTypeId

vicariates.rename(columns={
        # 'Name, City': 'Name',
        'External_Id': 'Archdpdx_Migration_Id__c'
    }, inplace=True)

vicariates.reset_index()
vicariates.set_index('Record Number', inplace=True)

vicariates

#### Export Vicariates to CSV


In [None]:
# export to CSV
vicariates.to_csv('/Users/matthewmartin/Library/CloudStorage/GoogleDrive-matt@meribahflow.com/Shared drives/Clients/ADPDX (Portland)/Data/Clergy DB/staging/vicariates_staging.csv')


#### Upsert Vicariates


In [None]:
bulk_data = []
for row in vicariates.itertuples(index=False):
    d = row._asdict()
    # del d['Index']
    bulk_data.append(d)

if run_upserts == 'True':
    vicariate_upsert = sf.bulk.Account.upsert(data=bulk_data, external_id_field='Archdpdx_Migration_Id__c', batch_size=100, use_serial=False)
    upserts = pd.DataFrame(vicariate_upsert)

    print(upserts)
    

In [None]:
# Generate an Errors log
import csv

keys = vicariate_upsert[0].keys()

with open('results_files/vicariate_results.csv', 'w', newline='') as csv_file:
    writer = csv.DictWriter(csv_file, keys)
    writer.writeheader()
    writer.writerows(vicariate_upsert)

In [None]:
# Get Vicariate records from SF

sf_deaneries = sf.query("SELECT Archdpdx_Migration_Id__c, Id FROM Account WHERE RecordType.DeveloperName = 'Deanery'")

df_sf_deaneries = pd.DataFrame(sf_deaneries['records'])
df_sf_deaneries = df_sf_deaneries.drop(columns = 'attributes')

df_sf_deaneries

# Creates a dict of Vicariate unique ids to the new Salesforce record IDs, so can populate on latter Account records
vicariate_sf_recordids = df_sf_deaneries.set_index('Archdpdx_Migration_Id__c')['Id'].to_dict()
vicariate_sf_recordids

### B) Parishes, Schools, Organizations


In [None]:
# Create acc_main (accounts excluding Deaneries (already handled) and Religious (to be handled differently, after))
acc_main = accounts[accounts['AccountRecordType'] != 'Deanery']
acc_main = acc_main[acc_main['AccountRecordType'] != 'Religious']

acc_main.loc[acc_main['AccountRecordType'] == 'Church', 'Vicariate_Ext_Id'] = 'Vicariates_' + acc_main['Vicariate']

In [None]:
acc_main.sample(5)

In [None]:
# map in Deaneries
acc_main['mbfc__Deanery__c'] = acc_main.Vicariate_Ext_Id.map(vicariate_sf_recordids)

acc_main[acc_main['AccountRecordType'] == 'Church']['mbfc__Deanery__c']

In [None]:
# Clean up NaN values

acc_main.fillna('', inplace=True)

In [None]:
# Generate Schedule text from all Schedule columns

def create_account_schedule(row):
    account_schedule = []
    for i in range(1, 8):
        head_col = f'Schedule {i} Head'
        text_col = f'Schedule {i} Text'
        
        head = row[head_col]
        text = row[text_col]
        
        if pd.notnull(head) or pd.notnull(text):
            if pd.notnull(head):
                account_schedule.append(f"<p><strong>{head}</strong></p>")
            if pd.notnull(text):
                account_schedule.append(f"<p>{text}</p>")
            account_schedule.append("<p><br></p>")
    
    # Join all parts into a single string
    return "".join(account_schedule).strip()

acc_main['mbfc__Mass_Times__c'] = acc_main.apply(create_account_schedule, axis=1)



In [None]:
acc_main['mbfc__Mass_Times__c'].sample(15)

In [None]:
acc_main

In [None]:
# Create 'account_staging' df (drop extraneous columns)

accounts_staging = acc_main[[
    'Name',
    'Formal_Name__c',
    'RecordTypeId',
    'mbfc__Church_Type__c',
    'mbfc__Deanery__c',
    'BillingStreet',
    'BillingCity',
    'BillingState',
    'BillingPostalCode',
    'BillingCountry',
    'Phone',
    'Fax',
    'mbfc__Email__c',
    'Website',
    'mbfc__Mass_Times__c',
    'mbfc__Abbreviation__c',
    'mbfc__Religious_Suffix__c',
    'mbfc__Type_Members__c',
    'Description',
    'Archdiocese_Assigns_Clergy__c', # Boolean fields
    'mbfc__Non_Latin__c', 
    'Disabled_Access__c', 
    'Locator_Description__c',
    'Parent_Parish__c',
    'mbfc__Date_Established__c',
    'County__c',
    'Sanctuary_Capacity__c',
    # 'Miles_to_Pastoral_Centre__c',
    'Religious_Secular_Order__c',
    'Pontifical_or_Diocesan_Order__c',
    'Archdiocesan_School_Code__c',
    'Grades_Provided__c',
    'Job_Id__c',
    'Archdpdx_Migration_Id__c',
    'mbfc__Organization_Type__c',
    'ParentId'  # Later, check whether or not can upsert using external ID using this field


    ]]

In [None]:
accounts_staging

#### Create Parishes Holding Acc for acc heirarchy


In [None]:
# Upsert a Parishes holding account

# Get Account Group RecordTypeID
group_recordTypeId = df_sf_recordTypes.loc[
    (df_sf_recordTypes['DeveloperName'] == 'Group') & (df_sf_recordTypes['SobjectType'] == 'Account'),
    'Id'
    ].iloc[0]  # Use .iloc[0] to get the first item if you're expecting exactly one match


# Insert Vicariates holding account
parish_holding_account = sf.Account.upsert('Archdpdx_Migration_Id__c/Parishes_Holding_Acc',
    {
    "Name": "Parishes",
    "ParentId": diocesan_account_id,
    "RecordTypeId": group_recordTypeId,
    "Job_Id__c": curr_job_id,
    "mbfc__Group_Type__c": "Office"
    }
)

# Get Vicariate Holding Acc's SF ID (as an upsert doesn't return the actual record ID)

parish_holding_account = sf.Account.get_by_custom_id('Archdpdx_Migration_Id__c', 'Parishes_Holding_Acc')

parishes_holding_account_id = parish_holding_account['Id']

parishes_holding_account_id

In [None]:
# Set the ParentId for all Parish records

# accounts_staging['ParentId'] = None # Commented this out as (a) the field already exists and it was blanking out pre-existing values.

accounts_staging['ParentId']= accounts_staging.apply(
    lambda row: parishes_holding_account_id if row['mbfc__Church_Type__c'] == 'Parish' else row['ParentId'], axis=1
)

accounts_staging.sample(10)


#### Upsert Accounts (TBD )


In [None]:
# send accounts_staging to csv
accounts_staging.to_csv('staging_files/accounts_staging.csv', encoding='utf-8-sig')

In [None]:

# # Rename columns apis
# accounts_staging = accounts_staging.rename(columns={'Parent_Parish__c': 'Parent_Parish__r'})  # Later on, attempt to include 'ParentId' (which, as a standard SF field, might not work)

# # Reformat values to match what SF api requires
# accounts_staging['Parent_Parish__r'] = accounts_staging.apply(lambda x: "{'Archdpdx_Migration_Id__c': '" + x['Parent_Parish__r'] + "'}" if pd.notna(x['Parent_Parish__r']) and x['Parent_Parish__r'] != 'None' and x['Parent_Parish__r'] != '' else None, axis=1)




In [None]:
accounts_staging

In [None]:
# accounts_staging[accounts_staging.Parent_Parish__r.isnull() == False]["Parent_Parish__r"]

In [None]:
print(accounts_staging['mbfc__Date_Established__c'].dtype)

In [None]:

# Convert datetime to string in the desired format
accounts_staging['mbfc__Date_Established__c'] = accounts_staging['mbfc__Date_Established__c'].dt.strftime('%Y-%m-%d')

In [None]:
# Upsert using new function

accounts_upsert2 = upsert_to_salesforce_bulk(sf, accounts_staging, 'Account', 'Archdpdx_Migration_Id__c', 'results_files/accounts_results.csv', batch_size=100)

In [None]:
# Extract SF Account records

sf_accounts = sf.query('Select id, Name, RecordTypeId, mbfc__Church_Type__c, Archdpdx_Migration_Id__c, Job_Id__c from Account WHERE Job_Id__c != null')
sf_accounts = pd.DataFrame(sf_accounts['records'])
sf_accounts = sf_accounts.drop(columns = 'attributes')
sf_accounts

### C) Religious Institutes (Parents)


This section prepares and upserts parent Religious Congregation accounts for each row in RelCommunities table.

Dataframes >>
- acc_religious
- acc_religious_2
- acc_religious_parents

In [None]:
"""
- 'acc_religious' DF: create unique_id of religious parents
- create 'acc_religious_orders' DF , upsert into SF
- extract accounts from Salesforce, create dict (external_ID : account_ID)
- map parent ids onto religious child accounts DF in main DF
- 'acc_religious' > staging DF ('acc_religious')
    - drop unnecessary columns
    - upsert create DF of religious children, upsert into SF with
"""

# Create a new DF of all Religious accounts
acc_religious = accounts[accounts['AccountRecordType'] == 'Religious']

# Create a simplified external ID field for Parent Accounts
acc_religious['Archdpdx_Migration_Id__c'] = acc_religious['Order Full Name'].apply(
    lambda x: x.lower().replace(' ', '')[:40]
)

acc_religious_2 = acc_religious

# Create a DF for only parent religious order accounts
acc_religious_parents = acc_religious_2[[
    'Order Full Name', 
    # 'Name', 
    'mbfc__Abbreviation__c', 
    'mbfc__Religious_Suffix__c', 
    'mbfc__Type_Members__c', 
    'Archdpdx_Migration_Id__c',
    'Pontifical_or_Diocesan_Order__c',
    'Religious_Secular_Order__c',
    ]]

# Drop duplicate rows of the same parent Religious Order (becuase there are more than 1 local community of a particular order)
acc_religious_parents.drop_duplicates('Order Full Name', inplace=True)

# Manipulate the 'Name' field to remove any comma and subsequent text
# acc_religious_parents['Name'] = acc_religious_parents['Name'].str.split(',').str[0]

# How many remaining rows after dropping duplicates?
print(acc_religious_parents.shape)

# Rename columns
acc_religious_parents = acc_religious_parents.rename(columns={
    # 'Order Full Name': 'Description',
    'Order Full Name': 'Name'
    })

# Drop NA
acc_religious_parents.fillna('', inplace=True)

acc_religious_parents


In [None]:
acc_religious_parents['mbfc__Religious_Type__c'] = 'Congregation'

In [None]:
# Get Religious RecordTypeId
religious_recordtype_id = get_recordtype_id(df_sf_recordTypes, 'Religious', 'Account', 'mbfc')

religious_recordtype_id

In [None]:
# Set recordType to 'Religious'

religious_recordtype_id = df_sf_recordTypes.loc[
    (df_sf_recordTypes['DeveloperName'] == 'Religious') & (df_sf_recordTypes['SobjectType'] == 'Account'),
    'Id'
    ].iloc[0]  # Use .iloc[0] to get the first item if you're expecting exactly one match

print(religious_recordtype_id)

acc_religious_parents['RecordTypeId'] = religious_recordtype_id

acc_religious_parents.sample(10)

In [None]:
# Send to CSV
acc_religious_parents.to_csv('staging_files/religious_order_staging.csv', encoding='utf-8-sig')

In [None]:
# Upsert to Salesforce
bulk_data = []
for row in acc_religious_parents.itertuples(index=False):
    d = row._asdict()
    # del d['Index']
    bulk_data.append(d)

if run_upserts == 'True':
    religious_order_upsert = sf.bulk.Account.upsert(data=bulk_data, external_id_field='Archdpdx_Migration_Id__c', batch_size=100, use_serial=False)
    df_rel_order_upsert = pd.DataFrame(religious_order_upsert)

df_rel_order_upsert

In [None]:
# Generate an Errors log
import csv

keys = religious_order_upsert[0].keys()

with open('results_files/religious_order_results.csv', 'w', newline='') as csv_file:
    writer = csv.DictWriter(csv_file, keys)
    writer.writeheader()
    writer.writerows(religious_order_upsert)

In [None]:
# get SF Accounts
get_all_rel_accounts = f"Select id, Name, RecordTypeId, Type, Archdpdx_Migration_Id__c from Account where RecordTypeID = '{religious_recordtype_id}'"

print(religious_recordtype_id)

# get list of records, add to dataframe
sf_accounts = sf.query(get_all_rel_accounts)
df_sf_accounts = pd.DataFrame(sf_accounts['records'])
df_sf_accounts = df_sf_accounts.drop(columns = 'attributes')

df_sf_accounts.sample(10)

In [None]:
religious_order_mapping = df_sf_accounts.set_index('Archdpdx_Migration_Id__c')['Id'].to_dict()
# religious_order_mapping

### D) Religious Communities


This section stages the 'relCommunities' tables as Religious Account records. 

Dataframes:
- acc_religious_staging
- acc_religious_staging_2 

In [None]:
acc_religious_staging = (acc_religious
                         .rename(columns={'Archdpdx_Migration_Id__c' : 'Parent_Archdpdx_Migration_Id__c'})
)

acc_religious_staging['ParentId'] = acc_religious_staging['Parent_Archdpdx_Migration_Id__c'].map(religious_order_mapping)

In [None]:
pd.set_option('display.max_columns', None)

In [None]:
# Enrich the data

acc_religious_staging['mbfc__Religious_Type__c'] = 'Local Community'
acc_religious_staging['Archdpdx_Migration_Id__c'] = 'RelCommunities_' + acc_religious_staging['Record Number'].astype('str')
acc_religious_staging['RecordTypeId'] = religious_recordtype_id
# acc_religious_staging.drop(columns='Name', inplace=True)
# acc_religious_staging.rename(columns={
#     'Name, City': 'Name'
# }, inplace=True)

In [None]:
acc_religious_staging.sample(5)

In [None]:
acc_religious_staging_2 = acc_religious_staging[[
    'Name',
    'RecordTypeId',
    'mbfc__Religious_Type__c',
    'BillingStreet',
    'BillingCity',
    'BillingState',
    'BillingPostalCode',
    'BillingCountry',
    'Phone',
    'Fax',
    'mbfc__Email__c',
    'Website',
    'mbfc__Abbreviation__c',
    'mbfc__Religious_Suffix__c',
    'mbfc__Type_Members__c',
    'Description',
    'Job_Id__c',
    'ParentId',
    'Archdpdx_Migration_Id__c'
    ]]

acc_religious_staging_2.sample(5)

In [None]:
# Final Cleanup

acc_religious_staging_2 = acc_religious_staging_2.fillna('')

In [None]:
acc_religious_staging_2

In [None]:
# Send to CSV
acc_religious_staging_2.to_csv('/Users/matthewmartin/Library/CloudStorage/GoogleDrive-matt@meribahflow.com/Shared drives/Clients/ADPDX (Portland)/Data/Clergy DB/staging/religious_community_staging.csv', encoding='utf-8-sig')

In [None]:
upsert_to_salesforce_bulk(sf, acc_religious_staging_2, 'Account', 'Archdpdx_Migration_Id__c', 'results_files/religious_comm_results.csv', 100)


### E) Religious Superiors


In [None]:
acc_rel_superiors = acc_religious_2[[
    'Name',
    'Major Superior Name',
    'Major Superior Phone',
    'Major Superior Email',
    'Archdpdx_Migration_Id__c']].copy()


acc_rel_superiors['AccountId'] = acc_rel_superiors.Archdpdx_Migration_Id__c.map(religious_order_mapping)

acc_rel_superiors.sample(5)

In [None]:
def parse_names(df, column_name):
    # Convert all non-string entries to strings (handling NaN and other data types)
    df[column_name] = df[column_name].fillna('').apply(str)

    # Create a new DataFrame to store the name parts
    name_parts = pd.DataFrame()

    # Parse each name in the column
    name_parts['First Name'] = df[column_name].apply(lambda x: HumanName(x).first if x.strip() != '' else '')
    name_parts['Last Name'] = df[column_name].apply(lambda x: HumanName(x).last if x.strip() != '' else '')
    name_parts['Middle Name'] = df[column_name].apply(lambda x: HumanName(x).middle if x.strip() != '' else '')
    name_parts['Title'] = df[column_name].apply(lambda x: HumanName(x).title if x.strip() != '' else '')
    name_parts['Suffix'] = df[column_name].apply(lambda x: HumanName(x).suffix if x.strip() != '' else '')
    name_parts['Nickname'] = df[column_name].apply(lambda x: HumanName(x).nickname if x.strip() != '' else '')

    # Combine the original DataFrame with the name parts DataFrame
    result_df = pd.concat([df, name_parts], axis=1)
    return result_df



In [None]:
!pip install nameparser
from nameparser import HumanName
from nameparser.config import CONSTANTS

# Add dataset-specific Titles and Suffix constants for parsing
CONSTANTS.titles.add('Rev.', 'Very Rev.', 'Very Rev', 'Sr.', 'Sr. ', 'Very', 'Bishop')
CONSTANTS.suffix_acronyms.add('FRS', 'OMI', 'OSA', 'OCD', 'OFM', 'OP', 'OC', 'FSE', 'OMV', 'SDB', 'SM', 'SFX', 'SP', 'OP', 'O.S.M', 'OSM' 'SNJM', 'OSF', 'HMRF', 'DD', 'CSJP', 'SDD', 'BVM', 'BVM - President', 'SJ')



In [None]:
# Parse Complex Names
acc_rel_superiors_parsed = parse_names(acc_rel_superiors, 'Major Superior Name')

In [None]:
acc_rel_superiors_staging = acc_rel_superiors_parsed.fillna('')

acc_rel_superiors_staging['Archdpdx_Migration_Id__c'] = acc_rel_superiors_staging['Major Superior Name'].apply(lambda x: x.replace(' ','').lower())

# Rename columns
acc_rel_superiors_staging = acc_rel_superiors_staging.rename(columns={
    'Major Superior Phone': 'Phone',
    'Major Superior Email': 'Email',
    'Title': 'Salutation',
    'First Name': 'FirstName',
    'Middle Name': 'MiddleName',
    'Last Name': 'LastName'
})

# Add job id
acc_rel_superiors_staging['Archdpdx_Job_Id__c'] = curr_job_id

# Drop columns
acc_rel_superiors_staging = acc_rel_superiors_staging.drop(columns=['Name', 'Major Superior Name', 'Nickname'])

# Drop empty rows
acc_rel_superiors_staging = acc_rel_superiors_staging[acc_rel_superiors_staging['LastName'].str.strip() != '']

acc_rel_superiors_staging.sample(10)

In [None]:
acc_rel_superiors_staging

In [None]:
# Send to CSV
acc_rel_superiors_staging.to_csv('staging_files/religious_superiors_staging.csv', encoding='utf-8-sig')

In [None]:
# Upsert to Salesforce

def find_existing_contact(sf, first_name, last_name):
    query = f"SELECT Id, Archdpdx_Migration_Id__c FROM Contact WHERE FirstName = '{first_name}' AND LastName = '{last_name}'"
    result = sf.query(query)
    return result['records']



bulk_data = []
for row in acc_rel_superiors_staging.itertuples(index=False):
    d = row._asdict()
    existing_contacts = find_existing_contact(sf, d['FirstName'], d['LastName'])
    if existing_contacts:
        # Update existing contact with external ID
        d['Id'] = existing_contacts[0]['Id']
        bulk_data.append(d)
    else:
        bulk_data.append(d)


if run_upserts == 'True':
    religious_superior_upsert = sf.bulk.Contact.upsert(data=bulk_data, external_id_field='Archdpdx_Migration_Id__c', batch_size=100, use_serial=False)
    df_rel_superior_upsert = pd.DataFrame(religious_superior_upsert)

df_rel_superior_upsert

In [None]:
# Upsert Religious Superiors to Salesforce
upsert_to_salesforce_bulk(
    sf, 
    acc_rel_superiors_staging, 
    'Contact', 
    'Archdpdx_Migration_Id__c', 
    'results_files/religious_superiors_results.csv', 
    100
)

# CONTACTS


## Extract


In [885]:
import pandas as pd
df_contacts = (pd.read_csv('/Users/matthewmartin/Library/CloudStorage/GoogleDrive-matt@meribahflow.com/Shared drives/Clients/ADPDX (Portland)/Data/Clergy DB 2025_03_21/Reports/People.csv')
               .set_index('Record Number', verify_integrity=True)
               .drop(index='recNum') # Drops the extra row that replicates the labels
               .rename(columns=lambda x: x.replace(' ', '_')) # Remove whitespace in column names
)

df_contacts.sample(10)


Unnamed: 0_level_0,Common_Name,Sort_Name,Type(s),Clergy_Status,Religious_Status,Login_ID,Password,Password_Must_be_Changed,Access_Permission,Spouse,Title,Salutation,Christian_Name,Nickname,Middle_Name(s),Surname,Suffix,Mailing_Address,Mailing_Address_2,Mailing_Address_City,Mailing_Address_State,Mailing_Address_Province,Mailing_Address_Postal_Code,Mailing_Address_Country,Private_Address,Private_Address_2,Private_Address_City,Private_Address_State,Private_Address_Province,Private_Address_Postal_Code,Private_Address_Country,Preferred_Address,Work_Phone,Home_Phone,Cell_Phone,Preferred_Phone,Work_Email,Archdiocesan_Email,Home_Email,Preferred_Email,Directory_Include,Directory_Include_Middle_Name,Directory_Include_Suffix,Suppress_From_Reports,Seminarian_Student_Debt,Seminarian_Medical_Benefits,Send_Group_Mail_and_Email,Birth_Date,Place_of_Birth,Foreign_Born,...,Diaconal_Ordination_Date,Diaconal_Ordination_Place,Diaconal_Ordination_Prelate,Presbyteral_Ordination_Date,Presbyteral_Ordination_Place,Presbyteral_Ordination_Prelate,Episcopal_Ordination_Date,Episcopal_Ordination_Place,Episcopal_Ordination_Prelate,Ordination_Diocese,Incardinated_From_Diocese,Incardinated_From_Date,Incardinated_Now,Excardinated_To_Diocese,Excardinated_To_Date,Letter_of_Good_Standing_Date,Religious_In_Archdiocese_Date,Faculties,Faculties_Granted_Date,Faculties_Restricted_Date,Faculties_Withdrawn_Date,Last_Retreat_Date,Last_Educ_Requirement_Date,Policy_Manual_Acknowledgement_Date,Harassment_Prevention_Course_Date,Standards_of_Conduct_Date,Last_Background_Check_Date,Last_Child_Protection_Training_Date,Out_of_Diocese_Date,Senior_Status_Date,Laicized_Date,Deceased_Date,Languages,Coverage_Availability,Advanced_Directive_Date,End_of_Life_Plan_Date,Will_Date,Will_Note,CIC_489_File,Registered_Parish,CARA_Ethnicity,Seminarian_Status,Other_Diaconal_Ministry,Spiritual_Director_Authorized,Link_to_Religious_Community,Place_of_Work,Volunteer_Place,Type_of_Work,Work_Load,Work_Title
Record Number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1
571,Rev. Amancio Rodrigues,rodrigues amancio,Priest,Senior Status,,arodrigues,d1e9d9dfd01f2d88c094a80203f330bb1147c55fdc47ea...,No,,0,Rev.,Fr.,Amancio,,,Rodrigues,,53 SE 171st Ave,,Portland,OR,,97233.0,,,,,,,,,Do Not Include,,,503-252-4194,Do Not Include,,,amanciorod5@yahoo.com,Do Not Include,Yes,No,No,No,0,,No,1941-08-13,"Goa, India",Yes,...,,,,1966-12-21,"Pilar, Goa, India",Most Rev. Francisco da Piedade Rebelo,,,,"Goa, India","Goa, India",1975-01-04,Archdiocese of Portland in Oregon,,,,,General,,,,,,,,2019-04-04,2019-04-05,2019-04-04,,2012-02-07,,,"Portuguese, Konkawin",Not Active. Do Not Call.,,,1980-05-03,,,0,,,,,0,,,,,
2254,Mr. Nichlas Schaal,schaal nichlas,Staff,,,,,,,0,Mr.,Mr.,Nichlas,,,Schaal,,St. Anthony Parish,9905 SW McKenzie St,Tigard,OR,,97223.0,,,,,,,,,,503-639-4179 x125,,,,nschaal@satigard.org,,,,,,,,0,,Yes,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,0,,,,,
1772,Mr. Mike Douglass,douglass mike,Staff,,,,,,,0,Mr.,Mr.,Mike,,,Douglass,,St. James Parish,1145 NE 1st St,McMinnville,OR,,97128.0,,,,,,,,,,503-472-5232 x233,,,,mdouglass@stjamesmac.com,,mfdouglass@gmail.com (preferred),,,,,,0,,Yes,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,0,,,,,
2433,"Sr. Rosemary Antonich, SNJM",antonich rosemary,Religious,,Deceased,,,,,0,Sr.,Sr.,Rosemary,,,Antonich,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2021-03-21,,,,,,,,0,,,,,44,,,,,
3131,Mr. Daniel Plesha,plesha daniel,Staff,,,,,,,0,Mr.,Mr.,Daniel,,,Plesha,,Ascension Parish,743 SE 76th Ave,Portland,OR,,97215.0,,,,,,,,,,503-256-3897,,,,dplesha@ascensionpdx.org,,,,,,,,0,,Yes,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,0,,,,,
1833,Mrs. Meaghen Igloria,igloria meaghen,Staff,,,,,,,0,Mrs.,Mrs.,Meaghen,,,Igloria,,Holy Redeemer Parish,25 N Rosa Parks Way,Portland,OR,,97217.0,,,,,,,,,,503-285-4539 x512,,,,migloria@holyredeemerpdx.org,,,,,,,,0,,Yes,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,0,,,,,
1883,Mr. Alex Chan,chan alex,Staff,,,,,,,0,Mr.,Mr.,Alex,,,Chan,,St. Mary’s Cathedral Parish,1716 NW Davis St,Portland,OR,,97209.0,,,,,,,,,,503-228-4397,,,,hello@cathedralpdx.org,,,,,,,,0,,Yes,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,0,,,,,
139,Mr. Stuart Ragnone,ragnone stuart edward,Archive,,,,,,,0,Mr.,Mr.,Stuart,,Edward,Ragnone,,103 SW Sandlewood Loop,,Gresham,OR,,97030.0,,,,,,,,,,503-891-9535,503-490-8334,,,,,seragnone@comcast.net,,,No,No,,0,,,1961-12-29,Greenville SC,No,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,0,,,,,
1767,Ms. Pat Bauer,bauer pat,Staff,,,,,,,0,Ms.,Ms.,Pat,,,Bauer,,St. Augustine Parish,1151 NW Inlet Ave,Lincoln City,OR,,97367.0,,,,,,,,,,503-548-8061,,,,staugustinechurch@lincolncitycoast.com,,,,,,,,0,,Yes,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,0,,,,,
703,"Rev. James Laudwein, SJ",laudwein james,"Priest,Religious",Transferred Out,Transferred Out,,,,,0,Rev.,Fr.,James,,,Laudwein,,,,,,,,,,,,,,,,,503-221-2324,,,,,,jlaudwein@jesuits.org,,No,No,No,No,0,,No,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1900-01-01,,,,,,,,,,,0,,,,,60,,,,,


#### Get Photos


In [886]:
import os
import pandas as pd

# def list_jpeg_files(directory):
#     data = []
#     for filename in os.listdir(directory):
#         if filename.endswith(".jpeg") or filename.endswith(".jpg"):  # Checking for jpeg files
#             full_path = os.path.join(directory, filename)
#             data.append({'Filename': filename, 'Full Path': full_path})
#     return pd.DataFrame(data)

# # Specify your directory
# directory = '/content/drive/Shareddrives/Clients/ADPDX (Portland)/Data/Clergy DB/sql_backup/archdpdx.info backups/public_html/people/graphics/portraits/large'
# jpeg_files_df = list_jpeg_files(directory)


In [887]:
# # Query for the Library
# library_query = "SELECT Id, Name FROM ContentWorkspace WHERE Name = 'ADPDX Person Profile Photos'"
# library_result = sf.query(library_query)

# # Check if the library exists and get its ID
# if library_result['records']:
#     library_id = library_result['records'][0]['Id']
#     print(f"Library ID: {library_id}")

#     # Query for the Folder within the Library
#     folder_query = f"SELECT Id, Name FROM ContentFolder WHERE ParentContentFolderId = '{library_id}'"
#     folder_result = sf.query(folder_query)

#     # Check if the folder exists and get its ID
#     if folder_result['records']:
#         folder_id = folder_result['records'][0]['Id']
#         print(f"Folder ID: {folder_id}")
#     else:
#         print("Folder 'Large JPEGs' not found in the library.")
# else:
#     print("Library 'ADPDX Person Profile Photos' not found.")

## Analysis

Here we check the various columns and their types, count where values exist, count of unique values, sample data, etc.

DF shape:

- 142 columns
- 3017 rows


In [888]:
# Check the original shape of the imported CSV
print(f"Shape of original data set: {df_contacts.shape}")

# export to csv a list of the contact fields with count, unique, top, freq
contacts_describe = df_contacts.describe(include='all').transpose()
contacts_describe.to_csv(f'/Users/matthewmartin/Library/CloudStorage/GoogleDrive-matt@meribahflow.com/Shared drives/Clients/ADPDX (Portland)/Data/Clergy DB/analysis/contacts_describe.csv')

df_contacts.describe(include='all').transpose()  #initial analysis of the Contacts table

Shape of original data set: (3142, 140)


Unnamed: 0,count,unique,top,freq
Common_Name,3142,3137,Mr. Joseph Nguyen,2
Sort_Name,3142,3135,nguyen anthony,3
Type(s),3142,31,Staff,1204
Clergy_Status,1164,9,Transferred Out,477
Religious_Status,915,4,Active,452
...,...,...,...,...
Place_of_Work,288,146,Mount Angel Abbey,37
Volunteer_Place,56,49,Mary’s Woods,4
Type_of_Work,296,120,Pastoral Ministry,31
Work_Load,284,2,Full Time,249


In [889]:
#How many spousal links are there? 

spouses_count = df_contacts[(df_contacts['Spouse'].notna()) & (df_contacts['Spouse'] != '0')].shape[0]
spouses_count

238

In [890]:
unique_reg_parishes = df_contacts['Registered_Parish'].value_counts()
unique_reg_parishes


0      3068
11        6
62        5
65        4
46        4
100       3
9         3
53        2
139       2
42        2
83        2
3         2
55        2
99        2
109       2
26        1
40        1
23        1
120       1
14        1
73        1
70        1
103       1
29        1
91        1
145       1
142       1
68        1
93        1
54        1
44        1
7         1
63        1
122       1
108       1
30        1
125       1
16        1
33        1
124       1
84        1
147       1
32        1
52        1
1         1
126       1
135       1
61        1
Name: Registered_Parish, dtype: int64

In [891]:
unique_languages = df_contacts['Languages'].unique()
unique_languages

array([nan, 'Spanish', 'English,Spanish', 'Igbo', 'English, Spanish',
       'Spanish, Mayaqeqchi', 'Spanish (Mass only)',
       'Latin Mass and written translation. Read French, Italian, Spanish.',
       'Hindi, Konkani, Tamil',
       'French (fluent), Spanish (beginner), Latin (beginner)',
       'German, Spanish, Italian, French', 'Kiswahili, Kichagga',
       'Spanish (English is second language)',
       'German, Spanish, Italian, Latin Mass',
       'English, Spanish, Italian', 'Spanish, Italian', 'English',
       'Bicolango, Tagalog, Spanish', 'Spanish, Italian, Latin Mass',
       'Italian', 'Tagalog, English, Spanish',
       'French, Italian, Aramaic (modern), Spanish', 'Vietnamese',
       'German, Spanish', 'English,Spanish,Italian',
       'Conversant in Italian and Spanish, some facility with Latin and German',
       'English, Spanish, Latin Mass', 'Italian, Spanish',
       'Konkani, Hindi, Marathi, Spanish',
       'Tagalog, Bicol, Spanish (Mass only)', 'Spanish, E

In [892]:
# import re
# import numpy as np


# def deduplicate_languages(list_languages):
#     # Define a regular expression pattern to match periods and punctuation
#     punctuation_pattern = r'[.,!?;:"]'

#     # Flatten the array and filter out NaN values
#     flattened_languages = [re.sub(punctuation_pattern, '', lang) for sublist in list_languages if pd.notna(sublist) for lang in sublist.split(',')]

#     # Deduplicate the list of languages
#     unique_languages = list(set(flattened_languages))

#     return unique_languages


# # Example usage:
# unique_languages = deduplicate_languages(unique_languages)
# print(unique_languages)


## Transform


In [893]:
# list of columns NOT to be migrated as Contact attributes
misc_columns_to_drop = [
    'Password',
    'Password_Must_be_Changed',
    'Sort_Name'
]

affiliation_columns = [
    'Baptism_Date',
    'Place_of_Baptism',
    'Confirmation_Date',
    'Place_of_Confirmation',
    'Received_Date',
    'Parish_of_Record',
    'Marriage_Date',
    'Place_of_Marriage',
    'Date_of_First_Vows',
    'Date_of_Final_Vows',
    'Reader_Date',
    'Acolyte_Date',
    'Bachelor_Degree_Year',
    'Bachelor_Degree_Type',
    'Bachelor_Degree_Institution',
    'Graduate_1_Degree_Institution',
    'Graduate_1_Degree_Type',
    'Graduate_1_Degree_Year',
    'Graduate_2_Degree_Institution',
    'Graduate_2_Degree_Type',
    'Graduate_2_Degree_Year',
    'Graduate_3_Degree_Institution',
    'Graduate_3_Degree_Type',
    'Graduate_3_Degree_Year',
    'Graduate_4_Degree_Institution',
    'Graduate_4_Degree_Type',
    'Graduate_4_Degree_Year',
    'Diaconal_Ordination_Date',
    'Diaconal_Ordination_Place',
    'Diaconal_Ordination_Prelate',
    'Presbyteral_Ordination_Date',
    'Presbyteral_Ordination_Place',
    'Presbyteral_Ordination_Prelate',
    'Episcopal_Ordination_Date',
    'Episcopal_Ordination_Place',
    'Episcopal_Ordination_Prelate',
    'Incardinated_From_Date',
    'Incardinated_From_Diocese',
    'Excardinated_To_Diocese',
    'Excardinated_To_Date',
    'Faculties',
    'Faculties_Granted_Date',
    'Faculties_Restricted_Date',
    'Faculties_Withdrawn_Date',
]

# These fields need to be KEPT but while building the SF upsert flow these are dropped temporarily until mapping logic is included.
# TODO

fields_not_yet_mapped = [
    'Common_Name', # is a concat, does not need to be mapped
    'Mailing_Address_Province', # non-impactful, can leave as dropped
    'Private_Address_Province', # non-impactful, can leave as dropped
    #'Social_Security_Account_Number__c',  # The data is encrypted
    # 'Serving_Now', # TBD - not sure if this is a field that needs to be migrated
    'Ordination_Diocese', # TBD... isn't this covered by Reg. Entry records? 
    'Registered_Parish' # decided will not migration on 2/21/2025

]

In [894]:
# UDF to combine multiple Mailing Street Address lines into one
def combine_addresses(row, *columns):
    address_parts = []
    for col in columns:
        value = row[col]
        if pd.notnull(value):  # Check for non-null values
            address_parts.append(str(value))  # Convert to string
    return '\n'.join(address_parts)  # '\n' for line break

In [895]:
df_contact_staging = (df_contacts
                      .drop(columns='Salutation')
                      .rename(columns={
                          'Clergy_Status' : 'ADPDX_Clergy_Status__c',
                          'Religious_Status' : 'ADPDX_Religious_Status__c',
                          'Login_ID' : 'ADPDX_Login_ID__c',
                          'Access_Permission': 'ADPDX_Access_Permission__c',
                          'Title': 'Salutation',
                          'Christian_Name': 'FirstName',
                          'Middle_Name(s)': 'MiddleName',
                          'Surname': 'LastName',
                          'Suffix': 'Suffix',
                          'Preferred_Address': 'Preferred_Address__c',
                          'Mailing_Address_City': 'MailingCity',
                          'Mailing_Address_State': 'MailingState',
                          'Mailing_Address_Postal_Code': 'MailingPostalCode',
                          'Mailing_Address_Country': 'MailingCountry',
                          'Private_Address_City': 'OtherCity',
                          'Private_Address_State': 'OtherState',
                          'Private_Address_Postal_Code': 'OtherPostalCode',
                          'Private_Address_Country': 'OtherCountry',
                          'Work_Phone': 'npe01__WorkPhone__c',
                          'Home_Phone': 'HomePhone',
                          'Cell_Phone': 'MobilePhone',
                          'Preferred_Phone': 'npe01__PreferredPhone__c',
                          # IF Preferred phone contains, 'do not publish'
                          'Work_Email' : 'npe01__WorkEmail__c',
                          'Archdiocesan_Email': 'npe01__AlternateEmail__c',
                          'Home_Email': 'npe01__HomeEmail__c',
                          'Preferred_Email': 'npe01__Preferred_Email__c',
                          # IF Preferred email contains 'do not publish''
                          'Directory_Include': 'Directory_Include__c',
                          'Directory_Include_Middle_Name': 'Directory_Include_Middle_Name__c',
                          'Directory_Include_Suffix': 'Directory_Include_Suffix__c',
                          'Suppress_From_Reports': 'Suppress_From_Reports__c',
                          'Send_Group_Mail_and_Email': 'Send_Group_Mail_and_Email__c',
                          'Birth_Date': 'Birthdate',
                          'Place_of_Birth': 'mbfc__Place_of_Birth__c',
                          'Foreign_Born': 'Foreign_Born__c',
                          'Foreign_Citizenship': 'Foreign_Citizenship__c',
                          'Immigration_Status': 'Immigration_Status__c',
                          'Passport/Visa_Expiration_Date': 'Passport_Visa_Expiration_Date__c',
                          'Social_Security_Account_Number': 'Social_Security_Number__c',
                          'Deceased_Date': 'mbfc__Date_of_Death__c',
                          'Out_of_Diocese_Date': 'mbfc__Date_Left_Diocese__c', 
                          'CARA_Ethnicity': 'adpdx_CARA_Ethnicity__c',
                          'Seminarian_Status': 'adpdx_Seminarian_Status__c',
                          'Other_Diaconal_Ministry': 'adpdx_Other_Diaconal_Ministry__c',
                          'Spiritual_Director_Authorized': 'adpdx_Spiritual_Director_Authorized__c',
                          'Place_of_Work': 'adpdx_Place_of_Work__c',
                          'Volunteer_Place': 'adpdx_Volunteer_Place__c',
                          'Type_of_Work': 'adpdx_Type_of_Work__c',
                          'Work_Load': 'adpdx_Work_Load__c',
                          'Work_Title': 'adpdx_Work_Title__c',
                          'Coverage_Availability': 'adpdx_Coverage_Availability__c', 
                          'Advanced_Directive_Date': 'adpdx_Advanced_Directive_Date__c',
                          'End_of_Life_Plan_Date': 'adpdx_End_of_Life_Plan_Date__c',
                          'Will_Date': 'adpdx_Will_Date__c',
                          'Will_Note': 'adpdx_Will_Note__c',
                          'CIC_489_File': 'adpdx_CIC_489_File__c',
                          'Senior_Status_Date': 'adpdx_Senior_Status_Date__c', 
                          'Laicized_Date': 'adpdx_Laicized_Date__c',
                          'Seminarian_Student_Debt': 'adpdx_Seminarian_Student_Debt__c',
                          'Seminarian_Medical_Benefits': 'adpdx_Seminarian_Medical_Benefits__c',
                          'Candidacy_Date': 'adpdx_Candidacy_Date__c',
                          'Accepted_to_Formation_Date': 'adpdx_Accepted_to_Formation_Date__c',
                          'Formation_Withdrawn_Date': 'adpdx_Formation_Withdrawn_Date__c',
                          'Formation_Deferred_Date': 'adpdx_Formation_Deferred_Date__c',
                          'Formation_Terminated_Date': 'adpdx_Formation_Terminated_Date__c',
                          'Terminate_or_Defer_Note': 'adpdx_Terminate_or_Defer_Note__c',
                          'CARA_Highest_Ed_Level': 'adpdx_CARA_Highest_Ed_Level__c',
                          'Letter_of_Good_Standing_Date': 'adpdx_Letter_of_Good_Standing__c',
                          'Religious_In_Archdiocese_Date': 'mbfc__Date_of_Arrival_in_Diocese__c',
                          'Last_Retreat_Date': 'adpdx_Last_Retreat_Date__c',
                          'Last_Educ_Requirement_Date': 'adpdx_Last_Educ_Requirement_Date__c',
                          'Policy_Manual_Acknowledgement_Date': 'adpdx_Policy_Manual_Acknowledgement_Date__c',
                          'Harassment_Prevention_Course_Date': 'adpdx_Harassment_Prevention_Course_Date__c',
                          'Standards_of_Conduct_Date': 'adpdx_Standards_of_Conduct_Date__c',
                          'Last_Background_Check_Date': 'adpdx_Last_Background_Check_Date__c',
                          'Last_Child_Protection_Training_Date': 'adpdx_Last_Child_Protection_Training__c',
                          'Languages': 'Languages__c',
                          'Nickname': 'adpdx_Preferred_Name__c',
                          'Father_Full_Name': 'Father_Full_Name__c',
                          'Mother_Full_Maiden_Name': 'Mother_Full_Maiden_Name__c'

                          })
                      .assign(Bi_Ritual__c=lambda x: x['Type(s)'].str.contains('Biritual'))
                      .assign(Non_Latin_Rite__c=lambda x: x['Type(s)'].str.contains('Non-Latin Rite'))
                      .assign(adpdx_Discerner_Aspirant_for_Diaconate__c=lambda x: x['Type(s)'].str.contains('Diaconate'))
                      .assign(adpdx_Is_Seminarian__c=lambda x: x['Type(s)'].str.contains('Seminar'))
                      
                      .assign(Archdpdx_Migration_Id__c=lambda x: x.index)
                      .assign(MailingStreet=lambda x: x.apply(lambda row: combine_addresses(row, 'Mailing_Address', 'Mailing_Address_2'), axis=1))
                      .drop(columns=['Mailing_Address', 'Mailing_Address_2'])  # Optional: Drop original columns if not needed
                      .assign(OtherStreet=lambda x: x.apply(lambda row: combine_addresses(row, 'Private_Address', 'Private_Address_2'), axis=1))
                      .drop(columns=['Private_Address', 'Private_Address_2'])  # Optional: Drop original columns if not needed
                      .drop(columns=misc_columns_to_drop)
                      .drop(columns=affiliation_columns)
                      .drop(columns=fields_not_yet_mapped)

        )


In [896]:
# For use in the 'Contacts > Spouses' section
df_contact_staging_spouses = df_contact_staging.copy(deep=True)


In [897]:
df_contact_staging_spouses['Spouse']

Record Number
3178    0
2766    0
2337    0
3244    0
3295    0
       ..
1670    0
2755    0
1962    0
2202    0
1866    0
Name: Spouse, Length: 3142, dtype: object

In [898]:
df_contact_staging.drop(columns=['Spouse'], inplace=True)

In [899]:
df_contact_staging.columns

Index(['Type(s)', 'ADPDX_Clergy_Status__c', 'ADPDX_Religious_Status__c', 'ADPDX_Login_ID__c', 'ADPDX_Access_Permission__c', 'Salutation', 'FirstName', 'adpdx_Preferred_Name__c', 'MiddleName', 'LastName', 'Suffix', 'MailingCity', 'MailingState', 'MailingPostalCode', 'MailingCountry', 'OtherCity', 'OtherState', 'OtherPostalCode', 'OtherCountry', 'Preferred_Address__c', 'npe01__WorkPhone__c', 'HomePhone', 'MobilePhone', 'npe01__PreferredPhone__c', 'npe01__WorkEmail__c', 'npe01__AlternateEmail__c', 'npe01__HomeEmail__c', 'npe01__Preferred_Email__c', 'Directory_Include__c', 'Directory_Include_Middle_Name__c', 'Directory_Include_Suffix__c', 'Suppress_From_Reports__c', 'adpdx_Seminarian_Student_Debt__c', 'adpdx_Seminarian_Medical_Benefits__c', 'Send_Group_Mail_and_Email__c', 'Birthdate', 'mbfc__Place_of_Birth__c', 'Foreign_Born__c', 'Father_Full_Name__c', 'Mother_Full_Maiden_Name__c', 'Foreign_Citizenship__c', 'Immigration_Status__c', 'Passport_Visa_Expiration_Date__c',
       'Social_Securit

In [900]:
df_contact_staging.MailingStreet.sample(10)

Record Number
836                      St. Anne Parish\n1131 NE 10th St
1230                                                     
1349                                                     
1119                                                     
1975            Resurrection Parish\n21060 SW Stafford Rd
1984                       St. Anthony Parish\nPO Box 770
886      St. Michael the Archangel Parish\n424 SW Mill St
2496                                      2134 NE 78th St
3311    Regis St. Mary Catholic High School\n550 W Reg...
1308                                                     
Name: MailingStreet, dtype: object

### Gender

In [901]:
# Function to determine gender based on 'Type(s)' field
def determine_gender(row):
    if any(x in row['Type(s)'] for x in ['Bishop', 'Priest', 'Transitional Deacon', 'Permanent Deacon', 'Diaconate Formation', 'Diaconate Inquirer', 'Seminarian', 'Seminary Applicant']):
        return 'Male'
    elif 'Religious' in row['Type(s)'] and 'Br.' in row['Salutation']:
        return 'Male'
    elif 'Wife' in row['Type(s)'] or ('Religious' in row['Type(s)'] and 'Sr.' in row['Salutation']):
        return 'Female'
    elif any(x in row['Type(s)'] for x in ['Staff', 'Archive']):
        if row['Salutation'] == 'Ms.':
            return 'Female'
        elif row['Salutation'] == 'Mr.':
            return 'Male'
    else:
        return ''

In [902]:
# Apply the function to the DataFrame to create the new column
df_contact_staging['mbfc__gender__c'] = df_contact_staging.apply(determine_gender, axis=1)


In [903]:
df_contact_staging['mbfc__gender__c'].value_counts()

Male      1789
Female    1314
             4
Name: mbfc__gender__c, dtype: int64

### SSNs



In [904]:
# Import the CSV file with the first column as an integer
ssn_data2 = pd.read_csv(
    '/Users/matthewmartin/Library/CloudStorage/GoogleDrive-matt@meribahflow.com/Shared drives/Clients/ADPDX (Portland)/Data/decodeSSNs.csv'
)

ssn_data2.set_index("Person recNum", inplace=True)

ssn_data2.drop(columns=['Name', 'ssan Field'], inplace=True)

ssn_data2.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 278 entries, 601 to 787
Data columns (total 1 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   SSAN decrypted  278 non-null    object
dtypes: object(1)
memory usage: 4.3+ KB


In [905]:
ssn_data2.index = ssn_data2.index.astype(df_contact_staging.index.dtype)

print(df_contact_staging.index.dtype)
print(ssn_data2.index.dtype)

object
object


In [906]:
# Wrap the index values of ssn_data2 in double quotes
ssn_data2.index = ssn_data2.index.map(lambda x: f'{x}')

In [907]:
# Merge the data frames based on their indexes
df_contact_staging = df_contact_staging.merge(
    ssn_data2,
    how='left',
    left_index=True,
    right_index=True
)


In [908]:
df_contact_staging

Unnamed: 0_level_0,Type(s),ADPDX_Clergy_Status__c,ADPDX_Religious_Status__c,ADPDX_Login_ID__c,ADPDX_Access_Permission__c,Salutation,FirstName,adpdx_Preferred_Name__c,MiddleName,LastName,Suffix,MailingCity,MailingState,MailingPostalCode,MailingCountry,OtherCity,OtherState,OtherPostalCode,OtherCountry,Preferred_Address__c,npe01__WorkPhone__c,HomePhone,MobilePhone,npe01__PreferredPhone__c,npe01__WorkEmail__c,npe01__AlternateEmail__c,npe01__HomeEmail__c,npe01__Preferred_Email__c,Directory_Include__c,Directory_Include_Middle_Name__c,Directory_Include_Suffix__c,Suppress_From_Reports__c,adpdx_Seminarian_Student_Debt__c,adpdx_Seminarian_Medical_Benefits__c,Send_Group_Mail_and_Email__c,Birthdate,mbfc__Place_of_Birth__c,Foreign_Born__c,Father_Full_Name__c,Mother_Full_Maiden_Name__c,Foreign_Citizenship__c,Immigration_Status__c,Passport_Visa_Expiration_Date__c,Social_Security_Number__c,adpdx_Accepted_to_Formation_Date__c,adpdx_Candidacy_Date__c,adpdx_Formation_Withdrawn_Date__c,adpdx_Formation_Deferred_Date__c,adpdx_Formation_Terminated_Date__c,adpdx_Terminate_or_Defer_Note__c,adpdx_CARA_Highest_Ed_Level__c,Incardinated_Now,adpdx_Letter_of_Good_Standing__c,mbfc__Date_of_Arrival_in_Diocese__c,adpdx_Last_Retreat_Date__c,adpdx_Last_Educ_Requirement_Date__c,adpdx_Policy_Manual_Acknowledgement_Date__c,adpdx_Harassment_Prevention_Course_Date__c,adpdx_Standards_of_Conduct_Date__c,adpdx_Last_Background_Check_Date__c,adpdx_Last_Child_Protection_Training__c,mbfc__Date_Left_Diocese__c,adpdx_Senior_Status_Date__c,adpdx_Laicized_Date__c,mbfc__Date_of_Death__c,Languages__c,adpdx_Coverage_Availability__c,adpdx_Advanced_Directive_Date__c,adpdx_End_of_Life_Plan_Date__c,adpdx_Will_Date__c,adpdx_Will_Note__c,adpdx_CIC_489_File__c,adpdx_CARA_Ethnicity__c,adpdx_Seminarian_Status__c,adpdx_Other_Diaconal_Ministry__c,adpdx_Spiritual_Director_Authorized__c,Link_to_Religious_Community,adpdx_Place_of_Work__c,adpdx_Volunteer_Place__c,adpdx_Type_of_Work__c,adpdx_Work_Load__c,adpdx_Work_Title__c,Bi_Ritual__c,Non_Latin_Rite__c,adpdx_Discerner_Aspirant_for_Diaconate__c,adpdx_Is_Seminarian__c,Archdpdx_Migration_Id__c,MailingStreet,OtherStreet,mbfc__gender__c,SSAN decrypted
Record Number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1
3178,Staff,,,,,Ms.,Cenolina,,,Abarca,,Portland,OR,97209,,,,,,,503-222-4086 x103,,,,cenolina.hernandez@stpatrickpdx.org,,,,,,,,0,,Yes,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,,False,False,False,False,3178,St. Patrick Parish\n1623 NW 19th Ave,,Female,
2766,Priest,Transferred Out,,,,Rev.,Stephen,,Ozovehe,Abaukaka,,Tualatin,OR,97062,,Portland,OR,97202,,Mailing,503-430-7699,,773-733-3772,Work,,,abstoz@yahoo.com,,Yes,,,No,0,,No,1967-06-07,,,,,,,,,,,,,,,,"Diocese of Lokoja, Nigeria",,,,,,2022-05-30,2021-11-03,2021-11-04,2022-11-24,2023-01-16,,,,,,,,,,,,,,,0,,,,,,False,False,False,False,2766,Brighton Hospice Office\n8050 SW Warm Springs ...,5802 SW Milwaukie Ave Apt 4,Male,
2337,Staff,,,,,Mr.,Rogelio,,,Acevedo,,Portland,OR,97229,,,,,,,503-644-5264,,,,facilities@stpius.org,,,,,,,,0,,Yes,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,,False,False,False,False,2337,St. Pius X Parish\n1280 NW Saltzman Rd,,Male,
3244,Staff,,,,,Mr.,Sean,,,Ackroyd,,Corvallis,OR,97330,,,,,,,541-757-1988,,,,,,,,,,,,0,,Yes,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,,False,False,False,False,3244,St. Mary Parish\n501 NW 25th St,,Male,
3295,Staff,,,,,Ms.,Sherril,,,Acton,,Eugene,OR,97401,,,,,,,541-686-2234 x1524,,,,sacton@marisths.org,,,,,,,,0,,Yes,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,,False,False,False,False,3295,Marist Catholic High School\n1900 Kingsley Rd,,Female,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1670,Staff,,,,,Ms.,Jenny,,,Zomerdyk,,Central Point,OR,97502,,,,,,,541-664-1050,,,,churchoffice@shepherdcatholic.com,,,,,,,,0,,Yes,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,,False,False,False,False,1670,Shepherd of the Valley Parish\n600 Beebe Rd,,Female,
2755,Religious,,Active,dzorrilla,,Br.,Daniel,,,Zorrilla,,Saint Benedict,OR,97373,,,,,,,503-845-1181,,,,,,,,,,,,0,,Yes,,,,,,,,,,,,,,,,,,,2021-08-01,,,,,,2019-06-28,2021-10-10,,,,,,,,,,,,,,,,14,,,,,,False,False,False,False,2755,Félix Rougier House of Studies\nPO Box 499,,Male,
1962,Staff,,,,,Ms.,Kim,,,Zuber,,Sublimity,OR,97385,,,,,,,503-769-5664,,,,boniface@wvi.com,,,,,,,,0,,Yes,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,,False,False,False,False,1962,St. Boniface Parish\n375 SE Church St,,Female,
2202,Staff,,,,,Ms.,Agnes,,,Zueger,,Lake Oswego,OR,97034,,,,,,,503-636-7687,,,,agnesz@ollparish.com,,,,,,,,0,,Yes,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,,False,False,False,False,2202,Our Lady of the Lake Parish\n650 A Ave,,Female,


In [909]:

# Replace the Social Security Number in df_contact_staging
df_contact_staging.drop
df_contact_staging['Social_Security_Number__c'] = df_contact_staging['SSAN decrypted']

# # Drop the temporary columns if necessary
df_contact_staging.drop(columns=['SSAN decrypted'], inplace=True)

df_contact_staging

Unnamed: 0_level_0,Type(s),ADPDX_Clergy_Status__c,ADPDX_Religious_Status__c,ADPDX_Login_ID__c,ADPDX_Access_Permission__c,Salutation,FirstName,adpdx_Preferred_Name__c,MiddleName,LastName,Suffix,MailingCity,MailingState,MailingPostalCode,MailingCountry,OtherCity,OtherState,OtherPostalCode,OtherCountry,Preferred_Address__c,npe01__WorkPhone__c,HomePhone,MobilePhone,npe01__PreferredPhone__c,npe01__WorkEmail__c,npe01__AlternateEmail__c,npe01__HomeEmail__c,npe01__Preferred_Email__c,Directory_Include__c,Directory_Include_Middle_Name__c,Directory_Include_Suffix__c,Suppress_From_Reports__c,adpdx_Seminarian_Student_Debt__c,adpdx_Seminarian_Medical_Benefits__c,Send_Group_Mail_and_Email__c,Birthdate,mbfc__Place_of_Birth__c,Foreign_Born__c,Father_Full_Name__c,Mother_Full_Maiden_Name__c,Foreign_Citizenship__c,Immigration_Status__c,Passport_Visa_Expiration_Date__c,Social_Security_Number__c,adpdx_Accepted_to_Formation_Date__c,adpdx_Candidacy_Date__c,adpdx_Formation_Withdrawn_Date__c,adpdx_Formation_Deferred_Date__c,adpdx_Formation_Terminated_Date__c,adpdx_Terminate_or_Defer_Note__c,adpdx_CARA_Highest_Ed_Level__c,Incardinated_Now,adpdx_Letter_of_Good_Standing__c,mbfc__Date_of_Arrival_in_Diocese__c,adpdx_Last_Retreat_Date__c,adpdx_Last_Educ_Requirement_Date__c,adpdx_Policy_Manual_Acknowledgement_Date__c,adpdx_Harassment_Prevention_Course_Date__c,adpdx_Standards_of_Conduct_Date__c,adpdx_Last_Background_Check_Date__c,adpdx_Last_Child_Protection_Training__c,mbfc__Date_Left_Diocese__c,adpdx_Senior_Status_Date__c,adpdx_Laicized_Date__c,mbfc__Date_of_Death__c,Languages__c,adpdx_Coverage_Availability__c,adpdx_Advanced_Directive_Date__c,adpdx_End_of_Life_Plan_Date__c,adpdx_Will_Date__c,adpdx_Will_Note__c,adpdx_CIC_489_File__c,adpdx_CARA_Ethnicity__c,adpdx_Seminarian_Status__c,adpdx_Other_Diaconal_Ministry__c,adpdx_Spiritual_Director_Authorized__c,Link_to_Religious_Community,adpdx_Place_of_Work__c,adpdx_Volunteer_Place__c,adpdx_Type_of_Work__c,adpdx_Work_Load__c,adpdx_Work_Title__c,Bi_Ritual__c,Non_Latin_Rite__c,adpdx_Discerner_Aspirant_for_Diaconate__c,adpdx_Is_Seminarian__c,Archdpdx_Migration_Id__c,MailingStreet,OtherStreet,mbfc__gender__c
Record Number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1
3178,Staff,,,,,Ms.,Cenolina,,,Abarca,,Portland,OR,97209,,,,,,,503-222-4086 x103,,,,cenolina.hernandez@stpatrickpdx.org,,,,,,,,0,,Yes,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,,False,False,False,False,3178,St. Patrick Parish\n1623 NW 19th Ave,,Female
2766,Priest,Transferred Out,,,,Rev.,Stephen,,Ozovehe,Abaukaka,,Tualatin,OR,97062,,Portland,OR,97202,,Mailing,503-430-7699,,773-733-3772,Work,,,abstoz@yahoo.com,,Yes,,,No,0,,No,1967-06-07,,,,,,,,,,,,,,,,"Diocese of Lokoja, Nigeria",,,,,,2022-05-30,2021-11-03,2021-11-04,2022-11-24,2023-01-16,,,,,,,,,,,,,,,0,,,,,,False,False,False,False,2766,Brighton Hospice Office\n8050 SW Warm Springs ...,5802 SW Milwaukie Ave Apt 4,Male
2337,Staff,,,,,Mr.,Rogelio,,,Acevedo,,Portland,OR,97229,,,,,,,503-644-5264,,,,facilities@stpius.org,,,,,,,,0,,Yes,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,,False,False,False,False,2337,St. Pius X Parish\n1280 NW Saltzman Rd,,Male
3244,Staff,,,,,Mr.,Sean,,,Ackroyd,,Corvallis,OR,97330,,,,,,,541-757-1988,,,,,,,,,,,,0,,Yes,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,,False,False,False,False,3244,St. Mary Parish\n501 NW 25th St,,Male
3295,Staff,,,,,Ms.,Sherril,,,Acton,,Eugene,OR,97401,,,,,,,541-686-2234 x1524,,,,sacton@marisths.org,,,,,,,,0,,Yes,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,,False,False,False,False,3295,Marist Catholic High School\n1900 Kingsley Rd,,Female
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1670,Staff,,,,,Ms.,Jenny,,,Zomerdyk,,Central Point,OR,97502,,,,,,,541-664-1050,,,,churchoffice@shepherdcatholic.com,,,,,,,,0,,Yes,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,,False,False,False,False,1670,Shepherd of the Valley Parish\n600 Beebe Rd,,Female
2755,Religious,,Active,dzorrilla,,Br.,Daniel,,,Zorrilla,,Saint Benedict,OR,97373,,,,,,,503-845-1181,,,,,,,,,,,,0,,Yes,,,,,,,,,,,,,,,,,,,2021-08-01,,,,,,2019-06-28,2021-10-10,,,,,,,,,,,,,,,,14,,,,,,False,False,False,False,2755,Félix Rougier House of Studies\nPO Box 499,,Male
1962,Staff,,,,,Ms.,Kim,,,Zuber,,Sublimity,OR,97385,,,,,,,503-769-5664,,,,boniface@wvi.com,,,,,,,,0,,Yes,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,,False,False,False,False,1962,St. Boniface Parish\n375 SE Church St,,Female
2202,Staff,,,,,Ms.,Agnes,,,Zueger,,Lake Oswego,OR,97034,,,,,,,503-636-7687,,,,agnesz@ollparish.com,,,,,,,,0,,Yes,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,,False,False,False,False,2202,Our Lady of the Lake Parish\n650 A Ave,,Female


In [910]:
filtered_df = df_contact_staging[df_contact_staging['Social_Security_Number__c'].notna() & (df_contact_staging['Social_Security_Number__c'] != '')]
filtered_df

Unnamed: 0_level_0,Type(s),ADPDX_Clergy_Status__c,ADPDX_Religious_Status__c,ADPDX_Login_ID__c,ADPDX_Access_Permission__c,Salutation,FirstName,adpdx_Preferred_Name__c,MiddleName,LastName,Suffix,MailingCity,MailingState,MailingPostalCode,MailingCountry,OtherCity,OtherState,OtherPostalCode,OtherCountry,Preferred_Address__c,npe01__WorkPhone__c,HomePhone,MobilePhone,npe01__PreferredPhone__c,npe01__WorkEmail__c,npe01__AlternateEmail__c,npe01__HomeEmail__c,npe01__Preferred_Email__c,Directory_Include__c,Directory_Include_Middle_Name__c,Directory_Include_Suffix__c,Suppress_From_Reports__c,adpdx_Seminarian_Student_Debt__c,adpdx_Seminarian_Medical_Benefits__c,Send_Group_Mail_and_Email__c,Birthdate,mbfc__Place_of_Birth__c,Foreign_Born__c,Father_Full_Name__c,Mother_Full_Maiden_Name__c,Foreign_Citizenship__c,Immigration_Status__c,Passport_Visa_Expiration_Date__c,Social_Security_Number__c,adpdx_Accepted_to_Formation_Date__c,adpdx_Candidacy_Date__c,adpdx_Formation_Withdrawn_Date__c,adpdx_Formation_Deferred_Date__c,adpdx_Formation_Terminated_Date__c,adpdx_Terminate_or_Defer_Note__c,adpdx_CARA_Highest_Ed_Level__c,Incardinated_Now,adpdx_Letter_of_Good_Standing__c,mbfc__Date_of_Arrival_in_Diocese__c,adpdx_Last_Retreat_Date__c,adpdx_Last_Educ_Requirement_Date__c,adpdx_Policy_Manual_Acknowledgement_Date__c,adpdx_Harassment_Prevention_Course_Date__c,adpdx_Standards_of_Conduct_Date__c,adpdx_Last_Background_Check_Date__c,adpdx_Last_Child_Protection_Training__c,mbfc__Date_Left_Diocese__c,adpdx_Senior_Status_Date__c,adpdx_Laicized_Date__c,mbfc__Date_of_Death__c,Languages__c,adpdx_Coverage_Availability__c,adpdx_Advanced_Directive_Date__c,adpdx_End_of_Life_Plan_Date__c,adpdx_Will_Date__c,adpdx_Will_Note__c,adpdx_CIC_489_File__c,adpdx_CARA_Ethnicity__c,adpdx_Seminarian_Status__c,adpdx_Other_Diaconal_Ministry__c,adpdx_Spiritual_Director_Authorized__c,Link_to_Religious_Community,adpdx_Place_of_Work__c,adpdx_Volunteer_Place__c,adpdx_Type_of_Work__c,adpdx_Work_Load__c,adpdx_Work_Title__c,Bi_Ritual__c,Non_Latin_Rite__c,adpdx_Discerner_Aspirant_for_Diaconate__c,adpdx_Is_Seminarian__c,Archdpdx_Migration_Id__c,MailingStreet,OtherStreet,mbfc__gender__c
Record Number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1
601,Priest,Faculties Withdrawn,,,,Rev.,Daniel,,Edmund,Adams,,,,,,Alameda,CA,94501,,,,,,,,,,,No,No,No,Yes,0,,No,1954-10-24,"Portland, OR",No,John T. Adams,Harriet C. Kelly,,,,544-70-6289,,,,,,,,Archdiocese of Portland in Oregon,,,,,,,,,,2016-08-10,,,,,,,,,,,,,,,0,,,,,,False,False,False,False,601,,2150 Clinton Ave Apt C,Male
1592,Archive,,,,,Mr.,Jesus,,Daniel,Aguirre-Alvarez,,St. Benedict,OR,97373,,Eugene,OR,97402,,,,,541-556-4888,,,,jesus.aguirrealvarez.5@gmail.com,,,,,,0,,,1999-06-22,"Eugene, OR",No,Analcleto Aguirre Soberanis,Franciela Alvarez Cardoso,,,2019-01-21,543-55-4316,,,,2020-12-31,,,,,,,,,,,2019-10-19,2019-05-20,2020-10-11,,,,,"English,Spanish",,,,,,,,College II,,,0,,,,,,False,False,False,False,1592,Mount Angel Seminary\nOne Abbey Dr,555 N Danebo Ave Spc 132,Male
557,Priest,Active,,aahamefule,,Rev.,Anthony,,Chijioke,Ahamefule,,Bandon,OR,97411,,,,,,,541-329-0697,,443-500-3061,Work,,aahamefule@archdpdx.org,,,Yes,No,No,No,0,,Yes,1984-08-23,"Amucha, Imo State, Nigeria",Yes,Francis Ahamefule,Agatha Adaocha Ahamefule,Nigeria,,2021-03-11,665-36-1307,,2013-10-24,,,,,,Archdiocese of Portland in Oregon,,,,,,2023-05-05,2022-03-11,2022-08-02,2025-02-07,,,,,,,,,,,,,,,,0,,,,,,False,False,False,False,557,Holy Trinity Parish\n355 Oregon Ave SE,,Male
672,Priest,Active,,jalaeto,,Rev.,Justus,,Odira,Alaeto,,Brookings,OR,97415,,,,,,,541-469-2313,,503-868-8266,,pastor@sosstc.org,jalaeto@archdpdx.org,,Work,Yes,No,No,No,0,,Yes,1976-05-04,"Orlu, Nigeria",Yes,Daniel Uzoma Alaeto,Cecilia Ereonu Anah,Nigeria,Permanent Resident,,543-71-8505,,2009-10-22,,,,,,Archdiocese of Portland in Oregon,,,,,,2023-02-15,2015-11-10,2022-12-26,2024-12-15,,,,,Igbo,,,,,,,,,,,0,,,,,,False,False,False,False,672,Star of the Sea Parish\n820 Old Country Rd,,Male
577,Priest,Senior Status,,ballbright,,Rev.,Brian,,,Allbright,,Wilsonville,OR,97070,,,,,,,,,503-853-6093,,,ballbright@archdpdx.org,,,Yes,No,No,No,0,,Yes,1953-01-11,USA,No,Jake Elias Allbright,Evelyn Martha Dehler,,,,543-66-1209,,,,,,,,Archdiocese of Portland in Oregon,,,,,,2022-04-14,2015-07-30,2024-01-09,2024-09-25,,2023-07-01,,,"Spanish, Mayaqeqchi",,,2021-05-13,,,,,,,,0,,,,,,False,False,False,False,577,11715 SW Valencia Lane\n#306,,Male
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
862,Priest,Senior Status,,tyurchak,,Rev.,Thomas,,,Yurchak,,Beaverton,OR,97078,,,,,,Mailing,,541-514-3007,,,,tyurchak@archdpdx.org,tdjyurchak@outlook.com,Do Not Include,Yes,No,No,No,0,,Yes,1949-07-07,USA,No,,,,,,540-60-9916,,,,,,,,Archdiocese of Portland in Oregon,,,,,,2022-05-19,2016-11-17,2022-07-20,2024-11-22,,2019-06-30,,,German,,,,,,,,,,,0,,,,,,False,False,False,False,862,4655 SW St John Vianney Way\nApt A,,Male
600,Priest,Active,,czach,,Rev.,Charles,,,Zach,,Lake Oswego,OR,97034,,,,,,,503-636-7687,,503-730-5772,,frcharlesz@ollparish.com,czach@archdpdx.org,,,Yes,No,No,No,0,,Yes,1946-09-17,USA,No,,,,,,505-58-5881,,,,,,,,Archdiocese of Portland in Oregon,,,,,,2022-04-19,2015-11-24,2022-11-22,2024-11-22,,,,,Spanish,,,,,,,,,,,0,,,,,,False,False,False,False,600,Our Lady of the Lake Parish\n650 A Ave,,Male
623,Priest,Active,,dzegar,,Rev.,David,,,Zegar,,Portland,OR,97211,,,,,,,503-281-4429 x1,,503-951-3695,,,,dzegar@standrewchurch.com,,Yes,No,No,No,0,,Yes,1953-01-02,USA,No,,,,,,540-60-2518,,,,,,,,Archdiocese of Portland in Oregon,,,,,,2022-05-24,2016-05-25,2023-03-15,2024-12-19,,,,,Spanish,,,,,,,,,,,0,,,,,,False,False,False,False,623,St. Andrew Parish\n806 NE Alberta St,,Male
661,Priest,Active,,gzerr,,Rev.,Gary,,,Zerr,,Keizer,OR,97303,,,,,,,503-393-5323 x301,,971-218-5239,Work,frgary@sainteds.com,,fathergary@me.com,Work,Yes,No,No,No,0,,Yes,1956-09-05,USA,No,,,,,,546-19-2604,,,,,,,,Archdiocese of Portland in Oregon,,,,,,2022-05-04,2015-12-03,2022-11-28,2024-07-02,,,,,Spanish (Mass only),,,2019-09-01,2019-09-23,,,,,,,0,,,,,,False,False,False,False,661,St. Edward Parish\n5303 River Rd N,,Male


### Private Address Handling


In [911]:
# If 'OtherStreet' is not null, then set Secondary Address Type to 'Private'.  This is because the 'OtherAddress' fields all come from the 'Private' address fields in source system. 
df_contact_staging['npe01__Secondary_Address_Type__c'] = df_contact_staging['OtherStreet'].apply(lambda x: 'Private' if pd.notnull(x) else None)


### Handle Boolean Fields


In [912]:
boolean_columns_to_convert = ['Foreign_Born__c', 'Directory_Include__c', 'Directory_Include_Middle_Name__c', 'Directory_Include_Suffix__c',
       'Suppress_From_Reports__c', 'Send_Group_Mail_and_Email__c', ]

df_contact_staging[boolean_columns_to_convert] = df_contact_staging[boolean_columns_to_convert].replace({'Yes': True, 'No': False})


In [913]:
df_contact_staging[boolean_columns_to_convert] = df_contact_staging[boolean_columns_to_convert].fillna(False)

df_contact_staging[boolean_columns_to_convert].sample(5)

Unnamed: 0_level_0,Foreign_Born__c,Directory_Include__c,Directory_Include_Middle_Name__c,Directory_Include_Suffix__c,Suppress_From_Reports__c,Send_Group_Mail_and_Email__c
Record Number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
269,True,True,False,False,False,True
1765,False,False,False,False,False,True
1795,False,False,False,False,False,True
2514,False,False,False,False,False,False
2023,False,True,False,False,False,True


### Set Contact Record Type


In [914]:
# Set Record Type

# Go down row by row and check the 'Type(s)' columns, check for certain words that are keys in a dictionary, and
# the that row's 'Type(s)' field contains a string that is in the a key in a dictionary the update another columns
# called 'ContactRecordType' with the paired value.

contact_type_map = {
    'Bishop': 'Priest',
    'Priest': 'Priest',
    'Transitional Deacon': 'Permanent_Deacon',
    'Permanent Deacon': 'Permanent_Deacon',
    'Seminarian': 'Lay_Person',
    'Diaconate Formation': 'Lay_Person',
    'Seminary Applicant': 'Lay_Person',
    'Diaconate Inquirer': 'Lay_Person',
    'Wife': 'Lay_Person',
    'Religious': 'Religious',
    'Staff': 'Lay_Person',
    'Seminary Applicant': 'Lay_Person',
    'Archive': 'Lay_Person'
}

def update_contact_record_type(row):
    for key, value in contact_type_map.items():
        if key in row['Type(s)']:
            return value
    return None

df_contact_staging['ContactRecordType'] = df_contact_staging.apply(update_contact_record_type, axis=1)

In [915]:
# Set Candidate_Type__c field

candidate_types = ['Seminary Applicant', 'Diaconate Inquirer', 'Diaconate Formation', 'Seminarian']

# Function to update the 'candidate_type__c' column based on 'Type(s)' field
def update_candidate_type(row):
    if row['Type(s)'] in candidate_types:
        return row['Type(s)']
    return ''

# Apply the function to the DataFrame to create the new column
df_contact_staging['candidate_type__c'] = df_contact_staging.apply(update_candidate_type, axis=1)


In [916]:
# Map in the RecordTypeIDs
df_contact_staging['RecordTypeID'] = df_contact_staging['ContactRecordType'].map(record_types_mapping)

### Ecclesial Status & Ministerial Status


In [917]:
df_contact_staging

Unnamed: 0_level_0,Type(s),ADPDX_Clergy_Status__c,ADPDX_Religious_Status__c,ADPDX_Login_ID__c,ADPDX_Access_Permission__c,Salutation,FirstName,adpdx_Preferred_Name__c,MiddleName,LastName,Suffix,MailingCity,MailingState,MailingPostalCode,MailingCountry,OtherCity,OtherState,OtherPostalCode,OtherCountry,Preferred_Address__c,npe01__WorkPhone__c,HomePhone,MobilePhone,npe01__PreferredPhone__c,npe01__WorkEmail__c,npe01__AlternateEmail__c,npe01__HomeEmail__c,npe01__Preferred_Email__c,Directory_Include__c,Directory_Include_Middle_Name__c,Directory_Include_Suffix__c,Suppress_From_Reports__c,adpdx_Seminarian_Student_Debt__c,adpdx_Seminarian_Medical_Benefits__c,Send_Group_Mail_and_Email__c,Birthdate,mbfc__Place_of_Birth__c,Foreign_Born__c,Father_Full_Name__c,Mother_Full_Maiden_Name__c,Foreign_Citizenship__c,Immigration_Status__c,Passport_Visa_Expiration_Date__c,Social_Security_Number__c,adpdx_Accepted_to_Formation_Date__c,adpdx_Candidacy_Date__c,adpdx_Formation_Withdrawn_Date__c,adpdx_Formation_Deferred_Date__c,adpdx_Formation_Terminated_Date__c,adpdx_Terminate_or_Defer_Note__c,adpdx_CARA_Highest_Ed_Level__c,Incardinated_Now,adpdx_Letter_of_Good_Standing__c,mbfc__Date_of_Arrival_in_Diocese__c,adpdx_Last_Retreat_Date__c,adpdx_Last_Educ_Requirement_Date__c,adpdx_Policy_Manual_Acknowledgement_Date__c,adpdx_Harassment_Prevention_Course_Date__c,adpdx_Standards_of_Conduct_Date__c,adpdx_Last_Background_Check_Date__c,adpdx_Last_Child_Protection_Training__c,mbfc__Date_Left_Diocese__c,adpdx_Senior_Status_Date__c,adpdx_Laicized_Date__c,mbfc__Date_of_Death__c,Languages__c,adpdx_Coverage_Availability__c,adpdx_Advanced_Directive_Date__c,adpdx_End_of_Life_Plan_Date__c,adpdx_Will_Date__c,adpdx_Will_Note__c,adpdx_CIC_489_File__c,adpdx_CARA_Ethnicity__c,adpdx_Seminarian_Status__c,adpdx_Other_Diaconal_Ministry__c,adpdx_Spiritual_Director_Authorized__c,Link_to_Religious_Community,adpdx_Place_of_Work__c,adpdx_Volunteer_Place__c,adpdx_Type_of_Work__c,adpdx_Work_Load__c,adpdx_Work_Title__c,Bi_Ritual__c,Non_Latin_Rite__c,adpdx_Discerner_Aspirant_for_Diaconate__c,adpdx_Is_Seminarian__c,Archdpdx_Migration_Id__c,MailingStreet,OtherStreet,mbfc__gender__c,npe01__Secondary_Address_Type__c,ContactRecordType,candidate_type__c,RecordTypeID
Record Number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1
3178,Staff,,,,,Ms.,Cenolina,,,Abarca,,Portland,OR,97209,,,,,,,503-222-4086 x103,,,,cenolina.hernandez@stpatrickpdx.org,,,,False,False,False,False,0,,True,,,False,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,,False,False,False,False,3178,St. Patrick Parish\n1623 NW 19th Ave,,Female,Private,Lay_Person,,012Ps000002fU3DIAU
2766,Priest,Transferred Out,,,,Rev.,Stephen,,Ozovehe,Abaukaka,,Tualatin,OR,97062,,Portland,OR,97202,,Mailing,503-430-7699,,773-733-3772,Work,,,abstoz@yahoo.com,,True,False,False,False,0,,False,1967-06-07,,False,,,,,,,,,,,,,,"Diocese of Lokoja, Nigeria",,,,,,2022-05-30,2021-11-03,2021-11-04,2022-11-24,2023-01-16,,,,,,,,,,,,,,,0,,,,,,False,False,False,False,2766,Brighton Hospice Office\n8050 SW Warm Springs ...,5802 SW Milwaukie Ave Apt 4,Male,Private,Priest,,012Ps000002fU3FIAU
2337,Staff,,,,,Mr.,Rogelio,,,Acevedo,,Portland,OR,97229,,,,,,,503-644-5264,,,,facilities@stpius.org,,,,False,False,False,False,0,,True,,,False,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,,False,False,False,False,2337,St. Pius X Parish\n1280 NW Saltzman Rd,,Male,Private,Lay_Person,,012Ps000002fU3DIAU
3244,Staff,,,,,Mr.,Sean,,,Ackroyd,,Corvallis,OR,97330,,,,,,,541-757-1988,,,,,,,,False,False,False,False,0,,True,,,False,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,,False,False,False,False,3244,St. Mary Parish\n501 NW 25th St,,Male,Private,Lay_Person,,012Ps000002fU3DIAU
3295,Staff,,,,,Ms.,Sherril,,,Acton,,Eugene,OR,97401,,,,,,,541-686-2234 x1524,,,,sacton@marisths.org,,,,False,False,False,False,0,,True,,,False,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,,False,False,False,False,3295,Marist Catholic High School\n1900 Kingsley Rd,,Female,Private,Lay_Person,,012Ps000002fU3DIAU
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1670,Staff,,,,,Ms.,Jenny,,,Zomerdyk,,Central Point,OR,97502,,,,,,,541-664-1050,,,,churchoffice@shepherdcatholic.com,,,,False,False,False,False,0,,True,,,False,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,,False,False,False,False,1670,Shepherd of the Valley Parish\n600 Beebe Rd,,Female,Private,Lay_Person,,012Ps000002fU3DIAU
2755,Religious,,Active,dzorrilla,,Br.,Daniel,,,Zorrilla,,Saint Benedict,OR,97373,,,,,,,503-845-1181,,,,,,,,False,False,False,False,0,,True,,,False,,,,,,,,,,,,,,,,2021-08-01,,,,,,2019-06-28,2021-10-10,,,,,,,,,,,,,,,,14,,,,,,False,False,False,False,2755,Félix Rougier House of Studies\nPO Box 499,,Male,Private,Religious,,012Ps000002fU3GIAU
1962,Staff,,,,,Ms.,Kim,,,Zuber,,Sublimity,OR,97385,,,,,,,503-769-5664,,,,boniface@wvi.com,,,,False,False,False,False,0,,True,,,False,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,,False,False,False,False,1962,St. Boniface Parish\n375 SE Church St,,Female,Private,Lay_Person,,012Ps000002fU3DIAU
2202,Staff,,,,,Ms.,Agnes,,,Zueger,,Lake Oswego,OR,97034,,,,,,,503-636-7687,,,,agnesz@ollparish.com,,,,False,False,False,False,0,,True,,,False,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,,False,False,False,False,2202,Our Lady of the Lake Parish\n650 A Ave,,Female,Private,Lay_Person,,012Ps000002fU3DIAU


In [918]:
def determine_ecclesial_status(df):
    def ecclesial_status(row):
        if pd.notna(row['ADPDX_Clergy_Status__c']) and 'Laicized' in row['ADPDX_Clergy_Status__c']:
            return 'Laicized'
        # elif pd.notna(row['ADPDX_Clergy_Status__c']) and 'Faculties Withdrawn' in row['ADPDX_Clergy_Status__c']:
        #     return 'Faculties Withdrawn'
        elif pd.notna(row['Type(s)']) and 'Bishop' in row['Type(s)']:
            return 'Bishop/Archbishop'
        elif pd.notna(row['Type(s)']) and 'Priest,Religious' in row['Type(s)']:
            return 'Priest - Religious'
        elif pd.notna(row['Type(s)']) and 'Priest' in row['Type(s)'] and (not pd.isna(row['Foreign_Citizenship__c']) or row['Incardinated_Now'] != 'Archdiocese of Portland in Oregon'):
            return 'Priest - Extern'
        elif pd.notna(row['Type(s)']) and 'Priest' in row['Type(s)'] and (pd.isna(row['Foreign_Citizenship__c']) and row['Incardinated_Now'] == 'Archdiocese of Portland in Oregon'):
            return 'Priest - Diocesan'
        elif pd.notna(row['Type(s)']) and 'Transitional Deacon' in row['Type(s)']:
            return 'Transitional Deacon'
        elif pd.notna(row['Type(s)']) and 'Permanent Deacon' in row['Type(s)']:
            return 'Permanent Deacon'
        elif pd.notna(row['Type(s)']) and row['Type(s)'] == 'Religious' and row['Salutation'] == 'Br.':
            return 'Professed Male Religious (Non-Priest)'
        elif pd.notna(row['Type(s)']) and row['Type(s)'] == 'Religious' and row['Salutation'] == 'Sr.':
            return 'Professed Female Religious'
        elif pd.notna(row['Type(s)']) and row['Type(s)'] == 'Religious,Staff' and row['Salutation'] == 'Sr.':
            return 'Professed Female Religious'
        elif pd.notna(row['Type(s)']) and row['Type(s)'] == 'Wife':
            return 'Lay Person'
        elif pd.notna(row['Type(s)']) and row['Type(s)'] == 'Staff':
            return 'Lay Person'
        elif row['Salutation'] == 'Mr.':
            return 'Lay Person'
        elif row['Salutation'] == 'Ms.':
            return 'Lay Person'
        elif row['Salutation'] == 'Mrs.':
            return 'Lay Person'
        elif row['Salutation'] == 'Rev.':
            return 'Unknown'
        else:
            return None

    df['mbfc__Ecclesial_Status__c'] = df.apply(ecclesial_status, axis=1)
    return df


df_contact_staging = determine_ecclesial_status(df_contact_staging)

In [919]:
df_contact_staging.sample(20)

Unnamed: 0_level_0,Type(s),ADPDX_Clergy_Status__c,ADPDX_Religious_Status__c,ADPDX_Login_ID__c,ADPDX_Access_Permission__c,Salutation,FirstName,adpdx_Preferred_Name__c,MiddleName,LastName,Suffix,MailingCity,MailingState,MailingPostalCode,MailingCountry,OtherCity,OtherState,OtherPostalCode,OtherCountry,Preferred_Address__c,npe01__WorkPhone__c,HomePhone,MobilePhone,npe01__PreferredPhone__c,npe01__WorkEmail__c,npe01__AlternateEmail__c,npe01__HomeEmail__c,npe01__Preferred_Email__c,Directory_Include__c,Directory_Include_Middle_Name__c,Directory_Include_Suffix__c,Suppress_From_Reports__c,adpdx_Seminarian_Student_Debt__c,adpdx_Seminarian_Medical_Benefits__c,Send_Group_Mail_and_Email__c,Birthdate,mbfc__Place_of_Birth__c,Foreign_Born__c,Father_Full_Name__c,Mother_Full_Maiden_Name__c,Foreign_Citizenship__c,Immigration_Status__c,Passport_Visa_Expiration_Date__c,Social_Security_Number__c,adpdx_Accepted_to_Formation_Date__c,adpdx_Candidacy_Date__c,adpdx_Formation_Withdrawn_Date__c,adpdx_Formation_Deferred_Date__c,adpdx_Formation_Terminated_Date__c,adpdx_Terminate_or_Defer_Note__c,adpdx_CARA_Highest_Ed_Level__c,Incardinated_Now,adpdx_Letter_of_Good_Standing__c,mbfc__Date_of_Arrival_in_Diocese__c,adpdx_Last_Retreat_Date__c,adpdx_Last_Educ_Requirement_Date__c,adpdx_Policy_Manual_Acknowledgement_Date__c,adpdx_Harassment_Prevention_Course_Date__c,adpdx_Standards_of_Conduct_Date__c,adpdx_Last_Background_Check_Date__c,adpdx_Last_Child_Protection_Training__c,mbfc__Date_Left_Diocese__c,adpdx_Senior_Status_Date__c,adpdx_Laicized_Date__c,mbfc__Date_of_Death__c,Languages__c,adpdx_Coverage_Availability__c,adpdx_Advanced_Directive_Date__c,adpdx_End_of_Life_Plan_Date__c,adpdx_Will_Date__c,adpdx_Will_Note__c,adpdx_CIC_489_File__c,adpdx_CARA_Ethnicity__c,adpdx_Seminarian_Status__c,adpdx_Other_Diaconal_Ministry__c,adpdx_Spiritual_Director_Authorized__c,Link_to_Religious_Community,adpdx_Place_of_Work__c,adpdx_Volunteer_Place__c,adpdx_Type_of_Work__c,adpdx_Work_Load__c,adpdx_Work_Title__c,Bi_Ritual__c,Non_Latin_Rite__c,adpdx_Discerner_Aspirant_for_Diaconate__c,adpdx_Is_Seminarian__c,Archdpdx_Migration_Id__c,MailingStreet,OtherStreet,mbfc__gender__c,npe01__Secondary_Address_Type__c,ContactRecordType,candidate_type__c,RecordTypeID,mbfc__Ecclesial_Status__c
Record Number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1
1787,Staff,,,,,Ms.,Linda,,,Mainard,,Milwaukie,OR,97222.0,,,,,,,503-659-8835,,,,lmainard@gmail.com,,,,False,False,False,False,0,,True,,,False,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,,False,False,False,False,1787,Christ the King Parish\n7414 SE Michael Dr,,Female,Private,Lay_Person,,012Ps000002fU3DIAU,Lay Person
2780,Wife,,,klivingood,,Mrs.,Kristie,,Ann,Livingood,,,,,,Eugene,OR,97401.0,,,,,,,,,,,False,False,False,False,0,,True,,,False,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,,False,False,False,False,2780,,1932 Lake Isle Dr,Female,Private,Lay_Person,,012Ps000002fU3DIAU,Lay Person
2653,Religious,,Active,,,Sr.,Louis Marie,,,Premazzi,,Beaverton,OR,97003.0,,,,,,,503-649-7127,,,,,,,,True,False,False,False,0,,True,,,False,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,35,Our Lady of Peace Retreat Center,,Various,Full Time,Team,False,False,False,False,2653,Sisters of Our Lady of Sorrows\n3600 SW 170th Ave,,Female,Private,Religious,,012Ps000002fU3GIAU,Professed Female Religious
1193,"Priest,Religious",Transferred Out,Transferred Out,,,Rev.,Dan,,,Mai,,,,,,,,,,,,,,,,,,,False,False,False,False,0,,False,,,False,,,,,,,,,,,,,,,,,,,,,,,,1900-01-01,,,,,,,,,,,,,,,60,,,,,,False,False,False,False,1193,,,Male,Private,Priest,,012Ps000002fU3FIAU,Priest - Religious
2046,Archive,,,,,Mr.,John,,Anthony,Mance,,Lebanon,OR,97355.0,,,,,,,,,541-409-5009,,,,jamance62@gmail.com,,False,False,False,False,0,,False,1962-08-08,"Phoenix, AZ",False,Rudolph Francis Mance,Lynda Marie Tillman,,,,,,,,2021-09-15,,(From type Diaconate Inquirer.) (From type Dia...,,,,,,,,,,2020-12-04,2021-01-15,,,,,,,,,,,,,,,,0,,,,,,False,False,False,False,2046,276 S 7th St,,Male,Private,Lay_Person,,012Ps000002fU3DIAU,Lay Person
1041,"Priest,Religious",Transferred Out,Transferred Out,,,Rev.,William,,,Edens,,,,,,,,,,,,,,,,,,,False,False,False,False,0,,False,,,False,,,,,,,,,,,,,,,,,,,,,,,,1900-01-01,,,,,,,,,,,,,,,68,,,,,,False,False,False,False,1041,,,Male,Private,Priest,,012Ps000002fU3FIAU,Priest - Religious
1499,Staff,,,,,Ms.,Priscilla,,,Rodriguez,,Portland,OR,97221.0,,,,,,,503-292-6621,,,,prodriguez@ccpdxor.com,prodriguez@archdpdx.org,,,False,False,False,False,0,,True,,,False,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,,False,False,False,False,1499,Mt. Calvary Cemetery\n333 SW Skyline Blvd,,Female,Private,Lay_Person,,012Ps000002fU3DIAU,Lay Person
3223,Staff,,,,,Mr.,Justin,,,Robertson,,Gresham,OR,97030.0,,,,,,,503-665-9129,,,,,jrobertson@archdpdx.org,,,False,False,False,False,0,,True,,,False,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,,False,False,False,False,3223,St. Henry Parish\n346 NW 1st St,,Male,Private,Lay_Person,,012Ps000002fU3DIAU,Lay Person
783,"Priest,Religious",Transferred Out,Transferred Out,,,Rev.,Nhat,Joseph,Hoang,Dang,,,,,,,,,,,,,,,,,,,False,False,False,False,0,,False,1966-08-05,Vietnam,True,,,,,,,,,,,,,,,,,,,,,,,,2018-03-01,,,,,,,,,,,,,,,21,,,,,,False,False,False,False,783,,,Male,Private,Priest,,012Ps000002fU3FIAU,Priest - Religious
2377,Staff,,,,,Mr.,David,,,Johnson,,North Bend,OR,97459.0,,,,,,,541-756-0633,,,,,,,,False,False,False,False,0,,True,,,False,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,,False,False,False,False,2377,Holy Redeemer Parish\n2250 16th St,,Male,Private,Lay_Person,,012Ps000002fU3DIAU,Lay Person


In [920]:
# This function is no longer used due to ADPDX's custom enhancement in which a Flow automatically updates this status. 

def determine_ministerial_status(df):
    def ministerial_status(row):
        if row['ADPDX_Clergy_Status__c'] == 'Deceased':
            return 'Deceased'
        elif row['ADPDX_Clergy_Status__c'] == 'Active':
            return 'Active in Ministry'
        elif row['ADPDX_Clergy_Status__c'] == 'Inactive':
            return 'Inactive'
        elif row['ADPDX_Clergy_Status__c'] == 'Senior Status':
            return 'Senior Status'
        elif row['ADPDX_Clergy_Status__c'] == 'Faculties Withdrawn':
            return 'Faculties Withdrawn'
        elif row['ADPDX_Clergy_Status__c'] == 'Transferred Out':
            return 'Left Diocese'
        elif row['ADPDX_Clergy_Status__c'] == 'Unassigned':
            return 'Unassigned'
        elif row['ADPDX_Clergy_Status__c'] == 'Laicized':
            return 'Laicized'
        else:
            return 'Unknown'
        
    df['mbfc__Ministerial_Status__c'] = df.apply(ministerial_status, axis=1)
    return df

# df_contact_staging = determine_ministerial_status(df_contact_staging)

### Preferred Phone & Email


In [921]:
df_contact_staging['npe01__PreferredPhone__c'] = df_contact_staging['npe01__PreferredPhone__c'].fillna('Work')
df_contact_staging['npe01__Preferred_Email__c'] = df_contact_staging['npe01__Preferred_Email__c'].fillna('Work')

### Remove Addresses for Lay People


In [922]:
def remove_mailing_address_for_lay_person(row):
    if row['ContactRecordType'] == 'Lay_Person':
        row['MailingStreet'] = ''
        row['MailingCity'] = ''
        row['MailingState'] = ''
        row['MailingPostalCode'] = ''
        row['MailingCountry'] = ''
    return row

df_contact_staging = df_contact_staging.apply(remove_mailing_address_for_lay_person, axis=1)

### Religious Congregation

In this section, for those Contacts who have a value in the `Link to Religious Community` source field we need to populate the `mbfc__Religious_Order__c` target field in Salesforce with the correct Religious Community's parent account - the Religious Congregation.

NOTE: In the source data, there is no differentiation between a child Religious Community and a parent Religious Order, there is only one record for the Religious Comnmunity. In MF360 we represent these Accounts separately so we need to first (a) get the Religious Community record using the `Link to Religious Community` value but transforming it (adding 'RelCommunities\_' in front of the value) so it matches the Archdpdx_Migration_Id\_\_c in Salesforce.

Once acquired, (b) we need to get the value of the `ParentID` field on the Religious Community which is the ID of the Religious Congregation record. That ID is the value we then want to populate in the `mbfc__Religious_Order__c` field.


In [923]:
# get SF Account
get_all_accounts = 'Select Id, Name, RecordTypeId, Type, mbfc__Parish_Code__c, Job_Id__c, Archdpdx_Migration_Id__c, ParentID from Account WHERE Archdpdx_Migration_Id__c != null'

# get list of records, add to dataframe
sf_accounts = sf.query(get_all_accounts)
df_sf_accounts = pd.DataFrame(sf_accounts['records'])
df_sf_accounts = df_sf_accounts.drop(columns = 'attributes')

# create a dict in order to apply later
accounts_id_map = df_sf_accounts.set_index('Archdpdx_Migration_Id__c')['Id'].to_dict()

In [924]:
df_sf_accounts[df_sf_accounts['Archdpdx_Migration_Id__c'].str.contains('RelCommunities', na=False)]

Unnamed: 0,Id,Name,RecordTypeId,Type,mbfc__Parish_Code__c,Job_Id__c,Archdpdx_Migration_Id__c,ParentId
228,0017x000010HvhLAAS,"Colombiere Jesuit Community, Portland (SJ)",012Ps000002fU2gIAE,,,152,RelCommunities_1,0017x0000108353AAA
229,0017x000010HvhRAAS,"Franciscan Sisters of the Eucharist, Bridal Ve...",012Ps000002fU2gIAE,,,152,RelCommunities_10,0017x0000108358AAA
230,0017x000010HvhSAAS,"Apostolic Life Community, Portland (ALCP)",012Ps000002fU2gIAE,,,152,RelCommunities_11,0017x0000108359AAA
231,0017x000010HvhTAAS,"Blessed Stephen Bellesini Community, San Diego...",012Ps000002fU2gIAE,,,152,RelCommunities_12,0017x000010835AAAQ
232,0017x000010HvhUAAS,Canisius Jesuit Community at Jesuit High Schoo...,012Ps000002fU2gIAE,,,152,RelCommunities_13,0017x0000108353AAA
...,...,...,...,...,...,...,...,...
292,0017x000010HvhPAAS,Missionaries of the Holy Spirit Provincial Hou...,012Ps000002fU2gIAE,,,152,RelCommunities_8,0017x0000108356AAA
293,0017x000010HviOAAS,"Community of St. Thomas More, Eugene (OP)",012Ps000002fU2gIAE,,,152,RelCommunities_80,0017x000010835DAAQ
294,0017x000010HviPAAS,"Saint Benedict Lodge, McKenzie Bridge (OP)",012Ps000002fU2gIAE,,,152,RelCommunities_81,0017x000010835DAAQ
295,0017x000010HviQAAS,"St. Sharbel Maronite Church, Portland (MMJMJ)",012Ps000002fU2gIAE,,,152,RelCommunities_82,0017x000010HpdWAAS


In [925]:
# applies a lambda function to each element in the ‘Link_to_Religious_Community’ column, prefixing the value with 'RelCommunities_'
def transform_religious_community_link(df):
    df['Link_to_Religious_Community'] = df['Link_to_Religious_Community'].apply(
        lambda x: None if x == '0' else f'RelCommunities_{x}'
    )
    return df

# This function searches for a record in the sf_accounts DataFrame where the ‘Archdpdx_Migration_Id__c’ column matches the given archdpdx_migration_id
def get_parent_id_from_salesforce(sf_accounts, archdpdx_migration_id):
    print(f"Searching for: {archdpdx_migration_id}")  # Debug print
    matching_record = sf_accounts[sf_accounts['Archdpdx_Migration_Id__c'] == archdpdx_migration_id]
    if not matching_record.empty:
        print(f"Found: {matching_record['ParentId'].values[0]}")  # Debug print
        return matching_record['ParentId'].values[0]
    print("Not found")  # Debug print
    return None

# uses the get_parent_id_from_salesforce function to find the ‘ParentId’ from the sf_accounts DataFrame
def update_religious_order(df, sf_accounts):
    df['mbfc__Religious_Order__c'] = df.apply(
        lambda row: get_parent_id_from_salesforce(sf_accounts, row['Link_to_Religious_Community']) 
        if row['Link_to_Religious_Community'] is not None else None, axis=1
    )
    return df


# run the transform_religious_community_link and update_religious_order functions
df_contact_staging = transform_religious_community_link(df_contact_staging)

df_contact_staging = update_religious_order(df_contact_staging, df_sf_accounts)

Searching for: RelCommunities_60
Found: 0017x0000108353AAA
Searching for: RelCommunities_53
Found: 0017x000010835eAAA
Searching for: RelCommunities_9
Found: 0017x0000108357AAA
Searching for: RelCommunities_4
Found: 0017x0000108355AAA
Searching for: RelCommunities_8
Found: 0017x0000108356AAA
Searching for: RelCommunities_35
Found: 0017x000010835NAAQ
Searching for: RelCommunities_1
Found: 0017x0000108353AAA
Searching for: RelCommunities_23
Found: 0017x000010835GAAQ
Searching for: RelCommunities_56
Found: 0017x000010HpdWAAS
Searching for: RelCommunities_23
Found: 0017x000010835GAAQ
Searching for: RelCommunities_53
Found: 0017x000010835eAAA
Searching for: RelCommunities_60
Found: 0017x0000108353AAA
Searching for: RelCommunities_1
Found: 0017x0000108353AAA
Searching for: RelCommunities_27
Found: 0017x000010835IAAQ
Searching for: RelCommunities_44
Found: 0017x000010835VAAQ
Searching for: RelCommunities_23
Found: 0017x000010835GAAQ
Searching for: RelCommunities_44
Found: 0017x000010835VAAQ
Se

In [926]:
df_contact_staging[df_contact_staging.mbfc__Religious_Order__c.isna() == False]

Unnamed: 0_level_0,Type(s),ADPDX_Clergy_Status__c,ADPDX_Religious_Status__c,ADPDX_Login_ID__c,ADPDX_Access_Permission__c,Salutation,FirstName,adpdx_Preferred_Name__c,MiddleName,LastName,Suffix,MailingCity,MailingState,MailingPostalCode,MailingCountry,OtherCity,OtherState,OtherPostalCode,OtherCountry,Preferred_Address__c,npe01__WorkPhone__c,HomePhone,MobilePhone,npe01__PreferredPhone__c,npe01__WorkEmail__c,npe01__AlternateEmail__c,npe01__HomeEmail__c,npe01__Preferred_Email__c,Directory_Include__c,Directory_Include_Middle_Name__c,Directory_Include_Suffix__c,Suppress_From_Reports__c,adpdx_Seminarian_Student_Debt__c,adpdx_Seminarian_Medical_Benefits__c,Send_Group_Mail_and_Email__c,Birthdate,mbfc__Place_of_Birth__c,Foreign_Born__c,Father_Full_Name__c,Mother_Full_Maiden_Name__c,Foreign_Citizenship__c,Immigration_Status__c,Passport_Visa_Expiration_Date__c,Social_Security_Number__c,adpdx_Accepted_to_Formation_Date__c,adpdx_Candidacy_Date__c,adpdx_Formation_Withdrawn_Date__c,adpdx_Formation_Deferred_Date__c,adpdx_Formation_Terminated_Date__c,adpdx_Terminate_or_Defer_Note__c,adpdx_CARA_Highest_Ed_Level__c,Incardinated_Now,adpdx_Letter_of_Good_Standing__c,mbfc__Date_of_Arrival_in_Diocese__c,adpdx_Last_Retreat_Date__c,adpdx_Last_Educ_Requirement_Date__c,adpdx_Policy_Manual_Acknowledgement_Date__c,adpdx_Harassment_Prevention_Course_Date__c,adpdx_Standards_of_Conduct_Date__c,adpdx_Last_Background_Check_Date__c,adpdx_Last_Child_Protection_Training__c,mbfc__Date_Left_Diocese__c,adpdx_Senior_Status_Date__c,adpdx_Laicized_Date__c,mbfc__Date_of_Death__c,Languages__c,adpdx_Coverage_Availability__c,adpdx_Advanced_Directive_Date__c,adpdx_End_of_Life_Plan_Date__c,adpdx_Will_Date__c,adpdx_Will_Note__c,adpdx_CIC_489_File__c,adpdx_CARA_Ethnicity__c,adpdx_Seminarian_Status__c,adpdx_Other_Diaconal_Ministry__c,adpdx_Spiritual_Director_Authorized__c,Link_to_Religious_Community,adpdx_Place_of_Work__c,adpdx_Volunteer_Place__c,adpdx_Type_of_Work__c,adpdx_Work_Load__c,adpdx_Work_Title__c,Bi_Ritual__c,Non_Latin_Rite__c,adpdx_Discerner_Aspirant_for_Diaconate__c,adpdx_Is_Seminarian__c,Archdpdx_Migration_Id__c,MailingStreet,OtherStreet,mbfc__gender__c,npe01__Secondary_Address_Type__c,ContactRecordType,candidate_type__c,RecordTypeID,mbfc__Ecclesial_Status__c,mbfc__Religious_Order__c
Record Number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1
671,"Priest,Religious",Transferred Out,Transferred Out,,,Rev.,J.,J.K.,K.,Adams,III,,,,,,,,,,,503-975-4744,,Work,jadams@jesuits.org,,,Work,False,False,False,False,0,,False,,,False,,,,,,,,,,,,,,,,,,,,,,,,2010-06-30,,,,,,,,,,,,,,,RelCommunities_60,,,,,,False,False,False,False,671,,,Male,Private,Priest,,012Ps000002fU3FIAU,Priest - Religious,0017x0000108353AAA
2430,Religious,,Active,,,Sr.,Delores,,,Adelman,,Beaverton,OR,97078,,Beaverton,OR,97078,,,503-644-9181,503-718-0411,,Work,,,srdeloresa@ssmo.org,Work,True,False,False,False,0,,True,,,False,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,RelCommunities_53,,,,,,False,False,False,False,2430,Sisters of St. Mary of Oregon\n4440 SW 148th Ave,4595 SW 148th Ave,Female,Private,Religious,,012Ps000002fU3GIAU,Professed Female Religious,0017x000010835eAAA
1584,"Priest,Religious",Active,Active,makuti,,Rev.,Macdonald,,,Akuti,,Rockaway,OR,97136,,,,,,,503-355-2661,,424-410-0097,Work,padreakuti@gmail.com,makuti@archdpdx.org,,Work,True,False,False,False,0,,True,1977-08-18,"Vura Bilinyo, Uganda",True,,,Uganda,R1 (Religious Visa),2022-02-14,,,,,,,,,"Apostles of Jesus, Kenya",2019-04-25,,,,2019-05-24,2022-04-21,2020-08-24,2022-04-28,2024-03-14,,,,,,,,,,,,,,,,RelCommunities_9,"St. Mary’s by the Sea Parish, Rockaway",,Parish Ministry,Full Time,Administrator,False,False,False,False,1584,St. Mary by the Sea Parish\nPO Box 390,,Male,Private,Priest,,012Ps000002fU3FIAU,Priest - Religious,0017x0000108357AAA
912,"Priest,Religious",Transferred Out,Transferred Out,,,Rt. Rev.,James,,,Albers,,,,,,,,,,,,,,Work,,,,Work,False,False,False,False,0,,False,,,False,,,,,,,,,,,,,,,,,,,,,,,,1900-01-01,,,,,,,,,,,,,,,RelCommunities_4,,,,,,False,False,False,False,912,,,Male,Private,Priest,,012Ps000002fU3FIAU,Priest - Religious,0017x0000108355AAA
913,"Priest,Religious",Transferred Out,Transferred Out,,,Rev.,Jose,,,Alberto,,,,,,,,,,,,,,Work,,,,Work,False,False,False,False,0,,False,,,False,,,,,,,,,,,,,,,,,,,,,,,,1900-01-01,,,,,,,,,,,,,,,RelCommunities_8,,,,,,False,False,False,False,913,,,Male,Private,Priest,,012Ps000002fU3FIAU,Priest - Religious,0017x0000108356AAA
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1434,"Priest,Religious",Deceased,Deceased,,,Rev.,Jerome,,,Young,,,,,,,,,,,,,,Work,,,,Work,False,False,False,False,0,,False,,,False,,,,,,,,,,,,,,,,,,,,,,,,,,,2012-12-08,,,,,,,,,,,,RelCommunities_4,,,,,,False,False,False,False,1434,,,Male,Private,Priest,,012Ps000002fU3FIAU,Priest - Religious,0017x0000108355AAA
1435,"Priest,Religious",Transferred Out,Transferred Out,,,Rev.,Robert,,,Young,,,,,,,,,,,,,,Work,,,,Work,False,False,False,False,0,,False,,,False,,,,,,,,,,,,,,,,,,,,,,,,1900-01-01,,,,,,,,,,,,,,,RelCommunities_22,,,,,,False,False,False,False,1435,,,Male,Private,Priest,,012Ps000002fU3FIAU,Priest - Religious,0017x000010835FAAQ
787,"Priest,Religious",Senior Status,Retired,nzodrow,,Rt. Rev.,Nathan,,,Zodrow,,Saint Benedict,OR,97373,,,,,,,503-845-3030,503-236-4747,,Do Not Include,,,nathan.zodrow@mtangel.edu,Work,True,False,False,False,0,,False,1952-03-02,USA,False,,,,,,537-48-7637,,,,,,,,Benedectines (OSB),,1974-09-08,,,,,,,,,2010-06-20,,,Spanish,,,,,,,,,,,RelCommunities_4,Mount Angel Abbey,,Curator of Art Collection / Archivist,Full Time,Curator Archivist,False,False,False,False,787,Mount Angel Abbey\n1 Abbey Dr,,Male,Private,Priest,,012Ps000002fU3FIAU,Priest - Religious,0017x0000108355AAA
1611,"Priest,Religious,Non-Latin Rite",Unassigned,Active,,,Rev.,Peter,,John,Zogbi,,Portland,OR,97214,,,,,,,503-231-3853,,503-726-9945,Work,,,peter.zogbi@gmail.com,Work,True,False,False,False,0,,False,,,False,,,,,,,,,,,,,,Eparchy of Our Lady of Lebanon of Los Angeles,,2024-05-31,,,,,,,,,,,,,,,,,,,,,,,RelCommunities_82,St. Sharbel Parish,,Pastoral,Full Time,Pastor,False,True,False,False,1611,St. Sharbel Maronite Catholic Church\n1804 SE ...,,Male,Private,Priest,,012Ps000002fU3FIAU,Priest - Religious,0017x000010HpdWAAS


### Registered Parish

In this section we populate the 'Home Parish' target field for Contacts who have a 'Registered Parish' in the source system.

DONE: Check to see if the Registered Parish data is worth importing. Currently, 'Registered Parish' is only populated on 51 rows, and 32 of those rows in the 'Types' field are listed as 'Archive'. In other words, **only 19 of the 51 rows have a 'Registered Parish' value that might be meaningful.**


### Diocese of Incardination


In [927]:
df_contact_staging['Incardinated_Now'].sample(10)

Record Number
3016    Society of Jesus, USA West Province
1435                                    NaN
205                                     NaN
2251                                    NaN
2563                                    NaN
2364                                    NaN
1484                                    NaN
2689                                    NaN
3398    Society of Jesus, USA West Province
2497                                    NaN
Name: Incardinated_Now, dtype: object

In [928]:
# Need to look for, then create a new Account that corresponds to a given 'Diocese of Incardination', then populate with record Id. 

def update_incardinated_accounts(sf, df, column_name, record_type_dev_name, church_type, new_column_name):
    """
    Update the DataFrame by getting or creating Salesforce accounts for the values in the specified column.
    
    For each account name:
      - If the name contains "Diocese" or "Archdiocese":
          record_type_dev_name is set to "Church" and church_type is set to "Diocese".
      - Otherwise:
          record_type_dev_name is set to "Religious" (with no church_type)
          and the account's 'mbfc__Religious_Type__c' field is updated to "Congregation".
    
    Parameters:
      sf (Salesforce): Salesforce connection object.
      df (pd.DataFrame): The DataFrame to update.
      column_name (str): The name of the column containing account names.
      record_type_dev_name (str): (Unused parameter in this updated logic)
      church_type (str): (Unused parameter in this updated logic)
      new_column_name (str): The name of the new column to store the Salesforce account IDs.
    
    Returns:
      pd.DataFrame: The updated DataFrame with the new column containing Salesforce account IDs.
    """
    df[new_column_name] = None

    for index, row in df.iterrows():
        account_name = row[column_name]
        if pd.notna(account_name):
            if "Diocese" in account_name or "Archdiocese" in account_name:
                # If the account name contains "Diocese" or "Archdiocese"
                account_id = get_or_create_account(sf, account_name, 'Church', 'Diocese')
            else:
                # For all other accounts, use "Religious" as the record type and update the Religious Type field.
                account_id = get_or_create_account(sf, account_name, 'Religious', None)
                # Update the account to set 'mbfc__Religious_Type__c' to "Congregation"
                sf.Account.update(account_id, {'mbfc__Religious_Type__c': 'Congregation'})
            
            df.at[index, new_column_name] = account_id
    
    return df

# Example usage
# sf = Salesforce(username='your_username', password='your_password', security_token='your_security_token')
df_contact_staging = update_incardinated_accounts(sf, df_contact_staging, 'Incardinated_Now', 'Church', 'Diocese', 'mbfc__Diocese_of_Incardination__c')

# This cell takes >6m to run

In [929]:
df_contact_staging[['mbfc__Diocese_of_Incardination__c', 'Incardinated_Now']].sample(20)

Unnamed: 0_level_0,mbfc__Diocese_of_Incardination__c,Incardinated_Now
Record Number,Unnamed: 1_level_1,Unnamed: 2_level_1
2352,,
828,0017x0000107p0uAAA,Archdiocese of Portland in Oregon
1707,,
2979,0017x00001087iAAAQ,Archdiocese of Los Angeles
1099,,
192,,
2721,,
420,0017x00001088PiAAI,Diocese of Helena
213,0017x00001085wVAAQ,Archdiocese of Anchorage
2826,,


In [930]:
# Drop the 'Incardinated Now' column 
del df_contact_staging['Incardinated_Now']


### Deceased & Date of Death

ADPDX does not have a 'Deceased' boolean other than whether or not the Date of Death column has been populated. The target application functions based on both a 'Deceased' boolean and, optionally, a 'Date of Death.'


In [931]:
# Create a new column 'npsp__Deceased__c' and set its value to True when there is a value in 'mbfc__Date_of_Death__c'
df_contact_staging['npsp__Deceased__c'] = df_contact_staging['mbfc__Date_of_Death__c'].notna()


### Final Dataframe Cleanup


In [932]:
# drop columns that are no longer needed
# del df_contact_staging['Type(s)']  # Commented this out as we want to KEEP the field and migrated to 'ADPDX Contact Type'
del df_contact_staging['ContactRecordType']
del df_contact_staging['Link_to_Religious_Community']

In [933]:
df_contact_staging = df_contact_staging.rename(columns={'Type(s)': 'ADPDX_Contact_Type__c'})

In [934]:
# convert '' to NaN
df_contact_staging.replace("", np.nan, inplace=True)

# convert NaN to None
df_contact_staging = df_contact_staging.where(df_contact_staging.notnull(), None)


In [935]:
df_contact_staging['Languages__c'].sample(20)

Record Number
2804                None
1410                None
2652                None
1568                None
893                 None
1014                None
881                 None
1595     English,Spanish
2509                None
2867                None
2808                None
3181                None
1691                None
1201                None
1057                None
2992                None
676     Spanish, Italian
1512                None
3470                None
2356                None
Name: Languages__c, dtype: object

In [936]:
# df_contact_staging_2 = df_contact_staging.where(df_contact_staging.notnull(), None)

## Load


In [937]:
df_contact_staging['Archdpdx_Job_Id__c'] = curr_job_id

In [938]:
# generate CSV for manual loading
df_contact_staging.to_csv(f'/Users/matthewmartin/Library/CloudStorage/GoogleDrive-matt@meribahflow.com/Shared drives/Clients/ADPDX (Portland)/Data/Clergy DB/staging/df_contacts_staging.csv', encoding='utf-8-sig')
df_contact_staging.to_csv('staging_files/contacts_staging.csv', encoding='utf-8-sig')


In [939]:
# upsert Contact records into SF using Bulk api

from simple_salesforce.exceptions import SalesforceMalformedRequest

bulk_data = []
for row in df_contact_staging.itertuples(index=False):
    d = row._asdict()
    # del d['Index']
    bulk_data.append(d)

try:
    # Attempt to upsert Contact records into SF using Bulk API
    contact_upsert = sf.bulk.Contact.upsert(data=bulk_data, external_id_field='Archdpdx_Migration_Id__c', batch_size=500, use_serial=False)
    contact_upsert_results = pd.DataFrame(contact_upsert)
except SalesforceMalformedRequest as e:
    # If a SalesforceMalformedRequest error occurs, print the error message and response content
    print(f"SalesforceMalformedRequest error: {e}")
    print(f"Response content: {e.content}")

In [940]:
# Print upsert results to local file

keys = contact_upsert[0].keys()
with open('results_files/contact_results.csv', 'w', newline='') as csv_file:
    writer = csv.DictWriter(csv_file, keys)
    writer.writeheader()
    writer.writerows(contact_upsert)


# CONTACT > SPOUSES


In [None]:
# Step 1: Query Salesforce to get the mapping of Archdpdx_Migration_Id__c to Salesforce Contact IDs
query = "SELECT Id, Archdpdx_Migration_Id__c, AccountId FROM Contact WHERE Archdpdx_Migration_Id__c != null"
sf_contacts = sf.query_all(query)
sf_contacts_df = pd.DataFrame(sf_contacts['records']).drop(columns='attributes')

# Step 2: Create a dictionary from this mapping
contact_id_mapping = sf_contacts_df.set_index('Archdpdx_Migration_Id__c')['Id'].to_dict()
account_id_mapping = sf_contacts_df.set_index('Id')['AccountId'].to_dict()

# Step 3: Merge df_contact_staging_spouses with sf_contacts_df to get the 'Id' column
df_contact_staging_spouses = df_contact_staging_spouses.merge(sf_contacts_df, on='Archdpdx_Migration_Id__c', how='left')

df_contact_staging_spouses

In [None]:

# Step 4: Filter the DataFrame to only select records where the gender is male
df_contact_staging_spouses = df_contact_staging_spouses[df_contact_staging_spouses['Type(s)'] != 'Wife']

# Step 5: Use this dictionary to update the mbfc__Wife__c field in the df_contact_staging_spouses DataFrame
def update_spouse_id(row):
    spouse_id = row['Spouse']
    if pd.notna(spouse_id) and spouse_id in contact_id_mapping:
        return contact_id_mapping[spouse_id]
    return None

# Update the mbfc__Wife__c field in the df_contact_staging_spouses DataFrame
df_contact_staging_spouses['mbfc__Wife__c'] = df_contact_staging_spouses.apply(update_spouse_id, axis=1)

# Step 6: Prepare the data for the bulk update
update_data = []
for row in df_contact_staging_spouses.itertuples(index=False):
    if pd.notna(row.mbfc__Wife__c):
        update_data.append({
            'Id': row.Id,
            'mbfc__Wife__c': row.mbfc__Wife__c,
            'AccountId': account_id_mapping.get(row.mbfc__Wife__c)
        })

# Step 7: Perform the bulk update
try:
    update_results = sf.bulk.Contact.update(update_data, batch_size=100)
    update_results_df = pd.DataFrame(update_results)
    print(update_results_df)

    # Print upsert results to local file
    keys = update_results[0].keys()
    with open('results_files/spouse_update_results.csv', 'w', newline='') as csv_file:
        writer = csv.DictWriter(csv_file, keys)
        writer.writeheader()
        writer.writerows(update_results)

    # Convert the error messages to a DataFrame for better readability
    error_messages = []
    for result in update_results:
        if not result['success']:
            for error in result['errors']:
                error_messages.append({
                    'Id': result['id'],
                    'StatusCode': error['statusCode'],
                    'Message': error['message']
                })

    error_df = pd.DataFrame(error_messages)
    print(error_df)

    # Verify the updates
    df_contact_staging_spouses[['Spouse', 'mbfc__Wife__c']].sample(10)

except SalesforceMalformedRequest as e:
    print(f"SalesforceMalformedRequest error: {e}")
    print(f"Response content: {e.content}")

# cell takes > 1m to run

# CONTACTS > PHOTOS

#TODO: Contact Photos


# CONTACT > REGISTER ENTRIES


In [None]:
import pandas as pd

# Load CSV
df = (pd.read_csv('/Users/matthewmartin/Library/CloudStorage/GoogleDrive-matt@meribahflow.com/Shared drives/Clients/ADPDX (Portland)/Data/Clergy DB 2025_03_21/Reports/People.csv')
               .rename(columns=lambda x: x.replace(' ', '_')) # Remove whitespace in column names
               .drop(index=0) # Drops the extra row that replicates the labels
)

df

In [None]:
df = df[df["Type(s)"] != 'Wife']

In [None]:
# Import all Contact fields that actually map to Register Entry records

import pandas as pd

# Define the structure of your column sets with correct attribute names
column_sets = [
    {'date': 'Baptism_Date', 'place': 'Place_of_Baptism', 'notation_type': 'Proof of Baptism'},
    {'date': 'Confirmation_Date', 'place': 'Place_of_Confirmation', 'notation_type': 'Notice of Confirmation'},
    {'date': 'Received_Date', 'place': 'Parish_of_Record', 'notation_type': 'Notice of Profession of Faith'},
    {'date': 'Marriage_Date', 'place': 'Place_of_Marriage', 'notation_type': 'Notice of Matrimony', 'spouse': 'Spouse'},
    {'date': 'Diaconal_Ordination_Date', 'place': 'Diaconal_Ordination_Place', 'prelate': 'Diaconate_Ordination_Prelate', 'notation_type': 'Notice of Holy Orders', 'ordination_type': 'Diaconate'},
    {'date': 'Presbyteral_Ordination_Date', 'place': 'Presbyteral_Ordination_Place', 'prelate': 'Presbyteral_Ordination_Prelate', 'notation_type': 'Notice of Holy Orders', 'ordination_type': 'Presbyteral'},
    {'date': 'Episcopal_Ordination_Date', 'place': 'Episcopal_Ordination_Place', 'prelate': 'Episcopal_Ordination_Prelate', 'notation_type': 'Notice of Holy Orders', 'ordination_type': 'Episcopal'},
    {'date': 'Reader_Date', 'place': 'Reader_Place', 'notation_type': 'Notice of Holy Orders', 'ordination_type': 'Minor Order: Reader'},
    {'date': 'Acolyte_Date', 'place': 'Acolyte_Place', 'notation_type': 'Notice of Holy Orders', 'ordination_type': 'Minor Order: Acolyte'}

]

# New DataFrame for entries
register_entries = pd.DataFrame(columns=['RecordNumber', 'mbfc__Register_Entry_Type__c', 'mbfc__Type__c', 'mbfc__Notation_Type__c', 'mbfc__Ordination_Type__c', 'Date', 'Place', 'Prelate', 'mbfc__Matrimonial_Wife__c'])
new_entries = []  # List to store entries before final concatenation

# Processing rows
for row in df.itertuples():
    for column_set in column_sets:
        date_value = getattr(row, column_set['date'], None)
        if pd.notna(date_value):  # Check if date field is not NaN
            entry = {
                'RecordNumber': getattr(row, 'Record_Number', None),
                'Date': date_value,
                'Place': getattr(row, column_set['place'], None)
            }
            # Add Prelate if applicable
            if 'prelate' in column_set:
                entry['Prelate'] = getattr(row, column_set['prelate'], None)

            # Set 'mbfc__Register_Entry_Type__c', and conditionally add 'mbfc__Type__c' or 'mbfc__Notation_Type__c'
            if 'sacrament_type' in column_set:
                entry['mbfc__Type__c'] = column_set['sacrament_type']
                entry['mbfc__Register_Entry_Type__c'] = 'Sacrament'
            if 'notation_type' in column_set:
                entry['mbfc__Notation_Type__c'] = column_set['notation_type']
                entry['mbfc__Register_Entry_Type__c'] = 'Notation'

            # Handle ordination type specific updates
            if 'ordination_type' in column_set:
                entry['mbfc__Ordination_Type__c'] = column_set['ordination_type']

            # Add Spouse if applicable
            if 'spouse' in column_set:
                entry['mbfc__Matrimonial_Wife__c'] = getattr(row, column_set['spouse'], None)

            new_entries.append(entry)
    
    # Add entries for 'Reader Date'
    # reader_date = getattr(row, 'Reader_Date', None)
    # if pd.notna(reader_date):
    #     entry = {
    #         'RecordNumber': getattr(row, 'Record_Number', None),
    #         'Date': reader_date,
    #         'mbfc__Notation_Type__c': 'Notice of Holy Orders',
    #         'mbfc__Ordination_Type__c': 'Minor Order: Reader',
    #         'mbfc__Register_Entry_Type__c': 'Notation'
    #     }
    #     new_entries.append(entry)
    
    # # Add entries for 'Acolyte Date'
    # acolyte_date = getattr(row, 'Acolyte_Date', None)
    # if pd.notna(acolyte_date):
    #     entry = {
    #         'RecordNumber': getattr(row, 'Record_Number', None),
    #         'Date': acolyte_date,
    #         'mbfc__Notation_Type__c': 'Notice of Holy Orders',
    #         'mbfc__Ordination_Type__c': 'Minor Order: Acolyte',
    #         'mbfc__Register_Entry_Type__c': 'Notation'
    #     }
    #     new_entries.append(entry)

# Concatenate all new entries to the DataFrame at once
if new_entries:
    register_entries = pd.concat([register_entries, pd.DataFrame(new_entries)], ignore_index=True)

print(f"Total records added: {len(register_entries)}")

# Optionally, save the new DataFrame to a CSV
register_entries.to_csv('Register_Entries.csv', index=False)

# Display the DataFrame
register_entries.sample(30)


### Populate Lookup for Prelate


In [None]:
from nameparser import HumanName
from nameparser.config import CONSTANTS

# Add dataset-specific Titles and Suffix constants for parsing
CONSTANTS.titles.add('Very', 'Rev.', 'Very Rev.', 'Sr.', 'Most Rev.')
CONSTANTS.suffix_acronyms.add('FRS', 'J.C.L.', 'J.C.L., D.D.', 'D.D.', 'OMI', 'OSA', 'OCD', 'OP', 'OC', 'FSE', 'OMV', 'SDB', 'SM', 'SFX', 'SP', 'OP', 'O.S.M', 'SNJM', 'OSF', 'HMRF', 'DD', 'CSJP', 'SDD', 'BVM', 'BVM - President', 'SJ', 'SL', 'IX', 'SSJ', 'J.C.L.', 'J.C.L', 'OFM', 'MSpS', 'Fco.' )


def parse_name(name):
    if pd.isna(name):  # Checks if the name is NaN or None
        return {
            'Salutation': '',
            'FirstName': '',
            'MiddleName': '',
            'LastName': '',
            'Suffix': ''
        }
    else:
        name = HumanName(name)
        return {
            'Salutation': name.title,
            'FirstName': name.first,
            'MiddleName': name.middle,
            'LastName': name.last,
            'Suffix': name.suffix
        }

# Apply the parsing function only where 'Prelate' exists and is not NaN
for entry in new_entries:
    if 'Prelate' in entry and pd.notna(entry['Prelate']):
        parsed_name = parse_name(entry['Prelate'])
        entry.update(parsed_name)

# Ensure the DataFrame creation from new_entries includes checks for existence of keys:
register_entries = pd.DataFrame(new_entries)
if 'Prelate' in register_entries.columns:
    register_entries['Salutation'] = register_entries['Prelate'].apply(lambda x: parse_name(x)['Salutation'] if pd.notna(x) else '')
    register_entries['FirstName'] = register_entries['Prelate'].apply(lambda x: parse_name(x)['FirstName'] if pd.notna(x) else '')
    register_entries['MiddleName'] = register_entries['Prelate'].apply(lambda x: parse_name(x)['MiddleName'] if pd.notna(x) else '')
    register_entries['LastName'] = register_entries['Prelate'].apply(lambda x: parse_name(x)['LastName'] if pd.notna(x) else '')
    register_entries['Suffix'] = register_entries['Prelate'].apply(lambda x: parse_name(x)['Suffix'] if pd.notna(x) else '')


# Display the DataFrame
print(f"Total records added: {len(register_entries)}")
register_entries.sample(10)



In [None]:
# Query Salesforce for existing contacts and create a dictionary for mapping

from simple_salesforce import Salesforce

query = """
SELECT Id, Archdpdx_Migration_Id__c
FROM Contact
"""
result = sf.query_all(query)
contact_map = {rec['Archdpdx_Migration_Id__c']: rec['Id'] for rec in result['records']}


In [None]:
# Get RecordTypeId for Contact.Priest
priest_contact_recordtype_id = get_recordtype_id(df_sf_recordTypes, 'Priest', 'Contact', 'mbfc')

priest_contact_recordtype_id

In [None]:
# Get RecordID for Prelates by querying for Contacts by FirstName and LastName and, if not found, Create New Contacts

from simple_salesforce import SFType, SalesforceResourceNotFound

contact = SFType('Contact', sf.session_id, sf.sf_instance)
for index, row in register_entries.iterrows():
    first_name, last_name = row.get('FirstName'), row.get('LastName')

    if pd.isna(first_name) or pd.isna(last_name) or first_name.strip() == '' or last_name.strip() == '':
        # If either first name or last name is missing or empty, skip this row or handle as needed
        print(f"Skipping row {index} due to missing name information.")
        continue

    try:
        # Search for contact by First and Last Name
        query = f"SELECT Id FROM Contact WHERE FirstName = '{first_name}' AND LastName = '{last_name}'"
        result = sf.query(query)
        if result['totalSize'] > 0:
            contact_id = result['records'][0]['Id']
        else:
            # Create a new contact if no match found
            new_contact = {
                'Salutation': "Most Rev.",
                'FirstName': first_name,
                'LastName': last_name,
                'Archdpdx_Job_Id__c': curr_job_id,
                'RecordTypeId': priest_contact_recordtype_id,
                'mbfc__Ecclesial_Status__c': "Bishop/Archbishop",
                'mbfc__Ministerial_Status__c': "N/A",
            }
            create_result = contact.create(new_contact)
            contact_id = create_result['id']

        # Update DataFrame with the Salesforce Contact ID
        register_entries.at[index, 'mbfc__Celebrant__c'] = contact_id

    except SalesforceException as e:
        print(f"Error processing row {index}: {e}")

# cell talks > 2m to run

### Prepare to Upsert


In [None]:
# Map Contact IDs to Register Entries

register_entries_2 = register_entries

register_entries_2['mbfc__Contact__c'] = register_entries['RecordNumber'].map(contact_map)


In [None]:
register_entries_2.sample(20)

In [None]:
# Step 0: Query Salesforce to recreate the contact_id_mapping dictionary
contact_records = sf.query_all("SELECT Id, Archdpdx_Migration_Id__c FROM Contact WHERE Archdpdx_Migration_Id__c != NULL")
contact_id_mapping = {record['Archdpdx_Migration_Id__c']: record['Id'] for record in contact_records['records']}

# Step 1: Define a function to update the mbfc__Matrimonial_Wife__c field
def update_matrimonial_wife_id(row):
    wife_id = row['mbfc__Matrimonial_Wife__c']
    if pd.notna(wife_id) and wife_id in contact_id_mapping:
        return contact_id_mapping[wife_id]
    return None

# Step 2: Apply the function to the register_entries_2 DataFrame
register_entries_2['mbfc__Matrimonial_Wife__c'] = register_entries_2.apply(update_matrimonial_wife_id, axis=1)

# Step 3: Prepare the data for the bulk update
update_data = []
for row in register_entries_2.itertuples(index=False):
    if pd.notna(row.mbfc__Matrimonial_Wife__c):
        update_data.append({
            'Id': row.Id if 'Id' in row._fields else None,
            'mbfc__Matrimonial_Wife__c': row.mbfc__Matrimonial_Wife__c
        })

# The `update_data` list is now ready for bulk update operations

In [None]:
print(contact_id_mapping)


In [None]:

# Replace Matrimonial Wife external id with Salesforce Ids
# register_entries_2['mbfc__Matrimonial_Wife__c'] = register_entries['mbfc__Matrimonial_Wife__c'].replace('0', None).map(contact_map)


In [None]:
register_entries_2[register_entries_2['mbfc__Matrimonial_Wife__c'].isna() == False].sample(20)


In [None]:
# Append Job_Id__c
register_entries_2['Archdpdx_Job_Id__c'] = curr_job_id

In [None]:
# Generate an External ID
def create_external_id(row):
    record_number = str(row['RecordNumber']).replace(' ', '').replace('-', '')
    entry_type = str(row['mbfc__Register_Entry_Type__c']).replace(' ', '').replace('-', '')

    # Check whether to use Type or Notation Type based on what's available
    if 'mbfc__Type__c' in row and not pd.isna(row['mbfc__Type__c']):
        type_field = str(row['mbfc__Type__c']).replace(' ', '').replace('-', '')
    elif 'mbfc__Notation_Type__c' in row and not pd.isna(row['mbfc__Notation_Type__c']):
        type_field = str(row['mbfc__Notation_Type__c']).replace(' ', '').replace('-', '') + str(row['mbfc__Ordination_Type__c']).replace(' ', '').replace('-', '')
    else:
        type_field = 'Unknown'

    return f"{record_number}_{entry_type}_{type_field}"

In [None]:
# Assuming your DataFrame is named `register_entries`
register_entries_2['Archdpdx_Migration_Id__c'] = register_entries.apply(create_external_id, axis=1)

if register_entries['Archdpdx_Migration_Id__c'].duplicated().any():
    print("Warning: There are duplicate external IDs.")
    # Optionally, show the duplicates
    duplicates = register_entries[register_entries['external_id'].duplicated(keep=False)]
    print(duplicates)
else:
    print("All external IDs are unique.")


In [None]:
# Drop unnecessary columns:
register_entries_2.drop(['RecordNumber', 'Prelate', 'Salutation', 'FirstName', 'MiddleName', 'LastName', 'Suffix'], axis=1, inplace=True)

In [None]:
register_entries_staging = register_entries_2

In [None]:
# Remove all NaN values:
register_entries_staging.fillna('', inplace=True)

# Rename columns
register_entries_staging = register_entries_staging.rename(columns={
    'Place': 'mbfc__Location_text__c',
    'Date': 'mbfc__Event_Date__c'
})


In [None]:
# generate CSV for manual loading
register_entries_staging.to_csv('staging_files/reg_entry_staging.csv', encoding='utf-8-sig')


In [None]:
# Upsert Register Entry Records

bulk_data = []
for row in register_entries_staging.itertuples(index=False):
    d = row._asdict()
    # del d['Index']
    bulk_data.append(d)

# Keep the batch <100 as I've been getting an exceptionCode: 'InvalidBatch', 'exceptionMessage': 'Records not processed'
reg_entry_upsert = sf.bulk.mbfc__Register_Entry__c.upsert(data=bulk_data, external_id_field='Archdpdx_Migration_Id__c', batch_size=100, use_serial=False)
reg_entry_upsert_results = pd.DataFrame(reg_entry_upsert)

In [None]:
# Print upsert results to local file

keys = reg_entry_upsert[0].keys()

with open('results_files/register_entry_results.csv', 'w', newline='') as csv_file:
    writer = csv.DictWriter(csv_file, keys)
    writer.writeheader()
    writer.writerows(reg_entry_upsert)

# CONTACT > AFFILIATIONS


In [None]:
# Function to create a unique ID based on Person's Name + completion date or start date + affiliation type
def create_unique_id(row):
    # Get values, handling NaNs
    person_id = str(row.get('mbfc__Person__c', '')).strip()
    
    # Check for completion date, and if it's blank, use the start date
    completion_date = row.get('mbfc__Completion_Date__c', '')
    if pd.isna(completion_date) or completion_date == '':
        completion_date = row.get('mbfc__Start_Date__c', '')
    
    completion_date = str(completion_date).strip()
    affiliation = str(row.get('mbfc__Affiliation__c', '')).strip()
    
    # Concatenate the three fields
    combined = f"{person_id}{completion_date}{affiliation}"
    
    # Remove unwanted characters and convert to lowercase
    clean_id = ''.join(combined.split()).replace('-', '').replace('.', '').lower()
    
    # Limit the string to 50 characters
    return clean_id[:50]

## Education Affiliations

This section takes multiple sets of columns (all related to a person's education) from the Contacts table, and combines them into a single set of columns in a new dataframe for insertion into Salesforce as Affiliation records.


In [None]:
# Parse and stage Education Affiliation records
import pandas as pd
from functools import lru_cache

# Load CSV
df = (pd.read_csv('/Users/matthewmartin/Library/CloudStorage/GoogleDrive-matt@meribahflow.com/Shared drives/Clients/ADPDX (Portland)/Data/Clergy DB 2025_03_21/Reports/People.csv')
               .rename(columns=lambda x: x.replace(' ', '_')) # Remove whitespace in column names
               .drop(index=0) # Drops the extra row that replicates the labels
)


# Define the structure of your column sets with correct attribute names
degree_sets = [
    {'year': 'Bachelor_Degree_Year', 'type': 'Bachelor_Degree_Type', 'institution': 'Bachelor_Degree_Institution'},
    {'year': 'Graduate_1_Degree_Year', 'type': 'Graduate_1_Degree_Type', 'institution': 'Graduate_1_Degree_Institution'},
    {'year': 'Graduate_2_Degree_Year', 'type': 'Graduate_2_Degree_Type', 'institution': 'Graduate_2_Degree_Institution'},
    {'year': 'Graduate_3_Degree_Year', 'type': 'Graduate_3_Degree_Type', 'institution': 'Graduate_3_Degree_Institution'},
    {'year': 'Graduate_4_Degree_Year', 'type': 'Graduate_4_Degree_Type', 'institution': 'Graduate_4_Degree_Institution'}
]

# Query for the Record Type ID for 'Organization'
record_type_result = sf.query("SELECT Id FROM RecordType WHERE SobjectType = 'Account' AND DeveloperName = 'Organization' AND NamespacePrefix = 'mbfc'")
organization_record_type_id = record_type_result['records'][0]['Id'] if record_type_result['records'] else None

# Initialize the DataFrame for the staging table
education_staging = pd.DataFrame()

# Function to check and create institution account
@lru_cache(maxsize=None)
def get_or_create_institution_account(institution_name):
    if pd.isna(institution_name):
        return None  # Return None or handle as appropriate if institution name is NaN

    # Truncate institution_name to 80 characters
    institution_name = institution_name[:80]

    # Query Salesforce to find the institution
    query = f"SELECT Id, Name FROM Account WHERE Name = '{institution_name}' LIMIT 1"
    results = sf.query(query)
    
    # If exists, return the ID
    if results['records']:
        return results['records'][0]['Id']
    else:
        # Ensure no NaN values are sent to Salesforce
        account_data = {
            'Name': institution_name if pd.notna(institution_name) else "Default Name",  # Provide a default if NaN
            'RecordTypeId': organization_record_type_id,
            'mbfc__Organization_Type__c': 'School'
        }
        # Remove keys with None values to avoid JSON serialization issues
        account_data = {k: v for k, v in account_data.items() if v is not None}
        
        new_account = sf.Account.create(account_data)
        return new_account['id']

# Get Contact record ID from Salesforce
@lru_cache(maxsize=None)
def get_contact_id_by_record_number(record_number):
    if pd.isna(record_number):
        return None
    query = f"SELECT Id FROM Contact WHERE Archdpdx_Migration_Id__c = '{record_number}'"
    results = sf.query(query)
    if results['records']:
        return results['records'][0]['Id']
    return None


# Initialize an empty list to collect DataFrames or dictionaries
new_entries = []

recordType_affil_educational = get_recordtype_id(df_sf_recordTypes, 'Education', 'mbfc__Affiliation__c', 'mbfc')
print(recordType_affil_educational)

# Process each row and each degree set
for index, row in df.iterrows():
    for degree_set in degree_sets:
        year = row[degree_set['year']]
        if pd.notna(year):  # Only proceed if the year column is not NaN
            formatted_year = f"{int(year)}-01-01"  # Convert year to YYYY-MM-DD format
            institution_name = row[degree_set['institution']]
            account_id = get_or_create_institution_account(institution_name)
            contact_id = get_contact_id_by_record_number(row['Record_Number'])
            recordtype_id = recordType_affil_educational 
            
            # Create a record for the staging table
            affiliation_record = {
                'mbfc__Person__c': contact_id,
                'mbfc__Completion_Date__c': formatted_year,
                'mbfc__Context__c': account_id,
                'mbfc__Category__c': 'Education/Studies',
                'mbfc__Affiliation__c': row[degree_set['type']],
                'RecordTypeId': recordtype_id
            }
            new_entries.append(affiliation_record)

# Convert all collected records to a DataFrame in one go
education_staging = pd.DataFrame(new_entries)


#FIXME: There are 4 rows where no INSTITUTION is listed. This makes it impossible to import an Affiliation record. Need to figure out how to handle this with Client. 
#FIXME: There are about 15 rows where no DEGREE is listed. This makes it impossible to import an Affiliation record. Need to figure out how to handle this with Client. 

# Cell takes approx. 5m to run

In [None]:
# Apply the function to each row and create a new column with the unique ID
education_staging['Archdpdx_Migration_Id__c'] = education_staging.apply(create_unique_id, axis=1)

# Check the first few rows to verify the new column
education_staging.head()

In [None]:
# Fill any NaN values
education_staging = education_staging.fillna('')

In [None]:
# Save the staging table to CSV
education_staging.to_csv('staging_files/education_staging.csv', index=False)


In [None]:
import pandas as pd
import numpy as np
from simple_salesforce import Salesforce, SalesforceMalformedRequest, SalesforceError
from datetime import datetime, date



# def upsert_to_salesforce(sf, dataframe, object_name, external_id_field):
#     """
#     Upsert records to Salesforce from a pandas DataFrame.

#     Parameters:
#     sf (Salesforce): The Salesforce connection instance.
#     dataframe (pd.DataFrame): The pandas DataFrame containing data to upsert.
#     object_name (str): The Salesforce object name (e.g., 'Contact').
#     external_id_field (str): The external ID field used for upserts.
#     """
#     successful_upserts = 0
#     failed_upserts = 0

#     # Replace placeholder values with None in the DataFrame
#     dataframe.replace({None: pd.NA, ' ': None, '': None}, inplace=True)

#     # Convert DataFrame to a list of dictionaries
#     data_to_upsert = dataframe.to_dict(orient='records')

#     for data in data_to_upsert:
#         try:
#             data = convert_non_serializables(data)
#             external_id = data.pop(external_id_field)

#             # Perform upsert using only the External ID
#             response = getattr(sf, object_name).upsert(f'{external_id_field}/{external_id}', data)
#             successful_upserts += 1
#             print(f"Successfully upserted {object_name} with External ID: {external_id}")
#         except SalesforceMalformedRequest as e:
#             failed_upserts += 1
#             print(f"Malformed request error when upserting {object_name} with External ID: {external_id}. Error: {e.content}")
#         except SalesforceError as e:
#             failed_upserts += 1
#             print(f"Salesforce error when upserting {object_name} with External ID: {external_id}. Error: {e.content}")
#         except Exception as e:
#             failed_upserts += 1
#             print(f"Failed to upsert {object_name} with External ID: {external_id}. Error: {e}")

#     print(f"Upsert completed. Successful upserts: {successful_upserts}, Failed upserts: {failed_upserts}")

# def convert_non_serializables(data):
#     """Convert non-serializable objects to serializable formats."""
#     for key, value in data.items():
#         try:
#             if isinstance(value, (datetime, date)):
#                 data[key] = value.isoformat()
#             elif isinstance(value, float) and np.isnan(value):
#                 data[key] = None
#             elif pd.isna(value):
#                 data[key] = None
#             elif isinstance(value, (int, bool, str)):
#                 data[key] = value
#             else:
#                 data[key] = str(value)  # Convert other types to string
#         except Exception as e:
#             print(f"Error processing key: {key}, value: {value}, error: {e}")
#     return data

# def upsert_to_salesforce_bulk(sf, dataframe, object_name, external_id_field, failed_log_file, batch_size=10000):
#     """
#     Upsert records to Salesforce from a pandas DataFrame using the Bulk API.

#     Parameters:
#     sf (Salesforce): The Salesforce connection instance.
#     dataframe (pd.DataFrame): The pandas DataFrame containing data to upsert.
#     object_name (str): The Salesforce object name (e.g., 'Contact').
#     external_id_field (str): The external ID field used for upserts.
#     failed_log_file (str): The file name where failed upsert records will be logged.
#     batch_size (int): The number of records to include in each batch.
#     """
#     successful_upserts = 0
#     failed_upserts = 0

#     # Replace placeholder values with None in the DataFrame
#     dataframe.replace({None: pd.NA, ' ': None, '': None}, inplace=True)

#     # Convert DataFrame to a list of dictionaries
#     data_to_upsert = dataframe.to_dict(orient='records')

#     with open(failed_log_file, 'a') as log_file:
#         # Process data in batches
#         for i in range(0, len(data_to_upsert), batch_size):
#             batch_data = data_to_upsert[i:i + batch_size]
#             batch_data = [convert_non_serializables(record) for record in batch_data]

#             try:
#                 # Perform bulk upsert
#                 response = sf.bulk.__getattr__(object_name).upsert(batch_data, external_id_field=external_id_field)

#                 for res in response:
#                     if res['success']:
#                         successful_upserts += 1
#                     else:
#                         failed_upserts += 1
#                         log_file.write(f"Failed to upsert record: {res}\n")

#             except SalesforceMalformedRequest as e:
#                 failed_upserts += len(batch_data)
#                 log_file.write(f"Malformed request error when upserting batch. Error: {e.content}\n")
#             except SalesforceError as e:
#                 failed_upserts += len(batch_data)
#                 log_file.write(f"Salesforce error when upserting batch. Error: {e.content}\n")
#             except Exception as e:
#                 failed_upserts += len(batch_data)
#                 log_file.write(f"Failed to upsert batch. Error: {e}\n")

#     print(f"Upsert completed. Successful upserts: {successful_upserts}, Failed upserts: {failed_upserts}")


In [None]:
# Upsert Education Affiliation records

# upsert_to_salesforce(sf, education_staging, 'mbfc__Affiliation__c', 'Archdpdx_Migration_Id__c')
upsert_to_salesforce_bulk(sf, education_staging, 'mbfc__Affiliation__c', 'Archdpdx_Migration_Id__c', 'results_files/affiliations_education_results.csv', batch_size=1000)


## Ecclesial Affiliations

This section handles individual Contact source table FIELDS that map to Affiliation RECORDS in the target system.

As the source data model and target data model are substantially different, this section groups together source columns into what will become individual records in the new system and populates missing information based on or required by the target system.

Example: each affiliation record in the target system requires a Context. In certain cases this data does not exist in the source or it is found in another column:

| Affiliation            | Context                   | Completion Date           |
| ---------------------- | ------------------------- | ------------------------- |
| First Vows             | Religious Order           | Date of First Vows        |
| Final Vows             | Religious Order           | Date of Final Vows        |
| Incardination          | Incardinated from Diocese | Incardinated From Date    |
| Faculties (Type)       | Local Diocese             | Faculties Granted Date    |
| Faculties (Restricted) | Local Diocese             | Faculties Restricted Date |
| Faculties (Withdrawn)  | Local Diocese             | Faculties Withdrawn Date  |
| Excardinated           | Excardinated To Diocese   | Excardinated To Date      |

Other examples of columns that need to be populated:

- RecordTypeId
- Category
- Start Date
- Completion Date

Depending on which column is being migrated, the date value might be considered to be a Start Date or a Completion Date in the target system, and needs to be staged accordingly.


In [None]:
# Generate a staging DF of Ecclesial Affiliations out of a handful of fields in the source data, each of which is to be converted into a new row in the staging DF.

# FIXME: There are a number of rows where a Faculties Granted is missing a date, and conversely, where there is a Faculties Granted Date but no description of the Faculties granted. This is a problem, because the application requires a date for when Faculties were granted.


import pandas as pd
from functools import lru_cache
from simple_salesforce import Salesforce

# Load CSV
df = (pd.read_csv('/Users/matthewmartin/Library/CloudStorage/GoogleDrive-matt@meribahflow.com/Shared drives/Clients/ADPDX (Portland)/Data/Clergy DB 2025_03_21/Reports/People.csv')
               .rename(columns=lambda x: x.replace(' ', '_')) # Remove whitespace in column names
               .drop(index=0) # Drops the extra row that replicates the labels
)

# Define the structure of your column sets with correct attribute names
column_sets = [
    {'year': 'Incardinated_From_Date', 'context': 'Incardinated_From_Diocese'},
    {'year': 'Excardinated_To_Date', 'context': 'Excardinated_To_Diocese'},
    {'year': 'Faculties_Granted_Date', 'affiliation': 'Faculties'},
    {'year': 'Faculties_Restricted_Date'},
    {'year': 'Faculties_Withdrawn_Date'}
    # {'year': 'Reader_Date'},  # Add Reader Date
    # {'year': 'Acolyte_Date'},  # Add Acolyte Date
    # {'year': 'Candidacy_Date'}  # Add Candidate Date
]



In [None]:

# Query for the Record Type IDs of Church, Religious    
record_type_query = "SELECT Id, DeveloperName FROM RecordType WHERE SobjectType = 'Account' AND DeveloperName IN ('Church', 'Religious')"
record_type_result = sf.query(record_type_query)
record_type_ids = {record['DeveloperName']: record['Id'] for record in record_type_result['records']}

church_record_type_id = record_type_ids.get('Church')
religious_record_type_id = record_type_ids.get('Religious')

# Query for the Record Type IDs for 'Ecclesial_Affiliation' and 'Ministerial_Status' for mbfc__Affiliation__c object
record_type_query = "SELECT Id, DeveloperName FROM RecordType WHERE SobjectType = 'mbfc__Affiliation__c' AND DeveloperName IN ('Ecclesial_Affiliation', 'Ministerial_Status')"
record_type_result = sf.query(record_type_query)
record_type_ids = {record['DeveloperName']: record['Id'] for record in record_type_result['records']}

ecclesial_affiliation_record_type_id = record_type_ids.get('Ecclesial_Affiliation')
ministerial_status_record_type_id = record_type_ids.get('Ministerial_Status')

# Check if any of the required Record Types are missing
if not ecclesial_affiliation_record_type_id:
    raise ValueError("No RecordType found for Ecclesial Affiliation on mbfc__Affiliation__c object.")
if not ministerial_status_record_type_id:
    raise ValueError("No RecordType found for Ministerial Status on mbfc__Affiliation__c object.")

In [None]:

# Initialize the DataFrame for the staging table
ecclesial_affiliation_staging = pd.DataFrame()

# Function to check and create institution account
def get_or_create_church_account(context):
    if pd.isna(context):
        return None  # Return None or handle as appropriate if institution name is NaN

    # Query Salesforce to find the institution
    query = f"SELECT Id, Name FROM Account WHERE Name = '{context}' LIMIT 1"
    results = sf.query(query)
    
    # If exists, return the ID
    if results['records']:
        return results['records'][0]['Id']
    else:
        # Ensure no NaN values are sent to Salesforce
        if 'Diocese' in context or 'Archdiocese' in context:
            account_data = {
                'Name': context if pd.notna(context) else "Church Name Missing",  # Provide a default if NaN
                'RecordTypeId': church_record_type_id,
                'mbfc__Church_Type__c': 'Diocese'
            }
        else:
            account_data = {
                'Name': context if pd.notna(context) else "Religious Name Missing",  # Provide a default if NaN
                'RecordTypeId': religious_record_type_id
            }

        # Remove keys with None values to avoid JSON serialization issues
        account_data = {k: v for k, v in account_data.items() if v is not None}
        
        new_account = sf.Account.create(account_data)
        return new_account['id']

# Get Contact record ID from Salesforce
@lru_cache(maxsize=None)
def get_contact_id_by_record_number(record_number):
    if pd.isna(record_number):
        return None
    query = f"SELECT Id FROM Contact WHERE Archdpdx_Migration_Id__c = '{record_number}'"
    results = sf.query(query)
    if results['records']:
        return results['records'][0]['Id']
    return None

# Initialize an empty list to collect DataFrames or dictionaries
new_entries = []

# Process each row and each degree set
for index, row in df.iterrows():
    for col_set in column_sets:
        date = row[col_set['year']]
        if pd.notna(date):  # Only proceed if the year column is not NaN
            context = row.get(col_set.get('context'), None)
            account_id = get_or_create_church_account(context)
            contact_id = get_contact_id_by_record_number(row['Record_Number'])
            
            # Initialize all necessary variables with None
            start_date = None
            completion_date = None
            affiliation = None
            record_type_id = None
            category = None

            # Determine the mbfc__Affiliation__c value
            if 'Incardinated_From_Date' in col_set['year']:
                affiliation = 'Incardinated'
                completion_date = date
                record_type_id = ecclesial_affiliation_record_type_id
                category = 'Ecclesial Affiliations'
            elif 'Excardinated_To_Date' in col_set['year']:
                affiliation = 'Excardinated'
                completion_date = date
                record_type_id = ecclesial_affiliation_record_type_id
                category = 'Ecclesial Affiliations'
            elif 'Faculties_Granted_Date' in col_set['year']:
                faculties_value = row.get(col_set.get('affiliation', ''))
                if pd.isna(faculties_value):
                    affiliation = 'Faculties'
                else:
                    affiliation = f"Faculties ({faculties_value})"
                account_id = diocesan_account_id  # Override account ID for faculties
                start_date = date
                record_type_id = ministerial_status_record_type_id
                category = 'Faculties'
            elif 'Faculties_Restricted_Date' in col_set['year']:
                affiliation = 'Faculties (Restricted)'
                account_id = diocesan_account_id  # Override account ID for faculties
                completion_date = date
                record_type_id = ministerial_status_record_type_id
                category = 'Faculties'
            elif 'Faculties_Withdrawn_Date' in col_set['year']:
                affiliation = 'Faculties (Withdrawn)'
                account_id = diocesan_account_id  # Override account ID for faculties
                completion_date = date
                record_type_id = ministerial_status_record_type_id
                category = 'Faculties'
            elif 'Date_of_First_Vows' in col_set['year']:
                affiliation = 'First Vows'
                completion_date = date
                record_type_id = ecclesial_affiliation_record_type_id
                category = 'Ecclesial Affiliations'
            elif 'Date_of_Final_Vows' in col_set['year']:
                affiliation = 'Final Vows'
                completion_date = date
                record_type_id = ecclesial_affiliation_record_type_id
                category = 'Ecclesial Affiliations'
            # elif 'Reader_Date' in col_set['year']:
            #     affiliation = 'Reader Installation'
            #     completion_date = date
            #     record_type_id = ecclesial_affiliation_record_type_id
            #     category = 'Installations'
            #     account_id = diocesan_account_id
            # elif 'Acolyte_Date' in col_set['year']:
            #     affiliation = 'Acolyte Installation'
            #     completion_date = date
            #     record_type_id = ecclesial_affiliation_record_type_id
            #     category = 'Installations'
            #     account_id = diocesan_account_id
            # elif 'Candidacy_Date' in col_set['year']:
            #     affiliation = 'Candidate Installation'
            #     completion_date = date
            #     record_type_id = ecclesial_affiliation_record_type_id
            #     category = 'Installations'
            #     account_id = diocesan_account_id

            else:
                affiliation = row.get(col_set.get('affiliation', ''), None)
            
            # Create a record for the staging table
            affiliation_record = {
                'RecordTypeId': record_type_id,
                'mbfc__Person__c': contact_id,
                'mbfc__Completion_Date__c': completion_date,
                'mbfc__Start_Date__c': start_date,
                'mbfc__Context__c': account_id,
                'mbfc__Category__c': category,
                'mbfc__Affiliation__c': affiliation
            }
            new_entries.append(affiliation_record)

# Convert all collected records to a DataFrame in one go
ecclesial_affiliations_staging = pd.DataFrame(new_entries)

# Takes approx. 1.5 minutes to run

In [None]:
ecclesial_affiliations_staging.sample(20)

In [None]:
# Apply the function to each row and create a new column with the unique ID
ecclesial_affiliations_staging['Archdpdx_Migration_Id__c'] = ecclesial_affiliations_staging.apply(create_unique_id, axis=1)

# Check for duplicates
ecclesial_affiliations_staging['Archdpdx_Migration_Id__c'].duplicated().value_counts()

In [None]:
# Send the new DataFrame to a CSV
ecclesial_affiliations_staging.to_csv('staging_files/Ecclesial_Affiliations_Staging.csv', index=False, encoding='utf-8-sig')

In [None]:
ecclesial_affiliations_staging

In [None]:
# upsert Ecclesial Affiliation records
upsert_to_salesforce_bulk(sf, ecclesial_affiliations_staging, 'mbfc__Affiliation__c', 'Archdpdx_Migration_Id__c', 'results_files/affiliations_ecclesial_results.csv', 100)

#FIXME: There are a number of rows where a Faculties Granted is missing a date, and conversely, where there is a Faculties Granted Date but no description of the Faculties granted. This is a problem, because the application requires a date for when Faculties were granted.

# Takes approx 1.5 minutes to run

## Religious Community Affiliations

In [None]:
acc_religious_staging_2

In [None]:
df_contact_staging

In [None]:
import pandas as pd

# Function to get the Salesforce record for the corresponding religious community records
def get_religious_community_id(religious_community_ID):
    if pd.isna(religious_community_ID) or religious_community_ID == '0':
        return None  # Return None or handle as appropriate if religious community name is NaN or '0'

    # Query Salesforce to find the religious community
    query = f"SELECT Id, Name FROM Account WHERE Archdpdx_Migration_Id__c = 'RelCommunities_{religious_community_ID}' LIMIT 1"
    results = sf.query(query)
    
    # If exists, return the ID
    if results['records']:
        return results['records'][0]['Id']
    else:
        raise ValueError(f"Religious community with ID '{religious_community_ID}' not found in Salesforce.")

# Function to get the Salesforce contact ID by record number
def get_contact_id_by_record_number(record_number):
    if pd.isna(record_number):
        return None
    query = f"SELECT Id FROM Contact WHERE Archdpdx_Migration_Id__c = '{record_number}'"
    results = sf.query(query)
    if results['records']:
        return results['records'][0]['Id']
    return None

# Prepare the data frame with the necessary columns
religious_affiliations_staging = pd.DataFrame(columns=['mbfc__Person__c', 'mbfc__Context__c', 'mbfc__Category__c', 'mbfc__Affiliation__c', 'Archdpdx_Migration_Id__c'])

# Populate the data frame with the contact IDs and religious community IDs
new_entries = []

for index, row in df_contact_staging_spouses.iterrows():
    religious_community_ID = row.get('Link_to_Religious_Community', None)
    if pd.notna(religious_community_ID) and religious_community_ID != '0':
        try:
            contact_id = get_contact_id_by_record_number(row['Archdpdx_Migration_Id__c'])
            religious_community_id = get_religious_community_id(religious_community_ID)
            
            affiliation_record = {
                'mbfc__Person__c': contact_id,
                'mbfc__Context__c': religious_community_id,
                'mbfc__Category__c': 'Religious Community',
                'mbfc__Affiliation__c': 'Member',
                'Archdpdx_Migration_Id__c': f"{contact_id}_{religious_community_id}_RelComm"
            }
            new_entries.append(affiliation_record)
        except ValueError as e:
            print(f"Error processing row {index}: {e}")
            continue  # Skip to the next iteration

# Convert the list of new entries to a DataFrame
religious_affiliations_staging = pd.DataFrame(new_entries)

# Cell takes >5m to run

In [None]:
# Drop the index and ensure only the required columns are included

# religious_affiliations_staging.reset_index(drop=True, inplace=True)

religious_affiliations_staging = religious_affiliations_staging[['mbfc__Person__c', 'mbfc__Context__c', 'mbfc__Category__c', 'mbfc__Affiliation__c', 'Archdpdx_Migration_Id__c']]

# Upsert the data frame to Salesforce
upsert_to_salesforce_bulk(sf, religious_affiliations_staging, 'mbfc__Affiliation__c', 'Archdpdx_Migration_Id__c', 'results_files/affiliations_religious_comm_results.csv', 100)

# AFFILIATIONS


In [None]:
# Import Assignments.csv

import pandas as pd


df_affiliations = (
    pd.read_csv('/Users/matthewmartin/Library/CloudStorage/GoogleDrive-matt@meribahflow.com/Shared drives/Clients/ADPDX (Portland)/Data/Clergy DB 2025_03_21/Reports/Assignments.csv')
    .set_index('Record Number', verify_integrity=True)
    .drop(index='recNum', errors='ignore')  # Added errors='ignore' to prevent errors if 'recNum' does not exist
    .drop(columns=['Historic Name'], errors='ignore')  # Added errors='ignore' for the same reason
    .rename(columns=lambda x: x.replace(' ', '_'))  # Remove whitespace in column names
    .assign(Account_Ext_Id=lambda df: df['Organization_Table_Name'] + '_' + df['Organization_Table_Link'])
    # .assign(mbfc__Person__r=lambda df: df['Assigned_Person'].apply(lambda x: {'Archdpdx_Migration_Id__c': x}))
    # .assign(mbfc__Context__r=lambda df: df['Account_Ext_Id'].apply(lambda x: {'Archdpdx_Migration_Id__c': x}))
    # .assign(mbfc__Use_Custom_Title__c= True)
    .assign(mbfc__Category__c= 'Any All')
    # .assign(Archdpdx_Migration_Id__c= df_affiliations.index)
    .drop(columns=[
        # 'Assigned_Person'
        'Organization_Table_Name'
        ,'Organization_Table_Link'
        ,'Projected_Term_End_Date'
        ,'Term_Number'
        ,'Leave_Type' # Leave out 'Leave_Type' until mapped properly
        ])
    .rename(columns={
        'Duty_Load': 'mbfc__Duty_Load__c',
        'Start_Date': 'mbfc__Start_Date__c',
        'End_Date': 'mbfc__Completion_Date__c',
        'Assignment_Title': 'mbfc__Affiliation__c',
        'Archdiocesan_Assignment': 'adpdx_Archdiocesan_Assignment__c',
    })
    .replace({'ADPDX_Archdiocesan_Assignment__c': {'Yes': True, 'No': False, None: False}})
    .fillna('')
)

# Display a sample of the DataFrame to check the new structure
df_affiliations.sample(10)



In [None]:
df_affiliations['adpdx_Archdiocesan_Assignment__c'] = df_affiliations['adpdx_Archdiocesan_Assignment__c'].replace({'Yes': True, 'No': False, '': False}).fillna(False)

In [None]:
# Get SF Record Ids from External Ids

# Get Context Account Ids
add_salesforce_record_ids(sf, df_affiliations, 'Account_Ext_Id', 'Account', 'Archdpdx_Migration_Id__c', 'mbfc__Context__c')

In [None]:
# Get Person Contact Ids
add_salesforce_record_ids(sf, df_affiliations, 'Assigned_Person', 'Contact', 'Archdpdx_Migration_Id__c', 'mbfc__Person__c')

In [None]:
# Set Archdpdx_Migration_Id__c External ID
df_affiliations['Archdpdx_Migration_Id__c'] = df_affiliations.index

# Create Job ID
df_affiliations['Archdpdx_Job_Id__c'] = curr_job_id

df_affiliations


In [None]:
# Final cleanup
df_affiliations.drop(columns=[
    'Account_Ext_Id',
    'Assigned_Person', 
    ], 
    inplace=True)

df_affiliations

#FIXME: INVALID_FIELD: Foreign key external ID: relcommunities_23 not found for field Archdpdx_Migration_Id__c
#FIXME: INVALID_FIELD: Foreign key external ID: offices_0 not found for field Archdpdx_Migration_Id__c
#FIXME: Record #115 > FIELD_INTEGRITY_EXCEPTION: Start Date: invalid date: Tue Aug 01 00:00:00 GMT 1021 [mbfc__Start_Date__c

In [None]:
# TEST ME
# Set Archdpdx_Migration_Id__c External ID
df_affiliations['Archdpdx_Migration_Id__c'] = df_affiliations.index.astype(str).str.replace(' ', '').str.replace('-', '')


In [None]:
df_affiliations.to_csv('staging_files/affiliations_staging.csv', encoding='utf-8', index=False)

In [None]:
upsert_to_salesforce_bulk(sf, df_affiliations, 'mbfc__Affiliation__c', 'Archdpdx_Migration_Id__c', 'results_files/affiliations_general_results.csv', 1000)

# cell takes >4m to run

# Post-Migration Manual Updates

1. Convert 'Offices' that are ADPDX Pastoral Centre offices into record type: 'Groups', and set their parentID to the Diocese (there are just 6 of these accounts).
1. Update the Religous Order records 'Religious Superior' lookup.
1. Set 'organization type' field value for each account in the 'organization' load: Offices, Newman Centres, Schools, Organizations
1. Consolidate education degree titles in 'Affiliation.Affiliation' picklist into the standard value
