# Introduction

The following notebook orchestrates the migration of ADPDX Accounts into Salesforce.


# Order of Loading

1. Vicariates
1. Organizations
1. Religious Parents
1. Religious Communities
1. Religious Superiors
1. Contacts
1. Contact > Register Entries
1. Contact > Education Affiliations
1. Contact > Ecclesial Affiliations
1. Affiliations



# Setup Enviro


In [15]:
# !conda install -y simple-salesforce
# !conda install -y email_validator
# !conda install -y python-dotenv
# !conda install import-ipynb

In [16]:
# enviro setup

import pandas as pd
import numpy as np

from datetime import datetime
now = datetime.now()

from simple_salesforce import Salesforce

In [17]:
# import environment variables (SF login credentials)
from dotenv import load_dotenv
import os

load_dotenv()

True

In [18]:
# Global Variables { run: "auto", vertical-output: true, display-mode: "both" }

target_enviro = "adpdx_devpro" # @param {type:"string"}

# The `run_upserts` variable controls whether or not upserts to Salesforce are executed when the notebook is run.
run_upserts = "True" # @param ["True", "False"]

In [19]:
# ADPDX dev_pro credentials
adpdx_user = os.getenv('ADPDX_UAT_USER')
print(adpdx_user)
adpdx_pass = os.getenv('ADPDX_UAT_PASS')
print( adpdx_pass)
adpdx_token = os.getenv('ADPDX_UAT_TOKEN')
print(adpdx_token)

# instantiate a SF session object
sf = Salesforce(domain='test', username=adpdx_user, password=adpdx_pass, security_token=adpdx_token)

matt+adpdx@meribahflow.com.uat
CSN?QM3e
97zjin7eBEEHmN5uBLIYWX0J


## UDFs


In [20]:
# General notebook UDFs

import json
import csv
from datetime import datetime
from simple_salesforce import Salesforce

# Job ID Incrementer
def update_job_id(file_name):
    # Open the file in read mode and get the current job ID
    with open(file_name, 'r') as file:
        current_job_id = int(file.readline())

    # Increment the job ID
    new_job_id = current_job_id + 1

    # Open the file in write mode and update the job ID
    with open(file_name, 'w') as file:
        file.write(str(new_job_id))

    # Return the new job ID
    return new_job_id


def concat_columns(df, columns, new_column, separator='_'):
    """
    Concatenates the values from specified columns into a single string
    with the specified separator and populates a new column in the DataFrame.

    Args:
    - df: pandas DataFrame
    - columns: list of column names to concatenate
    - new_column: name of the new column to be created
    - separator: separator to use between concatenated values (default is '_')

    Returns:
    - Updated pandas DataFrame with the new column
    """
    df[new_column] = df[columns].astype(str).apply(lambda x: separator.join(x), axis=1)
    return df


def convert_non_serializables(data):
    """Convert non-serializable objects to serializable formats."""
    for key, value in data.items():
        try:
            if isinstance(value, (datetime, date)):
                data[key] = value.isoformat()
            elif isinstance(value, float) and np.isnan(value):
                data[key] = None
            elif pd.isna(value):
                data[key] = None
            elif isinstance(value, (int, bool, str, bool)):
                data[key] = value
            else:
                data[key] = str(value)  # Convert other types to string
        except Exception as e:
            print(f"Error processing key: {key}, value: {value}, error: {e}")
    return data

In [21]:
# Query, merge data with SF data  

import pandas as pd
from simple_salesforce import Salesforce
from simple_salesforce.exceptions import SalesforceMalformedRequest, SalesforceError

def find_salesforce_record_id(sf, df, column_to_search, sf_object_name, sf_field_name, new_column_name, match_behavior='first'):
    """
    Find Salesforce record IDs for a DataFrame column and add a new column with the Salesforce record IDs.

    Parameters:
    sf (Salesforce): The Salesforce connection instance.
    df (pd.DataFrame): The pandas DataFrame containing data.
    column_to_search (str): The column name in the DataFrame to search against Salesforce.
    sf_object_name (str): The Salesforce object name (e.g., 'Contact').
    sf_field_name (str): The field name in Salesforce to match.
    new_column_name (str): The name for the new DataFrame column to hold Salesforce record IDs.
    match_behavior (str): Behavior when multiple matches found ('first' or 'alert').

    Returns:
    pd.DataFrame: The original DataFrame with the new column containing Salesforce record IDs.

    Example usage:
    df_contact_staging = find_salesforce_record_id(sf, df_contact_staging, 'Link_to_Religious_Community', 'Contact', 'Archdpdx_Migration_Id__c', 'New_Column_Name', match_behavior='alert')

    """
    if column_to_search not in df.columns:
        raise ValueError(f"Column '{column_to_search}' not found in DataFrame.")

    df[new_column_name] = None
    multiple_matches_found = False

    unique_values = df[column_to_search].dropna().unique()
    chunk_size = 1000  # Adjust chunk size as needed

    for start in range(0, len(unique_values), chunk_size):
        chunk_values = unique_values[start:start + chunk_size]
        chunk_values_str = ", ".join([f"'{val}'" for val in chunk_values])

        soql_query = f"SELECT Id, {sf_field_name} FROM {sf_object_name} WHERE {sf_field_name} IN ({chunk_values_str})"
        
        try:
            query_result = sf.query_all(soql_query)
        except SalesforceMalformedRequest as e:
            raise ValueError(f"Malformed request error: {e.content}")
        except SalesforceError as e:
            raise ValueError(f"Salesforce error: {e.content}")

        id_mapping = {}
        for record in query_result['records']:
            key = record[sf_field_name]
            if key in id_mapping:
                multiple_matches_found = True
                if match_behavior == 'first':
                    continue  # Skip subsequent matches if 'first' behavior is selected
            id_mapping[key] = record['Id']

        df[new_column_name] = df[column_to_search].map(id_mapping)

    if multiple_matches_found and match_behavior == 'alert':
        print("Alert: Multiple matches found for some records.")

    return df


def get_recordtype_id(df_recordTypes, developer_name, sobject_type, namespace):
    """
    Retrieves the Record Type ID for a specific Developer Name, SObject Type, and Namespace.

    Parameters:
    df_recordTypes (pd.DataFrame): The DataFrame containing Salesforce Record Types.
    developer_name (str): The DeveloperName to filter by.
    sobject_type (str): The SObjectType to filter by.
    namespace (str): The Namespace to filter by.

    Returns:
    str: The Record Type ID if a match is found, otherwise raises an error.

    Example: 
    religious_recordtype_id = get_recordtype_id(df_sf_recordTypes, 'Religious', 'Account', 'mbfc')
    """
    try:
        recordtype_id = df_recordTypes.loc[
            (df_recordTypes['DeveloperName'] == developer_name) & 
            (df_recordTypes['SobjectType'] == sobject_type) &
            (df_recordTypes['NamespacePrefix'] == namespace),
            'Id'
        ].iloc[0]  # Retrieve the first match
        
        return recordtype_id
    except IndexError:
        raise ValueError(f"No record type found for DeveloperName '{developer_name}', SObjectType '{sobject_type}', and Namespace '{namespace}'")


# Add a Salesforce record ID column to a DataFrame based on matching external ID field values
def add_salesforce_record_ids(sf, dataframe, df_column_name, sf_object_name, sf_external_id_field, new_column_name, chunk_size=1000):
    """
    Add a Salesforce record ID column to a DataFrame based on matching external ID field values.

    Parameters:
    sf (Salesforce): The Salesforce connection instance.
    dataframe (pd.DataFrame): The pandas DataFrame containing data to match.
    df_column_name (str): The column name in the DataFrame to match with Salesforce.
    sf_object_name (str): The Salesforce object name (e.g., 'Contact').
    sf_external_id_field (str): The external ID field in Salesforce to match.
    new_column_name (str): The name for the new DataFrame column to hold Salesforce record IDs.
    chunk_size (int): The number of records to include in each chunk for querying Salesforce.

    Returns:
    pd.DataFrame: The original DataFrame with the new column containing Salesforce record IDs.
    """
    # Ensure the dataframe column name exists in the dataframe
    if df_column_name not in dataframe.columns:
        raise ValueError(f"Column '{df_column_name}' not found in DataFrame.")
    
    # Create a set of unique values from the specified DataFrame column
    unique_values = dataframe[df_column_name].dropna().unique()
    
    id_mapping = {}
    
    # Process the unique values in chunks
    for start in range(0, len(unique_values), chunk_size):
        chunk_values = unique_values[start:start + chunk_size]
        chunk_values_str = ", ".join([f"'{val}'" for val in chunk_values])
        
        soql_query = f"SELECT Id, {sf_external_id_field} FROM {sf_object_name} WHERE {sf_external_id_field} IN ({chunk_values_str})"
        
        try:
            query_result = sf.query_all(soql_query)
        except SalesforceMalformedRequest as e:
            raise ValueError(f"Malformed request error: {e.content}")
        except SalesforceError as e:
            raise ValueError(f"Salesforce error: {e.content}")
        
        # Update the id_mapping with results from the current chunk
        id_mapping.update({record[sf_external_id_field]: record['Id'] for record in query_result['records']})
    
    # Map the Salesforce record IDs to the DataFrame
    dataframe[new_column_name] = dataframe[df_column_name].map(id_mapping)
    
    return dataframe

In [22]:
# Upsert to SF

import pandas as pd
import numpy as np
from simple_salesforce import Salesforce, SalesforceMalformedRequest, SalesforceError
from datetime import datetime, date

# Gets or creates a Diocesan account based on the Account Name
def get_or_create_diocesan_account(sf, account_name):
    """
    Searches for an account by name, returns the ID if found,
    otherwise creates the account with RecordType 'Church' and 'mbfc__Church_Type__c' set to 'Diocese',
    and then returns the new ID.

    Parameters:
    sf (Salesforce): Salesforce connection object
    account_name (str): The name of the account to search for or create

    Returns:
    str: The ID of the found or created account
    """

    # Query for the Record Type ID using the Developer Name 'Church'
    record_type_query = "SELECT Id FROM RecordType WHERE SobjectType = 'Account' AND DeveloperName = 'Church' LIMIT 1"
    record_type_result = sf.query(record_type_query)
    if record_type_result['records']:
        record_type_id = record_type_result['records'][0]['Id']
    else:
        raise ValueError("No RecordType found with DeveloperName 'Church'")

    # Search for the Account by name
    account_query = f"SELECT Id FROM Account WHERE Name = '{account_name}' LIMIT 1"
    account_result = sf.query(account_query)
    
    if account_result['records']:
        # Account found, return the ID
        return account_result['records'][0]['Id']
    else:
        # Account not found, create a new Account
        account_data = {
            'Name': account_name,
            'RecordTypeId': record_type_id,
            'mbfc__Church_Type__c': 'Diocese'
        }
        new_account = sf.Account.create(account_data)
        return new_account['id']
    
    from simple_salesforce import Salesforce

# improved version of the get_or_create_diocesan_account function
def get_or_create_account(sf, account_name, record_type_dev_name, church_type):
    """
    Searches for an account by name, returns the ID if found,
    otherwise creates the account with the specified Record Type and Church Type,
    and then returns the new ID.

    Parameters:
    sf (Salesforce): Salesforce connection object
    account_name (str): The name of the account to search for or create
    record_type_dev_name (str): The developer name of the Record Type to use for creating the account
    church_type (str): The Church Type to set for the new account

    Returns:
    str: The ID of the found or created account

    Example usage: 
    sf = Salesforce(username='your_username', password='your_password', security_token='your_security_token')
    account_id = get_or_create_account(sf, 'Diocese of Calgary', 'Church', 'Diocese')
    print(f"Account ID: {account_id}")
    """

    # Query for the Record Type ID using the provided developer name
    record_type_query = f"SELECT Id FROM RecordType WHERE SobjectType = 'Account' AND DeveloperName = '{record_type_dev_name}' LIMIT 1"
    record_type_result = sf.query(record_type_query)
    if record_type_result['records']:
        record_type_id = record_type_result['records'][0]['Id']
    else:
        raise ValueError(f"No RecordType found with DeveloperName '{record_type_dev_name}'")

    # Search for the Account by name
    account_query = f"SELECT Id FROM Account WHERE Name = '{account_name}' LIMIT 1"
    account_result = sf.query(account_query)
    
    if account_result['records']:
        # Account found, return the ID
        return account_result['records'][0]['Id']
    else:
        # Account not found, create a new Account
        account_data = {
            'Name': account_name,
            'RecordTypeId': record_type_id,
            'mbfc__Church_Type__c': church_type
        }
        new_account = sf.Account.create(account_data)
        return new_account['id']


# def upsert_to_salesforce(sf, dataframe, object_name, external_id_field):
#     """
#     Upsert records to Salesforce from a pandas DataFrame.

#     Parameters:
#     sf (Salesforce): The Salesforce connection instance.
#     dataframe (pd.DataFrame): The pandas DataFrame containing data to upsert.
#     object_name (str): The Salesforce object name (e.g., 'Contact').
#     external_id_field (str): The external ID field used for upserts.
#     """
#     successful_upserts = 0
#     failed_upserts = 0

#     # Replace placeholder values with None in the DataFrame
#     dataframe.replace({None: pd.NA, ' ': None, '': None}, inplace=True)

#     # Convert DataFrame to a list of dictionaries
#     data_to_upsert = dataframe.to_dict(orient='records')

#     for data in data_to_upsert:
#         try:
#             data = convert_non_serializables(data)
#             external_id = data.pop(external_id_field)

#             # Perform upsert using only the External ID
#             response = getattr(sf, object_name).upsert(f'{external_id_field}/{external_id}', data)
#             successful_upserts += 1
#             print(f"Successfully upserted {object_name} with External ID: {external_id}")
#         except SalesforceMalformedRequest as e:
#             failed_upserts += 1
#             print(f"Malformed request error when upserting {object_name} with External ID: {external_id}. Error: {e.content}")
#         except SalesforceError as e:
#             failed_upserts += 1
#             print(f"Salesforce error when upserting {object_name} with External ID: {external_id}. Error: {e.content}")
#         except Exception as e:
#             failed_upserts += 1
#             print(f"Failed to upsert {object_name} with External ID: {external_id}. Error: {e}")

#     print(f"Upsert completed. Successful upserts: {successful_upserts}, Failed upserts: {failed_upserts}")


def upsert_to_salesforce_bulk(sf, dataframe, object_name, external_id_field, results_log_file, batch_size=100):
    """
    Upsert records to Salesforce from a pandas DataFrame using the Bulk API.

    Parameters:
    sf (Salesforce): The Salesforce connection instance.
    dataframe (pd.DataFrame): The pandas DataFrame containing data to upsert.
    object_name (str): The Salesforce object name (e.g., 'Contact').
    external_id_field (str): The external ID field used for upserts.
    results_log_file (str): The file name where the full upsert results will be logged.
    batch_size (int): The number of records to include in each batch.
    """
    successful_upserts = 0
    failed_upserts = 0
    batch_number = 0

    # Replace placeholder values with None in the DataFrame
    dataframe.replace({pd.NA: None, ' ': None, '': None}, inplace=True)

    # Convert DataFrame to a list of dictionaries
    data_to_upsert = dataframe.to_dict(orient='records')

    # Open the results log file in 'write' mode to truncate/overwrite existing data
    with open(results_log_file, 'w') as results_log:
        writer = csv.writer(results_log)
        writer.writerow(['Batch Number', 'Record', 'Success', 'Error'])  # Write the headers

        # Process data in batches
        for i in range(0, len(data_to_upsert), batch_size):
            batch_number += 1
            batch_data = data_to_upsert[i:i + batch_size]
            batch_data = [convert_non_serializables(record) for record in batch_data]

            try:
                # Perform bulk upsert
                response = sf.bulk.__getattr__(object_name).upsert(batch_data, external_id_field=external_id_field)

                for index, res in enumerate(response):
                    if res['success']:
                        successful_upserts += 1
                        writer.writerow([batch_number, json.dumps(batch_data[index]), 'True', ''])
                    else:
                        failed_upserts += 1
                        writer.writerow([batch_number, json.dumps(batch_data[index]), 'False', json.dumps(res['errors'])])

            except SalesforceMalformedRequest as e:
                failed_upserts += len(batch_data)
                writer.writerow([batch_number, '', 'False', f"Malformed request: {e.content}"])
                for record in batch_data:
                    writer.writerow([batch_number, json.dumps(record), 'False', f"Failed record due to malformed request"])

            except SalesforceError as e:
                failed_upserts += len(batch_data)
                writer.writerow([batch_number, '', 'False', f"Salesforce error: {e.content}"])
                for record in batch_data:
                    writer.writerow([batch_number, json.dumps(record), 'False', f"Failed record due to Salesforce error"])

            except Exception as e:
                failed_upserts += len(batch_data)
                writer.writerow([batch_number, '', 'False', f"Unexpected error: {str(e)}"])
                for record in batch_data:
                    writer.writerow([batch_number, json.dumps(record), 'False', f"Failed record due to unexpected error"])

            # Progress monitoring
            print(f"Batch {batch_number} processed: {successful_upserts} successful, {failed_upserts} failed.")

    # Final summary message
    total_records = len(data_to_upsert)
    total_batches = batch_number
    print(f"Upsert completed. Total records processed: {total_records}, Batches: {total_batches}, "
          f"Successful upserts: {successful_upserts}, Failed upserts: {failed_upserts}")

## Extract Salesforce xref data

The following cells downloads all records from the target Salesforce enviro for the following objects:

- RecordTypes
- Users
- Accounts
- Contacts


In [23]:
# Get or create the Diocesan Account and get its ID
diocesan_account_id = get_or_create_account(sf, 'Archdiocese of Portland in Oregon', 'Church', 'Diocese')

print(f"Account ID: {diocesan_account_id}")

Account ID: 001Dx00001HwDsgIAF


In [24]:
# get all ACTIVE SF users

sf_users = sf.query('Select Alias, FirstName, LastName, Username, id from User WHERE IsActive = True')
df_sf_users = pd.DataFrame(sf_users['records'])
df_sf_users = df_sf_users.drop(columns = 'attributes')

In [25]:
# get all SF Record Types
get_all_recordTypes = 'Select Id, Name, DeveloperName, sObjecttype, namespaceprefix from RecordType'

# get list of records, add to dataframe
sf_recordTypes = sf.query(get_all_recordTypes)
df_sf_recordTypes = pd.DataFrame(sf_recordTypes['records'])
df_sf_recordTypes = df_sf_recordTypes.drop(columns = 'attributes')

# Drop NPSP's 'Organization' record type  
df_sf_recordTypes = df_sf_recordTypes[
    ~((df_sf_recordTypes['Name'] == 'Organization') & (df_sf_recordTypes['NamespacePrefix'].isna()))
]

# Create a dictionary mapping 'DeveloperName' to 'Id' for faster lookup
record_types_mapping = df_sf_recordTypes.set_index('DeveloperName')['Id'].to_dict()

In [26]:
# get SF Accounts
get_all_accounts = 'Select id, Name, RecordTypeId, Type, mbfc__Parish_Code__c, Job_Id__c, Archdpdx_Migration_Id__c from Account'

# get list of records, add to dataframe
sf_accounts = sf.query(get_all_accounts)
df_sf_accounts = pd.DataFrame(sf_accounts['records'])
df_sf_accounts = df_sf_accounts.drop(columns = 'attributes')
df_sf_accounts.shape

(2000, 7)

In [27]:
# get SF Contacts
get_all_contacts = 'Select id, Name, npe01__Type_of_Account__c, RecordTypeId, Archdpdx_Migration_Id__c, CreatedById from Contact'

# get list of records, add to dataframe
sf_contacts = sf.query(get_all_contacts)
df_sf_contacts = pd.DataFrame(sf_contacts['records'])
df_sf_contacts = df_sf_contacts.drop(columns = 'attributes')
df_sf_contacts.shape

(2000, 6)

# ACCOUNTS


## Extract


### Load ArchdPDX csvs as DataFrames

ADPDX data for organizations is held in 6 tables, all of which will be migrated into Salesforce's Accounts object.


In [28]:
df_offices = pd.read_csv('/Users/matthewmartin/Library/CloudStorage/GoogleDrive-matt@meribahflow.com/Shared drives/Clients/ADPDX (Portland)/Data/Clergy DB/reports from clergypdx/Offices.csv', skiprows= lambda x: x in [1])
df_offices["src_table"] = 'Offices'
df_offices["AccountRecordType"] = 'Organization'
df_offices.rename({
    "Common Name": "Name",
    "Name": "Formal_Name__c"
    }, axis="columns", inplace=True)

df_offices

Unnamed: 0,Record Number,Name,Formal_Name__c,Archdiocese Assigns Clergy,Locator Description,Mailing Address,Mailing Address 2,Mailing Address City,Mailing Address State,Mailing Address Province,Mailing Address Postal Code,Mailing Address Country,Phone,Fax,Email,Web Site,src_table,AccountRecordType
0,1,Pastoral Center,Pastoral Center,Yes,,2838 E Burnside St,,Portland,OR,,97214,,503-234-5334,503-234-2545,commdir@archdpdx.org,http://www.archdpdx.org/,Offices,Organization
1,3,Catholic Sentinel,Catholic Sentinel,No,,2838 E Burnside St,,Portland,OR,,97214,,503-281-1191,,sentinel@catholicsentinel.org,http://www.sentinel.org/,Offices,Organization
2,4,Catholic Cemeteries,Catholic Cemeteries,No,,333 SW Skyline Blvd,,Portland,OR,,97221,,503-292-6621,,,http://www.ccpdxor.com/,Offices,Organization
3,6,Griffin Center,Griffin Center,No,,11957 SE Fuller Rd,,Milwaukie,OR,,97222,,503-652-7476,,hwycoff@archdpdx.org,http://www.griffincenterportland.org/,Offices,Organization
4,11,Providence Portland Medical Center,Providence Portland Medical Center,Yes,,4805 NE Glisan St,,Portland,OR,,97213,,503-215-6833,,,,Offices,Organization
5,12,Providence St. Vincent Medical Center,Providence St. Vincent Medical Center,Yes,,9205 SW Barnes Rd,,Portland,OR,,97213,,503-216-2261,,,,Offices,Organization
6,14,National Sanctuary of Our Sorrowful Mother (Th...,National Sanctuary of Our Sorrowful Mother (Th...,No,8840 NE Skidmore St,PO Box 20008,,Portland,OR,,97294,,503-254-7371,503-254-7948,office@thegrotto.org,https://thegrotto.org/,Offices,Organization
7,18,Southern Oregon VA Rehabilitation Center,Southern Oregon VA Rehabilitation Center,No,,8495 Crater Lake Hwy,,White City,OR,,97503,,541-826-2111 x3318,,,https://www.southernoregon.va.gov/index.asp,Offices,Organization
8,21,Clergy Office,Clergy Office,Yes,,2838 E Burnside St,,Portland,OR,,97214,,503-233-8368,503-230-1477,clergy@archdpdx.org,http://www.archdpdx.org/vicar-for-clergy,Offices,Organization
9,22,Vocations,Vocations,Yes,,2838 E Burnside St,,Portland,OR,,97214,,503-233-8368,503-230-1477,vocations@archdpdx.org,https://www.archdpdxvocations.org/,Offices,Organization


In [29]:
df_parishes = pd.read_csv('/Users/matthewmartin/Library/CloudStorage/GoogleDrive-matt@meribahflow.com/Shared drives/Clients/ADPDX (Portland)/Data/Clergy DB/reports from clergypdx/Parishes (3).csv', dtype={'Vicariate': 'object', 'Established': 'str', 'Mission Of': 'str'}, skiprows= lambda x: x in [1])
df_parishes["src_table"] = 'Parishes'
df_parishes["AccountRecordType"] = 'Church'
# df_parishes.rename({"Parish Formal Name": "Account Name"}, axis="columns", inplace=True)
df_parishes.rename({
                    "Parish Formal Name": "Formal_Name__c",
                    "Common Name": "Name",
                    'Mission Of': 'Parent_Parish'
                }, axis="columns", inplace=True)

df_parishes

Unnamed: 0,Record Number,Name,Sort Name,Parish Name,Formal_Name__c,Parish City,Archdiocese Assigns Clergy,Parent_Parish,Established,Vicariate,...,Schedule 4 Head,Schedule 4 Text,Schedule 5 Head,Schedule 5 Text,Schedule 6 Head,Schedule 6 Text,Schedule 7 Head,Schedule 7 Text,src_table,AccountRecordType
0,1,"Our Lady of Perpetual Help, St Mary’s, Albany",our lady of perpetual help st marys albany,,"Our Lady of Perpetual Help, St Mary’s",Albany,Yes,0,1885,1,...,Adoration,Wednesday 6:00 pm – 7:00 pm in the Church<br>D...,,,,,,,Parishes,Church
1,2,"St. Andrew Dũng-Lạc Mission, Aloha",st andrew dunglac aloha,,St. Andrew Dũng-Lạc,Aloha,No,83,0,13,...,,,,,,,,,Parishes,Church
2,3,"St. Elizabeth Ann Seton, Aloha",st elizabeth ann seton aloha,,St. Elizabeth Ann Seton,Aloha,Yes,0,1982,16,...,Adoration/Adoración,Sunday 11:00 am–3:00 pm (chapel)<br>Monday–Wed...,,,,,,,Parishes,Church
3,4,"St. Peter the Fisherman Mission, Arch Cape",st peter the fisherman arch cape,,St. Peter the Fisherman,Arch Cape,Yes,131,0,9,...,,,,,,,,,Parishes,Church
4,5,"Our Lady of the Mountain, Ashland",our lady of the mountain ashland,,Our Lady of the Mountain,Ashland,Yes,0,1887,15,...,Adoration,First Friday 9:00 am–6:00 pm<br>or 24 hours pe...,,,,,,,Parishes,Church
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
146,152,"St. John in the Woods Mission, Welches",st john in the woods welches,,St. John in the Woods,Welches,Yes,127,0,5,...,,,,,,,,,Parishes,Church
147,153,"St. Cyril, Wilsonville",st cyril wilsonville,,St. Cyril,Wilsonville,Yes,0,1926,17,...,Adoration,"Thursday 12:30 pm–1:00 pm, 6:00–6:30 pm",,,,,,,Parishes,Church
148,154,"St. Luke, Woodburn",st luke woodburn,,St. Luke,Woodburn,Yes,0,1899,6,...,,,,,,,,,Parishes,Church
149,155,"St. John the Evangelist, Yamhill",st john the evangelist yamhill,,St. John the Evangelist,Yamhill,Yes,0,1910,18,...,Adoration,Thursday 9:30–11:00 am,,,,,,,Parishes,Church


In [30]:
df_religious = pd.read_csv('/Users/matthewmartin/Library/CloudStorage/GoogleDrive-matt@meribahflow.com/Shared drives/Clients/ADPDX (Portland)/Data/Clergy DB/reports from clergypdx/RelCommunities.csv', skiprows= lambda x: x in [1])
df_religious["src_table"] = 'RelCommunities'
df_religious["AccountRecordType"] = 'Religious'
df_religious.rename({
                    "Community Name": "Formal_Name__c",
                    "Common Name": "Name"
                     }, axis="columns", inplace=True)

df_religious.sample(10)

Unnamed: 0,Record Number,Name,Formal_Name__c,Community City,Archdiocese Assigns Clergy,Order Full Name,Order Common Name,Order Letters,Men or Women,Non-Latin Rite,...,Religious Order,Secular Order,Diocesan Order,Pontifical Order,Local Superior,Major Superior Name,Major Superior Phone,Major Superior Email,src_table,AccountRecordType
20,30,"Adrian Dominican Sisters, Adrian, MI (OP)",Adrian Dominican Sisters,"Adrian, MI",No,Dominican Sisters of Adrian,Dominicans,OP,Women,No,...,No,No,No,Yes,0,"Sr. Elise García, OP Chapter Prioress",,,RelCommunities,Religious
32,42,"Sisters of Charity of the Blessed Virgin Mary,...",Sisters of Charity of the Blessed Virgin Mary,Portland,No,Sisters of Charity of the Blessed Virgin Mary,Sisters of Charity of the Blessed Virgin Mary,BVM,Women,No,...,Yes,,,Yes,0,"LaDonna Manternach, BVM – President",563-588-2351,,RelCommunities,Religious
34,44,"Sisters of the Holy Names of Jesus and Mary, M...",Sisters of the Holy Names of Jesus and Mary,Marylhurst,No,Sisters of the Holy Names of Jesus and Mary U....,Holy Names Sisters,SNJM,Women,No,...,Yes,No,No,No,2479,,,,RelCommunities,Religious
31,41,"Sisters For Christian Community, Hillsboro (SFCC)",Sisters For Christian Community,Hillsboro,No,Sisters For Christian Community,Sisters For Christian Community,SFCC,Women,No,...,,,,,0,,,,RelCommunities,Religious
9,13,Canisius Jesuit Community at Jesuit High Schoo...,Canisius Jesuit Community at Jesuit High School,Beaverton,No,Societas Iesu,Jesuits,SJ,Men,No,...,,,,,0,,,,RelCommunities,Religious
39,49,"Sisters of Reparation, Portland (SR)",Sisters of Reparation,Portland,No,Sisters of Reparation of the Sacred Wounds of ...,Sisters of Reparation,SR,Women,No,...,Yes,No,Yes,No,2463,,,,RelCommunities,Religious
19,29,"Adorers of the Holy Cross, Portland (MTG)",Adorers of the Holy Cross,Portland,No,Adorers of the Holy Cross,Adorers of the Holy Cross,MTG,Women,No,...,,,,,2460,,,,RelCommunities,Religious
10,14,"Félix Rougier House of Studies, Mount Angel (M...",Félix Rougier House of Studies,Mount Angel,No,Misioneros del Espíritu Santo,Missionaries of the Holy Spirit,MSpS,Men,No,...,,,,,725,,,,RelCommunities,Religious
12,17,"Carmelite House of Studies, Mount Angel (OCD)",Carmelite House of Studies,Mount Angel,No,Ordo Carmelitarum Discalceatorum,Discalced Carmelite Friars,OCD,Men,No,...,Yes,No,No,No,2412,"Fr. Matthew Williams, O.C.D.",909-793-0424,,RelCommunities,Religious
36,46,"Sisters of Mary of Kakamega, Portland (SMK)",Sisters of Mary of Kakamega,Portland,No,Sisters of Mary of Kakamega,Sisters of Mary of Kakamega,SMK,Women,No,...,,,,,0,Sr. Josephine,,,RelCommunities,Religious


In [31]:
df_schools = pd.read_csv('/Users/matthewmartin/Library/CloudStorage/GoogleDrive-matt@meribahflow.com/Shared drives/Clients/ADPDX (Portland)/Data/Clergy DB/reports from clergypdx/Schools.csv', skiprows= lambda x: x in [1])
df_schools["src_table"] = 'Schools'
df_schools["AccountRecordType"] = 'Organization'
df_schools.rename({
                    "School Name": "Formal_Name__c",
                    "Common Name": "Name",
                    'Parish Link': 'Parent_Parish'
                    
                    }, axis="columns", inplace=True)

df_schools.sample(10)

Unnamed: 0,Record Number,Name,Formal_Name__c,School City,Archdiocese Assigns Clergy,Parent_Parish,Vicariate Link,Archdiocesan School Code,Grades Provided,Established,...,Mailing Address Zip,Mailing Address Country,Phone,Fax,Email,Web Site,Lat/Long Coordinates Decimal,Google Small Embed URL,src_table,AccountRecordType
46,49,"St. Clare School, Portland",St. Clare School,Portland,Yes,93,0,12-PDXCLAS,K-8,1914.0,...,97219,,503-244-7600,503-293-2076,info@stclarepdx.org,https://www.stclarepdx.org/,"45.46307574143576,-122.69822944820064",https://www.google.com/maps/embed?pb=!1m18!1m1...,Schools,Organization
12,15,"La Salle Catholic College Preparatory, Milwaukie",La Salle Catholic College Preparatory,Milwaukie,No,0,0,12-MILLASS,9-12,,...,97222,,503-659-4155,503-659-2535,info@lsprep.org,https://www.lsprep.org/,"45.43655634320491,-122.58915834820111",https://www.google.com/maps/embed?pb=!1m18!1m1...,Schools,Organization
20,23,"Sacred Heart Catholic School, Gervais",Sacred Heart Catholic School,Gervais,Yes,38,0,12-GERSACS,PK-8,,...,97026,,503-792-4541,503-792-3826,shssecretary@shstl.org,https://school.shstl.org/,"45.11004716502553,-122.90336668451253",https://www.google.com/maps/embed?pb=!1m18!1m1...,Schools,Organization
11,14,"Jesuit High School, Beaverton",Jesuit High School,Beaverton,No,0,0,12-PDXJESS,9-12,,...,97225,,503-292-2663,503-291-5464,,https://www.jesuitportland.org/,"45.48485193998266,-122.77096984820002",https://www.google.com/maps/embed?pb=!1m18!1m1...,Schools,Organization
29,32,"All Saints School, Portland",All Saints School,Portland,Yes,75,0,12-PDXALLS,PK-8,1936.0,...,97232,,503-236-6205,503-236-0781,office@allsaintsportland.com,https://allsaintsportland.com/,"45.52753313317805,-122.62445829995636",https://www.google.com/maps/embed?pb=!1m18!1m1...,Schools,Organization
54,57,"Mount Angel Seminary, Mount Angel",Mount Angel Seminary,Mount Angel,No,0,0,,,1889.0,...,97373,,503-845-3951,503-845-3128,seminaryinfo@mtangel.edu,https://www.mountangelabbey.org/seminary/,"45.057377868413475,-122.77894774820723",https://www.google.com/maps/embed?pb=!1m18!1m1...,Schools,Organization
7,10,"Valley Catholic High School, Beaverton",Valley Catholic High School,Beaverton,No,0,0,12-BEAVALH,9-12,,...,97078,,503-644-3745,503-646-4054,dierardi@valleycatholic.org,https://www.valleycatholic.org/,"45.48728744411335,-122.83094091801642",https://www.google.com/maps/embed?pb=!1m14!1m8...,Schools,Organization
39,42,"St. Thomas More Catholic School, Portland",St. Thomas More Catholic School,Portland,Yes,114,0,12-PDXTHOS,K-8,1948.0,...,97221,,503-222-6105,503-227-5661,stmschool@stmpdx.org,https://www.stmpdxschool.org/,"45.502207238927774,-122.71646438450101",https://www.google.com/maps/embed?pb=!1m18!1m1...,Schools,Organization
50,53,"Franciscan Montessori Earth School, Portland",Franciscan Montessori Earth School,Portland,No,0,0,12-PDXFMES,Ungraded,1977.0,...,97236,,503-760-8220,800-967-1502,info@fmes.org,https://fmes.org/,"45.501894638845194,-122.51350104843362",https://www.google.com/maps/embed?pb=!1m18!1m1...,Schools,Organization
40,43,"St. Therese Catholic School, Portland",St. Therese Catholic School,Portland,Yes,113,0,12-PDXTHES,PS-8,,...,97230,,503-253-9400,503-253-9571,elyoussefr@stthereseschool.org,https://www.stthereseschool.org/,"45.531674736959594,-122.52926558450008",https://www.google.com/maps/embed?pb=!1m18!1m1...,Schools,Organization


In [32]:
df_vicariates = pd.read_csv('/Users/matthewmartin/Library/CloudStorage/GoogleDrive-matt@meribahflow.com/Shared drives/Clients/ADPDX (Portland)/Data/Clergy DB/reports from clergypdx/Vicariates.csv', skiprows= lambda x: x in [1])
df_vicariates["src_table"] = 'Vicariates'
df_vicariates["AccountRecordType"] = 'Deanery'
# As we want to designate the Common Name as what will be the Account Name in Salesforce, we are renaming these columns in a different pattern than prior CSVs.
df_vicariates.rename({"Common Name": "Name"}, axis="columns", inplace=True)

df_vicariates.sample(10)

Unnamed: 0,Record Number,Name,Vicariate Name,Archdiocese Assigns Clergy,src_table,AccountRecordType
6,7,Metropolitan Eugene Vicariate,Metropolitan Eugene,Yes,Vicariates,Deanery
12,13,"South Portland, Suburban Vicariate","South Portland, Suburban",Yes,Vicariates,Deanery
10,11,Santiam Vicariate,Santiam,Yes,Vicariates,Deanery
5,6,Marion County Vicariate,Marion County,Yes,Vicariates,Deanery
15,16,Tualatin Valley Vicariate,Tualatin Valley,Yes,Vicariates,Deanery
13,14,Southeast Portland Vicariate,Southeast Portland,Yes,Vicariates,Deanery
2,3,Columbia County Vicariate,Columbia County,Yes,Vicariates,Deanery
4,5,"East Portland, Suburban Vicariate","East Portland, Suburban",Yes,Vicariates,Deanery
11,12,South Coast Vicariate,South Coast,Yes,Vicariates,Deanery
8,9,North Coast Vicariate,North Coast,Yes,Vicariates,Deanery


In [33]:
df_newman = pd.read_csv('/Users/matthewmartin/Library/CloudStorage/GoogleDrive-matt@meribahflow.com/Shared drives/Clients/ADPDX (Portland)/Data/Clergy DB/reports from clergypdx/NewmanCenters.csv', skiprows= lambda x: x in [1])
df_newman["src_table"] = 'NewmanCenters'
df_newman["AccountRecordType"] = 'Organization'
df_newman.rename({
                    "Newman Center Name": "Formal_Name__c",
                    "Common Name": "Name",
                    "Newman Center City": "Mailing Address City2"
                  }, axis="columns", inplace=True)

df_newman

Unnamed: 0,Record Number,Name,Formal_Name__c,Mailing Address City2,Archdiocese Assigns Clergy,Established,Locator Description,Mailing Address,Mailing Address 2,Mailing Address City,...,Schedule 4 Head,Schedule 4 Text,Schedule 5 Head,Schedule 5 Text,Schedule 6 Head,Schedule 6 Text,Schedule 7 Head,Schedule 7 Text,src_table,AccountRecordType
0,1,"OSU Newman Center, Corvallis",OSU Newman Center,Corvallis,No,0,,2127 NW Monroe Ave,,Corvallis,...,,,,,,,,,NewmanCenters,Organization
1,2,"St. Thomas More (UO) Newman Center, Eugene",St. Thomas More (UO) Newman Center,Eugene,No,1915,,1850 Emerald St,,Eugene,...,,,,,,,,,NewmanCenters,Organization
2,3,Walsh Memorial (SOU) Newman Center at Our Lady...,Walsh Memorial (SOU) Newman Center at Our Lady...,Ashland,Yes,0,,987 Hillview Dr,,Ashland,...,,,,,,,,,NewmanCenters,Organization
3,4,"PDX (PSU) Newman Center, Portland",PDX (PSU) Newman Center,Portland,No,0,,424 SW Mill St,,Portland,...,,,,,,,,,NewmanCenters,Organization


Each of the 6 tables has an overlapping but distinct set of columns, making it challenging to conform these tables into a single staging table.

In addition, columns that correspond to the same field in salesforce are named differently in each table (eg. 'Parish City' vs. 'Religious City' vs. 'Newman Center City')


In [34]:
print('TABLE: (ROWS, COLUMNS)\n')

print(f'Offices:    {df_offices.shape}')
print(f'Parishes:   {df_parishes.shape}')
print(f'Religious:  {df_religious.shape}')
print(f'Schools:    {df_schools.shape}')
print(f'Vicariates: {df_vicariates.shape}')
print(f'Newman Ctr: {df_newman.shape}')

TABLE: (ROWS, COLUMNS)

Offices:    (35, 18)
Parishes:   (151, 45)
Religious:  (70, 34)
Schools:    (56, 26)
Vicariates: (18, 6)
Newman Ctr: (4, 37)


### Merge DFs into a single Accounts DF

This step takes 6 different tables and combines them into a single Accounts table for cleaning and staging.


In [35]:
# init list of DataFrames
src_accounts = [df_offices, df_parishes, df_religious, df_schools, df_vicariates, df_newman]

# concats the various Account dataframes into one large table
accounts = pd.concat(src_accounts, ignore_index=True)

## Transform


Time to do some table column renaming and re-organizing!


In [36]:
# renames columns headers to consolidate account names into SF-conformed data model
accounts.rename({"Common Name": "Name, City"}, axis="columns", inplace=True)

accounts.rename(
    columns={
        # 'Account Name': 'Name',
        'Mailing Address': 'BillingStreet1',
        'Mailing Address 2': 'BillingStreet2',
        'Mailing Address City': 'BillingCity',
        'Mailing Address State': 'BillingState',
        'Mailing Address Postal Code': 'BillingPostalCode',
        'Mailing Address Country': 'BillingCountry',
        'Email': 'mbfc__Email__c',
        'Web Site': 'Website',
        'Order Common Name': 'mbfc__Abbreviation__c',
        'Order Letters': 'mbfc__Religious_Suffix__c',
        'Men or Women': 'mbfc__Type_Members__c',
        'Archdiocese Assigns Clergy': 'Archdiocese_Assigns_Clergy__c',
        'Locator Description': 'Locator_Description__c',
        'Established': 'mbfc__Date_Established__c',
        'County': 'County__c',
        'Disabled Access': 'Disabled_Access__c',
        'Sanctuary Capacity': 'Sanctuary_Capacity__c',
        'Miles to Pastoral Centre': 'Miles_to_Pastoral_Centre__c',
        'Archdiocesan School Code': 'Archdiocesan_School_Code__c',
        'Grades Provided': 'Grades_Provided__c'
    },
    inplace=True
)


# reorder column order
col = accounts.pop('Name')
accounts.insert(2, col.name, col)

col = accounts.pop('Parish Name')
accounts.insert(3, col.name, col)

col = accounts.pop('AccountRecordType')
accounts.insert(1, col.name, col)



In [37]:
accounts.columns

Index(['Record Number', 'AccountRecordType', 'Formal_Name__c', 'Name',
       'Parish Name', 'Archdiocese_Assigns_Clergy__c',
       'Locator_Description__c', 'BillingStreet1', 'BillingStreet2',
       'BillingCity', 'BillingState', 'Mailing Address Province',
       'BillingPostalCode', 'BillingCountry', 'Phone', 'Fax', 'mbfc__Email__c',
       'Website', 'src_table', 'Sort Name', 'Parish City', 'Parent_Parish',
       'mbfc__Date_Established__c', 'Vicariate', 'Non-Latin', 'County__c',
       'Disabled_Access__c', 'Sanctuary_Capacity__c',
       'Lat/Long Coordinates Decimal', 'Google Small Embed URL',
       'Miles to Pastoral Center', 'Schedule 1 Head', 'Schedule 1 Text',
       'Schedule 2 Head', 'Schedule 2 Text', 'Schedule 3 Head',
       'Schedule 3 Text', 'Schedule 4 Head', 'Schedule 4 Text',
       'Schedule 5 Head', 'Schedule 5 Text', 'Schedule 6 Head',
       'Schedule 6 Text', 'Schedule 7 Head', 'Schedule 7 Text',
       'Community City', 'Order Full Name', 'mbfc__Abbreviat

In [38]:
accounts[accounts.BillingStreet2.isna() == False]

Unnamed: 0,Record Number,AccountRecordType,Formal_Name__c,Name,Parish Name,Archdiocese_Assigns_Clergy__c,Locator_Description__c,BillingStreet1,BillingStreet2,BillingCity,...,Major Superior Phone,Major Superior Email,School City,Vicariate Link,Archdiocesan_School_Code__c,Grades_Provided__c,Mailing Address 1,Mailing Address Zip,Vicariate Name,Mailing Address City2
14,32,Organization,Diaconate Office,Diaconate Office,,Yes,,Pastoral Center,2838 E Burnside St,Portland,...,,,,,,,,,,
32,58,Organization,Office of Marketing and Communications,Office of Marketing and Communications,,Yes,,Pastoral Center,2838 E Burnside St,Portland,...,,,,,,,,,,
35,1,Church,"Our Lady of Perpetual Help, St Mary’s","Our Lady of Perpetual Help, St Mary’s, Albany",,Yes,SW Ellsworth St between 8th and 9th Streets,"Our Lady of Perpetual Help, St Mary’s Parish",815 Broadalbin St SW,Albany,...,,,,,,,,,,
36,2,Church,St. Andrew Dũng-Lạc,"St. Andrew Dũng-Lạc Mission, Aloha",,No,SW Grabhorn Rd/209th Ave and Farmington Rd,St. Andrew Dũng-Lạc Mission,7390 SW Grabhorn Rd,Aloha,...,,,,,,,,,,
37,3,Church,St. Elizabeth Ann Seton,"St. Elizabeth Ann Seton, Aloha",,Yes,,St. Elizabeth Ann Seton Parish,3145 SW 192nd Ave,Aloha,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
236,62,Religious,Work of Jesus the High Priest,"Work of Jesus the High Priest, Gresham (OJSS)",,No,,OJSS Community,451 NW 1st St,Gresham,...,,,,,,,,,,
238,64,Religious,Heralds of the Good News,"Heralds of the Good News, Portland (HGN)",,No,,c/o Chancellor,2838 E Burnside St,Portland,...,+91 80 74 51 02 67,rkappumkal@gmail.com,,,,,,,,
239,65,Religious,Missionary Oblates of Mary Immaculate,"Missionary Oblates of Mary Immaculate, Rome, I...",,No,,Missionary Oblates of Mary Immaculate,Via Aurelia 290,Roma,...,,gensec@omigen.org,,,,,,,,
247,73,Religious,Brothers of Saint John,"Brothers of Saint John, Laredo, TX (CSJ)",,No,,St. John Priory,505 Century Dr S,Laredo,...,,,,,,,,,,


In [39]:
# merge two Non-Latin columns into one 
accounts['Non_Latin__c'] = accounts['Non-Latin'].combine_first(accounts['Non-Latin Rite']) 

# Rename the 'Non_Latin__c' field to 'mbfc__Non_Latin__c'
accounts.rename(columns={'Non_Latin__c': 'mbfc__Non_Latin__c'}, inplace=True)


In [40]:
# export merged tables DESCRIPTION to CSV for mapping
accounts.describe(include='all').transpose().to_csv(f'/Users/matthewmartin/Library/CloudStorage/GoogleDrive-matt@meribahflow.com/Shared drives/Clients/ADPDX (Portland)/Data/Clergy DB/working/accounts.csv')
accounts.describe(include='all').transpose()

Unnamed: 0,count,unique,top,freq,mean,std,min,25%,50%,75%,max
Record Number,334.0,,,,54.5,41.389801,1.0,21.25,45.0,76.75,173.0
AccountRecordType,334,4,Church,151,,,,,,,
Formal_Name__c,316,273,St. Mary,5,,,,,,,
Name,334,334,Pastoral Center,1,,,,,,,
Parish Name,5,5,St. Anne,1,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...
Mailing Address 1,56,55,4420 SW St Marys Dr,2,,,,,,,
Mailing Address Zip,56.0,,,,97222.446429,124.9586,97005.0,97134.75,97217.5,97301.0,97526.0
Vicariate Name,18,18,Albany-Corvallis,1,,,,,,,
Mailing Address City2,4,4,Corvallis,1,,,,,,,


In [41]:
# Create a single BillingAddress field

# Concatenate the two columns with CHAR(10) as separator
accounts['BillingStreet'] = accounts[['BillingStreet1', 'BillingStreet2']].apply(lambda x: '\n'.join(x.dropna()), axis=1)

# Drop the original columns
accounts.drop(columns=['BillingStreet1', 'BillingStreet2'], inplace=True)

In [42]:
# Handle boolean fields

boolean_columns_to_convert = [
    'Archdiocese_Assigns_Clergy__c', 
    'mbfc__Non_Latin__c', 
    'Disabled_Access__c', 
    ]

# Convert 'Yes'/'No' to True/False
accounts[boolean_columns_to_convert] = accounts[boolean_columns_to_convert].replace({'Yes': True, 'No': False, None: False})



In [43]:
accounts[boolean_columns_to_convert].sample(10)

Unnamed: 0,Archdiocese_Assigns_Clergy__c,mbfc__Non_Latin__c,Disabled_Access__c
327,True,False,False
90,True,False,True
273,True,False,False
106,True,False,True
1,False,False,False
306,False,False,False
234,False,False,False
179,True,False,True
292,True,False,False
232,False,True,False


In [44]:
# Religious Order fields > conform to new data model

# Apply logic to create new columns
accounts['Religious_Secular_Order__c'] = accounts.apply(
    lambda x: 'Religious Order' if x['Religious Order'] == 'Yes' else ('Secular Order' if x['Secular Order'] == 'Yes' else None), axis=1
)

accounts['Pontifical_or_Diocesan_Order__c'] = accounts.apply(
    lambda x: 'Diocesan Order' if x['Diocesan Order'] == 'Yes' else ('Pontifical Order' if x['Pontifical Order'] == 'Yes' else None), axis=1
)

accounts.drop(columns=['Religious Order', 'Secular Order', 'Diocesan Order', 'Pontifical Order'], inplace=True)

In [45]:
print(accounts['mbfc__Date_Established__c'].dtype)

object


In [46]:
# Handle Date fields that are only YYYY

# Ensure all values in 'mbfc__Date_Established__c' are strings
accounts['mbfc__Date_Established__c'] = accounts['mbfc__Date_Established__c'].astype(str)

# Define a function to transform valid year values
def transform_year(year):
    if pd.notna(year) and year.replace('.', '', 1).isdigit() and len(year.split('.')[0]) == 4:
        return pd.to_datetime(year.split('.')[0] + '-01-01')
    else:
        return pd.NaT

# Apply the function to the 'mbfc__Date_Established__c' column
accounts['mbfc__Date_Established__c'] = accounts['mbfc__Date_Established__c'].apply(transform_year)


In [47]:
accounts['mbfc__Date_Established__c'].sample(10)

232          NaT
109   1906-01-01
193          NaT
39    1887-01-01
51    1888-01-01
202          NaT
318          NaT
181          NaT
27           NaT
245          NaT
Name: mbfc__Date_Established__c, dtype: datetime64[ns]

In [48]:
accounts[accounts.src_table == 'Schools'].Parent_Parish

256    109
257     11
258    101
259    121
260      9
261      0
262      0
263      0
264      0
265      0
266      0
267      0
268      0
269      0
270      0
271      0
272      0
273      0
274     33
275    149
276     38
277     42
278     46
279     54
280     58
281     59
282     61
283     62
284     73
285     75
286     77
287    154
288    145
289    135
290    126
291    125
292    124
293    123
294    102
295    114
296    113
297    107
298     78
299     79
300     87
301      0
302     93
303     96
304     98
305      0
306      0
307      0
308      0
309      0
310      0
311    147
Name: Parent_Parish, dtype: object

In [49]:
# Format Parent_Parish field

# Remove instances of '0'
accounts.Parent_Parish = accounts.Parent_Parish.apply(lambda x: '' if x == 0 else x)


In [50]:
# Append prefix
accounts['Parent_Parish'] = accounts['Parent_Parish'].apply(lambda x: 'Parishes_' + str(x) if pd.notna(x) and x != '' else x)

In [51]:
# Check final results, in particular the 'Schools' records
accounts.Parent_Parish[(accounts.Parent_Parish.isna() == False) & (accounts["src_table"] == "Schools")].sample(10)

300     Parishes_87
272                
269                
285     Parishes_75
279     Parishes_54
289    Parishes_135
265                
256    Parishes_109
278     Parishes_46
262                
Name: Parent_Parish, dtype: object

In [52]:
# Replace Parent_Parish unique ids with SF records
add_salesforce_record_ids(sf, accounts, "Parent_Parish", "Account", "Archdpdx_Migration_Id__c", "Parent_Parish__c", 10 )

Unnamed: 0,Record Number,AccountRecordType,Formal_Name__c,Name,Parish Name,Archdiocese_Assigns_Clergy__c,Locator_Description__c,BillingCity,BillingState,Mailing Address Province,...,Grades_Provided__c,Mailing Address 1,Mailing Address Zip,Vicariate Name,Mailing Address City2,mbfc__Non_Latin__c,BillingStreet,Religious_Secular_Order__c,Pontifical_or_Diocesan_Order__c,Parent_Parish__c
0,1,Organization,Pastoral Center,Pastoral Center,,True,,Portland,OR,,...,,,,,,False,2838 E Burnside St,,,
1,3,Organization,Catholic Sentinel,Catholic Sentinel,,False,,Portland,OR,,...,,,,,,False,2838 E Burnside St,,,
2,4,Organization,Catholic Cemeteries,Catholic Cemeteries,,False,,Portland,OR,,...,,,,,,False,333 SW Skyline Blvd,,,
3,6,Organization,Griffin Center,Griffin Center,,False,,Milwaukie,OR,,...,,,,,,False,11957 SE Fuller Rd,,,
4,11,Organization,Providence Portland Medical Center,Providence Portland Medical Center,,True,,Portland,OR,,...,,,,,,False,4805 NE Glisan St,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
329,18,Deanery,,Yamhill County Vicariate,,True,,,,,...,,,,Yamhill County,,False,,,,
330,1,Organization,OSU Newman Center,"OSU Newman Center, Corvallis",,False,,Corvallis,OR,,...,,,,,Corvallis,False,2127 NW Monroe Ave,,,
331,2,Organization,St. Thomas More (UO) Newman Center,"St. Thomas More (UO) Newman Center, Eugene",,False,,Eugene,OR,,...,,,,,Eugene,False,1850 Emerald St,,,
332,3,Organization,Walsh Memorial (SOU) Newman Center at Our Lady...,Walsh Memorial (SOU) Newman Center at Our Lady...,,True,,Ashland,OR,,...,,,,,Ashland,False,987 Hillview Dr,,,


In [53]:
# ParentID field

accounts['ParentId'] = accounts['Parent_Parish__c']

# Verify results
accounts[accounts.Parent_Parish__c.isna() == False]


Unnamed: 0,Record Number,AccountRecordType,Formal_Name__c,Name,Parish Name,Archdiocese_Assigns_Clergy__c,Locator_Description__c,BillingCity,BillingState,Mailing Address Province,...,Mailing Address 1,Mailing Address Zip,Vicariate Name,Mailing Address City2,mbfc__Non_Latin__c,BillingStreet,Religious_Secular_Order__c,Pontifical_or_Diocesan_Order__c,Parent_Parish__c,ParentId
36,2,Church,St. Andrew Dũng-Lạc,"St. Andrew Dũng-Lạc Mission, Aloha",,False,SW Grabhorn Rd/209th Ave and Farmington Rd,Aloha,OR,,...,,,,,False,St. Andrew Dũng-Lạc Mission\n7390 SW Grabhorn Rd,,,001Dx00001HwDzoIAF,001Dx00001HwDzoIAF
38,4,Church,St. Peter the Fisherman,"St. Peter the Fisherman Mission, Arch Cape",,True,79441 Hwy 101 S,Seaside,OR,,...,,,,,False,St. Peter the Fisherman Mission\nPO Box 29,,,001Dx00001HwE0XIAV,001Dx00001HwE0XIAV
45,13,Church,Holy Trinity,"Holy Trinity Mission, Brownsville",,True,W Blakely Ave and French St,Brownsville,OR,,...,,,,,False,Holy Trinity Mission\n104 W Blakely Ave,,,001Dx00001HwE0jIAF,001Dx00001HwE0jIAF
47,15,Church,St. Patrick of the Forest,"St. Patrick of the Forest Mission, Cave Junction",,True,407 W River St,Cave Junction,OR,,...,,,,,False,St. Patrick of the Forest Mission\n407 W River St,,,001Dx00001HwDzHIAV,001Dx00001HwDzHIAV
49,17,Church,St. John the Baptist,"St. John the Baptist Mission, Clatskanie",,True,100 SW High St,Rainier,OR,,...,,,,,False,St. John the Baptist Mission\nPO Box 340,,,001Dx00001HwE0JIAV,001Dx00001HwE0JIAV
50,18,Church,St. Joseph,"St. Joseph Mission, Cloverdale",,True,34560 Parkway Dr,Cloverdale,OR,,...,,,,,False,St. Joseph Mission\nPO Box 9,,,001Dx00001HwE0lIAF,001Dx00001HwE0lIAF
56,25,Church,St. Philip Benizi,"St. Philip Benizi Mission, Creswell",,True,552 Holbrook Ln,Cottage Grove,OR,,...,,,,,False,St. Philip Benizi Mission\n1025 N 19th St,,,001Dx00001HwDz5IAF,001Dx00001HwDz5IAF
58,27,Church,St. Martin de Porres,"St. Martin de Porres Mission, Dayton",,True,407 Ferry St,Yamhill,OR,,...,,,,,False,St. Martin de Porres Mission\nPO Box 580,,,001Dx00001HwE0tIAF,001Dx00001HwE0tIAF
59,28,Church,St. Henry,"St. Henry Mission, Dexter",,True,38925 Dexter Rd,Dexter,OR,,...,,,,,False,St. Henry Mission\nPO Box 65,,,001Dx00001HwDzeIAF,001Dx00001HwDzeIAF
69,39,Church,Holy Family,"Holy Family Mission, Glendale",,True,243 Marshall St,Myrtle Creek,OR,,...,,,,,False,Holy Family Mission\nPO Box 810,,,001Dx00001HwDzZIAV,001Dx00001HwDzZIAV


### AccountRecordType & ChurchType


In [54]:
#Sets all rows where AccountRecordType is Church as a Parish.
accounts.loc[accounts['AccountRecordType'] == 'Church', 'mbfc__Church_Type__c'] = 'Parish'
accounts[accounts['AccountRecordType'] == 'Church'].head(5)


Unnamed: 0,Record Number,AccountRecordType,Formal_Name__c,Name,Parish Name,Archdiocese_Assigns_Clergy__c,Locator_Description__c,BillingCity,BillingState,Mailing Address Province,...,Mailing Address Zip,Vicariate Name,Mailing Address City2,mbfc__Non_Latin__c,BillingStreet,Religious_Secular_Order__c,Pontifical_or_Diocesan_Order__c,Parent_Parish__c,ParentId,mbfc__Church_Type__c
35,1,Church,"Our Lady of Perpetual Help, St Mary’s","Our Lady of Perpetual Help, St Mary’s, Albany",,True,SW Ellsworth St between 8th and 9th Streets,Albany,OR,,...,,,,False,"Our Lady of Perpetual Help, St Mary’s Parish\n...",,,,,Parish
36,2,Church,St. Andrew Dũng-Lạc,"St. Andrew Dũng-Lạc Mission, Aloha",,False,SW Grabhorn Rd/209th Ave and Farmington Rd,Aloha,OR,,...,,,,False,St. Andrew Dũng-Lạc Mission\n7390 SW Grabhorn Rd,,,001Dx00001HwDzoIAF,001Dx00001HwDzoIAF,Parish
37,3,Church,St. Elizabeth Ann Seton,"St. Elizabeth Ann Seton, Aloha",,True,,Aloha,OR,,...,,,,False,St. Elizabeth Ann Seton Parish\n3145 SW 192nd Ave,,,,,Parish
38,4,Church,St. Peter the Fisherman,"St. Peter the Fisherman Mission, Arch Cape",,True,79441 Hwy 101 S,Seaside,OR,,...,,,,False,St. Peter the Fisherman Mission\nPO Box 29,,,001Dx00001HwE0XIAV,001Dx00001HwE0XIAV,Parish
39,5,Church,Our Lady of the Mountain,"Our Lady of the Mountain, Ashland",,True,,Ashland,OR,,...,,,,False,Our Lady of the Mountain Parish\n987 Hillview Dr,,,,,Parish


In [55]:
# Update 'mbfc__organization_type__c' field for rows where the source table is 'Schools'
accounts.loc[accounts['src_table'] == 'Schools', 'mbfc__Organization_Type__c'] = 'School'

### Generate ExternalId


In [56]:
# Generate an External ID
columns_to_concate = ['src_table', 'Record Number']
accounts = concat_columns(accounts, columns_to_concate, 'Archdpdx_Migration_Id__c', separator='_')

In [57]:
# set Deanery RecordTypeId to the Church RecordTypeId
# map in RecordTypeIds
accounts['RecordTypeId'] = accounts['AccountRecordType'].map(record_types_mapping)
record_types_mapping

{'Church': '012Dx0000003p4xIAA',
 'Deanery': '012Dx0000003p4yIAA',
 'Group': '012Dx0000003p4zIAA',
 'Organization': '012Dx0000003p50IAA',
 'Property': '012Dx0000003p51IAA',
 'Religious': '012Dx0000003p5KIAQ',
 'All_Types': '012Dx0000003p53IAA',
 'Any': '012Dx0000003p54IAA',
 'Assignments_Clergy': '012Dx0000003p55IAA',
 'Chancery_Users': '012Dx0000003p56IAA',
 'Clergy_Religious_Residence': '012Dx0000003p57IAA',
 'Diocean_Users': '012Dx0000003p58IAA',
 'Diocesan_Appointment': '012Dx0000003p59IAA',
 'Ecclesial_Affiliation': '012Dx0000003p5AIAQ',
 'Education': '012Dx0000003p5BIAQ',
 'Lay_Person': '012Dx0000003p5HIAQ',
 'Ministerial_Status': '012Dx0000003p5DIAQ',
 'Parish_Affiliations': '012Dx0000003p5EIAQ',
 'Staff': '012Dx0000003p5FIAQ',
 'Consecrated': '012Dx0000003p5GIAQ',
 'Permanent_Deacon': '012Dx0000003p5IIAQ',
 'Priest': '012Dx0000003p5JIAQ',
 'MajorGift': '012Hu000001pkqBIAQ',
 'Grant': '012Hu000001pkqCIAQ',
 'HH_Account': '012Hu000001pkqDIAQ',
 'Donation': '012Hu000001pkqFIAQ',
 

## Load


### Generate a new Job ID


In [58]:
# increment to the job_id
file_name = '/Users/matthewmartin/Library/CloudStorage/GoogleDrive-matt@meribahflow.com/Shared drives/Clients/ADPDX (Portland)/Data/Clergy DB/jobs/job_id'
curr_job_id = update_job_id(file_name)
print(f"New job ID: {curr_job_id}")

# add/update account DF with job_id
accounts["Job_Id__c"] = curr_job_id


New job ID: 138


### A) Vicariates


In [59]:
# Get Account Group RecordTypeID
deanery_recordTypeId = df_sf_recordTypes.loc[
    (df_sf_recordTypes['DeveloperName'] == 'Deanery') & (df_sf_recordTypes['SobjectType'] == 'Account'),
    'Id'
    ].iloc[0]  # Use .iloc[0] to get the first item if you're expecting exactly one match


# Insert Vicariates holding account
vicariate_account = sf.Account.upsert('Archdpdx_Migration_Id__c/Vicariates_Holding_Acc',
    {
    "Name": "Vicariates",
    "ParentId": diocesan_account_id,
    "mbfc__Diocese__c": diocesan_account_id,
    "RecordTypeId": deanery_recordTypeId,
    # "mbfc__Group_Type__c": 'Office',
    "Job_Id__c": curr_job_id
    }
)

# Get Vicariate Holding Acc's SF ID (as an upsert doesn't return the actual record ID)
vicariate_account = sf.Account.get_by_custom_id('Archdpdx_Migration_Id__c', 'Vicariates_Holding_Acc')
vicariate_account_id = vicariate_account['Id']

vicariate_account_id

'001Dx00001HwDuDIAV'

In [60]:
# Prepare Vicariates staging DF

vicariates = accounts[accounts['AccountRecordType'] == 'Deanery']


vicariates = vicariates[[
    'Record Number',
    'Name',
    # 'AccountRecordType',
    'Job_Id__c',
    'Archdpdx_Migration_Id__c',
    'RecordTypeId'
    ]]

# add parentid
vicariates["mbfc__Diocese__c"] = diocesan_account_id
vicariates['ParentId'] = vicariate_account_id
# vicariates['mbfc__Church_Type__c'] = 'Deanery'
vicariates['RecordTypeId'] = deanery_recordTypeId

vicariates.rename(columns={
        # 'Name, City': 'Name',
        'External_Id': 'Archdpdx_Migration_Id__c'
    }, inplace=True)

vicariates.reset_index()
vicariates.set_index('Record Number', inplace=True)

vicariates

Unnamed: 0_level_0,Name,Job_Id__c,Archdpdx_Migration_Id__c,RecordTypeId,mbfc__Diocese__c,ParentId
Record Number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,Albany-Corvallis Vicariate,138,Vicariates_1,012Dx0000003p4yIAA,001Dx00001HwDsgIAF,001Dx00001HwDuDIAV
2,"Beaverton, Suburban Vicariate",138,Vicariates_2,012Dx0000003p4yIAA,001Dx00001HwDsgIAF,001Dx00001HwDuDIAV
3,Columbia County Vicariate,138,Vicariates_3,012Dx0000003p4yIAA,001Dx00001HwDsgIAF,001Dx00001HwDuDIAV
4,Downtown Portland Vicariate,138,Vicariates_4,012Dx0000003p4yIAA,001Dx00001HwDsgIAF,001Dx00001HwDuDIAV
5,"East Portland, Suburban Vicariate",138,Vicariates_5,012Dx0000003p4yIAA,001Dx00001HwDsgIAF,001Dx00001HwDuDIAV
6,Marion County Vicariate,138,Vicariates_6,012Dx0000003p4yIAA,001Dx00001HwDsgIAF,001Dx00001HwDuDIAV
7,Metropolitan Eugene Vicariate,138,Vicariates_7,012Dx0000003p4yIAA,001Dx00001HwDsgIAF,001Dx00001HwDuDIAV
8,Metropolitan Salem Vicariate,138,Vicariates_8,012Dx0000003p4yIAA,001Dx00001HwDsgIAF,001Dx00001HwDuDIAV
9,North Coast Vicariate,138,Vicariates_9,012Dx0000003p4yIAA,001Dx00001HwDsgIAF,001Dx00001HwDuDIAV
10,Northeast Portland Vicariate,138,Vicariates_10,012Dx0000003p4yIAA,001Dx00001HwDsgIAF,001Dx00001HwDuDIAV


#### Export Vicariates to CSV


In [61]:
# export to CSV
vicariates.to_csv('/Users/matthewmartin/Library/CloudStorage/GoogleDrive-matt@meribahflow.com/Shared drives/Clients/ADPDX (Portland)/Data/Clergy DB/staging/vicariates_staging.csv')


#### Upsert Vicariates


In [62]:
bulk_data = []
for row in vicariates.itertuples(index=False):
    d = row._asdict()
    # del d['Index']
    bulk_data.append(d)

if run_upserts == 'True':
    vicariate_upsert = sf.bulk.Account.upsert(data=bulk_data, external_id_field='Archdpdx_Migration_Id__c', batch_size=100, use_serial=False)
    upserts = pd.DataFrame(vicariate_upsert)

    print(upserts)
    

    success  created                  id errors
0      True    False  001Dx00001HwDwnIAF     []
1      True    False  001Dx00001HwDwoIAF     []
2      True    False  001Dx00001HwDwpIAF     []
3      True    False  001Dx00001HwDwqIAF     []
4      True    False  001Dx00001HwDwrIAF     []
5      True    False  001Dx00001HwDwsIAF     []
6      True    False  001Dx00001HwDwtIAF     []
7      True    False  001Dx00001HwDwuIAF     []
8      True    False  001Dx00001HwDwvIAF     []
9      True    False  001Dx00001HwDwwIAF     []
10     True    False  001Dx00001HwDwxIAF     []
11     True    False  001Dx00001HwDwyIAF     []
12     True    False  001Dx00001HwDwzIAF     []
13     True    False  001Dx00001HwDx0IAF     []
14     True    False  001Dx00001HwDx1IAF     []
15     True    False  001Dx00001HwDx2IAF     []
16     True    False  001Dx00001HwDx3IAF     []
17     True    False  001Dx00001HwDx4IAF     []


In [63]:
# Generate an Errors log
import csv

keys = vicariate_upsert[0].keys()

with open('results_files/vicariate_results', 'w', newline='') as csv_file:
    writer = csv.DictWriter(csv_file, keys)
    writer.writeheader()
    writer.writerows(vicariate_upsert)

In [64]:
# Get Vicariate records from SF

sf_deaneries = sf.query("SELECT Archdpdx_Migration_Id__c, Id FROM Account WHERE RecordType.DeveloperName = 'Deanery'")

df_sf_deaneries = pd.DataFrame(sf_deaneries['records'])
df_sf_deaneries = df_sf_deaneries.drop(columns = 'attributes')

df_sf_deaneries

# Creates a dict of Vicariate unique ids to the new Salesforce record IDs, so can populate on latter Account records
vicariate_sf_recordids = df_sf_deaneries.set_index('Archdpdx_Migration_Id__c')['Id'].to_dict()
vicariate_sf_recordids

{'Vicariates_Holding_Acc': '001Dx00001HwDuDIAV',
 'Vicariates_1': '001Dx00001HwDwnIAF',
 'Vicariates_2': '001Dx00001HwDwoIAF',
 'Vicariates_3': '001Dx00001HwDwpIAF',
 'Vicariates_4': '001Dx00001HwDwqIAF',
 'Vicariates_5': '001Dx00001HwDwrIAF',
 'Vicariates_6': '001Dx00001HwDwsIAF',
 'Vicariates_7': '001Dx00001HwDwtIAF',
 'Vicariates_8': '001Dx00001HwDwuIAF',
 'Vicariates_9': '001Dx00001HwDwvIAF',
 'Vicariates_10': '001Dx00001HwDwwIAF',
 'Vicariates_11': '001Dx00001HwDwxIAF',
 'Vicariates_12': '001Dx00001HwDwyIAF',
 'Vicariates_13': '001Dx00001HwDwzIAF',
 'Vicariates_14': '001Dx00001HwDx0IAF',
 'Vicariates_15': '001Dx00001HwDx1IAF',
 'Vicariates_16': '001Dx00001HwDx2IAF',
 'Vicariates_17': '001Dx00001HwDx3IAF',
 'Vicariates_18': '001Dx00001HwDx4IAF'}

### B) Parishes, Schools, Organizations


In [65]:
# Create acc_main (accounts excluding Deaneries (already handled) and Religious (to be handled differently, after))
acc_main = accounts[accounts['AccountRecordType'] != 'Deanery']
acc_main = acc_main[acc_main['AccountRecordType'] != 'Religious']

acc_main.loc[acc_main['AccountRecordType'] == 'Church', 'Vicariate_Ext_Id'] = 'Vicariates_' + acc_main['Vicariate']

In [66]:
acc_main.sample(5)

Unnamed: 0,Record Number,AccountRecordType,Formal_Name__c,Name,Parish Name,Archdiocese_Assigns_Clergy__c,Locator_Description__c,BillingCity,BillingState,Mailing Address Province,...,Religious_Secular_Order__c,Pontifical_or_Diocesan_Order__c,Parent_Parish__c,ParentId,mbfc__Church_Type__c,mbfc__Organization_Type__c,Archdpdx_Migration_Id__c,RecordTypeId,Job_Id__c,Vicariate_Ext_Id
305,52,Organization,Regis St. Mary Catholic School,"Regis St. Mary Catholic School, Stayton",,True,St. Mary Campus (K-8),Stayton,OR,,...,,,,,,School,Schools_52,012Dx0000003p50IAA,138,
296,43,Organization,St. Therese Catholic School,"St. Therese Catholic School, Portland",,True,,Portland,OR,,...,,,001Dx00001HwE0HIAV,001Dx00001HwE0HIAV,,School,Schools_43,012Dx0000003p50IAA,138,
126,97,Church,St. Irene the Virgin and Great Martyr,"St. Irene the Virgin and Great Martyr, Portland",,False,,Portland,OR,,...,,,,,Parish,,Parishes_97,012Dx0000003p4xIAA,138,Vicariates_0
150,121,Church,St. Francis of Assisi,"St. Francis of Assisi, Roy",,True,,Banks,OR,,...,,,,,Parish,,Parishes_121,012Dx0000003p4xIAA,138,Vicariates_16
283,30,Organization,St. John the Baptist Catholic School,"St. John the Baptist Catholic School, Milwaukie",,True,,Milwaukie,OR,,...,,,001Dx00001HwDzVIAV,001Dx00001HwDzVIAV,,School,Schools_30,012Dx0000003p50IAA,138,


In [67]:
# map in Deaneries
acc_main['mbfc__Deanery__c'] = acc_main.Vicariate_Ext_Id.map(vicariate_sf_recordids)

acc_main[acc_main['AccountRecordType'] == 'Church']['mbfc__Deanery__c']

35     001Dx00001HwDwnIAF
36     001Dx00001HwDwzIAF
37     001Dx00001HwDx2IAF
38     001Dx00001HwDwvIAF
39     001Dx00001HwDx1IAF
              ...        
181    001Dx00001HwDwrIAF
182    001Dx00001HwDx3IAF
183    001Dx00001HwDwsIAF
184    001Dx00001HwDx4IAF
185    001Dx00001HwDwtIAF
Name: mbfc__Deanery__c, Length: 151, dtype: object

In [68]:
# Clean up NaN values

acc_main.fillna('', inplace=True)

In [69]:
# Generate Schedule text from all Schedule columns

def create_account_schedule(row):
    account_schedule = []
    for i in range(1, 8):
        head_col = f'Schedule {i} Head'
        text_col = f'Schedule {i} Text'
        
        head = row[head_col]
        text = row[text_col]
        
        if pd.notnull(head) or pd.notnull(text):
            if pd.notnull(head):
                account_schedule.append(f"<p><strong>{head}</strong></p>")
            if pd.notnull(text):
                account_schedule.append(f"<p>{text}</p>")
            account_schedule.append("<p><br></p>")
    
    # Join all parts into a single string
    return "".join(account_schedule).strip()

acc_main['mbfc__Mass_Times__c'] = acc_main.apply(create_account_schedule, axis=1)



In [70]:
acc_main['mbfc__Mass_Times__c'].sample(15)

289    <p><strong></strong></p><p></p><p><br></p><p><...
144    <p><strong>Weekend Mass</strong></p><p>Saturda...
285    <p><strong></strong></p><p></p><p><br></p><p><...
119    <p><strong>Weekend Mass</strong></p><p>Saturda...
98     <p><strong>Weekend Mass</strong></p><p>Saturda...
72     <p><strong>Weekend Mass</strong></p><p>Saturda...
89     <p><strong>Weekend Mass</strong></p><p>Saturda...
75     <p><strong>Weekend Mass</strong></p><p>Saturda...
161    <p><strong>Weekend Mass</strong></p><p>Saturda...
66     <p><strong>Weekend Mass</strong></p><p>Saturda...
22     <p><strong></strong></p><p></p><p><br></p><p><...
178    <p><strong>Weekend Mass</strong></p><p>Saturda...
153    <p><strong>Weekend Mass</strong></p><p>Saturda...
52     <p><strong>Weekend Mass</strong></p><p>Saturda...
104    <p><strong>Weekend Mass</strong></p><p>Sunday ...
Name: mbfc__Mass_Times__c, dtype: object

In [71]:
acc_main

Unnamed: 0,Record Number,AccountRecordType,Formal_Name__c,Name,Parish Name,Archdiocese_Assigns_Clergy__c,Locator_Description__c,BillingCity,BillingState,Mailing Address Province,...,Parent_Parish__c,ParentId,mbfc__Church_Type__c,mbfc__Organization_Type__c,Archdpdx_Migration_Id__c,RecordTypeId,Job_Id__c,Vicariate_Ext_Id,mbfc__Deanery__c,mbfc__Mass_Times__c
0,1,Organization,Pastoral Center,Pastoral Center,,True,,Portland,OR,,...,,,,,Offices_1,012Dx0000003p50IAA,138,,,<p><strong></strong></p><p></p><p><br></p><p><...
1,3,Organization,Catholic Sentinel,Catholic Sentinel,,False,,Portland,OR,,...,,,,,Offices_3,012Dx0000003p50IAA,138,,,<p><strong></strong></p><p></p><p><br></p><p><...
2,4,Organization,Catholic Cemeteries,Catholic Cemeteries,,False,,Portland,OR,,...,,,,,Offices_4,012Dx0000003p50IAA,138,,,<p><strong></strong></p><p></p><p><br></p><p><...
3,6,Organization,Griffin Center,Griffin Center,,False,,Milwaukie,OR,,...,,,,,Offices_6,012Dx0000003p50IAA,138,,,<p><strong></strong></p><p></p><p><br></p><p><...
4,11,Organization,Providence Portland Medical Center,Providence Portland Medical Center,,True,,Portland,OR,,...,,,,,Offices_11,012Dx0000003p50IAA,138,,,<p><strong></strong></p><p></p><p><br></p><p><...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
311,58,Organization,Resurrection Catholic Parish School,"Resurrection Catholic Parish School, Tualatin",,True,,Tualatin,OR,,...,001Dx00001HwE0mIAF,001Dx00001HwE0mIAF,,School,Schools_58,012Dx0000003p50IAA,138,,,<p><strong></strong></p><p></p><p><br></p><p><...
330,1,Organization,OSU Newman Center,"OSU Newman Center, Corvallis",,False,,Corvallis,OR,,...,,,,,NewmanCenters_1,012Dx0000003p50IAA,138,,,<p><strong>Mass (During Academic Year)</strong...
331,2,Organization,St. Thomas More (UO) Newman Center,"St. Thomas More (UO) Newman Center, Eugene",,False,,Eugene,OR,,...,,,,,NewmanCenters_2,012Dx0000003p50IAA,138,,,<p><strong>Weekend Mass</strong></p><p>Saturda...
332,3,Organization,Walsh Memorial (SOU) Newman Center at Our Lady...,Walsh Memorial (SOU) Newman Center at Our Lady...,,True,,Ashland,OR,,...,,,,,NewmanCenters_3,012Dx0000003p50IAA,138,,,<p><strong>Sunday Mass</strong></p><p>5:00 pm ...


In [72]:
# Create 'account_staging' df (drop extraneous columns)

accounts_staging = acc_main[[
    'Name',
    'Formal_Name__c',
    'RecordTypeId',
    'mbfc__Church_Type__c',
    'mbfc__Deanery__c',
    'BillingStreet',
    'BillingCity',
    'BillingState',
    'BillingPostalCode',
    'BillingCountry',
    'Phone',
    'Fax',
    'mbfc__Email__c',
    'Website',
    'mbfc__Mass_Times__c',
    'mbfc__Abbreviation__c',
    'mbfc__Religious_Suffix__c',
    'mbfc__Type_Members__c',
    'Description',
    'Archdiocese_Assigns_Clergy__c', # Boolean fields
    'mbfc__Non_Latin__c', 
    'Disabled_Access__c', 
    'Locator_Description__c',
    'Parent_Parish__c',
    'mbfc__Date_Established__c',
    'County__c',
    'Sanctuary_Capacity__c',
    # 'Miles_to_Pastoral_Centre__c',
    'Religious_Secular_Order__c',
    'Pontifical_or_Diocesan_Order__c',
    'Archdiocesan_School_Code__c',
    'Grades_Provided__c',
    'Job_Id__c',
    'Archdpdx_Migration_Id__c',
    'mbfc__Organization_Type__c',
    'ParentId'  # Later, check whether or not can upsert using external ID using this field


    ]]

In [73]:
accounts_staging

Unnamed: 0,Name,Formal_Name__c,RecordTypeId,mbfc__Church_Type__c,mbfc__Deanery__c,BillingStreet,BillingCity,BillingState,BillingPostalCode,BillingCountry,...,County__c,Sanctuary_Capacity__c,Religious_Secular_Order__c,Pontifical_or_Diocesan_Order__c,Archdiocesan_School_Code__c,Grades_Provided__c,Job_Id__c,Archdpdx_Migration_Id__c,mbfc__Organization_Type__c,ParentId
0,Pastoral Center,Pastoral Center,012Dx0000003p50IAA,,,2838 E Burnside St,Portland,OR,97214,,...,,,,,,,138,Offices_1,,
1,Catholic Sentinel,Catholic Sentinel,012Dx0000003p50IAA,,,2838 E Burnside St,Portland,OR,97214,,...,,,,,,,138,Offices_3,,
2,Catholic Cemeteries,Catholic Cemeteries,012Dx0000003p50IAA,,,333 SW Skyline Blvd,Portland,OR,97221,,...,,,,,,,138,Offices_4,,
3,Griffin Center,Griffin Center,012Dx0000003p50IAA,,,11957 SE Fuller Rd,Milwaukie,OR,97222,,...,,,,,,,138,Offices_6,,
4,Providence Portland Medical Center,Providence Portland Medical Center,012Dx0000003p50IAA,,,4805 NE Glisan St,Portland,OR,97213,,...,,,,,,,138,Offices_11,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
311,"Resurrection Catholic Parish School, Tualatin",Resurrection Catholic Parish School,012Dx0000003p50IAA,,,,Tualatin,OR,,,...,,,,,12-WEESRES,PK-5,138,Schools_58,School,001Dx00001HwE0mIAF
330,"OSU Newman Center, Corvallis",OSU Newman Center,012Dx0000003p50IAA,,,2127 NW Monroe Ave,Corvallis,OR,97330,,...,,,,,,,138,NewmanCenters_1,,
331,"St. Thomas More (UO) Newman Center, Eugene",St. Thomas More (UO) Newman Center,012Dx0000003p50IAA,,,1850 Emerald St,Eugene,OR,97403,,...,,,,,,,138,NewmanCenters_2,,
332,Walsh Memorial (SOU) Newman Center at Our Lady...,Walsh Memorial (SOU) Newman Center at Our Lady...,012Dx0000003p50IAA,,,987 Hillview Dr,Ashland,OR,97520,,...,,,,,,,138,NewmanCenters_3,,


#### Create Parishes Holding Acc for acc heirarchy


In [74]:
# Upsert a Parishes holding account

# Get Account Group RecordTypeID
group_recordTypeId = df_sf_recordTypes.loc[
    (df_sf_recordTypes['DeveloperName'] == 'Group') & (df_sf_recordTypes['SobjectType'] == 'Account'),
    'Id'
    ].iloc[0]  # Use .iloc[0] to get the first item if you're expecting exactly one match


# Insert Vicariates holding account
parish_holding_account = sf.Account.upsert('Archdpdx_Migration_Id__c/Parishes_Holding_Acc',
    {
    "Name": "Parishes",
    "ParentId": diocesan_account_id,
    "RecordTypeId": group_recordTypeId,
    "Job_Id__c": curr_job_id,
    "mbfc__Group_Type__c": "Office"
    }
)

# Get Vicariate Holding Acc's SF ID (as an upsert doesn't return the actual record ID)

parish_holding_account = sf.Account.get_by_custom_id('Archdpdx_Migration_Id__c', 'Parishes_Holding_Acc')

parishes_holding_account_id = parish_holding_account['Id']

parishes_holding_account_id

'001Dx00001HwDxKIAV'

In [75]:
# Set the ParentId for all Parish records

# accounts_staging['ParentId'] = None # Commented this out as (a) the field already exists and it was blanking out pre-existing values.

accounts_staging['ParentId']= accounts_staging.apply(
    lambda row: parishes_holding_account_id if row['mbfc__Church_Type__c'] == 'Parish' else row['ParentId'], axis=1
)

accounts_staging.sample(10)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  accounts_staging['ParentId']= accounts_staging.apply(


Unnamed: 0,Name,Formal_Name__c,RecordTypeId,mbfc__Church_Type__c,mbfc__Deanery__c,BillingStreet,BillingCity,BillingState,BillingPostalCode,BillingCountry,...,County__c,Sanctuary_Capacity__c,Religious_Secular_Order__c,Pontifical_or_Diocesan_Order__c,Archdiocesan_School_Code__c,Grades_Provided__c,Job_Id__c,Archdpdx_Migration_Id__c,mbfc__Organization_Type__c,ParentId
55,"Our Lady of Perpetual Help, Cottage Grove",Our Lady of Perpetual Help,012Dx0000003p4xIAA,Parish,001Dx00001HwDwtIAF,Our Lady of Perpetual Help Parish\n1025 N 19th St,Cottage Grove,OR,97424.0,,...,Lane,375.0,,,,,138,Parishes_24,,001Dx00001HwDxKIAV
14,Diaconate Office,Diaconate Office,012Dx0000003p50IAA,,,Pastoral Center\n2838 E Burnside St,Portland,OR,97214.0,,...,,,,,,,138,Offices_32,,
66,"St. Mary, Our Lady of the Dunes, Florence","St. Mary, Our Lady of the Dunes",012Dx0000003p4xIAA,Parish,001Dx00001HwDwyIAF,"St. Mary, Our Lady of the Dunes Parish\n85060 ...",Florence,OR,97439.0,,...,Lane,0.0,,,,,138,Parishes_36,,001Dx00001HwDxKIAV
58,"St. Martin de Porres Mission, Dayton",St. Martin de Porres,012Dx0000003p4xIAA,Parish,001Dx00001HwDx4IAF,St. Martin de Porres Mission\nPO Box 580,Yamhill,OR,97148.0,,...,Yamhill,0.0,,,,,138,Parishes_27,,001Dx00001HwDxKIAV
289,"St. Francis Catholic School, Sherwood",St. Francis Catholic School,012Dx0000003p50IAA,,,,Sherwood,OR,,,...,,,,,12-SHWFRAS,PK-8,138,Schools_36,School,001Dx00001HwE0bIAF
3,Griffin Center,Griffin Center,012Dx0000003p50IAA,,,11957 SE Fuller Rd,Milwaukie,OR,97222.0,,...,,,,,,,138,Offices_6,,
291,"St. Joseph Catholic School, Salem",St. Joseph Catholic School,012Dx0000003p50IAA,,,,Salem,OR,,,...,,,,,12-SALJOSS,PS-8,138,Schools_38,School,001Dx00001HwE0SIAV
285,"All Saints School, Portland",All Saints School,012Dx0000003p50IAA,,,,Portland,OR,,,...,,,,,12-PDXALLS,PK-8,138,Schools_32,School,001Dx00001HwDzgIAF
96,"All Souls, Myrtle Creek",All Souls,012Dx0000003p4xIAA,Parish,001Dx00001HwDx1IAF,All Souls Parish\nPO Box 810,Myrtle Creek,OR,97457.0,,...,Douglas,120.0,,,,,138,Parishes_66,,001Dx00001HwDxKIAV
87,"St. Patrick Mission, Lyons",St. Patrick,012Dx0000003p4xIAA,Parish,001Dx00001HwDwxIAF,St. Patrick Mission\n39043 Jordan Road,Scio,OR,97374.0,,...,Linn,0.0,,,,,138,Parishes_57,,001Dx00001HwDxKIAV


#### Upsert Accounts (TBD )


In [76]:
# send accounts_staging to csv
accounts_staging.to_csv('staging_files/accounts_staging.csv', encoding='utf-8-sig')

In [None]:

# # Rename columns apis
# accounts_staging = accounts_staging.rename(columns={'Parent_Parish__c': 'Parent_Parish__r'})  # Later on, attempt to include 'ParentId' (which, as a standard SF field, might not work)

# # Reformat values to match what SF api requires
# accounts_staging['Parent_Parish__r'] = accounts_staging.apply(lambda x: "{'Archdpdx_Migration_Id__c': '" + x['Parent_Parish__r'] + "'}" if pd.notna(x['Parent_Parish__r']) and x['Parent_Parish__r'] != 'None' and x['Parent_Parish__r'] != '' else None, axis=1)




In [78]:
accounts_staging

Unnamed: 0,Name,Formal_Name__c,RecordTypeId,mbfc__Church_Type__c,mbfc__Deanery__c,BillingStreet,BillingCity,BillingState,BillingPostalCode,BillingCountry,...,County__c,Sanctuary_Capacity__c,Religious_Secular_Order__c,Pontifical_or_Diocesan_Order__c,Archdiocesan_School_Code__c,Grades_Provided__c,Job_Id__c,Archdpdx_Migration_Id__c,mbfc__Organization_Type__c,ParentId
0,Pastoral Center,Pastoral Center,012Dx0000003p50IAA,,,2838 E Burnside St,Portland,OR,97214,,...,,,,,,,138,Offices_1,,
1,Catholic Sentinel,Catholic Sentinel,012Dx0000003p50IAA,,,2838 E Burnside St,Portland,OR,97214,,...,,,,,,,138,Offices_3,,
2,Catholic Cemeteries,Catholic Cemeteries,012Dx0000003p50IAA,,,333 SW Skyline Blvd,Portland,OR,97221,,...,,,,,,,138,Offices_4,,
3,Griffin Center,Griffin Center,012Dx0000003p50IAA,,,11957 SE Fuller Rd,Milwaukie,OR,97222,,...,,,,,,,138,Offices_6,,
4,Providence Portland Medical Center,Providence Portland Medical Center,012Dx0000003p50IAA,,,4805 NE Glisan St,Portland,OR,97213,,...,,,,,,,138,Offices_11,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
311,"Resurrection Catholic Parish School, Tualatin",Resurrection Catholic Parish School,012Dx0000003p50IAA,,,,Tualatin,OR,,,...,,,,,12-WEESRES,PK-5,138,Schools_58,School,001Dx00001HwE0mIAF
330,"OSU Newman Center, Corvallis",OSU Newman Center,012Dx0000003p50IAA,,,2127 NW Monroe Ave,Corvallis,OR,97330,,...,,,,,,,138,NewmanCenters_1,,
331,"St. Thomas More (UO) Newman Center, Eugene",St. Thomas More (UO) Newman Center,012Dx0000003p50IAA,,,1850 Emerald St,Eugene,OR,97403,,...,,,,,,,138,NewmanCenters_2,,
332,Walsh Memorial (SOU) Newman Center at Our Lady...,Walsh Memorial (SOU) Newman Center at Our Lady...,012Dx0000003p50IAA,,,987 Hillview Dr,Ashland,OR,97520,,...,,,,,,,138,NewmanCenters_3,,


In [79]:
# accounts_staging[accounts_staging.Parent_Parish__r.isnull() == False]["Parent_Parish__r"]

In [80]:
print(accounts_staging['mbfc__Date_Established__c'].dtype)

datetime64[ns]


In [81]:

# Convert datetime to string in the desired format
accounts_staging['mbfc__Date_Established__c'] = accounts_staging['mbfc__Date_Established__c'].dt.strftime('%Y-%m-%d')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  accounts_staging['mbfc__Date_Established__c'] = accounts_staging['mbfc__Date_Established__c'].dt.strftime('%Y-%m-%d')


In [82]:
# Upsert using new function

accounts_upsert2 = upsert_to_salesforce_bulk(sf, accounts_staging, 'Account', 'Archdpdx_Migration_Id__c', 'results_files/accounts_status', batch_size=100)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dataframe.replace({pd.NA: None, ' ': None, '': None}, inplace=True)


Batch 1 processed: 100 successful, 0 failed.
Batch 2 processed: 200 successful, 0 failed.
Batch 3 processed: 246 successful, 0 failed.
Upsert completed. Total records processed: 246, Batches: 3, Successful upserts: 246, Failed upserts: 0


In [83]:
# Extract SF Account records

sf_accounts = sf.query('Select id, Name, RecordTypeId, mbfc__Church_Type__c, Archdpdx_Migration_Id__c, Job_Id__c from Account WHERE Job_Id__c != null')
sf_accounts = pd.DataFrame(sf_accounts['records'])
sf_accounts = sf_accounts.drop(columns = 'attributes')
sf_accounts

Unnamed: 0,Id,Name,RecordTypeId,mbfc__Church_Type__c,Archdpdx_Migration_Id__c,Job_Id__c
0,001Dx00001HwDuDIAV,Vicariates,012Dx0000003p4yIAA,,Vicariates_Holding_Acc,138
1,001Dx00001HwDwnIAF,Albany-Corvallis Vicariate,012Dx0000003p4yIAA,,Vicariates_1,138
2,001Dx00001HwDwoIAF,"Beaverton, Suburban Vicariate",012Dx0000003p4yIAA,,Vicariates_2,138
3,001Dx00001HwDwpIAF,Columbia County Vicariate,012Dx0000003p4yIAA,,Vicariates_3,138
4,001Dx00001HwDwqIAF,Downtown Portland Vicariate,012Dx0000003p4yIAA,,Vicariates_4,138
...,...,...,...,...,...,...
331,001Dx00001HwE5gIAF,"Society of the Divine Saviour, Rome, Italy (SDS)",012Dx0000003p52IAA,,RelCommunities_78,137
332,001Dx00001HwE5hIAF,"Society of Our Lady of the Most Holy Trinity, ...",012Dx0000003p52IAA,,RelCommunities_79,137
333,001Dx00001HwE5iIAF,"Community of St. Thomas More, Eugene (OP)",012Dx0000003p52IAA,,RelCommunities_80,137
334,001Dx00001HwE5jIAF,"Saint Benedict Lodge, McKenzie Bridge (OP)",012Dx0000003p52IAA,,RelCommunities_81,137


### C) Religious Institutes (Parents)


This section prepares and upserts parent Religious Congregation accounts for each row in RelCommunities table.

Dataframes >>
- acc_religious
- acc_religious_2
- acc_religious_parents

In [84]:
"""
- 'acc_religious' DF: create unique_id of religious parents
- create 'acc_religious_orders' DF , upsert into SF
- extract accounts from Salesforce, create dict (external_ID : account_ID)
- map parent ids onto religious child accounts DF in main DF
- 'acc_religious' > staging DF ('acc_religious')
    - drop unnecessary columns
    - upsert create DF of religious children, upsert into SF with
"""

# Create a new DF of all Religious accounts
acc_religious = accounts[accounts['AccountRecordType'] == 'Religious']

# Create a simplified external ID field for Parent Accounts
acc_religious['Archdpdx_Migration_Id__c'] = acc_religious['Order Full Name'].apply(
    lambda x: x.lower().replace(' ', '')[:40]
)

acc_religious_2 = acc_religious

# Create a DF for only parent religious order accounts
acc_religious_parents = acc_religious_2[[
    'Order Full Name', 
    # 'Name', 
    'mbfc__Abbreviation__c', 
    'mbfc__Religious_Suffix__c', 
    'mbfc__Type_Members__c', 
    'Archdpdx_Migration_Id__c',
    'Pontifical_or_Diocesan_Order__c',
    'Religious_Secular_Order__c',
    ]]

# Drop duplicate rows of the same parent Religious Order (becuase there are more than 1 local community of a particular order)
acc_religious_parents.drop_duplicates('Order Full Name', inplace=True)

# Manipulate the 'Name' field to remove any comma and subsequent text
# acc_religious_parents['Name'] = acc_religious_parents['Name'].str.split(',').str[0]

# How many remaining rows after dropping duplicates?
print(acc_religious_parents.shape)

# Rename columns
acc_religious_parents = acc_religious_parents.rename(columns={
    # 'Order Full Name': 'Description',
    'Order Full Name': 'Name'
    })

# Drop NA
acc_religious_parents.fillna('', inplace=True)

acc_religious_parents


(62, 7)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  acc_religious['Archdpdx_Migration_Id__c'] = acc_religious['Order Full Name'].apply(
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  acc_religious_parents.drop_duplicates('Order Full Name', inplace=True)


Unnamed: 0,Name,mbfc__Abbreviation__c,mbfc__Religious_Suffix__c,mbfc__Type_Members__c,Archdpdx_Migration_Id__c,Pontifical_or_Diocesan_Order__c,Religious_Secular_Order__c
186,Societas Iesu,Jesuits,SJ,Men,societasiesu,,Religious Order
187,Ordo Cisterciensis Strictioris Observantiae,Trappists,OCSO,Men,ordocisterciensisstrictiorisobservantiae,Pontifical Order,Religious Order
189,Ordo Sancti Benedicti,Benedictines,OSB,Men,ordosanctibenedicti,,Religious Order
190,Misioneros del Espíritu Santo,"Missionaries of the Holy Spirit, Christ the Pr...",MSpS,Men,misionerosdelespíritusanto,,
191,Apostles of Jesus,Apostles of Jesus,AJ,Men,apostlesofjesus,Diocesan Order,Religious Order
...,...,...,...,...,...,...,...
249,Fraternità san Carlo Borromeo,Fraternity of St. Charles,FSCB,Men,fraternitàsancarloborromeo,,
250,"Sons of Mary, Mother of Mercy","Sons of Mary, Mother of Mercy",SMMM,Men,"sonsofmary,motherofmercy",,
251,Society of the Divine Word,Society of the Divine Word,SVD,Men,societyofthedivineword,,
252,Society of the Divine Saviour,Society of the Divine Saviour,SDS,Men,societyofthedivinesaviour,,


In [85]:
acc_religious_parents['mbfc__Religious_Type__c'] = 'Congregation'

In [86]:
# Get Religious RecordTypeId
religious_recordtype_id = get_recordtype_id(df_sf_recordTypes, 'Religious', 'Account', 'mbfc')

religious_recordtype_id

'012Dx0000003p52IAA'

In [87]:
# Set recordType to 'Religious'

religious_recordtype_id = df_sf_recordTypes.loc[
    (df_sf_recordTypes['DeveloperName'] == 'Religious') & (df_sf_recordTypes['SobjectType'] == 'Account'),
    'Id'
    ].iloc[0]  # Use .iloc[0] to get the first item if you're expecting exactly one match

print(religious_recordtype_id)

acc_religious_parents['RecordTypeId'] = religious_recordtype_id

acc_religious_parents.sample(10)

012Dx0000003p52IAA


Unnamed: 0,Name,mbfc__Abbreviation__c,mbfc__Religious_Suffix__c,mbfc__Type_Members__c,Archdpdx_Migration_Id__c,Pontifical_or_Diocesan_Order__c,Religious_Secular_Order__c,mbfc__Religious_Type__c,RecordTypeId
251,Society of the Divine Word,Society of the Divine Word,SVD,Men,societyofthedivineword,,,Congregation,012Dx0000003p52IAA
187,Ordo Cisterciensis Strictioris Observantiae,Trappists,OCSO,Men,ordocisterciensisstrictiorisobservantiae,Pontifical Order,Religious Order,Congregation,012Dx0000003p52IAA
206,Dominican Sisters of Adrian,Dominicans,OP,Women,dominicansistersofadrian,Pontifical Order,,Congregation,012Dx0000003p52IAA
200,Domus Dei Clerical Society of Apostolic Life,Domus Dei,SDD,Men,domusdeiclericalsocietyofapostoliclife,,Religious Order,Congregation,012Dx0000003p52IAA
230,Sociedad de María,Society of Mary,SdM,Women,sociedaddemaría,Diocesan Order,Religious Order,Congregation,012Dx0000003p52IAA
211,Franciscan Missionary Sisters of Our Lady of S...,Franciscans,OSF,Women,franciscanmissionarysistersofourladyofso,Diocesan Order,Religious Order,Congregation,012Dx0000003p52IAA
192,Franciscan Sisters of the Eucharist,Franciscan Sisters of the Eucharist,FSE,Women,franciscansistersoftheeucharist,Pontifical Order,Religious Order,Congregation,012Dx0000003p52IAA
225,Sisters of Reparation of the Sacred Wounds of ...,Sisters of Reparation,SR,Women,sistersofreparationofthesacredwoundsofje,Diocesan Order,Religious Order,Congregation,012Dx0000003p52IAA
250,"Sons of Mary, Mother of Mercy","Sons of Mary, Mother of Mercy",SMMM,Men,"sonsofmary,motherofmercy",,,Congregation,012Dx0000003p52IAA
214,Maryknoll Sisters of St. Dominic,Maryknoll Sisters,MM,Women,maryknollsistersofst.dominic,Pontifical Order,Religious Order,Congregation,012Dx0000003p52IAA


In [88]:
# Send to CSV
acc_religious_parents.to_csv('staging_files/religious_order_staging.csv', encoding='utf-8-sig')

In [89]:
# Upsert to Salesforce
bulk_data = []
for row in acc_religious_parents.itertuples(index=False):
    d = row._asdict()
    # del d['Index']
    bulk_data.append(d)

if run_upserts == 'True':
    religious_order_upsert = sf.bulk.Account.upsert(data=bulk_data, external_id_field='Archdpdx_Migration_Id__c', batch_size=100, use_serial=False)
    df_rel_order_upsert = pd.DataFrame(religious_order_upsert)

df_rel_order_upsert

Unnamed: 0,success,created,id,errors
0,True,False,001Dx00001HwE3TIAV,[]
1,True,False,001Dx00001HwE3UIAV,[]
2,True,False,001Dx00001HwE3VIAV,[]
3,True,False,001Dx00001HwE3WIAV,[]
4,True,False,001Dx00001HwE3XIAV,[]
...,...,...,...,...
57,True,False,001Dx00001HwE4OIAV,[]
58,True,False,001Dx00001HwE4PIAV,[]
59,True,False,001Dx00001HwE4QIAV,[]
60,True,False,001Dx00001HwE4RIAV,[]


In [90]:
# Generate an Errors log
import csv

keys = religious_order_upsert[0].keys()

with open('results_files/religious_order_results', 'w', newline='') as csv_file:
    writer = csv.DictWriter(csv_file, keys)
    writer.writeheader()
    writer.writerows(religious_order_upsert)

In [91]:
# get SF Accounts
get_all_rel_accounts = f"Select id, Name, RecordTypeId, Type, Archdpdx_Migration_Id__c from Account where RecordTypeID = '{religious_recordtype_id}'"

print(religious_recordtype_id)

# get list of records, add to dataframe
sf_accounts = sf.query(get_all_rel_accounts)
df_sf_accounts = pd.DataFrame(sf_accounts['records'])
df_sf_accounts = df_sf_accounts.drop(columns = 'attributes')

df_sf_accounts.sample(10)

012Dx0000003p52IAA


Unnamed: 0,Id,Name,RecordTypeId,Type,Archdpdx_Migration_Id__c
225,001O300000aKPITIA4,Saint John Society,012Dx0000003p52IAA,,
16,001Dx00001HwE3iIAF,Ordo Servorum Beatae Mariae Virginis,012Dx0000003p52IAA,,ordoservorumbeataemariaevirginis
71,001Dx00001HwE4lIAF,"Blessed Stephen Bellesini Community, c/o Chanc...",012Dx0000003p52IAA,,RelCommunities_12
118,001Dx00001HwE5WIAV,"Paulist Fathers, New York, NY (CSP)",012Dx0000003p52IAA,,RelCommunities_68
98,001Dx00001HwE5CIAV,"Sisters of Mary of Kakamega, Portland (SMK)",012Dx0000003p52IAA,,RelCommunities_46
17,001Dx00001HwE3jIAF,Adorers of the Holy Cross,012Dx0000003p52IAA,,adorersoftheholycross
201,001O300000aK6iuIAC,Society of Jesus,012Dx0000003p52IAA,,
195,001O300000aK42pIAC,Domus Dei Society,012Dx0000003p52IAA,,
194,001O300000aK42oIAC,Institute of the Incarnate Word,012Dx0000003p52IAA,,
106,001Dx00001HwE5KIAV,"Society of Mary, Corvallis (SdM)",012Dx0000003p52IAA,,RelCommunities_54


In [92]:
religious_order_mapping = df_sf_accounts.set_index('Archdpdx_Migration_Id__c')['Id'].to_dict()
# religious_order_mapping

### D) Religious Communities


This section stages the 'relCommunities' tables as Religious Account records. 

Dataframes:
- acc_religious_staging
- acc_religious_staging_2 

In [93]:
acc_religious_staging = (acc_religious
                         .rename(columns={'Archdpdx_Migration_Id__c' : 'Parent_Archdpdx_Migration_Id__c'})
)

acc_religious_staging['ParentId'] = acc_religious_staging['Parent_Archdpdx_Migration_Id__c'].map(religious_order_mapping)

In [94]:
pd.set_option('display.max_columns', None)

In [95]:
# Enrich the data

acc_religious_staging['mbfc__Religious_Type__c'] = 'Local Community'
acc_religious_staging['Archdpdx_Migration_Id__c'] = 'RelCommunities_' + acc_religious_staging['Record Number'].astype('str')
acc_religious_staging['RecordTypeId'] = religious_recordtype_id
# acc_religious_staging.drop(columns='Name', inplace=True)
# acc_religious_staging.rename(columns={
#     'Name, City': 'Name'
# }, inplace=True)

In [96]:
acc_religious_staging.sample(5)

Unnamed: 0,Record Number,AccountRecordType,Formal_Name__c,Name,Parish Name,Archdiocese_Assigns_Clergy__c,Locator_Description__c,BillingCity,BillingState,Mailing Address Province,BillingPostalCode,BillingCountry,Phone,Fax,mbfc__Email__c,Website,src_table,Sort Name,Parish City,Parent_Parish,mbfc__Date_Established__c,Vicariate,Non-Latin,County__c,Disabled_Access__c,Sanctuary_Capacity__c,Lat/Long Coordinates Decimal,Google Small Embed URL,Miles to Pastoral Center,Schedule 1 Head,Schedule 1 Text,Schedule 2 Head,Schedule 2 Text,Schedule 3 Head,Schedule 3 Text,Schedule 4 Head,Schedule 4 Text,Schedule 5 Head,Schedule 5 Text,Schedule 6 Head,Schedule 6 Text,Schedule 7 Head,Schedule 7 Text,Community City,Order Full Name,mbfc__Abbreviation__c,mbfc__Religious_Suffix__c,mbfc__Type_Members__c,Non-Latin Rite,Show Order in Name,Description,Local Superior,Major Superior Name,Major Superior Phone,Major Superior Email,School City,Vicariate Link,Archdiocesan_School_Code__c,Grades_Provided__c,Mailing Address 1,Mailing Address Zip,Vicariate Name,Mailing Address City2,mbfc__Non_Latin__c,BillingStreet,Religious_Secular_Order__c,Pontifical_or_Diocesan_Order__c,Parent_Parish__c,ParentId,mbfc__Church_Type__c,mbfc__Organization_Type__c,Parent_Archdpdx_Migration_Id__c,RecordTypeId,Job_Id__c,mbfc__Religious_Type__c,Archdpdx_Migration_Id__c
200,21,Religious,Society of Domus Dei Holy House Monasteries,"Society of Domus Dei Holy House Monasteries, W...",,False,,Washougal,WA,,98671,,360-835-5358,,,http://nhachua.net/,RelCommunities,,,,NaT,,,,False,,,,,,,,,,,,,,,,,,,"Washougal, WA",Domus Dei Clerical Society of Apostolic Life,Domus Dei,SDD,Men,No,Yes,Serving Our Lady of Lavang Parish; Southeast A...,2740.0,"Very Rev. Joseph Hai Dang Vu, SDD",504-254-9429,domusdeiusa@gmail.com,,,,,,,,,False,462 Hudson Rd,Religious Order,,,001Dx00001HwE3eIAF,,,domusdeiclericalsocietyofapostoliclife,012Dx0000003p52IAA,138,Local Community,RelCommunities_21
238,64,Religious,Heralds of the Good News,"Heralds of the Good News, Portland (HGN)",,False,,Portland,OR,,97214,,503-233-8322,,vschueler@archdpdx.org,,RelCommunities,,,,NaT,,,,False,,,,,,,,,,,,,,,,,,,Portland,Heralds of the Good News,Heralds of the Good News,HGN,Men,No,Yes,,0.0,Fr. Kappumkal Thomas,+91 80 74 51 02 67,rkappumkal@gmail.com,,,,,,,,,False,c/o Chancellor\n2838 E Burnside St,,,,001Dx00001HwE4DIAV,,,heraldsofthegoodnews,012Dx0000003p52IAA,138,Local Community,RelCommunities_64
233,57,Religious,Priory of Our Lady of Consolation,"Priory of Our Lady of Consolation, Amity (OSsS)",,False,,Amity,OR,,97101,,503-835-8080,503-835-9662,monks@brigittine.org,http://www.brigittine.com/,RelCommunities,,,,NaT,,,,False,,,,,,,,,,,,,,,,,,,Amity,"Brigittine Monks, Order of the Most Holy Savior",Brigittines,OSsS,Men,No,Yes,Canonical status of a Priory “Sui Juris”. Brot...,2425.0,,,,,,,,,,,,False,Priory of Our Lady of Consolation\n23300 SW Wa...,,,,001Dx00001HwE49IAF,,,"brigittinemonks,orderofthemostholysavior",012Dx0000003p52IAA,138,Local Community,RelCommunities_57
235,61,Religious,Society of Christ Fathers Province in the Unit...,Society of Christ Fathers Province in the Unit...,,False,,Lombard,IL,,60148,POLAND,630-424-0401,,,https://tchr.us/en,RelCommunities,,,,NaT,,,,False,,,,,,,,,,,,,,,,,,,"Lombard, IL",Towarzystwo Chrystusowe,Society of Christ,SCH,Men,No,Yes,Missionary priests and brothers serving Polish...,0.0,Fr. Jacek Walkiewicz,630-424-0401,schprov@ aol.com,,,,,,,,,False,Society of Christ\n786 West Sunset Ave,Religious Order,Pontifical Order,,001Dx00001HwE4AIAV,,,towarzystwochrystusowe,012Dx0000003p52IAA,138,Local Community,RelCommunities_61
201,22,Religious,Franciscan Friars,"Franciscan Friars, Oakland (OFM)",,False,,Oakland,CA,,94601,,510-536-3722,,info@sbofm.org,sbfranciscans.org,RelCommunities,,,,NaT,,,,False,,,,,,,,,,,,,,,,,,,Oakland,Ordo Fratrum Minorum Province of Saint Barbara,Franciscans,OFM,Men,No,Yes,"Serving St. John the Baptist Parish, Milwaukie...",0.0,"Very Rev. David Gaa, OFM",510-536-3722,,,,,,,,,,False,Provincial Office\n1500 34th Ave,Religious Order,,,001Dx00001HwE3fIAF,,,ordofratrumminorumprovinceofsaintbarbara,012Dx0000003p52IAA,138,Local Community,RelCommunities_22


In [97]:
acc_religious_staging_2 = acc_religious_staging[[
    'Name',
    'RecordTypeId',
    'mbfc__Religious_Type__c',
    'BillingStreet',
    'BillingCity',
    'BillingState',
    'BillingPostalCode',
    'BillingCountry',
    'Phone',
    'Fax',
    'mbfc__Email__c',
    'Website',
    'mbfc__Abbreviation__c',
    'mbfc__Religious_Suffix__c',
    'mbfc__Type_Members__c',
    'Description',
    'Job_Id__c',
    'ParentId',
    'Archdpdx_Migration_Id__c'
    ]]

acc_religious_staging_2.sample(5)

Unnamed: 0,Name,RecordTypeId,mbfc__Religious_Type__c,BillingStreet,BillingCity,BillingState,BillingPostalCode,BillingCountry,Phone,Fax,mbfc__Email__c,Website,mbfc__Abbreviation__c,mbfc__Religious_Suffix__c,mbfc__Type_Members__c,Description,Job_Id__c,ParentId,Archdpdx_Migration_Id__c
200,"Society of Domus Dei Holy House Monasteries, W...",012Dx0000003p52IAA,Local Community,462 Hudson Rd,Washougal,WA,98671,,360-835-5358,,,http://nhachua.net/,Domus Dei,SDD,Men,Serving Our Lady of Lavang Parish; Southeast A...,138,001Dx00001HwE3eIAF,RelCommunities_21
227,"Sisters of St. Francis of Philadelphia, Portla...",012Dx0000003p52IAA,Local Community,609 S Convent Rd,Aston,PA,19014,,610-459-4125,,communications@osfphila.org,https://osfphila.org/,Sisters of St. Francis of Philadelphia,OSF,Women,"Serving Ascension Parish, Cathedral of the Imm...",138,001Dx00001HwE43IAF,RelCommunities_51
228,"Sisters of St. Joseph of Peace, Eugene (CSJP)",012Dx0000003p52IAA,Local Community,CSJP Western Region Office\nPO Box 248,Bellevue,WA,98009,,425-467-5400,425-462-9760,,https://csjp.org/,Sisters of St. Joseph,CSJP,Women,"Serving Sacred Heart RiverBend Medical Center,...",138,001Dx00001HwE44IAF,RelCommunities_52
254,"Community of St. Thomas More, Eugene (OP)",012Dx0000003p52IAA,Local Community,1386 E 18th Ave,Eugene,OR,97403,,541-343-0065,541-686-8028,,uonewman.org,Dominicans,OP,Men,Serving St. Thomas More Parish and Newman Cent...,138,001Dx00001HwE3dIAF,RelCommunities_80
194,"Blessed Stephen Bellesini Community, c/o Chanc...",012Dx0000003p52IAA,Local Community,3180 University Ave Ste 255,San Diego,CA,92104,,619-235-0247,,,https://www.californiaaugustinians.org/,Augustinians,OSA,Men,Serving Providence Medford Medical Center as C...,138,001Dx00001HwE3aIAF,RelCommunities_12


In [98]:
# Final Cleanup

acc_religious_staging_2 = acc_religious_staging_2.fillna('')

In [99]:
acc_religious_staging_2

Unnamed: 0,Name,RecordTypeId,mbfc__Religious_Type__c,BillingStreet,BillingCity,BillingState,BillingPostalCode,BillingCountry,Phone,Fax,mbfc__Email__c,Website,mbfc__Abbreviation__c,mbfc__Religious_Suffix__c,mbfc__Type_Members__c,Description,Job_Id__c,ParentId,Archdpdx_Migration_Id__c
186,"Colombiere Jesuit Community, Portland (SJ)",012Dx0000003p52IAA,Local Community,3220 SE 43rd Ave,Portland,OR,97206,,503-595-1941,,,https://www.jesuitswest.org/,Jesuits,SJ,Men,"Manager: Fr. Paul Cochran, SJ",138,001Dx00001HwE3TIAV,RelCommunities_1
187,"Abbey of Our Lady of Guadalupe, Carlton (OCSO)",012Dx0000003p52IAA,Local Community,Abbey of Our Lady of Guadalupe\n9200 NE Abbey Rd,Carlton,OR,97111,,503-852-7174,503-852-7748,community@trappistabbey.org,http://www.trappistabbey.org/,Trappists,OCSO,Men,The cloistered monastic cistercian community s...,138,001Dx00001HwE3UIAV,RelCommunities_2
188,"JCCU Jesuit Tertianship, Portland (SJ)",012Dx0000003p52IAA,Local Community,3301 SE 45th Ave,Portland,OR,97206,,,,jctertianship@jesuits.org,,Jesuits,SJ,Men,,138,001Dx00001HwE3TIAV,RelCommunities_3
189,"Benedictine Monks of Mount Angel Abbey, Saint ...",012Dx0000003p52IAA,Local Community,1 Abbey Dr,Saint Benedict,OR,97373,,503-845-3030,,info@mtangelabbey.edu,https://www.mountangelabbey.org/,Benedictines,OSB,Men,"Serving Mount Angel Towers, Providence Benedec...",138,001Dx00001HwE3VIAV,RelCommunities_4
190,Missionaries of the Holy Spirit Provincial Hou...,012Dx0000003p52IAA,Local Community,PO Box 22387,Milwaukie,OR,97269,,503-324-2492,503-324-2493,,www.mspscpp.org,"Missionaries of the Holy Spirit, Christ the Pr...",MSpS,Men,,138,001Dx00001HwE3WIAV,RelCommunities_8
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
251,"Society of the Divine Word, Techny, IL (SVD)",012Dx0000003p52IAA,Local Community,"Society of the Divine Word, Chicago Province",,,,,,,,https://www.divineword.org/,Society of the Divine Word,SVD,Men,,138,001Dx00001HwE4QIAV,RelCommunities_77
252,"Society of the Divine Saviour, Rome, Italy (SDS)",012Dx0000003p52IAA,Local Community,"Via della Conciliazione, 51",Roma,,I-00193,ITALY,+39 06 686 291,,,http://www.sds.org/,Society of the Divine Saviour,SDS,Men,,138,001Dx00001HwE4RIAV,RelCommunities_78
253,"Society of Our Lady of the Most Holy Trinity, ...",012Dx0000003p52IAA,Local Community,PO Box 4116,Corpus Christi,TX,78469,,,,,https://solt.net/,Society of Our Lady of the Most Holy Trinity,SOLT,Men,,138,001Dx00001HwE4SIAV,RelCommunities_79
254,"Community of St. Thomas More, Eugene (OP)",012Dx0000003p52IAA,Local Community,1386 E 18th Ave,Eugene,OR,97403,,541-343-0065,541-686-8028,,uonewman.org,Dominicans,OP,Men,Serving St. Thomas More Parish and Newman Cent...,138,001Dx00001HwE3dIAF,RelCommunities_80


In [100]:
# Send to CSV
acc_religious_staging_2.to_csv('/Users/matthewmartin/Library/CloudStorage/GoogleDrive-matt@meribahflow.com/Shared drives/Clients/ADPDX (Portland)/Data/Clergy DB/staging/religious_community_staging.csv', encoding='utf-8-sig')

In [101]:
upsert_to_salesforce_bulk(sf, acc_religious_staging_2, 'Account', 'Archdpdx_Migration_Id__c', 'results_files/religious_comm_results.csv', 100)


Batch 1 processed: 70 successful, 0 failed.
Upsert completed. Total records processed: 70, Batches: 1, Successful upserts: 70, Failed upserts: 0


### E) Religious Superiors


In [102]:
acc_rel_superiors = acc_religious_2[[
    'Name',
    'Major Superior Name',
    'Major Superior Phone',
    'Major Superior Email',
    'Archdpdx_Migration_Id__c']].copy()


acc_rel_superiors['AccountId'] = acc_rel_superiors.Archdpdx_Migration_Id__c.map(religious_order_mapping)

acc_rel_superiors.sample(5)

Unnamed: 0,Name,Major Superior Name,Major Superior Phone,Major Superior Email,Archdpdx_Migration_Id__c,AccountId
225,"Sisters of Reparation, Portland (SR)",,,,sistersofreparationofthesacredwoundsofje,001Dx00001HwE41IAF
250,"Sons of Mary, Mother of Mercy, Umuahia, Nigeri...",,,,"sonsofmary,motherofmercy",001Dx00001HwE4PIAV
196,"Félix Rougier House of Studies, Mount Angel (M...",,,,misionerosdelespíritusanto,001Dx00001HwE3WIAV
240,"Order of Friars Minor, Capuchins, Portland (OF...",,,,"orderoffriarsminor,capuchins",001Dx00001HwE4FIAV
247,"Brothers of Saint John, Laredo, TX (CSJ)",,,,brothersofsaintjohn,001Dx00001HwE4MIAV


In [103]:
def parse_names(df, column_name):
    # Convert all non-string entries to strings (handling NaN and other data types)
    df[column_name] = df[column_name].fillna('').apply(str)

    # Create a new DataFrame to store the name parts
    name_parts = pd.DataFrame()

    # Parse each name in the column
    name_parts['First Name'] = df[column_name].apply(lambda x: HumanName(x).first if x.strip() != '' else '')
    name_parts['Last Name'] = df[column_name].apply(lambda x: HumanName(x).last if x.strip() != '' else '')
    name_parts['Middle Name'] = df[column_name].apply(lambda x: HumanName(x).middle if x.strip() != '' else '')
    name_parts['Title'] = df[column_name].apply(lambda x: HumanName(x).title if x.strip() != '' else '')
    name_parts['Suffix'] = df[column_name].apply(lambda x: HumanName(x).suffix if x.strip() != '' else '')
    name_parts['Nickname'] = df[column_name].apply(lambda x: HumanName(x).nickname if x.strip() != '' else '')

    # Combine the original DataFrame with the name parts DataFrame
    result_df = pd.concat([df, name_parts], axis=1)
    return result_df



In [104]:
!pip install nameparser
from nameparser import HumanName
from nameparser.config import CONSTANTS

# Add dataset-specific Titles and Suffix constants for parsing
CONSTANTS.titles.add('Rev.', 'Very Rev.', 'Very Rev', 'Sr.', 'Sr. ', 'Very', 'Bishop')
CONSTANTS.suffix_acronyms.add('FRS', 'OMI', 'OSA', 'OCD', 'OFM', 'OP', 'OC', 'FSE', 'OMV', 'SDB', 'SM', 'SFX', 'SP', 'OP', 'O.S.M', 'OSM' 'SNJM', 'OSF', 'HMRF', 'DD', 'CSJP', 'SDD', 'BVM', 'BVM - President', 'SJ')





SetManager({'dmin', 'ea', 'ifgict', 'mai', 'cph', 'chmc', 'faspen', 'cccm', 'smieee', 'cgc', 'apn aprn', 'pa-c', 'arm', 'o.s.m', 'fac-c', 'thm', 'csc', 'rhca', 'cpwa', 'asa', 'fieee', 'msa', 'caha', 'ceh', 'cgsp', 'cpsi', 'mph', 'emt-i/85', 'pt', 'dvm', 'bpi', 'lt', 'cap', 'nmd', 'jd', 'cmp', 'cro', 'kchs/dchs', 'osa', 'cpbe', 'els', 'cgma', 'cna', 'cma', 'cbnt', 'kcmg', 'edd', 'rn', 'kcvo', 'crna', 'cbte', 'ccp-c', 'cet', 'rba', 'fsdp', 'mieee', 'mvo', 'pci', 'mirm', 'diplom', 'uscg', 'gm', 'bts', 'agsf', 'lac', 'bca', 'ccc', 'dpm', 'crma', 'cpl', 'aca', 'cscp', 'uxc', 'fca', 'aia', 'che', 'usa', 'sscp', 'psyd', 'sasm', 'fec', 'rmsks', 'chp', 'drmp', 'tbr-ct', 'ccufc', 'pcc', 'kg', 'sj', 'do', 'ipep', 'cpa', 'cem', 'mpse', 'rfp', 'pe', 'git', 'gsmieee', 'ndtr', 'cmas', 'aqp', 'mem', 'cbv', 'ms', 'rdms', 'cpg', 'lp', 'abpp', 'cgb', 'fcela', 'dcm', 'fd', 'cltd', 'kcie', 'cpacc', 'dss', 'ocm', 'lvt', 'vd', 'cpe', 'cnm', '(vet)', 'asp', 'cera', 'lg', 'cia', 'chpe', 'cipm', 'rcp', 'cfre', 

In [105]:
# Parse Complex Names
acc_rel_superiors_parsed = parse_names(acc_rel_superiors, 'Major Superior Name')

In [106]:
acc_rel_superiors_staging = acc_rel_superiors_parsed.fillna('')

acc_rel_superiors_staging['Archdpdx_Migration_Id__c'] = acc_rel_superiors_staging['Major Superior Name'].apply(lambda x: x.replace(' ','').lower())

# Rename columns
acc_rel_superiors_staging = acc_rel_superiors_staging.rename(columns={
    'Major Superior Phone': 'Phone',
    'Major Superior Email': 'Email',
    'Title': 'Salutation',
    'First Name': 'FirstName',
    'Middle Name': 'MiddleName',
    'Last Name': 'LastName'
})

# Add job id
acc_rel_superiors_staging['Archdpdx_Job_Id__c'] = curr_job_id

# Drop columns
acc_rel_superiors_staging = acc_rel_superiors_staging.drop(columns=['Name', 'Major Superior Name', 'Nickname'])

# Drop empty rows
acc_rel_superiors_staging = acc_rel_superiors_staging[acc_rel_superiors_staging['LastName'].str.strip() != '']

acc_rel_superiors_staging.sample(10)

Unnamed: 0,Phone,Email,Archdpdx_Migration_Id__c,AccountId,FirstName,LastName,MiddleName,Salutation,Suffix,Archdpdx_Job_Id__c
214,914-941-7575,maryknoll@mksisters.org,"sisterteresahougnon,mm,president",001Dx00001HwE3qIAF,Teresa,Hougnon,,Sister,"MM, President",138
234,,,"rev.seancarroll,sj",001Dx00001HwE3TIAV,Sean,Carroll,,Rev.,SJ,138
249,+39 06 61571401,pr@sancarlo.org,fr.paolosottopietra,001Dx00001HwE4OIAV,Paolo,Sottopietra,,Fr.,,138
203,,ipertine@socsj.org,rev.ivanpertine,001Dx00001HwE3hIAF,Ivan,Pertine,,Rev.,,138
238,+91 80 74 51 02 67,rkappumkal@gmail.com,fr.kappumkalthomas,001Dx00001HwE4DIAV,Kappumkal,Thomas,,Fr.,,138
254,510-658-8722,provincial@opwest.org,"veryrev.christopherfadok,op,provincial",001Dx00001HwE3dIAF,Christopher,Fadok,,Very Rev.,"OP, Provincial",138
245,617-536-4141,office@omvusa.org,"fr.jimwalther,omv",001Dx00001HwE4KIAV,Jim,Walther,,Fr.,OMV,138
192,203-238-2243,,"mothermiriamseiferman,fse",001Dx00001HwE3YIAV,Miriam,Seiferman,,Mother,FSE,138
222,,,sr.josephine,001Dx00001HwE3yIAF,,Josephine,,Sr.,,138
198,909-793-0424,,"fr.matthewwilliams,o.c.d.",001Dx00001HwE3cIAF,Matthew,Williams,,Fr.,O.C.D.,138


In [107]:
acc_rel_superiors_staging.sample(10)

Unnamed: 0,Phone,Email,Archdpdx_Migration_Id__c,AccountId,FirstName,LastName,MiddleName,Salutation,Suffix,Archdpdx_Job_Id__c
222,,,sr.josephine,001Dx00001HwE3yIAF,,Josephine,,Sr.,,138
243,,,rogériogomes,001Dx00001HwE4IIAV,Rogério,Gomes,,,,138
207,,,"sr.janehibbard,snjmmonasteryadministrator",001Dx00001HwE3VIAV,SNJM,Sr. Jane Hibbard,Monastery Administrator,,,138
206,,,"sr.elisegarcía,opchapterprioress",001Dx00001HwE3kIAF,OP,Sr. Elise García,Chapter Prioress,,,138
224,,,"sisterbarbaraschamber,sp,provinical",001Dx00001HwE40IAF,Barbara,Schamber,,Sister,"SP, Provinical",138
235,630-424-0401,schprov@ aol.com,fr.jacekwalkiewicz,001Dx00001HwE4AIAV,Jacek,Walkiewicz,,Fr.,,138
254,510-658-8722,provincial@opwest.org,"veryrev.christopherfadok,op,provincial",001Dx00001HwE3dIAF,Christopher,Fadok,,Very Rev.,"OP, Provincial",138
228,,,"sisterandreanenzel,csjp",001Dx00001HwE44IAF,Andrea,Nenzel,,Sister,CSJP,138
191,610-502-1732,ajregionalusa@gmail.com,reverendaugustineidra,001Dx00001HwE3XIAV,Augustine,Idra,,Reverend,,138
210,973-403-3331,dominicans@caldwellop.org,"sr.luellaramm,op",001Dx00001HwE3mIAF,Luella,Ramm,,Sr.,OP,138


In [108]:
# Send to CSV
acc_rel_superiors_staging.to_csv('staging_files/religious_superiors_staging.csv', encoding='utf-8-sig')

In [109]:
# Upsert to Salesforce

def find_existing_contact(sf, first_name, last_name):
    query = f"SELECT Id, Archdpdx_Migration_Id__c FROM Contact WHERE FirstName = '{first_name}' AND LastName = '{last_name}'"
    result = sf.query(query)
    return result['records']



bulk_data = []
for row in acc_rel_superiors_staging.itertuples(index=False):
    d = row._asdict()
    existing_contacts = find_existing_contact(sf, d['FirstName'], d['LastName'])
    if existing_contacts:
        # Update existing contact with external ID
        d['Id'] = existing_contacts[0]['Id']
        bulk_data.append(d)
    else:
        bulk_data.append(d)


if run_upserts == 'True':
    religious_superior_upsert = sf.bulk.Contact.upsert(data=bulk_data, external_id_field='Archdpdx_Migration_Id__c', batch_size=100, use_serial=False)
    df_rel_superior_upsert = pd.DataFrame(religious_superior_upsert)

df_rel_superior_upsert

Unnamed: 0,success,created,id,errors
0,False,False,,"[{'statusCode': 'DUPLICATE_VALUE', 'message': ..."
1,True,False,003Dx00000nKikgIAC,[]
2,True,False,003Dx00000nKikhIAC,[]
3,True,False,003Dx00000nKikiIAC,[]
4,False,True,,"[{'statusCode': 'INVALID_EMAIL_ADDRESS', 'mess..."
5,True,False,003Dx00000nKikjIAC,[]
6,True,False,003Dx00000nKikkIAC,[]
7,False,False,,"[{'statusCode': 'DUPLICATE_VALUE', 'message': ..."
8,True,False,003Dx00000nKiklIAC,[]
9,True,False,003Dx00000nKikmIAC,[]


# CONTACTS


## Extract


In [111]:
import pandas as pd
df_contacts = (pd.read_csv('/Users/matthewmartin/Library/CloudStorage/GoogleDrive-matt@meribahflow.com/Shared drives/Clients/ADPDX (Portland)/Data/Clergy DB/reports from clergypdx/People.csv')
               .set_index('Record Number', verify_integrity=True)
               .drop(index='recNum') # Drops the extra row that replicates the labels
               .rename(columns=lambda x: x.replace(' ', '_')) # Remove whitespace in column names
)

df_contacts.sample(10)


Unnamed: 0_level_0,Common_Name,Sort_Name,Type(s),Clergy_Status,Religious_Status,Login_ID,Password,Password_Must_be_Changed,Access_Permission,Spouse,Title,Salutation,Christian_Name,Nickname,Middle_Name(s),Surname,Suffix,Mailing_Address,Mailing_Address_2,Mailing_Address_City,Mailing_Address_State,Mailing_Address_Province,Mailing_Address_Postal_Code,Mailing_Address_Country,Private_Address,Private_Address_2,Private_Address_City,Private_Address_State,Private_Address_Province,Private_Address_Postal_Code,Private_Address_Country,Preferred_Address,Work_Phone,Home_Phone,Cell_Phone,Preferred_Phone,Work_Email,Archdiocesan_Email,Home_Email,Preferred_Email,Directory_Include,Directory_Include_Middle_Name,Directory_Include_Suffix,Suppress_From_Reports,Seminarian_Student_Debt,Seminarian_Medical_Benefits,Send_Group_Mail_and_Email,Birth_Date,Place_of_Birth,Foreign_Born,Father_Full_Name,Mother_Full_Maiden_Name,Foreign_Citizenship,Immigration_Status,Passport/Visa_Expiration_Date,Social_Security_Account_Number,Baptism_Date,Place_of_Baptism,Confirmation_Date,Place_of_Confirmation,Received_Date,Parish_of_Record,Marriage_Date,Place_of_Marriage,Date_of_First_Vows,Date_of_Final_Vows,Accepted_to_Formation_Date,Reader_Date,Acolyte_Date,Candidacy_Date,Formation_Withdrawn_Date,Formation_Deferred_Date,Formation_Terminated_Date,Terminate_or_Defer_Note,Bachelor_Degree_Year,Bachelor_Degree_Type,Bachelor_Degree_Institution,Graduate_1_Degree_Year,Graduate_1_Degree_Type,Graduate_1_Degree_Institution,Graduate_2_Degree_Year,Graduate_2_Degree_Type,Graduate_2_Degree_Institution,Graduate_3_Degree_Year,Graduate_3_Degree_Type,Graduate_3_Degree_Institution,Graduate_4_Degree_Year,Graduate_4_Degree_Type,Graduate_4_Degree_Institution,CARA_Highest_Ed_Level,Diaconal_Ordination_Date,Diaconal_Ordination_Place,Diaconal_Ordination_Prelate,Presbyteral_Ordination_Date,Presbyteral_Ordination_Place,Presbyteral_Ordination_Prelate,Episcopal_Ordination_Date,Episcopal_Ordination_Place,Episcopal_Ordination_Prelate,Ordination_Diocese,Incardinated_From_Diocese,Incardinated_From_Date,Incardinated_Now,Serving_Now,Excardinated_To_Diocese,Excardinated_To_Date,Letter_of_Good_Standing_Date,Religious_In_Archdiocese_Date,Faculties,Faculties_Granted_Date,Faculties_Restricted_Date,Faculties_Withdrawn_Date,Last_Retreat_Date,Last_Educ_Requirement_Date,Policy_Manual_Acknowledgement_Date,Harassment_Prevention_Course_Date,Standards_of_Conduct_Date,Last_Background_Check_Date,Last_Child_Protection_Training_Date,Out_of_Diocese_Date,Senior_Status_Date,Laicized_Date,Deceased_Date,Languages,Coverage_Availability,Advanced_Directive_Date,End_of_Life_Plan_Date,Will_Date,Will_Note,CIC_489_File,Registered_Parish,CARA_Ethnicity,Seminarian_Status,Other_Diaconal_Ministry,Spiritual_Director_Authorized,Link_to_Religious_Community,Place_of_Work,Volunteer_Place,Type_of_Work,Work_Load,Work_Title
Record Number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1,Unnamed: 103_level_1,Unnamed: 104_level_1,Unnamed: 105_level_1,Unnamed: 106_level_1,Unnamed: 107_level_1,Unnamed: 108_level_1,Unnamed: 109_level_1,Unnamed: 110_level_1,Unnamed: 111_level_1,Unnamed: 112_level_1,Unnamed: 113_level_1,Unnamed: 114_level_1,Unnamed: 115_level_1,Unnamed: 116_level_1,Unnamed: 117_level_1,Unnamed: 118_level_1,Unnamed: 119_level_1,Unnamed: 120_level_1,Unnamed: 121_level_1,Unnamed: 122_level_1,Unnamed: 123_level_1,Unnamed: 124_level_1,Unnamed: 125_level_1,Unnamed: 126_level_1,Unnamed: 127_level_1,Unnamed: 128_level_1,Unnamed: 129_level_1,Unnamed: 130_level_1,Unnamed: 131_level_1,Unnamed: 132_level_1,Unnamed: 133_level_1,Unnamed: 134_level_1,Unnamed: 135_level_1,Unnamed: 136_level_1,Unnamed: 137_level_1,Unnamed: 138_level_1,Unnamed: 139_level_1,Unnamed: 140_level_1,Unnamed: 141_level_1
2945,Ms. Tanya Salazar,salazar tanya,Staff,,,,,,,0,Ms.,Ms.,Tanya,,,Salazar,,Our Lady of Victory Parish,PO Box 29,Seaside,OR,,97138.0,,,,,,,,,,503-738-6161,,,,,,,,,,,,0,,Yes,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,0,,,,,
3109,Mr. Ron Boyce,boyce ron joseph,Diaconate Inquirer,,,rboyce,1fe97d2547343488ec6876c77b8d4050ae52e281b4fdb8...,Yes,,3110,Mr.,Mr.,Ronald,Ron,Joseph,Boyce,,,,,,,,,PO Box 23093,,Tigard,OR,,97281.0,,,,,503-730-1986,,,,rboyce4539@gmail.com,,,,,,0,,Yes,1967-11-02,,,,,,,,,1989-03-26,"Sacred Heart Parish, Medford, OR",,,,,1994-06-18,"St. Anthony Parish, Tigard, OR",,,,,,,,,,,,,,,,,,,,,,,,,,Some college/Associate degree,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,Caucasian/white,,,,0,,,,,
2690,"Sr. Carole Strawn, SNJM",strawn carole,Religious,,Active,,,,,0,Sr.,Sr.,Carole,,,Strawn,,,,,,,,,227 N Ainsworth St,,Portland,OR,,97217.0,,,,503-285-1790,,,,,clrs1859@gmail.com,,Yes,,,,0,,Yes,,,,,,,,,,,,,,,,,,1966-01-01,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,44,Mary’s Woods,,SNJM Archivist,Full Time,Vice President of Mission Integration
1039,Rev. Dirk Dunfee,dunfee dirk,Priest,Transferred Out,,,,,,0,Rev.,Fr.,Dirk,,,Dunfee,,,,,,,,,,,,,,,,,,,,,,,,,No,No,No,No,0,,Yes,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1900-01-01,,,,,,,,,,,0,,,,,0,,,,,
638,Rev. Stephen Fister,fister stephen,Priest,Senior Status,,ffister,bc7030a6b916b8f84f8109ca10bfb209ead1ce9f9ca949...,No,,0,Rev.,Fr.,Stephen,,,Fister,,St. Rita Retreat Center,PO Box 310,Gold Hill,OR,,97525.0,,,,,,,,,,541-855-1333,,,,,,paterretreat@gmail.com,,Yes,No,No,No,0,,Yes,1948-01-20,USA,No,,,,,,TdPwOhjR5+ckMI2YEsjYRw==,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1982-01-10,"Prince of Peace Abbey, Oceanside, CA",Most Rev. Leo T. Maher,,,,Benedictines (OSB),Benedictines (OSB),2006-08-08,Archdiocese of Portland in Oregon,Archdiocese of Portland in Oregon,,,,,General,,,,,,,2023-01-09,2016-06-28,2022-02-08,2022-01-11,,2012-02-07,,,,,,,,,,0,,,,,0,,,,,
1678,Ms. Jenny Lopez-Garcia,lopezgarcia jenny,Staff,,,,,,,0,Ms.,Ms.,Jenny,,,Lopez-Garcia,,St. Alexander Parish,PO Box 644,Cornelius,OR,,97113.0,,,,,,,,,,503-359-0304,,,,reception.sac@comcast.net,,,,,,,,0,,Yes,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,0,,,,,
1465,Ms. Carmen Gaston,gaston carmen,Staff,,,,,,,0,Ms.,Ms.,Carmen,,,Gaston,,Pastoral Center,2838 E Burnside St,Portland,OR,,97214.0,,,,,,,,,,503-233-8332,,,,,cgaston@archdpdx.org,,,,,,,0,,Yes,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,0,,,,,
2431,"Sr. Angela Mary Aldi, OSF",aldi angela mary,Religious,,Active,,,,,0,Sr.,Sr.,Angela Mary,,,Aldi,,Sisters of Our Lady of Sorrows,3600 SW 170th Ave,Beaverton,OR,,97003.0,,,,,,,,,,503-649-7127,,,,,,,,Yes,Yes,,,0,,Yes,,,,,,,,,,,,,,,,,,1970-08-12,1983-08-15,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,35,Our Lady of Peace Retreat Center,,Various,Full Time,"kitchen, grounds, infimary"
740,Rev. Karl Schray,schray karl,Priest,Active,,kschray,92f43484361a556913fe7362c8e89dca9979e502f0328b...,No,,0,Rev.,Fr.,Karl,,,Schray,,All Souls Parish,PO Box 810,Myrtle Creek,OR,,97457.0,,PO Box 1968,,Rogue River,OR,,97537.0,,Mailing,541-863-3271,541-227-8274,,Work,,,kschray@gmail.com,Home,Yes,No,No,No,0,,Yes,1939-08-09,USA,No,,,,,,nhtRRqLVty93FdfQzNoygw==,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1965-05-22,"Cathedral of the Immaculate Conception, Portla...",Most Rev. Edward D. Howard,,,,Archdiocese of Portland in Oregon,,,Archdiocese of Portland in Oregon,Archdiocese of Portland in Oregon,,,,,General,,,,,,,2023-06-15,2018-08-23,2022-01-24,2023-12-16,,,,,,,,,2016-05-03,,,0,,,,,0,,,,,
1284,"Rev. Anthony Patalano, OP",patalano anthony,"Priest,Religious",Transferred Out,Transferred Out,,,,,0,Rev.,Fr.,Anthony,,,Patalano,,,,,,,,,,,,,,,,,,,,,,,,,No,No,No,No,0,,Yes,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1900-01-01,,,,,,,,,,,0,,,,,18,,,,,


#### Get Photos


In [112]:
import os
import pandas as pd

# def list_jpeg_files(directory):
#     data = []
#     for filename in os.listdir(directory):
#         if filename.endswith(".jpeg") or filename.endswith(".jpg"):  # Checking for jpeg files
#             full_path = os.path.join(directory, filename)
#             data.append({'Filename': filename, 'Full Path': full_path})
#     return pd.DataFrame(data)

# # Specify your directory
# directory = '/content/drive/Shareddrives/Clients/ADPDX (Portland)/Data/Clergy DB/sql_backup/archdpdx.info backups/public_html/people/graphics/portraits/large'
# jpeg_files_df = list_jpeg_files(directory)


In [113]:
# # Query for the Library
# library_query = "SELECT Id, Name FROM ContentWorkspace WHERE Name = 'ADPDX Person Profile Photos'"
# library_result = sf.query(library_query)

# # Check if the library exists and get its ID
# if library_result['records']:
#     library_id = library_result['records'][0]['Id']
#     print(f"Library ID: {library_id}")

#     # Query for the Folder within the Library
#     folder_query = f"SELECT Id, Name FROM ContentFolder WHERE ParentContentFolderId = '{library_id}'"
#     folder_result = sf.query(folder_query)

#     # Check if the folder exists and get its ID
#     if folder_result['records']:
#         folder_id = folder_result['records'][0]['Id']
#         print(f"Folder ID: {folder_id}")
#     else:
#         print("Folder 'Large JPEGs' not found in the library.")
# else:
#     print("Library 'ADPDX Person Profile Photos' not found.")

## Analysis

Here we check the various columns and their types, count where values exist, count of unique values, sample data, etc.

DF shape:

- 142 columns
- 3017 rows


In [114]:
# Check the original shape of the imported CSV
print(f"Shape of original data set: {df_contacts.shape}")

# export to csv a list of the contact fields with count, unique, top, freq
contacts_describe = df_contacts.describe(include='all').transpose()
contacts_describe.to_csv(f'/Users/matthewmartin/Library/CloudStorage/GoogleDrive-matt@meribahflow.com/Shared drives/Clients/ADPDX (Portland)/Data/Clergy DB/analysis/contacts_describe.csv')

df_contacts.describe(include='all').transpose()  #initial analysis of the Contacts table

Shape of original data set: (3016, 141)


Unnamed: 0,count,unique,top,freq
Common_Name,3016,3011,Ms. Leslie Jones,2
Sort_Name,3016,3009,nguyen anthony,3
Type(s),3016,29,Staff,1139
Clergy_Status,1138,8,Transferred Out,462
Religious_Status,902,4,Active,456
...,...,...,...,...
Place_of_Work,269,133,Mount Angel Abbey,37
Volunteer_Place,54,47,Mary’s Woods,4
Type_of_Work,276,117,Pastoral Ministry,30
Work_Load,262,2,Full Time,230


In [115]:
#How many spousal links are there? 

spouses_count = df_contacts[(df_contacts['Spouse'].notna()) & (df_contacts['Spouse'] != '0')].shape[0]
spouses_count

240

In [116]:
unique_reg_parishes = df_contacts['Registered_Parish'].value_counts()
unique_reg_parishes


0      2966
62        4
65        3
11        3
139       2
109       2
42        2
55        2
46        2
83        2
70        1
120       1
3         1
14        1
73        1
93        1
103       1
68        1
44        1
40        1
145       1
23        1
30        1
91        1
7         1
122       1
125       1
16        1
33        1
99        1
84        1
135       1
53        1
26        1
124       1
29        1
142       1
54        1
Name: Registered_Parish, dtype: int64

In [117]:
unique_languages = df_contacts['Languages'].unique()
unique_languages

array([nan, 'English,Spanish', 'Igbo', 'English, Spanish',
       'Spanish, Mayaqeqchi', 'Spanish (Mass only)',
       'Latin Mass and written translation. Read French, Italian, Spanish.',
       'Spanish', 'Hindi, Konkani, Tamil',
       'French (fluent), Spanish (beginner), Latin (beginner)',
       'German, Spanish, Italian, French', 'Kiswahili, Kichagga',
       'Spanish (English is second language)',
       'German, Spanish, Italian, Latin Mass',
       'English, Spanish, Italian', 'Spanish, Italian', 'English',
       'Bicolango, Tagalog, Spanish', 'Spanish, Italian, Latin Mass',
       'Italian', 'Tagalog, English, Spanish',
       'French, Italian, Aramaic (modern), Spanish', 'Vietnamese',
       'German, Spanish', 'English,Spanish,Italian',
       'Conversant in Italian and Spanish, some facility with Latin and German',
       'English, Spanish, Latin Mass', 'Italian, Spanish',
       'Konkani, Hindi, Marathi, Spanish',
       'Tagalog, Bicol, Spanish (Mass only)', 'Spanish, E

In [118]:
# import re
# import numpy as np


# def deduplicate_languages(list_languages):
#     # Define a regular expression pattern to match periods and punctuation
#     punctuation_pattern = r'[.,!?;:"]'

#     # Flatten the array and filter out NaN values
#     flattened_languages = [re.sub(punctuation_pattern, '', lang) for sublist in list_languages if pd.notna(sublist) for lang in sublist.split(',')]

#     # Deduplicate the list of languages
#     unique_languages = list(set(flattened_languages))

#     return unique_languages


# # Example usage:
# unique_languages = deduplicate_languages(unique_languages)
# print(unique_languages)


## Transform


In [119]:
# list of columns NOT to be migrated as Contact attributes
misc_columns_to_drop = [
    'Password',
    'Password_Must_be_Changed',
    'Sort_Name'
]

affiliation_columns = [
    'Baptism_Date',
    'Place_of_Baptism',
    'Confirmation_Date',
    'Place_of_Confirmation',
    'Received_Date',
    'Parish_of_Record',
    'Marriage_Date',
    'Place_of_Marriage',
    'Date_of_First_Vows',
    'Date_of_Final_Vows',
    'Reader_Date',
    'Acolyte_Date',
    'Bachelor_Degree_Year',
    'Bachelor_Degree_Type',
    'Bachelor_Degree_Institution',
    'Graduate_1_Degree_Institution',
    'Graduate_1_Degree_Type',
    'Graduate_1_Degree_Year',
    'Graduate_2_Degree_Institution',
    'Graduate_2_Degree_Type',
    'Graduate_2_Degree_Year',
    'Graduate_3_Degree_Institution',
    'Graduate_3_Degree_Type',
    'Graduate_3_Degree_Year',
    'Graduate_4_Degree_Institution',
    'Graduate_4_Degree_Type',
    'Graduate_4_Degree_Year',
    'Diaconal_Ordination_Date',
    'Diaconal_Ordination_Place',
    'Diaconal_Ordination_Prelate',
    'Presbyteral_Ordination_Date',
    'Presbyteral_Ordination_Place',
    'Presbyteral_Ordination_Prelate',
    'Episcopal_Ordination_Date',
    'Episcopal_Ordination_Place',
    'Episcopal_Ordination_Prelate',
    'Incardinated_From_Date',
    'Incardinated_From_Diocese',
    'Excardinated_To_Diocese',
    'Excardinated_To_Date',
    'Faculties',
    'Faculties_Granted_Date',
    'Faculties_Restricted_Date',
    'Faculties_Withdrawn_Date',
]

# These fields need to be KEPT but while building the SF upsert flow these are dropped temporarily until mapping logic is included.
# TODO

fields_not_yet_mapped = [
    'Common_Name', # is a concat, does not need to be mapped
    'Mailing_Address_Province', # non-impactful, can leave as dropped
    'Private_Address_Province', # non-impactful, can leave as dropped
    #'Social_Security_Account_Number__c',  # The data is encrypted
    'Serving_Now', # TBD - not sure if this is a field that needs to be migrated
    'Ordination_Diocese', # TBD... isn't this covered by Reg. Entry records? 
    'Registered_Parish' # decided will not migration on 2/21/2025

]

In [120]:
# UDF to combine multiple Mailing Street Address lines into one
def combine_addresses(row, *columns):
    address_parts = []
    for col in columns:
        value = row[col]
        if pd.notnull(value):  # Check for non-null values
            address_parts.append(str(value))  # Convert to string
    return '\n'.join(address_parts)  # '\n' for line break

In [121]:
df_contact_staging = (df_contacts
                      .drop(columns='Salutation')
                      .rename(columns={
                          'Clergy_Status' : 'ADPDX_Clergy_Status__c',
                          'Religious_Status' : 'ADPDX_Religious_Status__c',
                          'Login_ID' : 'ADPDX_Login_ID__c',
                          'Access_Permission': 'ADPDX_Access_Permission__c',
                          'Title': 'Salutation',
                          'Christian_Name': 'FirstName',
                          'Middle_Name(s)': 'MiddleName',
                          'Surname': 'LastName',
                          'Suffix': 'Suffix',
                          'Preferred_Address': 'Preferred_Address__c',
                          'Mailing_Address_City': 'MailingCity',
                          'Mailing_Address_State': 'MailingState',
                          'Mailing_Address_Postal_Code': 'MailingPostalCode',
                          'Mailing_Address_Country': 'MailingCountry',
                          'Private_Address_City': 'OtherCity',
                          'Private_Address_State': 'OtherState',
                          'Private_Address_Postal_Code': 'OtherPostalCode',
                          'Private_Address_Country': 'OtherCountry',
                          'Work_Phone': 'npe01__WorkPhone__c',
                          'Home_Phone': 'HomePhone',
                          'Cell_Phone': 'MobilePhone',
                          'Preferred_Phone': 'npe01__PreferredPhone__c',
                          # IF Preferred phone contains, 'do not publish'
                          'Work_Email' : 'npe01__WorkEmail__c',
                          'Archdiocesan_Email': 'npe01__AlternateEmail__c',
                          'Home_Email': 'npe01__HomeEmail__c',
                          'Preferred_Email': 'npe01__Preferred_Email__c',
                          # IF Preferred email contains 'do not publish''
                          'Directory_Include': 'Directory_Include__c',
                          'Directory_Include_Middle_Name': 'Directory_Include_Middle_Name__c',
                          'Directory_Include_Suffix': 'Directory_Include_Suffix__c',
                          'Suppress_From_Reports': 'Suppress_From_Reports__c',
                          'Send_Group_Mail_and_Email': 'Send_Group_Mail_and_Email__c',
                          'Birth_Date': 'Birthdate',
                          'Place_of_Birth': 'mbfc__Place_of_Birth__c',
                          'Foreign_Born': 'Foreign_Born__c',
                          'Foreign_Citizenship': 'Foreign_Citizenship__c',
                          'Immigration_Status': 'Immigration_Status__c',
                          'Passport/Visa_Expiration_Date': 'Passport_Visa_Expiration_Date__c',
                          'Social_Security_Account_Number': 'Social_Security_Number__c',
                          'Deceased_Date': 'mbfc__Date_of_Death__c',
                          'Out_of_Diocese_Date': 'mbfc__Date_Left_Diocese__c', 
                          'CARA_Ethnicity': 'adpdx_CARA_Ethnicity__c',
                          'Seminarian_Status': 'adpdx_Seminarian_Status__c',
                          'Other_Diaconal_Ministry': 'adpdx_Other_Diaconal_Ministry__c',
                          'Spiritual_Director_Authorized': 'adpdx_Spiritual_Director_Authorized__c',
                          'Place_of_Work': 'adpdx_Place_of_Work__c',
                          'Volunteer_Place': 'adpdx_Volunteer_Place__c',
                          'Type_of_Work': 'adpdx_Type_of_Work__c',
                          'Work_Load': 'adpdx_Work_Load__c',
                          'Work_Title': 'adpdx_Work_Title__c',
                          'Coverage_Availability': 'adpdx_Coverage_Availability__c', 
                          'Advanced_Directive_Date': 'adpdx_Advanced_Directive_Date__c',
                          'End_of_Life_Plan_Date': 'adpdx_End_of_Life_Plan_Date__c',
                          'Will_Date': 'adpdx_Will_Date__c',
                          'Will_Note': 'adpdx_Will_Note__c',
                          'CIC_489_File': 'adpdx_CIC_489_File__c',
                          'Senior_Status_Date': 'adpdx_Senior_Status_Date__c', 
                          'Laicized_Date': 'adpdx_Laicized_Date__c',
                          'Seminarian_Student_Debt': 'adpdx_Seminarian_Student_Debt__c',
                          'Seminarian_Medical_Benefits': 'adpdx_Seminarian_Medical_Benefits__c',
                          'Candidacy_Date': 'adpdx_Candidacy_Date__c',
                          'Accepted_to_Formation_Date': 'adpdx_Accepted_to_Formation_Date__c',
                          'Formation_Withdrawn_Date': 'adpdx_Formation_Withdrawn_Date__c',
                          'Formation_Deferred_Date': 'adpdx_Formation_Deferred_Date__c',
                          'Formation_Terminated_Date': 'adpdx_Formation_Terminated_Date__c',
                          'Terminate_or_Defer_Note': 'adpdx_Terminate_or_Defer_Note__c',
                          'CARA_Highest_Ed_Level': 'adpdx_CARA_Highest_Ed_Level__c',
                          'Letter_of_Good_Standing_Date': 'adpdx_Letter_of_Good_Standing__c',
                          'Religious_In_Archdiocese_Date': 'mbfc__Date_of_Arrival_in_Diocese__c',
                          'Last_Retreat_Date': 'adpdx_Last_Retreat_Date__c',
                          'Last_Educ_Requirement_Date': 'adpdx_Last_Educ_Requirement_Date__c',
                          'Policy_Manual_Acknowledgement_Date': 'adpdx_Policy_Manual_Acknowledgement_Date__c',
                          'Harassment_Prevention_Course_Date': 'adpdx_Harassment_Prevention_Course_Date__c',
                          'Standards_of_Conduct_Date': 'adpdx_Standards_of_Conduct_Date__c',
                          'Last_Background_Check_Date': 'adpdx_Last_Background_Check_Date__c',
                          'Last_Child_Protection_Training_Date': 'adpdx_Last_Child_Protection_Training__c',
                          'Languages': 'Languages__c',
                          'Nickname': 'adpdx_Preferred_Name__c',
                          'Father_Full_Name': 'Father_Full_Name__c',
                          'Mother_Full_Maiden_Name': 'Mother_Full_Maiden_Name__c'

                          })
                      .assign(Bi_Ritual__c=lambda x: x['Type(s)'].str.contains('Biritual'))
                      .assign(Non_Latin_Rite__c=lambda x: x['Type(s)'].str.contains('Non-Latin Rite'))
                      .assign(adpdx_Discerner_Aspirant_for_Diaconate__c=lambda x: x['Type(s)'].str.contains('Diaconate'))
                      .assign(adpdx_Is_Seminarian__c=lambda x: x['Type(s)'].str.contains('Seminar'))
                      
                      .assign(Archdpdx_Migration_Id__c=lambda x: x.index)
                      .assign(MailingStreet=lambda x: x.apply(lambda row: combine_addresses(row, 'Mailing_Address', 'Mailing_Address_2'), axis=1))
                      .drop(columns=['Mailing_Address', 'Mailing_Address_2'])  # Optional: Drop original columns if not needed
                      .assign(OtherStreet=lambda x: x.apply(lambda row: combine_addresses(row, 'Private_Address', 'Private_Address_2'), axis=1))
                      .drop(columns=['Private_Address', 'Private_Address_2'])  # Optional: Drop original columns if not needed
                      .drop(columns=misc_columns_to_drop)
                      .drop(columns=affiliation_columns)
                      .drop(columns=fields_not_yet_mapped)

        )


In [122]:
# For use in the 'Contacts > Spouses' section
df_contact_staging_spouses = df_contact_staging.copy(deep=True)


In [123]:
df_contact_staging_spouses['Spouse']

Record Number
2766    0
2337    0
3244    0
3295    0
2164    0
       ..
1670    0
2755    0
1962    0
2202    0
1866    0
Name: Spouse, Length: 3016, dtype: object

In [124]:
df_contact_staging.drop(columns=['Spouse'], inplace=True)

In [125]:
df_contact_staging.columns

Index(['Type(s)', 'ADPDX_Clergy_Status__c', 'ADPDX_Religious_Status__c',
       'ADPDX_Login_ID__c', 'ADPDX_Access_Permission__c', 'Salutation',
       'FirstName', 'adpdx_Preferred_Name__c', 'MiddleName', 'LastName',
       'Suffix', 'MailingCity', 'MailingState', 'MailingPostalCode',
       'MailingCountry', 'OtherCity', 'OtherState', 'OtherPostalCode',
       'OtherCountry', 'Preferred_Address__c', 'npe01__WorkPhone__c',
       'HomePhone', 'MobilePhone', 'npe01__PreferredPhone__c',
       'npe01__WorkEmail__c', 'npe01__AlternateEmail__c',
       'npe01__HomeEmail__c', 'npe01__Preferred_Email__c',
       'Directory_Include__c', 'Directory_Include_Middle_Name__c',
       'Directory_Include_Suffix__c', 'Suppress_From_Reports__c',
       'adpdx_Seminarian_Student_Debt__c',
       'adpdx_Seminarian_Medical_Benefits__c', 'Send_Group_Mail_and_Email__c',
       'Birthdate', 'mbfc__Place_of_Birth__c', 'Foreign_Born__c',
       'Father_Full_Name__c', 'Mother_Full_Maiden_Name__c',
       'For

In [126]:
df_contact_staging.MailingStreet.sample(10)

Record Number
80                                                  
179                                                 
236                                                 
2163                 St. Henry Parish\n346 NW 1st St
889                                                 
3185    St. Andre Bessette Parish\n601 W Burnside St
3133               Ascension Parish\n743 SE 76th Ave
2389                St. Luke Parish\n417 Harrison St
1501        Gethsemani Cemetery\n11666 SE Stevens Rd
2328       The Madeleine School\n3240 NE 23rd Avenue
Name: MailingStreet, dtype: object

### Gender

In [127]:
# Function to determine gender based on 'Type(s)' field
def determine_gender(row):
    if any(x in row['Type(s)'] for x in ['Bishop', 'Priest', 'Transitional Deacon', 'Permanent Deacon', 'Diaconate Formation', 'Diaconate Inquirer', 'Seminarian', 'Seminary Applicant']):
        return 'Male'
    elif 'Religious' in row['Type(s)'] and 'Br.' in row['Salutation']:
        return 'Male'
    elif 'Wife' in row['Type(s)'] or ('Religious' in row['Type(s)'] and 'Sr.' in row['Salutation']):
        return 'Female'
    elif any(x in row['Type(s)'] for x in ['Staff', 'Archive']):
        if row['Salutation'] == 'Ms.':
            return 'Female'
        elif row['Salutation'] == 'Mr.':
            return 'Male'
    else:
        return ''

In [128]:
# Apply the function to the DataFrame to create the new column
df_contact_staging['mbfc__gender__c'] = df_contact_staging.apply(determine_gender, axis=1)


In [129]:
df_contact_staging['mbfc__gender__c'].value_counts()

Male      1715
Female    1272
             3
Name: mbfc__gender__c, dtype: int64

### Languages


In [130]:
# # Define a function to clean the 'languages' column

# import re
# def clean_languages(text):
#     if pd.isna(text):
#         return text
#     # Remove text inside parentheses
#     text = re.sub(r'\(.*?\)', '', text)
#     # Replace ' & ' or ' and ' with ';'
#     text = re.sub(r' & | and ', ';', text)
#     # Replace commas with semicolons
#     text = text.replace(',', ';')
#     # Remove spaces before and after semicolons
#     text = re.sub(r'\s*;\s*', ';', text)
#     return text.strip(';')

# # Apply the cleaning function to the 'languages' column
# df_contact_staging['Languages__c'] = df_contact_staging['Languages__c'].apply(clean_languages)


### Private Address Handling


In [131]:
# If 'OtherStreet' is not null, then set Secondary Address Type to 'Private'.  This is because the 'OtherAddress' fields all come from the 'Private' address fields in source system. 
df_contact_staging['npe01__Secondary_Address_Type__c'] = df_contact_staging['OtherStreet'].apply(lambda x: 'Private' if pd.notnull(x) else None)


### Handle Boolean Fields


In [132]:
boolean_columns_to_convert = ['Foreign_Born__c', 'Directory_Include__c', 'Directory_Include_Middle_Name__c', 'Directory_Include_Suffix__c',
       'Suppress_From_Reports__c', 'Send_Group_Mail_and_Email__c', ]

df_contact_staging[boolean_columns_to_convert] = df_contact_staging[boolean_columns_to_convert].replace({'Yes': True, 'No': False})


In [133]:
df_contact_staging[boolean_columns_to_convert] = df_contact_staging[boolean_columns_to_convert].fillna(False)

df_contact_staging[boolean_columns_to_convert].sample(5)

Unnamed: 0_level_0,Foreign_Born__c,Directory_Include__c,Directory_Include_Middle_Name__c,Directory_Include_Suffix__c,Suppress_From_Reports__c,Send_Group_Mail_and_Email__c
Record Number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2933,False,False,False,False,False,True
2161,False,False,False,False,False,True
1434,False,False,False,False,False,False
2478,False,True,False,False,False,True
32,False,False,False,False,False,False


### Set Contact Record Type


In [134]:
# Set Record Type

# Go down row by row and check the 'Type(s)' columns, check for certain words that are keys in a dictionary, and
# the that row's 'Type(s)' field contains a string that is in the a key in a dictionary the update another columns
# called 'ContactRecordType' with the paired value.

contact_type_map = {
    'Bishop': 'Priest',
    'Priest': 'Priest',
    'Transitional Deacon': 'Permanent_Deacon',
    'Permanent Deacon': 'Permanent_Deacon',
    'Seminarian': 'Lay_Person',
    'Diaconate Formation': 'Lay_Person',
    'Seminary Applicant': 'Lay_Person',
    'Diaconate Inquirer': 'Lay_Person',
    'Wife': 'Lay_Person',
    'Religious': 'Religious',
    'Staff': 'Lay_Person',
    'Seminary Applicant': 'Lay_Person',
    'Archive': 'Lay_Person'
}

def update_contact_record_type(row):
    for key, value in contact_type_map.items():
        if key in row['Type(s)']:
            return value
    return None

df_contact_staging['ContactRecordType'] = df_contact_staging.apply(update_contact_record_type, axis=1)

In [135]:
# Set Candidate_Type__c field

candidate_types = ['Seminary Applicant', 'Diaconate Inquirer', 'Diaconate Formation', 'Seminarian']

# Function to update the 'candidate_type__c' column based on 'Type(s)' field
def update_candidate_type(row):
    if row['Type(s)'] in candidate_types:
        return row['Type(s)']
    return ''

# Apply the function to the DataFrame to create the new column
df_contact_staging['candidate_type__c'] = df_contact_staging.apply(update_candidate_type, axis=1)


In [136]:
# Map in the RecordTypeIDs
df_contact_staging['RecordTypeID'] = df_contact_staging['ContactRecordType'].map(record_types_mapping)

### Ecclesial Status & Ministerial Status


In [137]:
df_contact_staging

Unnamed: 0_level_0,Type(s),ADPDX_Clergy_Status__c,ADPDX_Religious_Status__c,ADPDX_Login_ID__c,ADPDX_Access_Permission__c,Salutation,FirstName,adpdx_Preferred_Name__c,MiddleName,LastName,Suffix,MailingCity,MailingState,MailingPostalCode,MailingCountry,OtherCity,OtherState,OtherPostalCode,OtherCountry,Preferred_Address__c,npe01__WorkPhone__c,HomePhone,MobilePhone,npe01__PreferredPhone__c,npe01__WorkEmail__c,npe01__AlternateEmail__c,npe01__HomeEmail__c,npe01__Preferred_Email__c,Directory_Include__c,Directory_Include_Middle_Name__c,Directory_Include_Suffix__c,Suppress_From_Reports__c,adpdx_Seminarian_Student_Debt__c,adpdx_Seminarian_Medical_Benefits__c,Send_Group_Mail_and_Email__c,Birthdate,mbfc__Place_of_Birth__c,Foreign_Born__c,Father_Full_Name__c,Mother_Full_Maiden_Name__c,Foreign_Citizenship__c,Immigration_Status__c,Passport_Visa_Expiration_Date__c,Social_Security_Number__c,adpdx_Accepted_to_Formation_Date__c,adpdx_Candidacy_Date__c,adpdx_Formation_Withdrawn_Date__c,adpdx_Formation_Deferred_Date__c,adpdx_Formation_Terminated_Date__c,adpdx_Terminate_or_Defer_Note__c,adpdx_CARA_Highest_Ed_Level__c,Incardinated_Now,adpdx_Letter_of_Good_Standing__c,mbfc__Date_of_Arrival_in_Diocese__c,adpdx_Last_Retreat_Date__c,adpdx_Last_Educ_Requirement_Date__c,adpdx_Policy_Manual_Acknowledgement_Date__c,adpdx_Harassment_Prevention_Course_Date__c,adpdx_Standards_of_Conduct_Date__c,adpdx_Last_Background_Check_Date__c,adpdx_Last_Child_Protection_Training__c,mbfc__Date_Left_Diocese__c,adpdx_Senior_Status_Date__c,adpdx_Laicized_Date__c,mbfc__Date_of_Death__c,Languages__c,adpdx_Coverage_Availability__c,adpdx_Advanced_Directive_Date__c,adpdx_End_of_Life_Plan_Date__c,adpdx_Will_Date__c,adpdx_Will_Note__c,adpdx_CIC_489_File__c,adpdx_CARA_Ethnicity__c,adpdx_Seminarian_Status__c,adpdx_Other_Diaconal_Ministry__c,adpdx_Spiritual_Director_Authorized__c,Link_to_Religious_Community,adpdx_Place_of_Work__c,adpdx_Volunteer_Place__c,adpdx_Type_of_Work__c,adpdx_Work_Load__c,adpdx_Work_Title__c,Bi_Ritual__c,Non_Latin_Rite__c,adpdx_Discerner_Aspirant_for_Diaconate__c,adpdx_Is_Seminarian__c,Archdpdx_Migration_Id__c,MailingStreet,OtherStreet,mbfc__gender__c,npe01__Secondary_Address_Type__c,ContactRecordType,candidate_type__c,RecordTypeID
Record Number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1
2766,Priest,Transferred Out,,sabukaka,,Rev.,Stephen,,Ozovehe,Abaukaka,,Tualatin,OR,97062,,Portland,OR,97202,,Mailing,503-430-7699,,773-733-3772,Work,,,abstoz@yahoo.com,,True,False,False,False,0,,True,1967-06-07,,False,,,,,,,,,,,,,,"Diocese of Lokoja, Nigeria",,,,,,2022-05-30,2021-11-03,2021-11-04,2022-11-24,2023-01-16,,,,,,,,,,,,,,,0,,,,,,False,False,False,False,2766,Brighton Hospice Office\n8050 SW Warm Springs ...,5802 SW Milwaukie Ave Apt 4,Male,Private,Priest,,012Dx0000003p5JIAQ
2337,Staff,,,,,Mr.,Rogelio,,,Acevedo,,Portland,OR,97229,,,,,,,503-644-5264,,,,facilities@stpius.org,,,,False,False,False,False,0,,True,,,False,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,,False,False,False,False,2337,St. Pius X Parish\n1280 NW Saltzman Rd,,Male,Private,Lay_Person,,012Dx0000003p5HIAQ
3244,Staff,,,,,Mr.,Sean,,,Ackroyd,,Corvallis,OR,97330,,,,,,,541-757-1988,,,,,,,,False,False,False,False,0,,True,,,False,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,,False,False,False,False,3244,St. Mary Parish\n501 NW 25th St,,Male,Private,Lay_Person,,012Dx0000003p5HIAQ
3295,Staff,,,,,Ms.,Sherril,,,Acton,,Eugene,OR,97401,,,,,,,541-686-2234 x1524,,,,sacton@marisths.org,,,,False,False,False,False,0,,True,,,False,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,,False,False,False,False,3295,Marist Catholic High School\n1900 Kingsley Rd,,Female,Private,Lay_Person,,012Dx0000003p5HIAQ
2164,Staff,,,,,Ms.,Barbara,,,Adams,,Gresham,OR,97030,,,,,,,503-665-9129,,,,adamsby@eou.edu,,,,False,False,False,False,0,,True,,,False,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,,False,False,False,False,2164,St. Henry Parish\n346 NW 1st St,,Female,Private,Lay_Person,,012Dx0000003p5HIAQ
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1670,Staff,,,,,Ms.,Jenny,,,Zomerdyk,,Central Point,OR,97502,,,,,,,541-664-1050,,,,churchoffice@shepherdcatholic.com,,,,False,False,False,False,0,,True,,,False,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,,False,False,False,False,1670,Shepherd of the Valley Parish\n600 Beebe Rd,,Female,Private,Lay_Person,,012Dx0000003p5HIAQ
2755,Religious,,Active,dzorrilla,,Br.,Daniel,,,Zorrilla,,Saint Benedict,OR,97373,,,,,,,503-845-1181,,,,,,,,False,False,False,False,0,,True,,,False,,,,,,,,,,,,,,,,2021-08-01,,,,,,2019-06-28,2021-10-10,,,,,,,,,,,,,,,,14,,,,,,False,False,False,False,2755,Félix Rougier House of Studies\nPO Box 499,,Male,Private,Religious,,012Dx0000003p5KIAQ
1962,Staff,,,,,Ms.,Kim,,,Zuber,,Sublimity,OR,97385,,,,,,,503-769-5664,,,,boniface@wvi.com,,,,False,False,False,False,0,,True,,,False,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,,False,False,False,False,1962,St. Boniface Parish\n375 SE Church St,,Female,Private,Lay_Person,,012Dx0000003p5HIAQ
2202,Staff,,,,,Ms.,Agnes,,,Zueger,,Lake Oswego,OR,97034,,,,,,,503-636-7687,,,,agnesz@ollparish.com,,,,False,False,False,False,0,,True,,,False,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,,False,False,False,False,2202,Our Lady of the Lake Parish\n650 A Ave,,Female,Private,Lay_Person,,012Dx0000003p5HIAQ


In [None]:
def determine_ecclesial_status(df):
    def ecclesial_status(row):
        if pd.notna(row['ADPDX_Clergy_Status__c']) and 'Laicized' in row['ADPDX_Clergy_Status__c']:
            return 'Laicized'
        # elif pd.notna(row['ADPDX_Clergy_Status__c']) and 'Faculties Withdrawn' in row['ADPDX_Clergy_Status__c']:
        #     return 'Faculties Withdrawn'
        elif pd.notna(row['Type(s)']) and 'Bishop' in row['Type(s)']:
            return 'Bishop/Archbishop'
        elif pd.notna(row['Type(s)']) and 'Priest,Religious' in row['Type(s)']:
            return 'Priest - Religious'
        elif pd.notna(row['Type(s)']) and 'Priest' in row['Type(s)'] and (not pd.isna(row['Foreign_Citizenship__c']) or row['Incardinated_Now'] != 'Archdiocese of Portland in Oregon'):
            return 'Priest - Extern'
        elif pd.notna(row['Type(s)']) and 'Priest' in row['Type(s)'] and (pd.isna(row['Foreign_Citizenship__c']) and row['Incardinated_Now'] == 'Archdiocese of Portland in Oregon'):
            return 'Priest - Diocesan'
        elif pd.notna(row['Type(s)']) and 'Transitional Deacon' in row['Type(s)']:
            return 'Transitional Deacon'
        elif pd.notna(row['Type(s)']) and row['Type(s)'] == 'Permanent Deacon':
            return 'Permanent Deacon'
        elif pd.notna(row['Type(s)']) and row['Type(s)'] == 'Religious' and row['Salutation'] == 'Br.':
            return 'Professed Male Religious (Non-Priest)'
        elif pd.notna(row['Type(s)']) and row['Type(s)'] == 'Religious' and row['Salutation'] == 'Sr.':
            return 'Professed Female Religious'
        elif pd.notna(row['Type(s)']) and row['Type(s)'] == 'Religious,Staff' and row['Salutation'] == 'Sr.':
            return 'Professed Female Religious'
        elif pd.notna(row['Type(s)']) and row['Type(s)'] == 'Wife':
            return 'Lay Person'
        elif row['Salutation'] == 'Mr.':
            return 'Lay Person'
        elif row['Salutation'] == 'Ms.':
            return 'Lay Person'
        elif row['Salutation'] == 'Mrs.':
            return 'Lay Person'
        elif row['Salutation'] == 'Rev.':
            return 'Unknown'
        else:
            return None

    df['mbfc__Ecclesial_Status__c'] = df.apply(ecclesial_status, axis=1)
    return df


df_contact_staging = determine_ecclesial_status(df_contact_staging)

In [139]:
df_contact_staging.sample(20)

Unnamed: 0_level_0,Type(s),ADPDX_Clergy_Status__c,ADPDX_Religious_Status__c,ADPDX_Login_ID__c,ADPDX_Access_Permission__c,Salutation,FirstName,adpdx_Preferred_Name__c,MiddleName,LastName,Suffix,MailingCity,MailingState,MailingPostalCode,MailingCountry,OtherCity,OtherState,OtherPostalCode,OtherCountry,Preferred_Address__c,npe01__WorkPhone__c,HomePhone,MobilePhone,npe01__PreferredPhone__c,npe01__WorkEmail__c,npe01__AlternateEmail__c,npe01__HomeEmail__c,npe01__Preferred_Email__c,Directory_Include__c,Directory_Include_Middle_Name__c,Directory_Include_Suffix__c,Suppress_From_Reports__c,adpdx_Seminarian_Student_Debt__c,adpdx_Seminarian_Medical_Benefits__c,Send_Group_Mail_and_Email__c,Birthdate,mbfc__Place_of_Birth__c,Foreign_Born__c,Father_Full_Name__c,Mother_Full_Maiden_Name__c,Foreign_Citizenship__c,Immigration_Status__c,Passport_Visa_Expiration_Date__c,Social_Security_Number__c,adpdx_Accepted_to_Formation_Date__c,adpdx_Candidacy_Date__c,adpdx_Formation_Withdrawn_Date__c,adpdx_Formation_Deferred_Date__c,adpdx_Formation_Terminated_Date__c,adpdx_Terminate_or_Defer_Note__c,adpdx_CARA_Highest_Ed_Level__c,Incardinated_Now,adpdx_Letter_of_Good_Standing__c,mbfc__Date_of_Arrival_in_Diocese__c,adpdx_Last_Retreat_Date__c,adpdx_Last_Educ_Requirement_Date__c,adpdx_Policy_Manual_Acknowledgement_Date__c,adpdx_Harassment_Prevention_Course_Date__c,adpdx_Standards_of_Conduct_Date__c,adpdx_Last_Background_Check_Date__c,adpdx_Last_Child_Protection_Training__c,mbfc__Date_Left_Diocese__c,adpdx_Senior_Status_Date__c,adpdx_Laicized_Date__c,mbfc__Date_of_Death__c,Languages__c,adpdx_Coverage_Availability__c,adpdx_Advanced_Directive_Date__c,adpdx_End_of_Life_Plan_Date__c,adpdx_Will_Date__c,adpdx_Will_Note__c,adpdx_CIC_489_File__c,adpdx_CARA_Ethnicity__c,adpdx_Seminarian_Status__c,adpdx_Other_Diaconal_Ministry__c,adpdx_Spiritual_Director_Authorized__c,Link_to_Religious_Community,adpdx_Place_of_Work__c,adpdx_Volunteer_Place__c,adpdx_Type_of_Work__c,adpdx_Work_Load__c,adpdx_Work_Title__c,Bi_Ritual__c,Non_Latin_Rite__c,adpdx_Discerner_Aspirant_for_Diaconate__c,adpdx_Is_Seminarian__c,Archdpdx_Migration_Id__c,MailingStreet,OtherStreet,mbfc__gender__c,npe01__Secondary_Address_Type__c,ContactRecordType,candidate_type__c,RecordTypeID,mbfc__Ecclesial_Status__c
Record Number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1
70,Wife,,,bespinoza,,Mrs.,Blanca,,Gladys,Espinoza,,,,,,Beaverton,OR,97008.0,,,,971-998-6619,,,,,espinozacalidonio@hotmail.com,,False,False,False,False,0,,True,,,False,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,,False,False,False,False,70,,6102 SW Valley Ave,Female,Private,Lay_Person,,012Dx0000003p5HIAQ,Lay Person
2720,Religious,,Active,,,Sr.,Bernadette,,,Wilson,,Corvallis,OR,97330.0,,,,,,,541-754-1505,,,,,,sister.bernadette@socmaria.org,,False,False,False,False,0,,True,,,False,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,54,,,,,,False,False,False,False,2720,Society of Mary\n540 NW 9th St,,Female,Private,Religious,,012Dx0000003p5KIAQ,Professed Female Religious
1879,Staff,,,,,Ms.,Cathy,,,Keathley,,Portland,OR,97233.0,,,,,,,503-761-8710 x1301,,,,cjvkeathley@comcast.net,,,,False,False,False,False,0,,True,,,False,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,,False,False,False,False,1879,St. Joseph the Worker Parish\n2310 SE 148th Ave,,Female,Private,Lay_Person,,012Dx0000003p5HIAQ,Lay Person
1655,Staff,,,,,Ms.,Aimee,,,Henley,,Beaverton,OR,97005.0,,,,,,,503-644-2619 x163,,,,ahenley@stceciliachurch.org,,,,False,False,False,False,0,,True,,,False,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,,False,False,False,False,1655,St. Cecilia Parish\n5105 SW Franklin Ave,,Female,Private,Lay_Person,,012Dx0000003p5HIAQ,Lay Person
2749,"Priest,Religious",Transferred Out,Transferred Out,jmejia,,Rev.,Jorge Luis,,,Mejia Ramirez,,,,,,,,,,,,,,,,,georgemej_6@hotmail.com,,False,False,False,False,0,,True,1981-01-03,Achi Bolivar,False,,,,,,,,,,,,,,Congragacion de la Mision Misionaros Vicentinos,,,,,,,,,,2017-05-08,,,,,,,,,,,,,,,0,,,,,,False,False,False,False,2749,,,Male,Private,Priest,,012Dx0000003p5JIAQ,Priest - Religious
3297,Staff,,,,,Ms.,Karla,,,Cardenas,,Hillsboro,OR,97123.0,,,,,,,503-648-2512,,,,kcardenas@smcshillsboro.org,,,,False,False,False,False,0,,True,,,False,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,,False,False,False,False,3297,St. Matthew Catholic School\n221 SE Walnut St,,Female,Private,Lay_Person,,012Dx0000003p5HIAQ,Lay Person
1673,Staff,,,,,Ms.,Mary Anne,,,Sciolto,,Coos Bay,OR,97420.0,,,,,,,541-267-7421,,,,maryanne@saintmonicacoosbay.org,,,,False,False,False,False,0,,True,,,False,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,,False,False,False,False,1673,St. Monica Parish\n357 S 6th St,,Female,Private,Lay_Person,,012Dx0000003p5HIAQ,Lay Person
996,"Priest,Religious",Transferred Out,Transferred Out,,,Rev.,Aquinas,,,Costello,,,,,,,,,,,,,,,,,,,False,False,False,False,0,,True,,,False,,,,,,,,,,,,,,,,,,,,,,,,1900-01-01,,,,,,,,,,,,,,,18,,,,,,False,False,False,False,996,,,Male,Private,Priest,,012Dx0000003p5JIAQ,Priest - Religious
6,Wife,,,pacramer,,Mrs.,Paula,,Ann,Cramer,,,,,,Rainier,OR,97048.0,,,,503-556-0511,,,,,,,False,False,False,False,0,,True,,,False,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,,False,False,False,False,6,,PO Box 1057,Female,Private,Lay_Person,,012Dx0000003p5HIAQ,Lay Person
2562,Religious,,Active,,,Sr.,Kathryn,,,Knoll,,,,,,Portland,OR,97211.0,,,,503-282-0028,,,,,katknoll@gmail.com,,True,False,False,False,0,,True,,,False,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,44,Sophia Center,,Artist and Director,Full Time,Artist and Director,False,False,False,False,2562,,4535 NE 13th Ave,Female,Private,Religious,,012Dx0000003p5KIAQ,Professed Female Religious


In [140]:
# This function is no longer used due to ADPDX's custom enhancement in which a Flow automatically updates this status. 

def determine_ministerial_status(df):
    def ministerial_status(row):
        if row['ADPDX_Clergy_Status__c'] == 'Deceased':
            return 'Deceased'
        elif row['ADPDX_Clergy_Status__c'] == 'Active':
            return 'Active in Ministry'
        elif row['ADPDX_Clergy_Status__c'] == 'Inactive':
            return 'Inactive'
        elif row['ADPDX_Clergy_Status__c'] == 'Senior Status':
            return 'Senior Status'
        elif row['ADPDX_Clergy_Status__c'] == 'Faculties Withdrawn':
            return 'Faculties Withdrawn'
        elif row['ADPDX_Clergy_Status__c'] == 'Transferred Out':
            return 'Left Diocese'
        elif row['ADPDX_Clergy_Status__c'] == 'Unassigned':
            return 'Unassigned'
        elif row['ADPDX_Clergy_Status__c'] == 'Laicized':
            return 'Laicized'
        else:
            return 'Unknown'
        
    df['mbfc__Ministerial_Status__c'] = df.apply(ministerial_status, axis=1)
    return df

# df_contact_staging = determine_ministerial_status(df_contact_staging)

### Preferred Phone & Email


In [141]:
df_contact_staging['npe01__PreferredPhone__c'] = df_contact_staging['npe01__PreferredPhone__c'].fillna('Work')
df_contact_staging['npe01__Preferred_Email__c'] = df_contact_staging['npe01__Preferred_Email__c'].fillna('Work')

### Remove Addresses for Lay People


In [142]:
def remove_mailing_address_for_lay_person(row):
    if row['ContactRecordType'] == 'Lay_Person':
        row['MailingStreet'] = ''
        row['MailingCity'] = ''
        row['MailingState'] = ''
        row['MailingPostalCode'] = ''
        row['MailingCountry'] = ''
    return row

df_contact_staging = df_contact_staging.apply(remove_mailing_address_for_lay_person, axis=1)

### Religious Congregation

In this section, for those Contacts who have a value in the `Link to Religious Community` source field we need to populate the `mbfc__Religious_Order__c` target field in Salesforce with the correct Religious Community's parent account - the Religious Congregation.

NOTE: In the source data, there is no differentiation between a child Religious Community and a parent Religious Order, there is only one record for the Religious Comnmunity. In MF360 we represent these Accounts separately so we need to first (a) get the Religious Community record using the `Link to Religious Community` value but transforming it (adding 'RelCommunities\_' in front of the value) so it matches the Archdpdx_Migration_Id\_\_c in Salesforce.

Once acquired, (b) we need to get the value of the `ParentID` field on the Religious Community which is the ID of the Religious Congregation record. That ID is the value we then want to populate in the `mbfc__Religious_Order__c` field.


In [143]:
# get SF Account
get_all_accounts = 'Select Id, Name, RecordTypeId, Type, mbfc__Parish_Code__c, Job_Id__c, Archdpdx_Migration_Id__c, ParentID from Account WHERE Archdpdx_Migration_Id__c != null'

# get list of records, add to dataframe
sf_accounts = sf.query(get_all_accounts)
df_sf_accounts = pd.DataFrame(sf_accounts['records'])
df_sf_accounts = df_sf_accounts.drop(columns = 'attributes')

# create a dict in order to apply later
accounts_id_map = df_sf_accounts.set_index('Archdpdx_Migration_Id__c')['Id'].to_dict()

In [144]:
df_sf_accounts[df_sf_accounts['Archdpdx_Migration_Id__c'].str.contains('RelCommunities', na=False)]

Unnamed: 0,Id,Name,RecordTypeId,Type,mbfc__Parish_Code__c,Job_Id__c,Archdpdx_Migration_Id__c,ParentId
228,001Dx00001HwE4dIAF,"Colombiere Jesuit Community, Portland (SJ)",012Dx0000003p52IAA,,,138,RelCommunities_1,001Dx00001HwE3TIAV
229,001Dx00001HwE4jIAF,"Franciscan Sisters of the Eucharist, Bridal Ve...",012Dx0000003p52IAA,,,138,RelCommunities_10,001Dx00001HwE3YIAV
230,001Dx00001HwE4kIAF,"Apostolic Life Community, Portland (ALCP)",012Dx0000003p52IAA,,,138,RelCommunities_11,001Dx00001HwE3ZIAV
231,001Dx00001HwE4lIAF,"Blessed Stephen Bellesini Community, c/o Chanc...",012Dx0000003p52IAA,,,138,RelCommunities_12,001Dx00001HwE3aIAF
232,001Dx00001HwE4mIAF,Canisius Jesuit Community at Jesuit High Schoo...,012Dx0000003p52IAA,,,138,RelCommunities_13,001Dx00001HwE3TIAV
...,...,...,...,...,...,...,...,...
293,001Dx00001HwE5hIAF,"Society of Our Lady of the Most Holy Trinity, ...",012Dx0000003p52IAA,,,138,RelCommunities_79,001Dx00001HwE4SIAV
294,001Dx00001HwE4hIAF,Missionaries of the Holy Spirit Provincial Hou...,012Dx0000003p52IAA,,,138,RelCommunities_8,001Dx00001HwE3WIAV
295,001Dx00001HwE5iIAF,"Community of St. Thomas More, Eugene (OP)",012Dx0000003p52IAA,,,138,RelCommunities_80,001Dx00001HwE3dIAF
296,001Dx00001HwE5jIAF,"Saint Benedict Lodge, McKenzie Bridge (OP)",012Dx0000003p52IAA,,,138,RelCommunities_81,001Dx00001HwE3dIAF


In [145]:
# applies a lambda function to each element in the ‘Link_to_Religious_Community’ column, prefixing the value with 'RelCommunities_'
def transform_religious_community_link(df):
    df['Link_to_Religious_Community'] = df['Link_to_Religious_Community'].apply(
        lambda x: None if x == '0' else f'RelCommunities_{x}'
    )
    return df

# This function searches for a record in the sf_accounts DataFrame where the ‘Archdpdx_Migration_Id__c’ column matches the given archdpdx_migration_id
def get_parent_id_from_salesforce(sf_accounts, archdpdx_migration_id):
    print(f"Searching for: {archdpdx_migration_id}")  # Debug print
    matching_record = sf_accounts[sf_accounts['Archdpdx_Migration_Id__c'] == archdpdx_migration_id]
    if not matching_record.empty:
        print(f"Found: {matching_record['ParentId'].values[0]}")  # Debug print
        return matching_record['ParentId'].values[0]
    print("Not found")  # Debug print
    return None

# uses the get_parent_id_from_salesforce function to find the ‘ParentId’ from the sf_accounts DataFrame
def update_religious_order(df, sf_accounts):
    df['mbfc__Religious_Order__c'] = df.apply(
        lambda row: get_parent_id_from_salesforce(sf_accounts, row['Link_to_Religious_Community']) 
        if row['Link_to_Religious_Community'] is not None else None, axis=1
    )
    return df


# run the transform_religious_community_link and update_religious_order functions
df_contact_staging = transform_religious_community_link(df_contact_staging)

df_contact_staging = update_religious_order(df_contact_staging, df_sf_accounts)

Searching for: RelCommunities_60
Found: 001Dx00001HwE3TIAV
Searching for: RelCommunities_53
Found: 001Dx00001HwE45IAF
Searching for: RelCommunities_9
Found: 001Dx00001HwE3XIAV
Searching for: RelCommunities_4
Found: 001Dx00001HwE3VIAV
Searching for: RelCommunities_8
Found: 001Dx00001HwE3WIAV
Searching for: RelCommunities_35
Found: 001Dx00001HwE3nIAF
Searching for: RelCommunities_1
Found: 001Dx00001HwE3TIAV
Searching for: RelCommunities_23
Found: 001Dx00001HwE3gIAF
Searching for: RelCommunities_56
Found: 001Dx00001HwE48IAF
Searching for: RelCommunities_23
Found: 001Dx00001HwE3gIAF
Searching for: RelCommunities_53
Found: 001Dx00001HwE45IAF
Searching for: RelCommunities_60
Found: 001Dx00001HwE3TIAV
Searching for: RelCommunities_1
Found: 001Dx00001HwE3TIAV
Searching for: RelCommunities_27
Found: 001Dx00001HwE3iIAF
Searching for: RelCommunities_44
Found: 001Dx00001HwE3wIAF
Searching for: RelCommunities_23
Found: 001Dx00001HwE3gIAF
Searching for: RelCommunities_44
Found: 001Dx00001HwE3wIAF
Se

In [146]:
df_contact_staging[df_contact_staging.mbfc__Religious_Order__c.isna() == False]

Unnamed: 0_level_0,Type(s),ADPDX_Clergy_Status__c,ADPDX_Religious_Status__c,ADPDX_Login_ID__c,ADPDX_Access_Permission__c,Salutation,FirstName,adpdx_Preferred_Name__c,MiddleName,LastName,Suffix,MailingCity,MailingState,MailingPostalCode,MailingCountry,OtherCity,OtherState,OtherPostalCode,OtherCountry,Preferred_Address__c,npe01__WorkPhone__c,HomePhone,MobilePhone,npe01__PreferredPhone__c,npe01__WorkEmail__c,npe01__AlternateEmail__c,npe01__HomeEmail__c,npe01__Preferred_Email__c,Directory_Include__c,Directory_Include_Middle_Name__c,Directory_Include_Suffix__c,Suppress_From_Reports__c,adpdx_Seminarian_Student_Debt__c,adpdx_Seminarian_Medical_Benefits__c,Send_Group_Mail_and_Email__c,Birthdate,mbfc__Place_of_Birth__c,Foreign_Born__c,Father_Full_Name__c,Mother_Full_Maiden_Name__c,Foreign_Citizenship__c,Immigration_Status__c,Passport_Visa_Expiration_Date__c,Social_Security_Number__c,adpdx_Accepted_to_Formation_Date__c,adpdx_Candidacy_Date__c,adpdx_Formation_Withdrawn_Date__c,adpdx_Formation_Deferred_Date__c,adpdx_Formation_Terminated_Date__c,adpdx_Terminate_or_Defer_Note__c,adpdx_CARA_Highest_Ed_Level__c,Incardinated_Now,adpdx_Letter_of_Good_Standing__c,mbfc__Date_of_Arrival_in_Diocese__c,adpdx_Last_Retreat_Date__c,adpdx_Last_Educ_Requirement_Date__c,adpdx_Policy_Manual_Acknowledgement_Date__c,adpdx_Harassment_Prevention_Course_Date__c,adpdx_Standards_of_Conduct_Date__c,adpdx_Last_Background_Check_Date__c,adpdx_Last_Child_Protection_Training__c,mbfc__Date_Left_Diocese__c,adpdx_Senior_Status_Date__c,adpdx_Laicized_Date__c,mbfc__Date_of_Death__c,Languages__c,adpdx_Coverage_Availability__c,adpdx_Advanced_Directive_Date__c,adpdx_End_of_Life_Plan_Date__c,adpdx_Will_Date__c,adpdx_Will_Note__c,adpdx_CIC_489_File__c,adpdx_CARA_Ethnicity__c,adpdx_Seminarian_Status__c,adpdx_Other_Diaconal_Ministry__c,adpdx_Spiritual_Director_Authorized__c,Link_to_Religious_Community,adpdx_Place_of_Work__c,adpdx_Volunteer_Place__c,adpdx_Type_of_Work__c,adpdx_Work_Load__c,adpdx_Work_Title__c,Bi_Ritual__c,Non_Latin_Rite__c,adpdx_Discerner_Aspirant_for_Diaconate__c,adpdx_Is_Seminarian__c,Archdpdx_Migration_Id__c,MailingStreet,OtherStreet,mbfc__gender__c,npe01__Secondary_Address_Type__c,ContactRecordType,candidate_type__c,RecordTypeID,mbfc__Ecclesial_Status__c,mbfc__Religious_Order__c
Record Number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1
671,"Priest,Religious",Transferred Out,Transferred Out,jadams,,Rev.,J.,J.K.,K.,Adams,III,,,,,,,,,,,503-975-4744,,Work,jadams@jesuits.org,,,Work,False,False,False,False,0,,True,,,False,,,,,,,,,,,,,,,,,,,,,,,,2010-06-30,,,,,,,,,,,,,,,RelCommunities_60,,,,,,False,False,False,False,671,,,Male,Private,Priest,,012Dx0000003p5JIAQ,Priest - Religious,001Dx00001HwE3TIAV
2430,Religious,,Active,,,Sr.,Delores,,,Adelman,,Beaverton,OR,97078,,Beaverton,OR,97078,,,503-644-9181,503-718-0411,,Work,,,srdeloresa@ssmo.org,Work,True,False,False,False,0,,True,,,False,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,RelCommunities_53,,,,,,False,False,False,False,2430,Sisters of St. Mary of Oregon\n4440 SW 148th Ave,4595 SW 148th Ave,Female,Private,Religious,,012Dx0000003p5KIAQ,Professed Female Religious,001Dx00001HwE45IAF
1584,"Priest,Religious",Active,Active,makuti,,Rev.,Macdonald,,,Akuti,,Rockaway,OR,97136,,,,,,,503-355-2661,,424-410-0097,Work,padreakuti@gmail.com,makuti@archdpdx.org,,Work,True,False,False,False,0,,True,1977-08-18,"Vura Bilinyo, Uganda",True,,,Uganda,R1 (Religious Visa),2022-02-14,,,,,,,,,"Apostles of Jesus, Kenya",2019-04-25,,,,2019-05-24,2022-04-21,2020-01-10,2022-04-28,2022-11-23,,,,,,,,,,,,,,,,RelCommunities_9,"St. Mary’s by the Sea Parish, Rockaway",,Parish Ministry,Full Time,Administrator,False,False,False,False,1584,St. Mary by the Sea Parish\nPO Box 390,,Male,Private,Priest,,012Dx0000003p5JIAQ,Priest - Religious,001Dx00001HwE3XIAV
912,"Priest,Religious",Transferred Out,Transferred Out,,,Rt. Rev.,James,,,Albers,,,,,,,,,,,,,,Work,,,,Work,False,False,False,False,0,,True,,,False,,,,,,,,,,,,,,,,,,,,,,,,1900-01-01,,,,,,,,,,,,,,,RelCommunities_4,,,,,,False,False,False,False,912,,,Male,Private,Priest,,012Dx0000003p5JIAQ,Priest - Religious,001Dx00001HwE3VIAV
913,"Priest,Religious",Transferred Out,Transferred Out,,,Rev.,Jose,,,Alberto,,,,,,,,,,,,,,Work,,,,Work,False,False,False,False,0,,True,,,False,,,,,,,,,,,,,,,,,,,,,,,,1900-01-01,,,,,,,,,,,,,,,RelCommunities_8,,,,,,False,False,False,False,913,,,Male,Private,Priest,,012Dx0000003p5JIAQ,Priest - Religious,001Dx00001HwE3WIAV
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2884,"Priest,Religious",Transferred Out,Transferred Out,pyoun,,Rev.,Pius,,,Youn,,Eugene,OR,97403,,,,,,,541-343-7021,520-222-8844,907-313-9028,Work,,,pius.youn@gmail.com,Work,True,False,False,False,0,,True,1987-08-31,,False,,,,,,,,,,,,,,,,2022-06-01,,,2022-05-22,2022-10-18,2022-10-10,2022-10-10,2023-01-26,2023-06-05,,,,"Korean, Spanish, Italian, Latin",,,,,,,,,,,RelCommunities_18,,,,,,False,False,False,False,2884,St. Thomas More Newman Center Parish\n1850 Eme...,,Male,Private,Priest,,012Dx0000003p5JIAQ,Priest - Religious,001Dx00001HwE3dIAF
1434,"Priest,Religious",Deceased,Deceased,,,Rev.,Jerome,,,Young,,,,,,,,,,,,,,Work,,,,Work,False,False,False,False,0,,False,,,False,,,,,,,,,,,,,,,,,,,,,,,,,,,2012-12-08,,,,,,,,,,,,RelCommunities_4,,,,,,False,False,False,False,1434,,,Male,Private,Priest,,012Dx0000003p5JIAQ,Priest - Religious,001Dx00001HwE3VIAV
1435,"Priest,Religious",Transferred Out,Transferred Out,,,Rev.,Robert,,,Young,,,,,,,,,,,,,,Work,,,,Work,False,False,False,False,0,,True,,,False,,,,,,,,,,,,,,,,,,,,,,,,1900-01-01,,,,,,,,,,,,,,,RelCommunities_22,,,,,,False,False,False,False,1435,,,Male,Private,Priest,,012Dx0000003p5JIAQ,Priest - Religious,001Dx00001HwE3fIAF
787,"Priest,Religious",Senior Status,Retired,nzodrow,,Rt. Rev.,Nathan,,,Zodrow,,Saint Benedict,OR,97373,,,,,,,503-845-3030,503-236-4747,,Do Not Include,,,nathan.zodrow@mtangel.edu,Work,True,False,False,False,0,,False,1952-03-02,USA,False,,,,,,myezGrk2uKkxbvB0mxeVWg==,,,,,,,,Benedectines (OSB),,1974-09-08,,,,,,,,,2010-06-20,,,Spanish,,,,,,,,,,,RelCommunities_4,Mount Angel Abbey,,Curator of Art Collection / Archivist,Full Time,Curator Archivist,False,False,False,False,787,Mount Angel Abbey\n1 Abbey Dr,,Male,Private,Priest,,012Dx0000003p5JIAQ,Priest - Religious,001Dx00001HwE3VIAV


### Registered Parish

In this section we populate the 'Home Parish' target field for Contacts who have a 'Registered Parish' in the source system.

DONE: Check to see if the Registered Parish data is worth importing. Currently, 'Registered Parish' is only populated on 51 rows, and 32 of those rows in the 'Types' field are listed as 'Archive'. In other words, **only 19 of the 51 rows have a 'Registered Parish' value that might be meaningful.**


### Diocese of Incardination


In [147]:
df_contact_staging['Incardinated_Now'].sample(10)

Record Number
1102               Society of Jesus, USA West Province
2076                                               NaN
671                                                NaN
2867    Western Dominican Province, Order of Preachers
2063                      Dominicans, Western Province
2930                                               NaN
2524                                               NaN
713                  Archdiocese of Portland in Oregon
2232                                               NaN
835                  Archdiocese of Portland in Oregon
Name: Incardinated_Now, dtype: object

In [148]:
# Need to look for, then create a new Account that corresponds to a given 'Diocese of Incardination', then populate with record Id. 

def update_incardinated_accounts(sf, df, column_name, record_type_dev_name, church_type, new_column_name):
    """
    Update the DataFrame by getting or creating Salesforce accounts for the values in the specified column.
    
    For each account name:
      - If the name contains "Diocese" or "Archdiocese":
          record_type_dev_name is set to "Church" and church_type is set to "Diocese".
      - Otherwise:
          record_type_dev_name is set to "Religious" (with no church_type)
          and the account's 'mbfc__Religious_Type__c' field is updated to "Congregation".
    
    Parameters:
      sf (Salesforce): Salesforce connection object.
      df (pd.DataFrame): The DataFrame to update.
      column_name (str): The name of the column containing account names.
      record_type_dev_name (str): (Unused parameter in this updated logic)
      church_type (str): (Unused parameter in this updated logic)
      new_column_name (str): The name of the new column to store the Salesforce account IDs.
    
    Returns:
      pd.DataFrame: The updated DataFrame with the new column containing Salesforce account IDs.
    """
    df[new_column_name] = None

    for index, row in df.iterrows():
        account_name = row[column_name]
        if pd.notna(account_name):
            if "Diocese" in account_name or "Archdiocese" in account_name:
                # If the account name contains "Diocese" or "Archdiocese"
                account_id = get_or_create_account(sf, account_name, 'Church', 'Diocese')
            else:
                # For all other accounts, use "Religious" as the record type and update the Religious Type field.
                account_id = get_or_create_account(sf, account_name, 'Religious', None)
                # Update the account to set 'mbfc__Religious_Type__c' to "Congregation"
                sf.Account.update(account_id, {'mbfc__Religious_Type__c': 'Congregation'})
            
            df.at[index, new_column_name] = account_id
    
    return df

# Example usage
# sf = Salesforce(username='your_username', password='your_password', security_token='your_security_token')
df_contact_staging = update_incardinated_accounts(sf, df_contact_staging, 'Incardinated_Now', 'Church', 'Diocese', 'mbfc__Diocese_of_Incardination__c')

# This cell takes >6m to run

In [149]:
df_contact_staging[['mbfc__Diocese_of_Incardination__c', 'Incardinated_Now']].sample(20)

Unnamed: 0_level_0,mbfc__Diocese_of_Incardination__c,Incardinated_Now
Record Number,Unnamed: 1_level_1,Unnamed: 2_level_1
1464,,
3138,,
2465,,
2232,,
592,001O300000aJylSIAS,Diocese of Oakland
1498,,
140,,
247,001Dx00001HwDsgIAF,Archdiocese of Portland in Oregon
1002,,
1580,001O300000aK3d1IAC,Society of the Missionaries of St. Francis Xav...


In [150]:
# Drop the 'Incardinated Now' column 
del df_contact_staging['Incardinated_Now']


### Deceased & Date of Death

ADPDX does not have a 'Deceased' boolean other than whether or not the Date of Death column has been populated. The target application functions based on both a 'Deceased' boolean and, optionally, a 'Date of Death.'


In [151]:
# Create a new column 'npsp__Deceased__c' and set its value to True when there is a value in 'mbfc__Date_of_Death__c'
df_contact_staging['npsp__Deceased__c'] = df_contact_staging['mbfc__Date_of_Death__c'].notna()


### Final Dataframe Cleanup


In [152]:
# drop columns that are no longer needed
# del df_contact_staging['Type(s)']  # Commented this out as we want to KEEP the field and migrated to 'ADPDX Contact Type'
del df_contact_staging['ContactRecordType']
del df_contact_staging['Link_to_Religious_Community']

In [153]:
df_contact_staging = df_contact_staging.rename(columns={'Type(s)': 'ADPDX_Contact_Type__c'})

In [154]:
# convert '' to NaN
df_contact_staging.replace("", np.nan, inplace=True)

# convert NaN to None
df_contact_staging = df_contact_staging.where(df_contact_staging.notnull(), None)


In [155]:
df_contact_staging['Languages__c'].sample(20)

Record Number
1712                         None
1977                         None
1698                         None
2121                         None
2189                         None
1945                         None
710     English, Spanish, Italian
565                          None
2784                         None
476                          None
951                          None
3128                         None
2402                         None
626                          None
2460                         None
2618                         None
2173                         None
3243                         None
2903                         None
1482                         None
Name: Languages__c, dtype: object

In [156]:
# df_contact_staging_2 = df_contact_staging.where(df_contact_staging.notnull(), None)

## Load


In [157]:
df_contact_staging['Archdpdx_Job_Id__c'] = curr_job_id

In [158]:
# generate CSV for manual loading
df_contact_staging.to_csv(f'/Users/matthewmartin/Library/CloudStorage/GoogleDrive-matt@meribahflow.com/Shared drives/Clients/ADPDX (Portland)/Data/Clergy DB/staging/df_contacts_staging.csv', encoding='utf-8-sig')
df_contact_staging.to_csv('staging_files/contacts_staging.csv', encoding='utf-8-sig')


In [159]:
# upsert Contact records into SF using Bulk api

from simple_salesforce.exceptions import SalesforceMalformedRequest

bulk_data = []
for row in df_contact_staging.itertuples(index=False):
    d = row._asdict()
    # del d['Index']
    bulk_data.append(d)

try:
    # Attempt to upsert Contact records into SF using Bulk API
    contact_upsert = sf.bulk.Contact.upsert(data=bulk_data, external_id_field='Archdpdx_Migration_Id__c', batch_size=500, use_serial=False)
    contact_upsert_results = pd.DataFrame(contact_upsert)
except SalesforceMalformedRequest as e:
    # If a SalesforceMalformedRequest error occurs, print the error message and response content
    print(f"SalesforceMalformedRequest error: {e}")
    print(f"Response content: {e.content}")

In [160]:
# Print upsert results to local file

keys = contact_upsert[0].keys()
with open('results_files/contact_results', 'w', newline='') as csv_file:
    writer = csv.DictWriter(csv_file, keys)
    writer.writeheader()
    writer.writerows(contact_upsert)


# CONTACT > SPOUSES


In [161]:
# Step 1: Query Salesforce to get the mapping of Archdpdx_Migration_Id__c to Salesforce Contact IDs
query = "SELECT Id, Archdpdx_Migration_Id__c, AccountId FROM Contact WHERE Archdpdx_Migration_Id__c != null"
sf_contacts = sf.query_all(query)
sf_contacts_df = pd.DataFrame(sf_contacts['records']).drop(columns='attributes')

# Step 2: Create a dictionary from this mapping
contact_id_mapping = sf_contacts_df.set_index('Archdpdx_Migration_Id__c')['Id'].to_dict()
account_id_mapping = sf_contacts_df.set_index('Id')['AccountId'].to_dict()

# Step 3: Merge df_contact_staging_spouses with sf_contacts_df to get the 'Id' column
df_contact_staging_spouses = df_contact_staging_spouses.merge(sf_contacts_df, on='Archdpdx_Migration_Id__c', how='left')

df_contact_staging_spouses

Unnamed: 0,Type(s),ADPDX_Clergy_Status__c,ADPDX_Religious_Status__c,ADPDX_Login_ID__c,ADPDX_Access_Permission__c,Spouse,Salutation,FirstName,adpdx_Preferred_Name__c,MiddleName,LastName,Suffix,MailingCity,MailingState,MailingPostalCode,MailingCountry,OtherCity,OtherState,OtherPostalCode,OtherCountry,Preferred_Address__c,npe01__WorkPhone__c,HomePhone,MobilePhone,npe01__PreferredPhone__c,npe01__WorkEmail__c,npe01__AlternateEmail__c,npe01__HomeEmail__c,npe01__Preferred_Email__c,Directory_Include__c,Directory_Include_Middle_Name__c,Directory_Include_Suffix__c,Suppress_From_Reports__c,adpdx_Seminarian_Student_Debt__c,adpdx_Seminarian_Medical_Benefits__c,Send_Group_Mail_and_Email__c,Birthdate,mbfc__Place_of_Birth__c,Foreign_Born__c,Father_Full_Name__c,Mother_Full_Maiden_Name__c,Foreign_Citizenship__c,Immigration_Status__c,Passport_Visa_Expiration_Date__c,Social_Security_Number__c,adpdx_Accepted_to_Formation_Date__c,adpdx_Candidacy_Date__c,adpdx_Formation_Withdrawn_Date__c,adpdx_Formation_Deferred_Date__c,adpdx_Formation_Terminated_Date__c,adpdx_Terminate_or_Defer_Note__c,adpdx_CARA_Highest_Ed_Level__c,Incardinated_Now,adpdx_Letter_of_Good_Standing__c,mbfc__Date_of_Arrival_in_Diocese__c,adpdx_Last_Retreat_Date__c,adpdx_Last_Educ_Requirement_Date__c,adpdx_Policy_Manual_Acknowledgement_Date__c,adpdx_Harassment_Prevention_Course_Date__c,adpdx_Standards_of_Conduct_Date__c,adpdx_Last_Background_Check_Date__c,adpdx_Last_Child_Protection_Training__c,mbfc__Date_Left_Diocese__c,adpdx_Senior_Status_Date__c,adpdx_Laicized_Date__c,mbfc__Date_of_Death__c,Languages__c,adpdx_Coverage_Availability__c,adpdx_Advanced_Directive_Date__c,adpdx_End_of_Life_Plan_Date__c,adpdx_Will_Date__c,adpdx_Will_Note__c,adpdx_CIC_489_File__c,adpdx_CARA_Ethnicity__c,adpdx_Seminarian_Status__c,adpdx_Other_Diaconal_Ministry__c,adpdx_Spiritual_Director_Authorized__c,Link_to_Religious_Community,adpdx_Place_of_Work__c,adpdx_Volunteer_Place__c,adpdx_Type_of_Work__c,adpdx_Work_Load__c,adpdx_Work_Title__c,Bi_Ritual__c,Non_Latin_Rite__c,adpdx_Discerner_Aspirant_for_Diaconate__c,adpdx_Is_Seminarian__c,Archdpdx_Migration_Id__c,MailingStreet,OtherStreet,Id,AccountId
0,Priest,Transferred Out,,sabukaka,,0,Rev.,Stephen,,Ozovehe,Abaukaka,,Tualatin,OR,97062,,Portland,OR,97202,,Mailing,503-430-7699,,773-733-3772,Work,,,abstoz@yahoo.com,,Yes,,,No,0,,Yes,1967-06-07,,,,,,,,,,,,,,,,"Diocese of Lokoja, Nigeria",,,,,,2022-05-30,2021-11-03,2021-11-04,2022-11-24,2023-01-16,,,,,,,,,,,,,,,0,,,,,,False,False,False,False,2766,Brighton Hospice Office\n8050 SW Warm Springs ...,5802 SW Milwaukie Ave Apt 4,003Dx00000nKiqAIAS,001Dx00001HwE7vIAF
1,Staff,,,,,0,Mr.,Rogelio,,,Acevedo,,Portland,OR,97229,,,,,,,503-644-5264,,,,facilities@stpius.org,,,,,,,,0,,Yes,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,,False,False,False,False,2337,St. Pius X Parish\n1280 NW Saltzman Rd,,003Dx00000nKiqBIAS,001Dx00001HwE7wIAF
2,Staff,,,,,0,Mr.,Sean,,,Ackroyd,,Corvallis,OR,97330,,,,,,,541-757-1988,,,,,,,,,,,,0,,Yes,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,,False,False,False,False,3244,St. Mary Parish\n501 NW 25th St,,003Dx00000nKiqCIAS,001Dx00001HwE7xIAF
3,Staff,,,,,0,Ms.,Sherril,,,Acton,,Eugene,OR,97401,,,,,,,541-686-2234 x1524,,,,sacton@marisths.org,,,,,,,,0,,Yes,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,,False,False,False,False,3295,Marist Catholic High School\n1900 Kingsley Rd,,003Dx00000nKiqDIAS,001Dx00001HwE7yIAF
4,Staff,,,,,0,Ms.,Barbara,,,Adams,,Gresham,OR,97030,,,,,,,503-665-9129,,,,adamsby@eou.edu,,,,,,,,0,,Yes,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,,False,False,False,False,2164,St. Henry Parish\n346 NW 1st St,,003Dx00000nKiqEIAS,001Dx00001HwE7zIAF
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3011,Staff,,,,,0,Ms.,Jenny,,,Zomerdyk,,Central Point,OR,97502,,,,,,,541-664-1050,,,,churchoffice@shepherdcatholic.com,,,,,,,,0,,Yes,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,,False,False,False,False,1670,Shepherd of the Valley Parish\n600 Beebe Rd,,003Dx00000nKimsIAC,001Dx00001HwEHhIAN
3012,Religious,,Active,dzorrilla,,0,Br.,Daniel,,,Zorrilla,,Saint Benedict,OR,97373,,,,,,,503-845-1181,,,,,,,,,,,,0,,Yes,,,,,,,,,,,,,,,,,,,2021-08-01,,,,,,2019-06-28,2021-10-10,,,,,,,,,,,,,,,,14,,,,,,False,False,False,False,2755,Félix Rougier House of Studies\nPO Box 499,,003Dx00000nKimtIAC,001Dx00001HwEHiIAN
3013,Staff,,,,,0,Ms.,Kim,,,Zuber,,Sublimity,OR,97385,,,,,,,503-769-5664,,,,boniface@wvi.com,,,,,,,,0,,Yes,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,,False,False,False,False,1962,St. Boniface Parish\n375 SE Church St,,003Dx00000nKimuIAC,001Dx00001HwEHjIAN
3014,Staff,,,,,0,Ms.,Agnes,,,Zueger,,Lake Oswego,OR,97034,,,,,,,503-636-7687,,,,agnesz@ollparish.com,,,,,,,,0,,Yes,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,,False,False,False,False,2202,Our Lady of the Lake Parish\n650 A Ave,,003Dx00000nKimvIAC,001Dx00001HwEHkIAN


In [162]:

# Step 4: Filter the DataFrame to only select records where the gender is male
df_contact_staging_spouses = df_contact_staging_spouses[df_contact_staging_spouses['Type(s)'] != 'Wife']

# Step 5: Use this dictionary to update the mbfc__Wife__c field in the df_contact_staging_spouses DataFrame
def update_spouse_id(row):
    spouse_id = row['Spouse']
    if pd.notna(spouse_id) and spouse_id in contact_id_mapping:
        return contact_id_mapping[spouse_id]
    return None

# Update the mbfc__Wife__c field in the df_contact_staging_spouses DataFrame
df_contact_staging_spouses['mbfc__Wife__c'] = df_contact_staging_spouses.apply(update_spouse_id, axis=1)

# Step 6: Prepare the data for the bulk update
update_data = []
for row in df_contact_staging_spouses.itertuples(index=False):
    if pd.notna(row.mbfc__Wife__c):
        update_data.append({
            'Id': row.Id,
            'mbfc__Wife__c': row.mbfc__Wife__c,
            'AccountId': account_id_mapping.get(row.mbfc__Wife__c)
        })

# Step 7: Perform the bulk update
try:
    update_results = sf.bulk.Contact.update(update_data, batch_size=100)
    update_results_df = pd.DataFrame(update_results)
    print(update_results_df)

    # Print upsert results to local file
    keys = update_results[0].keys()
    with open('results_files/spouse_update_results.csv', 'w', newline='') as csv_file:
        writer = csv.DictWriter(csv_file, keys)
        writer.writeheader()
        writer.writerows(update_results)

    # Convert the error messages to a DataFrame for better readability
    error_messages = []
    for result in update_results:
        if not result['success']:
            for error in result['errors']:
                error_messages.append({
                    'Id': result['id'],
                    'StatusCode': error['statusCode'],
                    'Message': error['message']
                })

    error_df = pd.DataFrame(error_messages)
    print(error_df)

    # Verify the updates
    df_contact_staging_spouses[['Spouse', 'mbfc__Wife__c']].sample(10)

except SalesforceMalformedRequest as e:
    print(f"SalesforceMalformedRequest error: {e}")
    print(f"Response content: {e.content}")

# cell takes > 1m to run

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_contact_staging_spouses['mbfc__Wife__c'] = df_contact_staging_spouses.apply(update_spouse_id, axis=1)


     success  created                  id  \
0       True    False  003Dx00000nKiqnIAC   
1       True    False  003Dx00000nKir2IAC   
2       True    False  003Dx00000nKir8IAC   
3       True    False  003Dx00000nKirIIAS   
4      False    False  003Dx00000nKisgIAC   
..       ...      ...                 ...   
119     True    False  003Dx00000nKjOWIA0   
120     True    False  003Dx00000nKjOnIAK   
121     True    False  003Dx00000nKjXdIAK   
122     True    False  003Dx00000nKjY2IAK   
123     True    False  003Dx00000nKjYEIA0   

                                                errors  
0                                                   []  
1                                                   []  
2                                                   []  
3                                                   []  
4    [{'statusCode': 'CANNOT_EXECUTE_FLOW_TRIGGER',...  
..                                                 ...  
119                                                 []  
120

# CONTACTS > PHOTOS

#TODO: Contact Photos


# CONTACT > REGISTER ENTRIES


In [163]:
import pandas as pd

# Load CSV
df = (pd.read_csv('/Users/matthewmartin/Library/CloudStorage/GoogleDrive-matt@meribahflow.com/Shared drives/Clients/ADPDX (Portland)/Data/Clergy DB/reports from clergypdx/People.csv')
               .rename(columns=lambda x: x.replace(' ', '_')) # Remove whitespace in column names
               .drop(index=0) # Drops the extra row that replicates the labels
)

df

Unnamed: 0,Record_Number,Common_Name,Sort_Name,Type(s),Clergy_Status,Religious_Status,Login_ID,Password,Password_Must_be_Changed,Access_Permission,Spouse,Title,Salutation,Christian_Name,Nickname,Middle_Name(s),Surname,Suffix,Mailing_Address,Mailing_Address_2,Mailing_Address_City,Mailing_Address_State,Mailing_Address_Province,Mailing_Address_Postal_Code,Mailing_Address_Country,Private_Address,Private_Address_2,Private_Address_City,Private_Address_State,Private_Address_Province,Private_Address_Postal_Code,Private_Address_Country,Preferred_Address,Work_Phone,Home_Phone,Cell_Phone,Preferred_Phone,Work_Email,Archdiocesan_Email,Home_Email,Preferred_Email,Directory_Include,Directory_Include_Middle_Name,Directory_Include_Suffix,Suppress_From_Reports,Seminarian_Student_Debt,Seminarian_Medical_Benefits,Send_Group_Mail_and_Email,Birth_Date,Place_of_Birth,Foreign_Born,Father_Full_Name,Mother_Full_Maiden_Name,Foreign_Citizenship,Immigration_Status,Passport/Visa_Expiration_Date,Social_Security_Account_Number,Baptism_Date,Place_of_Baptism,Confirmation_Date,Place_of_Confirmation,Received_Date,Parish_of_Record,Marriage_Date,Place_of_Marriage,Date_of_First_Vows,Date_of_Final_Vows,Accepted_to_Formation_Date,Reader_Date,Acolyte_Date,Candidacy_Date,Formation_Withdrawn_Date,Formation_Deferred_Date,Formation_Terminated_Date,Terminate_or_Defer_Note,Bachelor_Degree_Year,Bachelor_Degree_Type,Bachelor_Degree_Institution,Graduate_1_Degree_Year,Graduate_1_Degree_Type,Graduate_1_Degree_Institution,Graduate_2_Degree_Year,Graduate_2_Degree_Type,Graduate_2_Degree_Institution,Graduate_3_Degree_Year,Graduate_3_Degree_Type,Graduate_3_Degree_Institution,Graduate_4_Degree_Year,Graduate_4_Degree_Type,Graduate_4_Degree_Institution,CARA_Highest_Ed_Level,Diaconal_Ordination_Date,Diaconal_Ordination_Place,Diaconal_Ordination_Prelate,Presbyteral_Ordination_Date,Presbyteral_Ordination_Place,Presbyteral_Ordination_Prelate,Episcopal_Ordination_Date,Episcopal_Ordination_Place,Episcopal_Ordination_Prelate,Ordination_Diocese,Incardinated_From_Diocese,Incardinated_From_Date,Incardinated_Now,Serving_Now,Excardinated_To_Diocese,Excardinated_To_Date,Letter_of_Good_Standing_Date,Religious_In_Archdiocese_Date,Faculties,Faculties_Granted_Date,Faculties_Restricted_Date,Faculties_Withdrawn_Date,Last_Retreat_Date,Last_Educ_Requirement_Date,Policy_Manual_Acknowledgement_Date,Harassment_Prevention_Course_Date,Standards_of_Conduct_Date,Last_Background_Check_Date,Last_Child_Protection_Training_Date,Out_of_Diocese_Date,Senior_Status_Date,Laicized_Date,Deceased_Date,Languages,Coverage_Availability,Advanced_Directive_Date,End_of_Life_Plan_Date,Will_Date,Will_Note,CIC_489_File,Registered_Parish,CARA_Ethnicity,Seminarian_Status,Other_Diaconal_Ministry,Spiritual_Director_Authorized,Link_to_Religious_Community,Place_of_Work,Volunteer_Place,Type_of_Work,Work_Load,Work_Title
1,2766,Rev. Stephen Abaukaka,abaukaka stephen ozovehe,Priest,Transferred Out,,sabukaka,def2a990be60a7998b1ed7c820101f3bd02d33b8992518...,Yes,,0,Rev.,Fr.,Stephen,,Ozovehe,Abaukaka,,Brighton Hospice Office,8050 SW Warm Springs St Ste 205,Tualatin,OR,,97062,,5802 SW Milwaukie Ave Apt 4,,Portland,OR,,97202,,Mailing,503-430-7699,,773-733-3772,Work,,,abstoz@yahoo.com,,Yes,,,No,0,,Yes,1967-06-07,,,,,,,,,,,,,,,,,,,,,,,,,,,1996,Theology,,2013,MA Pastoral Studies,Chicago Theological Union,,,,,,,,,,,,,,1997-05-03,,"Diocese of Lokoja, Nigeria",,,,"Diocese of Lokoja, Nigeria",,,"Diocese of Lokoja, Nigeria","Diocese of Lokoja, Nigeria",,,,,Confessional,2021-11-02,,,,,,2022-05-30,2021-11-03,2021-11-04,2022-11-24,2023-01-16,,,,,,,,,,,0,,,,,0,,,,,
2,2337,Mr. Rogelio Acevedo,acevedo rogelio,Staff,,,,,,,0,Mr.,Mr.,Rogelio,,,Acevedo,,St. Pius X Parish,1280 NW Saltzman Rd,Portland,OR,,97229,,,,,,,,,,503-644-5264,,,,facilities@stpius.org,,,,,,,,0,,Yes,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,0,,,,,
3,3244,Mr. Sean Ackroyd,ackroyd sean,Staff,,,,,,,0,Mr.,Mr.,Sean,,,Ackroyd,,St. Mary Parish,501 NW 25th St,Corvallis,OR,,97330,,,,,,,,,,541-757-1988,,,,,,,,,,,,0,,Yes,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,0,,,,,
4,3295,Ms. Sherril Acton,acton sherril,Staff,,,,,,,0,Ms.,Ms.,Sherril,,,Acton,,Marist Catholic High School,1900 Kingsley Rd,Eugene,OR,,97401,,,,,,,,,,541-686-2234 x1524,,,,sacton@marisths.org,,,,,,,,0,,Yes,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,0,,,,,
5,2164,Ms. Barbara Adams,adams barbara,Staff,,,,,,,0,Ms.,Ms.,Barbara,,,Adams,,St. Henry Parish,346 NW 1st St,Gresham,OR,,97030,,,,,,,,,,503-665-9129,,,,adamsby@eou.edu,,,,,,,,0,,Yes,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,0,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3012,1670,Ms. Jenny Zomerdyk,zomerdyk jenny,Staff,,,,,,,0,Ms.,Ms.,Jenny,,,Zomerdyk,,Shepherd of the Valley Parish,600 Beebe Rd,Central Point,OR,,97502,,,,,,,,,,541-664-1050,,,,churchoffice@shepherdcatholic.com,,,,,,,,0,,Yes,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,0,,,,,
3013,2755,"Br. Daniel Zorrilla, MSpS",zorrilla daniel,Religious,,Active,dzorrilla,391eedf7c936f63d3d0a7d9ea7e506a84709662fd31ba9...,Yes,,0,Br.,Br.,Daniel,,,Zorrilla,,Félix Rougier House of Studies,PO Box 499,Saint Benedict,OR,,97373,,,,,,,,,,503-845-1181,,,,,,,,,,,,0,,Yes,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2021-08-01,,,,,,,,,,2019-06-28,2021-10-10,,,,,,,,,,,,0,,,,,14,,,,,
3014,1962,Ms. Kim Zuber,zuber kim,Staff,,,,,,,0,Ms.,Ms.,Kim,,,Zuber,,St. Boniface Parish,375 SE Church St,Sublimity,OR,,97385,,,,,,,,,,503-769-5664,,,,boniface@wvi.com,,,,,,,,0,,Yes,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,0,,,,,
3015,2202,Ms. Agnes Zueger,zueger agnes,Staff,,,,,,,0,Ms.,Ms.,Agnes,,,Zueger,,Our Lady of the Lake Parish,650 A Ave,Lake Oswego,OR,,97034,,,,,,,,,,503-636-7687,,,,agnesz@ollparish.com,,,,,,,,0,,Yes,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,0,,,,,


In [164]:
# Import all Contact fields that actually map to Register Entry records

import pandas as pd

# Define the structure of your column sets with correct attribute names
column_sets = [
    {'date': 'Baptism_Date', 'place': 'Place_of_Baptism', 'notation_type': 'Proof of Baptism'},
    {'date': 'Confirmation_Date', 'place': 'Place_of_Confirmation', 'notation_type': 'Notice of Confirmation'},
    {'date': 'Received_Date', 'place': 'Parish_of_Record', 'notation_type': 'Notice of Profession of Faith'},
    {'date': 'Marriage_Date', 'place': 'Place_of_Marriage', 'notation_type': 'Notice of Matrimony', 'spouse': 'Spouse'},
    {'date': 'Diaconal_Ordination_Date', 'place': 'Diaconal_Ordination_Place', 'prelate': 'Diaconate_Ordination_Prelate', 'notation_type': 'Notice of Holy Orders', 'ordination_type': 'Diaconate'},
    {'date': 'Presbyteral_Ordination_Date', 'place': 'Presbyteral_Ordination_Place', 'prelate': 'Presbyteral_Ordination_Prelate', 'notation_type': 'Notice of Holy Orders', 'ordination_type': 'Presbyteral'},
    {'date': 'Episcopal_Ordination_Date', 'place': 'Episcopal_Ordination_Place', 'prelate': 'Episcopal_Ordination_Prelate', 'notation_type': 'Notice of Holy Orders', 'ordination_type': 'Episcopal'}
]

# New DataFrame for entries
register_entries = pd.DataFrame(columns=['RecordNumber', 'mbfc__Register_Entry_Type__c', 'mbfc__Type__c', 'mbfc__Notation_Type__c', 'mbfc__Ordination_Type__c', 'Date', 'Place', 'Prelate', 'mbfc__Matrimonial_Wife__c'])
new_entries = []  # List to store entries before final concatenation

# Processing rows
for row in df.itertuples():
    for column_set in column_sets:
        date_value = getattr(row, column_set['date'], None)
        if pd.notna(date_value):  # Check if date field is not NaN
            entry = {
                'RecordNumber': getattr(row, 'Record_Number', None),
                'Date': date_value,
                'Place': getattr(row, column_set['place'], None)
            }
            # Add Prelate if applicable
            if 'prelate' in column_set:
                entry['Prelate'] = getattr(row, column_set['prelate'], None)

            # Set 'mbfc__Register_Entry_Type__c', and conditionally add 'mbfc__Type__c' or 'mbfc__Notation_Type__c'
            if 'sacrament_type' in column_set:
                entry['mbfc__Type__c'] = column_set['sacrament_type']
                entry['mbfc__Register_Entry_Type__c'] = 'Sacrament'
            if 'notation_type' in column_set:
                entry['mbfc__Notation_Type__c'] = column_set['notation_type']
                entry['mbfc__Register_Entry_Type__c'] = 'Notation'

            # Handle ordination type specific updates
            if 'ordination_type' in column_set:
                entry['mbfc__Ordination_Type__c'] = column_set['ordination_type']

            # Add Spouse if applicable
            if 'spouse' in column_set:
                entry['mbfc__Matrimonial_Wife__c'] = getattr(row, column_set['spouse'], None)

            new_entries.append(entry)
    
    # Add entries for 'Reader Date'
    # reader_date = getattr(row, 'Reader_Date', None)
    # if pd.notna(reader_date):
    #     entry = {
    #         'RecordNumber': getattr(row, 'Record_Number', None),
    #         'Date': reader_date,
    #         'mbfc__Notation_Type__c': 'Notice of Holy Orders',
    #         'mbfc__Ordination_Type__c': 'Minor Order: Reader',
    #         'mbfc__Register_Entry_Type__c': 'Notation'
    #     }
    #     new_entries.append(entry)
    
    # # Add entries for 'Acolyte Date'
    # acolyte_date = getattr(row, 'Acolyte_Date', None)
    # if pd.notna(acolyte_date):
    #     entry = {
    #         'RecordNumber': getattr(row, 'Record_Number', None),
    #         'Date': acolyte_date,
    #         'mbfc__Notation_Type__c': 'Notice of Holy Orders',
    #         'mbfc__Ordination_Type__c': 'Minor Order: Acolyte',
    #         'mbfc__Register_Entry_Type__c': 'Notation'
    #     }
    #     new_entries.append(entry)

# Concatenate all new entries to the DataFrame at once
if new_entries:
    register_entries = pd.concat([register_entries, pd.DataFrame(new_entries)], ignore_index=True)

print(f"Total records added: {len(register_entries)}")

# Optionally, save the new DataFrame to a CSV
register_entries.to_csv('Register_Entries.csv', index=False)

# Display the DataFrame
register_entries.sample(10)


Total records added: 1534


Unnamed: 0,RecordNumber,mbfc__Register_Entry_Type__c,mbfc__Type__c,mbfc__Notation_Type__c,mbfc__Ordination_Type__c,Date,Place,Prelate,mbfc__Matrimonial_Wife__c
225,3076,Notation,,Proof of Baptism,,1978-02-26,"St. Mary Parish, Farmington, NM",,
1035,2813,Notation,,Notice of Confirmation,,2016-05-25,"St. Mary’s , Eugene, OR",,
6,557,Notation,,Notice of Holy Orders,Diaconate,2015-05-23,"Cathedral of the Immaculate Conception, Portla...",,
152,428,Notation,,Notice of Confirmation,,1982-04-28,,,
666,26,Notation,,Notice of Matrimony,,2004-11-26,,,0.0
154,428,Notation,,Notice of Matrimony,,1992-06-20,,,0.0
637,298,Notation,,Notice of Holy Orders,Diaconate,1997-11-29,"Cathedral of the Immaculate Conception, Portla...",,
490,3007,Notation,,Notice of Holy Orders,Presbyteral,2020-05-02,St. Stephen Byzantine Catholic Cathedral,,
891,223,Notation,,Proof of Baptism,,1944-04-15,"Parroquia de San Miguelito, San Luis Potosí, M...",,
739,255,Notation,,Notice of Matrimony,,1972-06-24,,,55.0


### Populate Lookup for Prelate


In [165]:
from nameparser import HumanName
from nameparser.config import CONSTANTS

# Add dataset-specific Titles and Suffix constants for parsing
CONSTANTS.titles.add('Very', 'Rev.', 'Very Rev.', 'Sr.', 'Most Rev.')
CONSTANTS.suffix_acronyms.add('FRS', 'J.C.L.', 'J.C.L., D.D.', 'D.D.', 'OMI', 'OSA', 'OCD', 'OP', 'OC', 'FSE', 'OMV', 'SDB', 'SM', 'SFX', 'SP', 'OP', 'O.S.M', 'SNJM', 'OSF', 'HMRF', 'DD', 'CSJP', 'SDD', 'BVM', 'BVM - President', 'SJ', 'SL', 'IX', 'SSJ', 'J.C.L.', 'J.C.L', 'OFM', 'MSpS', 'Fco.' )


def parse_name(name):
    if pd.isna(name):  # Checks if the name is NaN or None
        return {
            'Salutation': '',
            'FirstName': '',
            'MiddleName': '',
            'LastName': '',
            'Suffix': ''
        }
    else:
        name = HumanName(name)
        return {
            'Salutation': name.title,
            'FirstName': name.first,
            'MiddleName': name.middle,
            'LastName': name.last,
            'Suffix': name.suffix
        }

# Apply the parsing function only where 'Prelate' exists and is not NaN
for entry in new_entries:
    if 'Prelate' in entry and pd.notna(entry['Prelate']):
        parsed_name = parse_name(entry['Prelate'])
        entry.update(parsed_name)

# Ensure the DataFrame creation from new_entries includes checks for existence of keys:
register_entries = pd.DataFrame(new_entries)
if 'Prelate' in register_entries.columns:
    register_entries['Salutation'] = register_entries['Prelate'].apply(lambda x: parse_name(x)['Salutation'] if pd.notna(x) else '')
    register_entries['FirstName'] = register_entries['Prelate'].apply(lambda x: parse_name(x)['FirstName'] if pd.notna(x) else '')
    register_entries['MiddleName'] = register_entries['Prelate'].apply(lambda x: parse_name(x)['MiddleName'] if pd.notna(x) else '')
    register_entries['LastName'] = register_entries['Prelate'].apply(lambda x: parse_name(x)['LastName'] if pd.notna(x) else '')
    register_entries['Suffix'] = register_entries['Prelate'].apply(lambda x: parse_name(x)['Suffix'] if pd.notna(x) else '')


# Display the DataFrame
print(f"Total records added: {len(register_entries)}")
register_entries.sample(10)



Total records added: 1534


Unnamed: 0,RecordNumber,Date,Place,Prelate,mbfc__Notation_Type__c,mbfc__Register_Entry_Type__c,mbfc__Ordination_Type__c,Salutation,FirstName,MiddleName,LastName,Suffix,mbfc__Matrimonial_Wife__c
966,664,2017-06-03,"Cathedral of the Immaculate Conception, Portla...","Most Rev. Alexander K. Sample, J.C.L., D.D.",Notice of Holy Orders,Notation,Presbyteral,,J.C.L.,,Most Rev. Alexander K. Sample,D.D.,
686,1144,1970-06-13,St. Charles Parish,Most Rev. Robert Dwyer,Notice of Holy Orders,Notation,Presbyteral,Most Rev.,Robert,,Dwyer,,
107,757,1980-05-31,"Santa Maria, CA",Most Rev. Thaddeus Shubsda,Notice of Holy Orders,Notation,Presbyteral,Most Rev.,Thaddeus,,Shubsda,,
566,1532,1995-02-18,,,Notice of Matrimony,Notation,,,,,,,0.0
1067,75,1973-04-21,,,Notice of Matrimony,Notation,,,,,,,315.0
1165,38,1973-12-15,,,Notice of Matrimony,Notation,,,,,,,102.0
404,3316,1979-10-18,,,Proof of Baptism,Notation,,,,,,,
574,697,2013-06-08,"Cathedral of the Immaculate Conception, Portla...","Most Rev. Alexander K. Sample, J.C.L., D.D.",Notice of Holy Orders,Notation,Presbyteral,,J.C.L.,,Most Rev. Alexander K. Sample,D.D.,
422,456,1963-01-20,"St. Mark, Eugene, OR",,Proof of Baptism,Notation,,,,,,,
1442,2866,2003-07-05,"St. Joseph, Big Bear City, CA",,Proof of Baptism,Notation,,,,,,,


In [166]:
# Query Salesforce for existing contacts and create a dictionary for mapping

from simple_salesforce import Salesforce

query = """
SELECT Id, Archdpdx_Migration_Id__c
FROM Contact
"""
result = sf.query_all(query)
contact_map = {rec['Archdpdx_Migration_Id__c']: rec['Id'] for rec in result['records']}


In [167]:
# Get RecordTypeId for Contact.Priest
priest_contact_recordtype_id = get_recordtype_id(df_sf_recordTypes, 'Priest', 'Contact', 'mbfc')

priest_contact_recordtype_id

'012Dx0000003p5JIAQ'

In [168]:
# Get RecordID for Prelates by querying for Contacts by FirstName and LastName and, if not found, Create New Contacts

from simple_salesforce import SFType, SalesforceResourceNotFound

contact = SFType('Contact', sf.session_id, sf.sf_instance)
for index, row in register_entries.iterrows():
    first_name, last_name = row.get('FirstName'), row.get('LastName')

    if pd.isna(first_name) or pd.isna(last_name) or first_name.strip() == '' or last_name.strip() == '':
        # If either first name or last name is missing or empty, skip this row or handle as needed
        print(f"Skipping row {index} due to missing name information.")
        continue

    try:
        # Search for contact by First and Last Name
        query = f"SELECT Id FROM Contact WHERE FirstName = '{first_name}' AND LastName = '{last_name}'"
        result = sf.query(query)
        if result['totalSize'] > 0:
            contact_id = result['records'][0]['Id']
        else:
            # Create a new contact if no match found
            new_contact = {
                'Salutation': "Most Rev.",
                'FirstName': first_name,
                'LastName': last_name,
                'Archdpdx_Job_Id__c': curr_job_id,
                'RecordTypeId': priest_contact_recordtype_id,
                'mbfc__Ecclesial_Status__c': "Bishop/Archbishop"
            }
            create_result = contact.create(new_contact)
            contact_id = create_result['id']

        # Update DataFrame with the Salesforce Contact ID
        register_entries.at[index, 'mbfc__Celebrant__c'] = contact_id

    except SalesforceException as e:
        print(f"Error processing row {index}: {e}")

# cell tak

Skipping row 2 due to missing name information.
Skipping row 3 due to missing name information.
Skipping row 4 due to missing name information.
Skipping row 5 due to missing name information.
Skipping row 6 due to missing name information.
Skipping row 8 due to missing name information.
Skipping row 9 due to missing name information.
Skipping row 10 due to missing name information.
Skipping row 11 due to missing name information.
Skipping row 12 due to missing name information.
Skipping row 13 due to missing name information.
Skipping row 16 due to missing name information.
Skipping row 17 due to missing name information.
Skipping row 18 due to missing name information.
Skipping row 19 due to missing name information.
Skipping row 20 due to missing name information.
Skipping row 21 due to missing name information.
Skipping row 22 due to missing name information.
Skipping row 23 due to missing name information.
Skipping row 24 due to missing name information.
Skipping row 26 due to miss

### Prepare to Upsert


In [169]:
# Map Contact IDs to Register Entries

register_entries_2 = register_entries

register_entries_2['mbfc__Contact__c'] = register_entries['RecordNumber'].map(contact_map)


In [170]:
# Append Job_Id__c
register_entries_2['Archdpdx_Job_Id__c'] = curr_job_id

In [171]:
# Generate an External ID
def create_external_id(row):
    record_number = str(row['RecordNumber']).replace(' ', '').replace('-', '')
    entry_type = str(row['mbfc__Register_Entry_Type__c']).replace(' ', '').replace('-', '')

    # Check whether to use Type or Notation Type based on what's available
    if 'mbfc__Type__c' in row and not pd.isna(row['mbfc__Type__c']):
        type_field = str(row['mbfc__Type__c']).replace(' ', '').replace('-', '')
    elif 'mbfc__Notation_Type__c' in row and not pd.isna(row['mbfc__Notation_Type__c']):
        type_field = str(row['mbfc__Notation_Type__c']).replace(' ', '').replace('-', '') + str(row['mbfc__Ordination_Type__c']).replace(' ', '').replace('-', '')
    else:
        type_field = 'Unknown'

    return f"{record_number}_{entry_type}_{type_field}"

In [172]:
# Assuming your DataFrame is named `register_entries`
register_entries_2['Archdpdx_Migration_Id__c'] = register_entries.apply(create_external_id, axis=1)

if register_entries['Archdpdx_Migration_Id__c'].duplicated().any():
    print("Warning: There are duplicate external IDs.")
    # Optionally, show the duplicates
    duplicates = register_entries[register_entries['external_id'].duplicated(keep=False)]
    print(duplicates)
else:
    print("All external IDs are unique.")


All external IDs are unique.


In [173]:
# Drop unnecessary columns:
register_entries_2.drop(['RecordNumber', 'Prelate', 'Salutation', 'FirstName', 'MiddleName', 'LastName', 'Suffix'], axis=1, inplace=True)

In [174]:
register_entries_staging = register_entries_2

In [175]:
# Remove all NaN values:
register_entries_staging.fillna('', inplace=True)

# Rename columns
register_entries_staging = register_entries_staging.rename(columns={
    'Place': 'mbfc__Location_text__c',
    'Date': 'mbfc__Event_Date__c'
})


In [176]:
# What is this checking for?... Why did I include this?
register_entries_staging[register_entries_staging.mbfc__Contact__c == '003Dx00000m0OtXIAU']


Unnamed: 0,mbfc__Event_Date__c,mbfc__Location_text__c,mbfc__Notation_Type__c,mbfc__Register_Entry_Type__c,mbfc__Ordination_Type__c,mbfc__Matrimonial_Wife__c,mbfc__Celebrant__c,mbfc__Contact__c,Archdpdx_Job_Id__c,Archdpdx_Migration_Id__c


In [177]:
# generate CSV for manual loading
register_entries_staging.to_csv('staging_files/reg_entry_staging.csv', encoding='utf-8-sig')


In [178]:
# Upsert Register Entry Records

bulk_data = []
for row in register_entries_staging.itertuples(index=False):
    d = row._asdict()
    # del d['Index']
    bulk_data.append(d)

# Keep the batch <100 as I've been getting an exceptionCode: 'InvalidBatch', 'exceptionMessage': 'Records not processed'
reg_entry_upsert = sf.bulk.mbfc__Register_Entry__c.upsert(data=bulk_data, external_id_field='Archdpdx_Migration_Id__c', batch_size=100, use_serial=False)
reg_entry_upsert_results = pd.DataFrame(reg_entry_upsert)

In [179]:
# Print upsert results to local file

keys = reg_entry_upsert[0].keys()

with open('results_files/register_entry_results', 'w', newline='') as csv_file:
    writer = csv.DictWriter(csv_file, keys)
    writer.writeheader()
    writer.writerows(reg_entry_upsert)

# CONTACT > AFFILIATIONS


In [180]:
# Function to create a unique ID based on Person's Name + completion date or start date + affiliation type
def create_unique_id(row):
    # Get values, handling NaNs
    person_id = str(row.get('mbfc__Person__c', '')).strip()
    
    # Check for completion date, and if it's blank, use the start date
    completion_date = row.get('mbfc__Completion_Date__c', '')
    if pd.isna(completion_date) or completion_date == '':
        completion_date = row.get('mbfc__Start_Date__c', '')
    
    completion_date = str(completion_date).strip()
    affiliation = str(row.get('mbfc__Affiliation__c', '')).strip()
    
    # Concatenate the three fields
    combined = f"{person_id}{completion_date}{affiliation}"
    
    # Remove unwanted characters and convert to lowercase
    clean_id = ''.join(combined.split()).replace('-', '').replace('.', '').lower()
    
    # Limit the string to 50 characters
    return clean_id[:50]

## Education Affiliations

This section takes multiple sets of columns (all related to a person's education) from the Contacts table, and combines them into a single set of columns in a new dataframe for insertion into Salesforce as Affiliation records.


In [181]:
# Parse and stage Education Affiliation records
import pandas as pd
from functools import lru_cache

# Load CSV
df = (pd.read_csv('/Users/matthewmartin/Library/CloudStorage/GoogleDrive-matt@meribahflow.com/Shared drives/Clients/ADPDX (Portland)/Data/Clergy DB/reports from clergypdx/People.csv')
               .rename(columns=lambda x: x.replace(' ', '_')) # Remove whitespace in column names
               .drop(index=0) # Drops the extra row that replicates the labels
)


# Define the structure of your column sets with correct attribute names
degree_sets = [
    {'year': 'Bachelor_Degree_Year', 'type': 'Bachelor_Degree_Type', 'institution': 'Bachelor_Degree_Institution'},
    {'year': 'Graduate_1_Degree_Year', 'type': 'Graduate_1_Degree_Type', 'institution': 'Graduate_1_Degree_Institution'},
    {'year': 'Graduate_2_Degree_Year', 'type': 'Graduate_2_Degree_Type', 'institution': 'Graduate_2_Degree_Institution'},
    {'year': 'Graduate_3_Degree_Year', 'type': 'Graduate_3_Degree_Type', 'institution': 'Graduate_3_Degree_Institution'},
    {'year': 'Graduate_4_Degree_Year', 'type': 'Graduate_4_Degree_Type', 'institution': 'Graduate_4_Degree_Institution'}
]

# Query for the Record Type ID for 'Organization'
record_type_result = sf.query("SELECT Id FROM RecordType WHERE SobjectType = 'Account' AND DeveloperName = 'Organization' AND NamespacePrefix = 'mbfc'")
organization_record_type_id = record_type_result['records'][0]['Id'] if record_type_result['records'] else None

# Initialize the DataFrame for the staging table
education_staging = pd.DataFrame()

# Function to check and create institution account
@lru_cache(maxsize=None)
def get_or_create_institution_account(institution_name):
    if pd.isna(institution_name):
        return None  # Return None or handle as appropriate if institution name is NaN

    # Query Salesforce to find the institution
    query = f"SELECT Id, Name FROM Account WHERE Name = '{institution_name}' LIMIT 1"
    results = sf.query(query)
    
    # If exists, return the ID
    if results['records']:
        return results['records'][0]['Id']
    else:
        # Ensure no NaN values are sent to Salesforce
        account_data = {
            'Name': institution_name if pd.notna(institution_name) else "Default Name",  # Provide a default if NaN
            'RecordTypeId': organization_record_type_id,
            'mbfc__Organization_Type__c': 'School'
        }
        # Remove keys with None values to avoid JSON serialization issues
        account_data = {k: v for k, v in account_data.items() if v is not None}
        
        new_account = sf.Account.create(account_data)
        return new_account['id']

# Get Contact record ID from Salesforce
@lru_cache(maxsize=None)
def get_contact_id_by_record_number(record_number):
    if pd.isna(record_number):
        return None
    query = f"SELECT Id FROM Contact WHERE Archdpdx_Migration_Id__c = '{record_number}'"
    results = sf.query(query)
    if results['records']:
        return results['records'][0]['Id']
    return None


# Initialize an empty list to collect DataFrames or dictionaries
new_entries = []

# Process each row and each degree set
for index, row in df.iterrows():
    for degree_set in degree_sets:
        year = row[degree_set['year']]
        if pd.notna(year):  # Only proceed if the year column is not NaN
            formatted_year = f"{int(year)}-01-01"  # Convert year to YYYY-MM-DD format
            institution_name = row[degree_set['institution']]
            account_id = get_or_create_institution_account(institution_name)
            contact_id = get_contact_id_by_record_number(row['Record_Number'])
            
            # Create a record for the staging table
            affiliation_record = {
                'mbfc__Person__c': contact_id,
                'mbfc__Completion_Date__c': formatted_year,
                'mbfc__Context__c': account_id,
                'mbfc__Category__c': 'Education/Studies',
                'mbfc__Affiliation__c': row[degree_set['type']]
            }
            new_entries.append(affiliation_record)

# Convert all collected records to a DataFrame in one go
education_staging = pd.DataFrame(new_entries)


#FIXME: There are 4 rows where no INSTITUTION is listed. This makes it impossible to import an Affiliation record. Need to figure out how to handle this with Client. 
#FIXME: There are about 15 rows where no DEGREE is listed. This makes it impossible to import an Affiliation record. Need to figure out how to handle this with Client. 

# Cell takes approx. 2m to run

In [182]:
# Apply the function to each row and create a new column with the unique ID
education_staging['Archdpdx_Migration_Id__c'] = education_staging.apply(create_unique_id, axis=1)

# Check the first few rows to verify the new column
education_staging.head()

Unnamed: 0,mbfc__Person__c,mbfc__Completion_Date__c,mbfc__Context__c,mbfc__Category__c,mbfc__Affiliation__c,Archdpdx_Migration_Id__c
0,003Dx00000nKiqAIAS,1996-01-01,,Education/Studies,Theology,003dx00000nkiqaias19960101theology
1,003Dx00000nKiqAIAS,2013-01-01,001Dx00001HwExyIAF,Education/Studies,MA Pastoral Studies,003dx00000nkiqaias20130101mapastoralstudies
2,003Dx00000nKiqFIAS,1976-01-01,001Dx00001HwF0RIAV,Education/Studies,BA Liberal Arts,003dx00000nkiqfias19760101baliberalarts
3,003Dx00000nKiqFIAS,1980-01-01,001Dx00001HwF0WIAV,Education/Studies,M.Div.,003dx00000nkiqfias19800101mdiv
4,003Dx00000nKiqUIAS,2004-01-01,001Dx00001HwF0bIAF,Education/Studies,"Bachelor, Philosophy","003dx00000nkiquias20040101bachelor,philosophy"


In [183]:
# Fill any NaN values
education_staging = education_staging.fillna('')

In [184]:
# Save the staging table to CSV
education_staging.to_csv('staging_files/education_staging.csv', index=False)


In [185]:
import pandas as pd
import numpy as np
from simple_salesforce import Salesforce, SalesforceMalformedRequest, SalesforceError
from datetime import datetime, date



# def upsert_to_salesforce(sf, dataframe, object_name, external_id_field):
#     """
#     Upsert records to Salesforce from a pandas DataFrame.

#     Parameters:
#     sf (Salesforce): The Salesforce connection instance.
#     dataframe (pd.DataFrame): The pandas DataFrame containing data to upsert.
#     object_name (str): The Salesforce object name (e.g., 'Contact').
#     external_id_field (str): The external ID field used for upserts.
#     """
#     successful_upserts = 0
#     failed_upserts = 0

#     # Replace placeholder values with None in the DataFrame
#     dataframe.replace({None: pd.NA, ' ': None, '': None}, inplace=True)

#     # Convert DataFrame to a list of dictionaries
#     data_to_upsert = dataframe.to_dict(orient='records')

#     for data in data_to_upsert:
#         try:
#             data = convert_non_serializables(data)
#             external_id = data.pop(external_id_field)

#             # Perform upsert using only the External ID
#             response = getattr(sf, object_name).upsert(f'{external_id_field}/{external_id}', data)
#             successful_upserts += 1
#             print(f"Successfully upserted {object_name} with External ID: {external_id}")
#         except SalesforceMalformedRequest as e:
#             failed_upserts += 1
#             print(f"Malformed request error when upserting {object_name} with External ID: {external_id}. Error: {e.content}")
#         except SalesforceError as e:
#             failed_upserts += 1
#             print(f"Salesforce error when upserting {object_name} with External ID: {external_id}. Error: {e.content}")
#         except Exception as e:
#             failed_upserts += 1
#             print(f"Failed to upsert {object_name} with External ID: {external_id}. Error: {e}")

#     print(f"Upsert completed. Successful upserts: {successful_upserts}, Failed upserts: {failed_upserts}")

# def convert_non_serializables(data):
#     """Convert non-serializable objects to serializable formats."""
#     for key, value in data.items():
#         try:
#             if isinstance(value, (datetime, date)):
#                 data[key] = value.isoformat()
#             elif isinstance(value, float) and np.isnan(value):
#                 data[key] = None
#             elif pd.isna(value):
#                 data[key] = None
#             elif isinstance(value, (int, bool, str)):
#                 data[key] = value
#             else:
#                 data[key] = str(value)  # Convert other types to string
#         except Exception as e:
#             print(f"Error processing key: {key}, value: {value}, error: {e}")
#     return data

# def upsert_to_salesforce_bulk(sf, dataframe, object_name, external_id_field, failed_log_file, batch_size=10000):
#     """
#     Upsert records to Salesforce from a pandas DataFrame using the Bulk API.

#     Parameters:
#     sf (Salesforce): The Salesforce connection instance.
#     dataframe (pd.DataFrame): The pandas DataFrame containing data to upsert.
#     object_name (str): The Salesforce object name (e.g., 'Contact').
#     external_id_field (str): The external ID field used for upserts.
#     failed_log_file (str): The file name where failed upsert records will be logged.
#     batch_size (int): The number of records to include in each batch.
#     """
#     successful_upserts = 0
#     failed_upserts = 0

#     # Replace placeholder values with None in the DataFrame
#     dataframe.replace({None: pd.NA, ' ': None, '': None}, inplace=True)

#     # Convert DataFrame to a list of dictionaries
#     data_to_upsert = dataframe.to_dict(orient='records')

#     with open(failed_log_file, 'a') as log_file:
#         # Process data in batches
#         for i in range(0, len(data_to_upsert), batch_size):
#             batch_data = data_to_upsert[i:i + batch_size]
#             batch_data = [convert_non_serializables(record) for record in batch_data]

#             try:
#                 # Perform bulk upsert
#                 response = sf.bulk.__getattr__(object_name).upsert(batch_data, external_id_field=external_id_field)

#                 for res in response:
#                     if res['success']:
#                         successful_upserts += 1
#                     else:
#                         failed_upserts += 1
#                         log_file.write(f"Failed to upsert record: {res}\n")

#             except SalesforceMalformedRequest as e:
#                 failed_upserts += len(batch_data)
#                 log_file.write(f"Malformed request error when upserting batch. Error: {e.content}\n")
#             except SalesforceError as e:
#                 failed_upserts += len(batch_data)
#                 log_file.write(f"Salesforce error when upserting batch. Error: {e.content}\n")
#             except Exception as e:
#                 failed_upserts += len(batch_data)
#                 log_file.write(f"Failed to upsert batch. Error: {e}\n")

#     print(f"Upsert completed. Successful upserts: {successful_upserts}, Failed upserts: {failed_upserts}")


In [186]:
# Upsert Education Affiliation records

# upsert_to_salesforce(sf, education_staging, 'mbfc__Affiliation__c', 'Archdpdx_Migration_Id__c')
upsert_to_salesforce_bulk(sf, education_staging, 'mbfc__Affiliation__c', 'Archdpdx_Migration_Id__c', 'results_files/education_affil', batch_size=1000)


Batch 1 processed: 669 successful, 29 failed.
Upsert completed. Total records processed: 698, Batches: 1, Successful upserts: 669, Failed upserts: 29


In [187]:

#FIXME: A number of Education Affiliation records are missing either an Affiliation title or a Context

In [188]:
# Upsert Education Affiliation records [DEP]

# bulk_data = []
# for row in education_staging.itertuples(index=False):
#     d = row._asdict()
#     # del d['Index']
#     bulk_data.append(d)

# try:
#     # Attempt to upsert Education Affiliation records into SF using Bulk API
    # education_affil_upsert = sf.bulk.mbfc__Affiliation__c.upsert(data=bulk_data, external_id_field='Archdpdx_Migration_Id__c', batch_size=500, use_serial=False)
    

# except SalesforceMalformedRequest as e:
#     # If a SalesforceMalformedRequest error occurs, print the error message and response content
#     print(f"SalesforceMalformedRequest error: {e}")
#     print(f"Response content: {e.content}")

# Send results to CSV
# education_affil_upsert_results = pd.DataFrame(education_affil_upsert)
# education_affil_upsert_results.to_csv('results_files/education_affil_upsert_results')

## Ecclesial Affiliations

This section handles individual Contact source table FIELDS that map to Affiliation RECORDS in the target system.

As the source data model and target data model are substantially different, this section groups together source columns into what will become individual records in the new system and populates missing information based on or required by the target system.

Example: each affiliation record in the target system requires a Context. In certain cases this data does not exist in the source or it is found in another column:

| Affiliation            | Context                   | Completion Date           |
| ---------------------- | ------------------------- | ------------------------- |
| First Vows             | Religious Order           | Date of First Vows        |
| Final Vows             | Religious Order           | Date of Final Vows        |
| Incardination          | Incardinated from Diocese | Incardinated From Date    |
| Faculties (Type)       | Local Diocese             | Faculties Granted Date    |
| Faculties (Restricted) | Local Diocese             | Faculties Restricted Date |
| Faculties (Withdrawn)  | Local Diocese             | Faculties Withdrawn Date  |
| Excardinated           | Excardinated To Diocese   | Excardinated To Date      |

Other examples of columns that need to be populated:

- RecordTypeId
- Category
- Start Date
- Completion Date

Depending on which column is being migrated, the date value might be considered to be a Start Date or a Completion Date in the target system, and needs to be staged accordingly.


In [189]:
# Generate a staging DF of Ecclesial Affiliations out of a handful of fields in the source data, each of which is to be converted into a new row in the staging DF.

# FIXME: There are a number of rows where a Faculties Granted is missing a date, and conversely, where there is a Faculties Granted Date but no description of the Faculties granted. This is a problem, because the application requires a date for when Faculties were granted.


import pandas as pd
from functools import lru_cache
from simple_salesforce import Salesforce

# Load CSV
df = (pd.read_csv('/Users/matthewmartin/Library/CloudStorage/GoogleDrive-matt@meribahflow.com/Shared drives/Clients/ADPDX (Portland)/Data/Clergy DB/reports from clergypdx/People.csv')
               .rename(columns=lambda x: x.replace(' ', '_')) # Remove whitespace in column names
               .drop(index=0) # Drops the extra row that replicates the labels
)

# Define the structure of your column sets with correct attribute names
column_sets = [
    {'year': 'Incardinated_From_Date', 'context': 'Incardinated_From_Diocese'},
    {'year': 'Excardinated_To_Date', 'context': 'Excardinated_To_Diocese'},
    {'year': 'Faculties_Granted_Date', 'affiliation': 'Faculties'},
    {'year': 'Faculties_Restricted_Date'},
    {'year': 'Faculties_Withdrawn_Date'},
    {'year': 'Reader_Date'},  # Add Reader Date
    {'year': 'Acolyte_Date'},  # Add Acolyte Date
    {'year': 'Candidacy_Date'}  # Add Candidate Date
]



In [190]:

# Query for the Record Type IDs of Church, Religious    
record_type_query = "SELECT Id, DeveloperName FROM RecordType WHERE SobjectType = 'Account' AND DeveloperName IN ('Church', 'Religious')"
record_type_result = sf.query(record_type_query)
record_type_ids = {record['DeveloperName']: record['Id'] for record in record_type_result['records']}

church_record_type_id = record_type_ids.get('Church')
religious_record_type_id = record_type_ids.get('Religious')

# Query for the Record Type IDs for 'Ecclesial_Affiliation' and 'Ministerial_Status' for mbfc__Affiliation__c object
record_type_query = "SELECT Id, DeveloperName FROM RecordType WHERE SobjectType = 'mbfc__Affiliation__c' AND DeveloperName IN ('Ecclesial_Affiliation', 'Ministerial_Status')"
record_type_result = sf.query(record_type_query)
record_type_ids = {record['DeveloperName']: record['Id'] for record in record_type_result['records']}

ecclesial_affiliation_record_type_id = record_type_ids.get('Ecclesial_Affiliation')
ministerial_status_record_type_id = record_type_ids.get('Ministerial_Status')

# Check if any of the required Record Types are missing
if not ecclesial_affiliation_record_type_id:
    raise ValueError("No RecordType found for Ecclesial Affiliation on mbfc__Affiliation__c object.")
if not ministerial_status_record_type_id:
    raise ValueError("No RecordType found for Ministerial Status on mbfc__Affiliation__c object.")

In [191]:

# Initialize the DataFrame for the staging table
ecclesial_affiliation_staging = pd.DataFrame()

# Function to check and create institution account
@lru_cache(maxsize=None)
def get_or_create_church_account(context):
    if pd.isna(context):
        return None  # Return None or handle as appropriate if institution name is NaN

    # Query Salesforce to find the institution
    query = f"SELECT Id, Name FROM Account WHERE Name = '{context}' LIMIT 1"
    results = sf.query(query)
    
    # If exists, return the ID
    if results['records']:
        return results['records'][0]['Id']
    else:
        # Ensure no NaN values are sent to Salesforce
        if 'Diocese' in context or 'Archdiocese' in context:
            account_data = {
                'Name': context if pd.notna(context) else "Church Name Missing",  # Provide a default if NaN
                'RecordTypeId': church_record_type_id,
                'mbfc__Church_Type__c': 'Diocese'
            }
        else:
            account_data = {
                'Name': context if pd.notna(context) else "Religious Name Missing",  # Provide a default if NaN
                'RecordTypeId': religious_record_type_id
            }

        # Remove keys with None values to avoid JSON serialization issues
        account_data = {k: v for k, v in account_data.items() if v is not None}
        
        new_account = sf.Account.create(account_data)
        return new_account['id']

# Get Contact record ID from Salesforce
@lru_cache(maxsize=None)
def get_contact_id_by_record_number(record_number):
    if pd.isna(record_number):
        return None
    query = f"SELECT Id FROM Contact WHERE Archdpdx_Migration_Id__c = '{record_number}'"
    results = sf.query(query)
    if results['records']:
        return results['records'][0]['Id']
    return None

# Initialize an empty list to collect DataFrames or dictionaries
new_entries = []

# Process each row and each degree set
for index, row in df.iterrows():
    for col_set in column_sets:
        date = row[col_set['year']]
        if pd.notna(date):  # Only proceed if the year column is not NaN
            context = row.get(col_set.get('context'), None)
            account_id = get_or_create_church_account(context)
            contact_id = get_contact_id_by_record_number(row['Record_Number'])
            
            # Initialize all necessary variables with None
            start_date = None
            completion_date = None
            affiliation = None
            record_type_id = None
            category = None

            # Determine the mbfc__Affiliation__c value
            if 'Incardinated_From_Date' in col_set['year']:
                affiliation = 'Incardinated'
                completion_date = date
                record_type_id = ecclesial_affiliation_record_type_id
                category = 'Ecclesial Affiliations'
            elif 'Excardinated_To_Date' in col_set['year']:
                affiliation = 'Excardinated'
                completion_date = date
                record_type_id = ecclesial_affiliation_record_type_id
                category = 'Ecclesial Affiliations'
            elif 'Faculties_Granted_Date' in col_set['year']:
                faculties_value = row.get(col_set.get('affiliation', ''))
                if pd.isna(faculties_value):
                    affiliation = 'Faculties'
                else:
                    affiliation = f"Faculties ({faculties_value})"
                account_id = diocesan_account_id  # Override account ID for faculties
                start_date = date
                record_type_id = ministerial_status_record_type_id
                category = 'Faculties'
            elif 'Faculties_Restricted_Date' in col_set['year']:
                affiliation = 'Faculties (Restricted)'
                account_id = diocesan_account_id  # Override account ID for faculties
                completion_date = date
                record_type_id = ministerial_status_record_type_id
                category = 'Faculties'
            elif 'Faculties_Withdrawn_Date' in col_set['year']:
                affiliation = 'Faculties (Withdrawn)'
                account_id = diocesan_account_id  # Override account ID for faculties
                completion_date = date
                record_type_id = ministerial_status_record_type_id
                category = 'Faculties'
            elif 'Date_of_First_Vows' in col_set['year']:
                affiliation = 'First Vows'
                completion_date = date
                record_type_id = ecclesial_affiliation_record_type_id
                category = 'Ecclesial Affiliations'
            elif 'Date_of_Final_Vows' in col_set['year']:
                affiliation = 'Final Vows'
                completion_date = date
                record_type_id = ecclesial_affiliation_record_type_id
                category = 'Ecclesial Affiliations'
            elif 'Reader_Date' in col_set['year']:
                affiliation = 'Reader Installation'
                completion_date = date
                record_type_id = ecclesial_affiliation_record_type_id
                category = 'Installations'
                account_id = diocesan_account_id
            elif 'Acolyte_Date' in col_set['year']:
                affiliation = 'Acolyte Installation'
                completion_date = date
                record_type_id = ecclesial_affiliation_record_type_id
                category = 'Installations'
                account_id = diocesan_account_id
            elif 'Candidacy_Date' in col_set['year']:
                affiliation = 'Candidate Installation'
                completion_date = date
                record_type_id = ecclesial_affiliation_record_type_id
                category = 'Installations'
                account_id = diocesan_account_id

            else:
                affiliation = row.get(col_set.get('affiliation', ''), None)
            
            # Create a record for the staging table
            affiliation_record = {
                'RecordTypeId': record_type_id,
                'mbfc__Person__c': contact_id,
                'mbfc__Completion_Date__c': completion_date,
                'mbfc__Start_Date__c': start_date,
                'mbfc__Context__c': account_id,
                'mbfc__Category__c': category,
                'mbfc__Affiliation__c': affiliation
            }
            new_entries.append(affiliation_record)

# Convert all collected records to a DataFrame in one go
ecclesial_affiliations_staging = pd.DataFrame(new_entries)

# Takes approx. 1.5 minutes to run

In [192]:
ecclesial_affiliations_staging.sample(20)

Unnamed: 0,RecordTypeId,mbfc__Person__c,mbfc__Completion_Date__c,mbfc__Start_Date__c,mbfc__Context__c,mbfc__Category__c,mbfc__Affiliation__c
897,012Dx0000003p5DIAQ,003Dx00000nKj65IAC,,2005-06-11,001Dx00001HwDsgIAF,Faculties,Faculties (General)
749,012Dx0000003p5DIAQ,003Dx00000nKjJTIA0,,2018-03-19,001Dx00001HwDsgIAF,Faculties,Faculties
759,012Dx0000003p5AIAQ,003Dx00000nKjJkIAK,2020-02-11,,001Dx00001HwDsgIAF,Installations,Acolyte Installation
227,012Dx0000003p5DIAQ,003Dx00000nKiojIAC,,2014-02-28,001Dx00001HwDsgIAF,Faculties,Faculties (Diaconal)
199,012Dx0000003p5AIAQ,003Dx00000nKinaIAC,2021-03-10,,001Dx00001HwDsgIAF,Installations,Reader Installation
469,012Dx0000003p5AIAQ,003Dx00000nKj8gIAC,2008-10-25,,001Dx00001HwDsgIAF,Installations,Acolyte Installation
906,012Dx0000003p5DIAQ,003Dx00000nKjMdIAK,,2014-06-06,001Dx00001HwDsgIAF,Faculties,Faculties (General)
9,012Dx0000003p5DIAQ,003Dx00000nKiqVIAS,,2012-06-09,001Dx00001HwDsgIAF,Faculties,Faculties (General)
85,012Dx0000003p5DIAQ,003Dx00000nKjA7IAK,,2022-07-01,001Dx00001HwDsgIAF,Faculties,Faculties (Confessional)
853,012Dx0000003p5AIAQ,003Dx00000nKj3hIAC,1981-01-23,,001Dx00001HwDsgIAF,Installations,Reader Installation


In [193]:
# Apply the function to each row and create a new column with the unique ID
ecclesial_affiliations_staging['Archdpdx_Migration_Id__c'] = ecclesial_affiliations_staging.apply(create_unique_id, axis=1)

# Check for duplicates
ecclesial_affiliations_staging['Archdpdx_Migration_Id__c'].duplicated().value_counts()

False    985
Name: Archdpdx_Migration_Id__c, dtype: int64

In [194]:
# Send the new DataFrame to a CSV
ecclesial_affiliations_staging.to_csv('staging_files/Ecclesial_Affiliations_Staging.csv', index=False, encoding='utf-8-sig')

In [198]:
ecclesial_affiliations_staging

Unnamed: 0,RecordTypeId,mbfc__Person__c,mbfc__Completion_Date__c,mbfc__Start_Date__c,mbfc__Context__c,mbfc__Category__c,mbfc__Affiliation__c,Archdpdx_Migration_Id__c
0,012Dx0000003p5DIAQ,003Dx00000nKiqAIAS,,2021-11-02,001Dx00001HwDsgIAF,Faculties,Faculties (Confessional),003dx00000nkiqaias20211102faculties(confessional)
1,012Dx0000003p5DIAQ,003Dx00000nKiqFIAS,2010-06-30,,001Dx00001HwDsgIAF,Faculties,Faculties (Withdrawn),003dx00000nkiqfias20100630faculties(withdrawn)
2,012Dx0000003p5DIAQ,003Dx00000nKiqRIAS,,2016-06-04,001Dx00001HwDsgIAF,Faculties,Faculties (General),003dx00000nkiqrias20160604faculties(general)
3,012Dx0000003p5AIAQ,003Dx00000nKiqRIAS,2013-04-10,,001Dx00001HwDsgIAF,Installations,Reader Installation,003dx00000nkiqrias20130410readerinstallation
4,012Dx0000003p5AIAQ,003Dx00000nKiqRIAS,2013-04-10,,001Dx00001HwDsgIAF,Installations,Acolyte Installation,003dx00000nkiqrias20130410acolyteinstallation
...,...,...,...,...,...,...,...,...
980,012Dx0000003p5DIAQ,003Dx00000nKjZ2IAK,,2022-06-01,001Dx00001HwDsgIAF,Faculties,Faculties (General),003dx00000nkjz2iak20220601faculties(general)
981,012Dx0000003p5DIAQ,003Dx00000nKjZ8IAK,,1976-06-05,001Dx00001HwDsgIAF,Faculties,Faculties (General),003dx00000nkjz8iak19760605faculties(general)
982,012Dx0000003p5AIAQ,003Dx00000nKjZ9IAK,2007-11-26,,001Dx00001HwFHDIA3,Ecclesial Affiliations,Incardinated,003dx00000nkjz9iak20071126incardinated
983,012Dx0000003p5DIAQ,003Dx00000nKimjIAC,,1979-10-05,001Dx00001HwDsgIAF,Faculties,Faculties (General),003dx00000nkimjiac19791005faculties(general)


In [195]:
# upsert Ecclesial Affiliation records
upsert_to_salesforce_bulk(sf, ecclesial_affiliations_staging, 'mbfc__Affiliation__c', 'Archdpdx_Migration_Id__c', 'results_files/ecclesial_affil_upsert_results', 100)

#FIXME: There are a number of rows where a Faculties Granted is missing a date, and conversely, where there is a Faculties Granted Date but no description of the Faculties granted. This is a problem, because the application requires a date for when Faculties were granted.

# Takes approx 1.5 minutes to run

Error processing key: RecordTypeId, value: 012Dx0000003p5DIAQ, error: isinstance() arg 2 must be a type, a tuple of types, or a union
Error processing key: mbfc__Person__c, value: 003Dx00000nKiqAIAS, error: isinstance() arg 2 must be a type, a tuple of types, or a union
Error processing key: mbfc__Completion_Date__c, value: None, error: isinstance() arg 2 must be a type, a tuple of types, or a union
Error processing key: mbfc__Start_Date__c, value: 2021-11-02, error: isinstance() arg 2 must be a type, a tuple of types, or a union
Error processing key: mbfc__Context__c, value: 001Dx00001HwDsgIAF, error: isinstance() arg 2 must be a type, a tuple of types, or a union
Error processing key: mbfc__Category__c, value: Faculties, error: isinstance() arg 2 must be a type, a tuple of types, or a union
Error processing key: mbfc__Affiliation__c, value: Faculties (Confessional), error: isinstance() arg 2 must be a type, a tuple of types, or a union
Error processing key: Archdpdx_Migration_Id__c, 

## Religious Community Affiliations

In [210]:
acc_religious_staging_2

Unnamed: 0,Name,RecordTypeId,mbfc__Religious_Type__c,BillingStreet,BillingCity,BillingState,BillingPostalCode,BillingCountry,Phone,Fax,mbfc__Email__c,Website,mbfc__Abbreviation__c,mbfc__Religious_Suffix__c,mbfc__Type_Members__c,Description,Job_Id__c,ParentId,Archdpdx_Migration_Id__c
186,"Colombiere Jesuit Community, Portland (SJ)",012Dx0000003p52IAA,Local Community,3220 SE 43rd Ave,Portland,OR,97206,,503-595-1941,,,https://www.jesuitswest.org/,Jesuits,SJ,Men,"Manager: Fr. Paul Cochran, SJ",138,001Dx00001HwE3TIAV,RelCommunities_1
187,"Abbey of Our Lady of Guadalupe, Carlton (OCSO)",012Dx0000003p52IAA,Local Community,Abbey of Our Lady of Guadalupe\n9200 NE Abbey Rd,Carlton,OR,97111,,503-852-7174,503-852-7748,community@trappistabbey.org,http://www.trappistabbey.org/,Trappists,OCSO,Men,The cloistered monastic cistercian community s...,138,001Dx00001HwE3UIAV,RelCommunities_2
188,"JCCU Jesuit Tertianship, Portland (SJ)",012Dx0000003p52IAA,Local Community,3301 SE 45th Ave,Portland,OR,97206,,,,jctertianship@jesuits.org,,Jesuits,SJ,Men,,138,001Dx00001HwE3TIAV,RelCommunities_3
189,"Benedictine Monks of Mount Angel Abbey, Saint ...",012Dx0000003p52IAA,Local Community,1 Abbey Dr,Saint Benedict,OR,97373,,503-845-3030,,info@mtangelabbey.edu,https://www.mountangelabbey.org/,Benedictines,OSB,Men,"Serving Mount Angel Towers, Providence Benedec...",138,001Dx00001HwE3VIAV,RelCommunities_4
190,Missionaries of the Holy Spirit Provincial Hou...,012Dx0000003p52IAA,Local Community,PO Box 22387,Milwaukie,OR,97269,,503-324-2492,503-324-2493,,www.mspscpp.org,"Missionaries of the Holy Spirit, Christ the Pr...",MSpS,Men,,138,001Dx00001HwE3WIAV,RelCommunities_8
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
251,"Society of the Divine Word, Techny, IL (SVD)",012Dx0000003p52IAA,Local Community,"Society of the Divine Word, Chicago Province",,,,,,,,https://www.divineword.org/,Society of the Divine Word,SVD,Men,,138,001Dx00001HwE4QIAV,RelCommunities_77
252,"Society of the Divine Saviour, Rome, Italy (SDS)",012Dx0000003p52IAA,Local Community,"Via della Conciliazione, 51",Roma,,I-00193,ITALY,+39 06 686 291,,,http://www.sds.org/,Society of the Divine Saviour,SDS,Men,,138,001Dx00001HwE4RIAV,RelCommunities_78
253,"Society of Our Lady of the Most Holy Trinity, ...",012Dx0000003p52IAA,Local Community,PO Box 4116,Corpus Christi,TX,78469,,,,,https://solt.net/,Society of Our Lady of the Most Holy Trinity,SOLT,Men,,138,001Dx00001HwE4SIAV,RelCommunities_79
254,"Community of St. Thomas More, Eugene (OP)",012Dx0000003p52IAA,Local Community,1386 E 18th Ave,Eugene,OR,97403,,541-343-0065,541-686-8028,,uonewman.org,Dominicans,OP,Men,Serving St. Thomas More Parish and Newman Cent...,138,001Dx00001HwE3dIAF,RelCommunities_80


In [211]:
df_contact_staging

Unnamed: 0_level_0,ADPDX_Contact_Type__c,ADPDX_Clergy_Status__c,ADPDX_Religious_Status__c,ADPDX_Login_ID__c,ADPDX_Access_Permission__c,Salutation,FirstName,adpdx_Preferred_Name__c,MiddleName,LastName,Suffix,MailingCity,MailingState,MailingPostalCode,MailingCountry,OtherCity,OtherState,OtherPostalCode,OtherCountry,Preferred_Address__c,npe01__WorkPhone__c,HomePhone,MobilePhone,npe01__PreferredPhone__c,npe01__WorkEmail__c,npe01__AlternateEmail__c,npe01__HomeEmail__c,npe01__Preferred_Email__c,Directory_Include__c,Directory_Include_Middle_Name__c,Directory_Include_Suffix__c,Suppress_From_Reports__c,adpdx_Seminarian_Student_Debt__c,adpdx_Seminarian_Medical_Benefits__c,Send_Group_Mail_and_Email__c,Birthdate,mbfc__Place_of_Birth__c,Foreign_Born__c,Father_Full_Name__c,Mother_Full_Maiden_Name__c,Foreign_Citizenship__c,Immigration_Status__c,Passport_Visa_Expiration_Date__c,Social_Security_Number__c,adpdx_Accepted_to_Formation_Date__c,adpdx_Candidacy_Date__c,adpdx_Formation_Withdrawn_Date__c,adpdx_Formation_Deferred_Date__c,adpdx_Formation_Terminated_Date__c,adpdx_Terminate_or_Defer_Note__c,adpdx_CARA_Highest_Ed_Level__c,adpdx_Letter_of_Good_Standing__c,mbfc__Date_of_Arrival_in_Diocese__c,adpdx_Last_Retreat_Date__c,adpdx_Last_Educ_Requirement_Date__c,adpdx_Policy_Manual_Acknowledgement_Date__c,adpdx_Harassment_Prevention_Course_Date__c,adpdx_Standards_of_Conduct_Date__c,adpdx_Last_Background_Check_Date__c,adpdx_Last_Child_Protection_Training__c,mbfc__Date_Left_Diocese__c,adpdx_Senior_Status_Date__c,adpdx_Laicized_Date__c,mbfc__Date_of_Death__c,Languages__c,adpdx_Coverage_Availability__c,adpdx_Advanced_Directive_Date__c,adpdx_End_of_Life_Plan_Date__c,adpdx_Will_Date__c,adpdx_Will_Note__c,adpdx_CIC_489_File__c,adpdx_CARA_Ethnicity__c,adpdx_Seminarian_Status__c,adpdx_Other_Diaconal_Ministry__c,adpdx_Spiritual_Director_Authorized__c,adpdx_Place_of_Work__c,adpdx_Volunteer_Place__c,adpdx_Type_of_Work__c,adpdx_Work_Load__c,adpdx_Work_Title__c,Bi_Ritual__c,Non_Latin_Rite__c,adpdx_Discerner_Aspirant_for_Diaconate__c,adpdx_Is_Seminarian__c,Archdpdx_Migration_Id__c,MailingStreet,OtherStreet,mbfc__gender__c,npe01__Secondary_Address_Type__c,candidate_type__c,RecordTypeID,mbfc__Ecclesial_Status__c,mbfc__Religious_Order__c,mbfc__Diocese_of_Incardination__c,npsp__Deceased__c,Archdpdx_Job_Id__c
Record Number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1
2766,Priest,Transferred Out,,sabukaka,,Rev.,Stephen,,Ozovehe,Abaukaka,,Tualatin,OR,97062,,Portland,OR,97202,,Mailing,503-430-7699,,773-733-3772,Work,,,abstoz@yahoo.com,Work,True,False,False,False,0,,True,1967-06-07,,False,,,,,,,,,,,,,,,,,,,2022-05-30,2021-11-03,2021-11-04,2022-11-24,2023-01-16,,,,,,,,,,,,,,,,,,,,False,False,False,False,2766,Brighton Hospice Office\n8050 SW Warm Springs ...,5802 SW Milwaukie Ave Apt 4,Male,Private,,012Dx0000003p5JIAQ,Priest - Temporary Sojourn (Foreign),,001O300000aK7FGIA0,False,138
2337,Staff,,,,,Mr.,Rogelio,,,Acevedo,,,,,,,,,,,503-644-5264,,,Work,facilities@stpius.org,,,Work,False,False,False,False,0,,True,,,False,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,False,False,False,False,2337,,,Male,Private,,012Dx0000003p5HIAQ,Lay Person,,,False,138
3244,Staff,,,,,Mr.,Sean,,,Ackroyd,,,,,,,,,,,541-757-1988,,,Work,,,,Work,False,False,False,False,0,,True,,,False,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,False,False,False,False,3244,,,Male,Private,,012Dx0000003p5HIAQ,Lay Person,,,False,138
3295,Staff,,,,,Ms.,Sherril,,,Acton,,,,,,,,,,,541-686-2234 x1524,,,Work,sacton@marisths.org,,,Work,False,False,False,False,0,,True,,,False,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,False,False,False,False,3295,,,Female,Private,,012Dx0000003p5HIAQ,Lay Person,,,False,138
2164,Staff,,,,,Ms.,Barbara,,,Adams,,,,,,,,,,,503-665-9129,,,Work,adamsby@eou.edu,,,Work,False,False,False,False,0,,True,,,False,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,False,False,False,False,2164,,,Female,Private,,012Dx0000003p5HIAQ,Lay Person,,,False,138
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1670,Staff,,,,,Ms.,Jenny,,,Zomerdyk,,,,,,,,,,,541-664-1050,,,Work,churchoffice@shepherdcatholic.com,,,Work,False,False,False,False,0,,True,,,False,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,False,False,False,False,1670,,,Female,Private,,012Dx0000003p5HIAQ,Lay Person,,,False,138
2755,Religious,,Active,dzorrilla,,Br.,Daniel,,,Zorrilla,,Saint Benedict,OR,97373,,,,,,,503-845-1181,,,Work,,,,Work,False,False,False,False,0,,True,,,False,,,,,,,,,,,,,,,2021-08-01,,,,,,2019-06-28,2021-10-10,,,,,,,,,,,,,,,,,,,,,False,False,False,False,2755,Félix Rougier House of Studies\nPO Box 499,,Male,Private,,012Dx0000003p5KIAQ,Professed Male Religious (Non-Priest),001Dx00001HwE3WIAV,,False,138
1962,Staff,,,,,Ms.,Kim,,,Zuber,,,,,,,,,,,503-769-5664,,,Work,boniface@wvi.com,,,Work,False,False,False,False,0,,True,,,False,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,False,False,False,False,1962,,,Female,Private,,012Dx0000003p5HIAQ,Lay Person,,,False,138
2202,Staff,,,,,Ms.,Agnes,,,Zueger,,,,,,,,,,,503-636-7687,,,Work,agnesz@ollparish.com,,,Work,False,False,False,False,0,,True,,,False,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,False,False,False,False,2202,,,Female,Private,,012Dx0000003p5HIAQ,Lay Person,,,False,138


In [212]:
import pandas as pd

# Function to get the Salesforce record for the corresponding religious community records
def get_religious_community_id(religious_community_ID):
    if pd.isna(religious_community_ID) or religious_community_ID == '0':
        return None  # Return None or handle as appropriate if religious community name is NaN or '0'

    # Query Salesforce to find the religious community
    query = f"SELECT Id, Name FROM Account WHERE Archdpdx_Migration_Id__c = 'RelCommunities_{religious_community_ID}' LIMIT 1"
    results = sf.query(query)
    
    # If exists, return the ID
    if results['records']:
        return results['records'][0]['Id']
    else:
        raise ValueError(f"Religious community with ID '{religious_community_ID}' not found in Salesforce.")

# Function to get the Salesforce contact ID by record number
def get_contact_id_by_record_number(record_number):
    if pd.isna(record_number):
        return None
    query = f"SELECT Id FROM Contact WHERE Archdpdx_Migration_Id__c = '{record_number}'"
    results = sf.query(query)
    if results['records']:
        return results['records'][0]['Id']
    return None

# Prepare the data frame with the necessary columns
religious_affiliations_staging = pd.DataFrame(columns=['mbfc__Person__c', 'mbfc__Context__c', 'mbfc__Category__c', 'mbfc__Affiliation__c', 'Archdpdx_Migration_Id__c'])

# Populate the data frame with the contact IDs and religious community IDs
new_entries = []

for index, row in df_contact_staging_spouses.iterrows():
    religious_community_ID = row.get('Link_to_Religious_Community', None)
    if pd.notna(religious_community_ID) and religious_community_ID != '0':
        contact_id = get_contact_id_by_record_number(row['Archdpdx_Migration_Id__c'])
        religious_community_id = get_religious_community_id(religious_community_ID)
        
        affiliation_record = {
            'mbfc__Person__c': contact_id,
            'mbfc__Context__c': religious_community_id,
            'mbfc__Category__c': 'Religious Community',
            'mbfc__Affiliation__c': 'Member',
            'Archdpdx_Migration_Id__c': f"{contact_id}_{religious_community_id}_RelComm"
        }
        new_entries.append(affiliation_record)

# Convert the list of new entries to a DataFrame
religious_affiliations_staging = pd.DataFrame(new_entries)

# Cell takes >4m to run

In [214]:
# Drop the index and ensure only the required columns are included

# religious_affiliations_staging.reset_index(drop=True, inplace=True)

religious_affiliations_staging = religious_affiliations_staging[['mbfc__Person__c', 'mbfc__Context__c', 'mbfc__Category__c', 'mbfc__Affiliation__c', 'Archdpdx_Migration_Id__c']]

# Upsert the data frame to Salesforce
upsert_to_salesforce_bulk(sf, religious_affiliations_staging, 'mbfc__Affiliation__c', 'Archdpdx_Migration_Id__c', 'results_files/religious_affil_upsert_results', 100)

Error processing key: mbfc__Person__c, value: 003Dx00000nKiqHIAS, error: isinstance() arg 2 must be a type, a tuple of types, or a union
Error processing key: mbfc__Context__c, value: 001Dx00001HwE5OIAV, error: isinstance() arg 2 must be a type, a tuple of types, or a union
Error processing key: mbfc__Category__c, value: Religious Community, error: isinstance() arg 2 must be a type, a tuple of types, or a union
Error processing key: mbfc__Affiliation__c, value: Member, error: isinstance() arg 2 must be a type, a tuple of types, or a union
Error processing key: Archdpdx_Migration_Id__c, value: 003Dx00000nKiqHIAS_001Dx00001HwE5OIAV_RelComm, error: isinstance() arg 2 must be a type, a tuple of types, or a union
Error processing key: mbfc__Person__c, value: 003Dx00000nKiqLIAS, error: isinstance() arg 2 must be a type, a tuple of types, or a union
Error processing key: mbfc__Context__c, value: 001Dx00001HwE5JIAV, error: isinstance() arg 2 must be a type, a tuple of types, or a union
Error p

# AFFILIATIONS


In [572]:
# Import Assignments.csv

import pandas as pd


df_affiliations = (
    pd.read_csv('/Users/matthewmartin/Library/CloudStorage/GoogleDrive-matt@meribahflow.com/Shared drives/Clients/ADPDX (Portland)/Data/Clergy DB/reports from clergypdx/Assignments (1).csv')
    .set_index('Record Number', verify_integrity=True)
    .drop(index='recNum', errors='ignore')  # Added errors='ignore' to prevent errors if 'recNum' does not exist
    .drop(columns=['Historic Name'], errors='ignore')  # Added errors='ignore' for the same reason
    .rename(columns=lambda x: x.replace(' ', '_'))  # Remove whitespace in column names
    .assign(Account_Ext_Id=lambda df: df['Organization_Table_Name'] + '_' + df['Organization_Table_Link'])
    # .assign(mbfc__Person__r=lambda df: df['Assigned_Person'].apply(lambda x: {'Archdpdx_Migration_Id__c': x}))
    # .assign(mbfc__Context__r=lambda df: df['Account_Ext_Id'].apply(lambda x: {'Archdpdx_Migration_Id__c': x}))
    # .assign(mbfc__Use_Custom_Title__c= True)
    .assign(mbfc__Category__c= 'Any All')
    # .assign(Archdpdx_Migration_Id__c= df_affiliations.index)
    .drop(columns=[
        # 'Assigned_Person'
        'Organization_Table_Name'
        ,'Organization_Table_Link'
        ,'Projected_Term_End_Date'
        ,'Term_Number'
        ,'Leave_Type' # Leave out 'Leave_Type' until mapped properly
        ])
    .rename(columns={
        'Duty_Load': 'mbfc__Duty_Load__c',
        'Start_Date': 'mbfc__Start_Date__c',
        'End_Date': 'mbfc__Completion_Date__c',
        'Assignment_Title': 'mbfc__Affiliation__c',
        'Archdiocesan_Assignment': 'adpdx_Archdiocesan_Assignment__c',
    })
    .replace({'ADPDX_Archdiocesan_Assignment__c': {'Yes': True, 'No': False, None: False}})
    .fillna('')
)

# Display a sample of the DataFrame to check the new structure
df_affiliations.sample(10)



Unnamed: 0_level_0,Assigned_Person,mbfc__Affiliation__c,adpdx_Archdiocesan_Assignment__c,mbfc__Duty_Load__c,mbfc__Start_Date__c,mbfc__Completion_Date__c,Account_Ext_Id,mbfc__Category__c
Record Number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2419,1559,Institute for Priestly Formation,,,2021-06-01,2021-08-31,Offices_22,Any All
3172,3000,Administrative Assistant,,,,2023-06-22,Offices_1,Any All
2420,541,Institute for Priestly Formation,Yes,,2021-06-01,2021-08-31,Offices_22,Any All
3190,1598,Pastoral Year,Yes,Full Time,2023-08-01,2023-12-31,Parishes_5,Any All
1346,804,Vicar Forane,Yes,,2019-10-01,,Vicariates_14,Any All
287,779,Administrator,Yes,,2016-07-01,2019-06-30,Parishes_68,Any All
2694,2732,Summer Assignment,,,2022-03-25,2022-07-31,Parishes_58,Any All
514,1505,Special Projects Reporter,,,,1900-01-01,Offices_3,Any All
2678,767,Presbyteral Council: Consultor,No,,2022-03-05,,Offices_21,Any All
1178,1958,Secretary,,,,,Parishes_141,Any All


In [573]:
df_affiliations['adpdx_Archdiocesan_Assignment__c'] = df_affiliations['adpdx_Archdiocesan_Assignment__c'].replace({'Yes': True, 'No': False, '': False}).fillna(False)

In [574]:
# Get SF Record Ids from External Ids

# Get Context Account Ids
add_salesforce_record_ids(sf, df_affiliations, 'Account_Ext_Id', 'Account', 'Archdpdx_Migration_Id__c', 'mbfc__Context__c')

Unnamed: 0_level_0,Assigned_Person,mbfc__Affiliation__c,adpdx_Archdiocesan_Assignment__c,mbfc__Duty_Load__c,mbfc__Start_Date__c,mbfc__Completion_Date__c,Account_Ext_Id,mbfc__Category__c,mbfc__Context__c
Record Number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
3515,780,Presbyteral Council Rep,True,Part Time,2024-01-01,,Vicariates_16,Any All,001Dx00001HwDx2IAF
3514,762,Presbyteral Council Rep,True,Part Time,2024-01-01,,Vicariates_3,Any All,001Dx00001HwDwpIAF
3512,3321,Deacon,True,Full Time,2024-02-19,,Parishes_83,Any All,001Dx00001HwDzoIAF
3511,803,Special Assignment,True,Full Time,2024-02-10,,Offices_21,Any All,001Dx00001HwDyQIAV
3510,3317,Development Operations Associate,False,,2024-01-16,,Offices_1,Any All,001Dx00001HwDyIIAV
...,...,...,...,...,...,...,...,...,...
5,511,Vicar Forane,True,,2016-10-01,2023-09-30,Vicariates_10,Any All,001Dx00001HwDwwIAF
4,511,Administrator,True,,2013-07-01,2017-06-30,Parishes_109,Any All,001Dx00001HwE0DIAV
3,511,Pastor,True,Full Time,2017-07-01,,Parishes_109,Any All,001Dx00001HwE0DIAV
2,318,Deacon,True,,2002-12-23,2016-06-30,Parishes_114,Any All,001Dx00001HwE0IIAV


In [575]:
# Get Person Contact Ids
add_salesforce_record_ids(sf, df_affiliations, 'Assigned_Person', 'Contact', 'Archdpdx_Migration_Id__c', 'mbfc__Person__c')

Unnamed: 0_level_0,Assigned_Person,mbfc__Affiliation__c,adpdx_Archdiocesan_Assignment__c,mbfc__Duty_Load__c,mbfc__Start_Date__c,mbfc__Completion_Date__c,Account_Ext_Id,mbfc__Category__c,mbfc__Context__c,mbfc__Person__c
Record Number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
3515,780,Presbyteral Council Rep,True,Part Time,2024-01-01,,Vicariates_16,Any All,001Dx00001HwDx2IAF,003Dx00000nKjOcIAK
3514,762,Presbyteral Council Rep,True,Part Time,2024-01-01,,Vicariates_3,Any All,001Dx00001HwDwpIAF,003Dx00000nKjUcIAK
3512,3321,Deacon,True,Full Time,2024-02-19,,Parishes_83,Any All,001Dx00001HwDzoIAF,003Dx00000nKjSuIAK
3511,803,Special Assignment,True,Full Time,2024-02-10,,Offices_21,Any All,001Dx00001HwDyQIAV,003Dx00000nKiw1IAC
3510,3317,Development Operations Associate,False,,2024-01-16,,Offices_1,Any All,001Dx00001HwDyIIAV,003Dx00000nKjDGIA0
...,...,...,...,...,...,...,...,...,...,...
5,511,Vicar Forane,True,,2016-10-01,2023-09-30,Vicariates_10,Any All,001Dx00001HwDwwIAF,003Dx00000nKjQrIAK
4,511,Administrator,True,,2013-07-01,2017-06-30,Parishes_109,Any All,001Dx00001HwE0DIAV,003Dx00000nKjQrIAK
3,511,Pastor,True,Full Time,2017-07-01,,Parishes_109,Any All,001Dx00001HwE0DIAV,003Dx00000nKjQrIAK
2,318,Deacon,True,,2002-12-23,2016-06-30,Parishes_114,Any All,001Dx00001HwE0IIAV,003Dx00000nKipTIAS


In [576]:
# Set Archdpdx_Migration_Id__c External ID
df_affiliations['Archdpdx_Migration_Id__c'] = df_affiliations.index

# Create Job ID
df_affiliations['Archdpdx_Job_Id__c'] = curr_job_id

df_affiliations


Unnamed: 0_level_0,Assigned_Person,mbfc__Affiliation__c,adpdx_Archdiocesan_Assignment__c,mbfc__Duty_Load__c,mbfc__Start_Date__c,mbfc__Completion_Date__c,Account_Ext_Id,mbfc__Category__c,mbfc__Context__c,mbfc__Person__c,Archdpdx_Migration_Id__c,Archdpdx_Job_Id__c
Record Number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
3515,780,Presbyteral Council Rep,True,Part Time,2024-01-01,,Vicariates_16,Any All,001Dx00001HwDx2IAF,003Dx00000nKjOcIAK,3515,133
3514,762,Presbyteral Council Rep,True,Part Time,2024-01-01,,Vicariates_3,Any All,001Dx00001HwDwpIAF,003Dx00000nKjUcIAK,3514,133
3512,3321,Deacon,True,Full Time,2024-02-19,,Parishes_83,Any All,001Dx00001HwDzoIAF,003Dx00000nKjSuIAK,3512,133
3511,803,Special Assignment,True,Full Time,2024-02-10,,Offices_21,Any All,001Dx00001HwDyQIAV,003Dx00000nKiw1IAC,3511,133
3510,3317,Development Operations Associate,False,,2024-01-16,,Offices_1,Any All,001Dx00001HwDyIIAV,003Dx00000nKjDGIA0,3510,133
...,...,...,...,...,...,...,...,...,...,...,...,...
5,511,Vicar Forane,True,,2016-10-01,2023-09-30,Vicariates_10,Any All,001Dx00001HwDwwIAF,003Dx00000nKjQrIAK,5,133
4,511,Administrator,True,,2013-07-01,2017-06-30,Parishes_109,Any All,001Dx00001HwE0DIAV,003Dx00000nKjQrIAK,4,133
3,511,Pastor,True,Full Time,2017-07-01,,Parishes_109,Any All,001Dx00001HwE0DIAV,003Dx00000nKjQrIAK,3,133
2,318,Deacon,True,,2002-12-23,2016-06-30,Parishes_114,Any All,001Dx00001HwE0IIAV,003Dx00000nKipTIAS,2,133


In [577]:
# Final cleanup
df_affiliations.drop(columns=[
    'Account_Ext_Id',
    'Assigned_Person', 
    ], 
    inplace=True)

df_affiliations

#FIXME: INVALID_FIELD: Foreign key external ID: relcommunities_23 not found for field Archdpdx_Migration_Id__c
#FIXME: INVALID_FIELD: Foreign key external ID: offices_0 not found for field Archdpdx_Migration_Id__c
#FIXME: Record #115 > FIELD_INTEGRITY_EXCEPTION: Start Date: invalid date: Tue Aug 01 00:00:00 GMT 1021 [mbfc__Start_Date__c

Unnamed: 0_level_0,mbfc__Affiliation__c,adpdx_Archdiocesan_Assignment__c,mbfc__Duty_Load__c,mbfc__Start_Date__c,mbfc__Completion_Date__c,mbfc__Category__c,mbfc__Context__c,mbfc__Person__c,Archdpdx_Migration_Id__c,Archdpdx_Job_Id__c
Record Number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
3515,Presbyteral Council Rep,True,Part Time,2024-01-01,,Any All,001Dx00001HwDx2IAF,003Dx00000nKjOcIAK,3515,133
3514,Presbyteral Council Rep,True,Part Time,2024-01-01,,Any All,001Dx00001HwDwpIAF,003Dx00000nKjUcIAK,3514,133
3512,Deacon,True,Full Time,2024-02-19,,Any All,001Dx00001HwDzoIAF,003Dx00000nKjSuIAK,3512,133
3511,Special Assignment,True,Full Time,2024-02-10,,Any All,001Dx00001HwDyQIAV,003Dx00000nKiw1IAC,3511,133
3510,Development Operations Associate,False,,2024-01-16,,Any All,001Dx00001HwDyIIAV,003Dx00000nKjDGIA0,3510,133
...,...,...,...,...,...,...,...,...,...,...
5,Vicar Forane,True,,2016-10-01,2023-09-30,Any All,001Dx00001HwDwwIAF,003Dx00000nKjQrIAK,5,133
4,Administrator,True,,2013-07-01,2017-06-30,Any All,001Dx00001HwE0DIAV,003Dx00000nKjQrIAK,4,133
3,Pastor,True,Full Time,2017-07-01,,Any All,001Dx00001HwE0DIAV,003Dx00000nKjQrIAK,3,133
2,Deacon,True,,2002-12-23,2016-06-30,Any All,001Dx00001HwE0IIAV,003Dx00000nKipTIAS,2,133


In [578]:
# TEST ME
# Set Archdpdx_Migration_Id__c External ID
df_affiliations['Archdpdx_Migration_Id__c'] = df_affiliations.index.astype(str).str.replace(' ', '').str.replace('-', '')


In [579]:
df_affiliations.to_csv('staging_files/affiliations_staging.csv', encoding='utf-8', index=False)

In [580]:
upsert_to_salesforce_bulk(sf, df_affiliations, 'mbfc__Affiliation__c', 'Archdpdx_Migration_Id__c', 'results_files/affiliation_upsert_results')

# cell takes >3m to run

Batch 1 processed: 100 successful, 0 failed.
Batch 2 processed: 200 successful, 0 failed.
Batch 3 processed: 300 successful, 0 failed.
Batch 4 processed: 400 successful, 0 failed.
Batch 5 processed: 499 successful, 1 failed.
Batch 6 processed: 599 successful, 1 failed.
Batch 7 processed: 699 successful, 1 failed.
Batch 8 processed: 799 successful, 1 failed.
Batch 9 processed: 899 successful, 1 failed.
Batch 10 processed: 999 successful, 1 failed.
Batch 11 processed: 1099 successful, 1 failed.
Batch 12 processed: 1199 successful, 1 failed.
Batch 13 processed: 1299 successful, 1 failed.
Batch 14 processed: 1399 successful, 1 failed.
Batch 15 processed: 1499 successful, 1 failed.
Batch 16 processed: 1598 successful, 2 failed.
Batch 17 processed: 1698 successful, 2 failed.
Batch 18 processed: 1797 successful, 3 failed.
Batch 19 processed: 1897 successful, 3 failed.
Batch 20 processed: 1997 successful, 3 failed.
Batch 21 processed: 2096 successful, 4 failed.
Batch 22 processed: 2196 success

# Post-Migration Manual Updates

1. Convert 'Offices' that are ADPDX Pastoral Centre offices into record type: 'Groups', and set their parentID to the Diocese (there are just 6 of these accounts).
1. Update the Religous Order records 'Religious Superior' lookup.
1. Set 'organization type' field value for each account in the 'organization' load: Offices, Newman Centres, Schools, Organizations
1. Consolidate education degree titles in 'Affiliation.Affiliation' picklist into the standard value
