In [None]:
import spacy

def replace_pronouns(text, name_pronoun_map, pronoun_options=None):
    """
    Replace pronouns in a text based on a mapping of names to pronouns.

    Args:
        text (str): The input text to process.
        name_pronoun_map (dict): A dictionary mapping names to pronoun types (e.g., "male", "female", "gender_fair").
        pronoun_options (dict, optional): A dictionary containing pronoun options for each type. Defaults to predefined options.

    Returns:
        dict: A dictionary containing the original text, modified text, and a list of replaced words with their attributes.
    """
    if pronoun_options is None:
        pronoun_options = {
            "male": {
                "nsubj": "he",  # Subject pronoun
                "dobj": "him",  # Object pronoun
                "poss": "his",  # Possessive adjective
                "poss_pronoun": "his",  # Possessive pronoun
                "reflexive": "himself"  # Reflexive pronoun
            },
            "female": {
                "nsubj": "she",
                "dobj": "her",
                "poss": "her",
                "poss_pronoun": "hers",
                "reflexive": "herself"
            },
            "gender_fair": {
                "nsubj": "they",
                "dobj": "them",
                "poss": "their",
                "poss_pronoun": "theirs",
                "reflexive": "themselves"
            }
        }

    nlp = spacy.load("en_core_web_sm")
    doc = nlp(text)

    # Reverse map for quick lookup of pronouns
    pronoun_reverse_map = {}
    for category, pronouns in pronoun_options.items():
        for role, value in pronouns.items():
            pronoun_reverse_map[value] = (category, role)

    # Process the text and replace pronouns
    name_to_category = {name.lower(): category for name, category in name_pronoun_map.items()}

    def get_pronoun_replacement(token, category):
        if token.text.lower() in pronoun_reverse_map:
            _, role = pronoun_reverse_map[token.text.lower()]
            return pronoun_options[category][role]
        return token.text

    replaced_text = []
    replaced_words = []

    for token in doc:
        # Check if token is a pronoun based on its tag and find replacement if applicable
        if token.pos_ == "PRON":
            relevant_entity = None
            for ent in doc.ents:
                if ent.text.lower() in name_to_category and ent.end <= token.i:
                    relevant_entity = ent

            if relevant_entity:
                category = name_to_category[relevant_entity.text.lower()]
                replacement = get_pronoun_replacement(token, category)
                # Only replace if the pronoun is different from the preferred pronoun
                if replacement.lower() != token.text.lower():
                    replaced_text.append(replacement + token.whitespace_)
                    replaced_words.append({
                        "original_word": token.text,
                        "replaced_word": replacement,
                        "word_index": token.i,
                        "char_offset": token.idx,
                        "char_end_offset": token.idx + len(token.text)
                    })
                else:
                    replaced_text.append(token.text_with_ws)  # Keep original pronoun if it matches
            else:
                replaced_text.append(token.text_with_ws)  # Keep original pronoun if no match found
        else:
            replaced_text.append(token.text_with_ws)

    # Ensure proper spacing by joining tokens directly as processed
    return {
        "original_text": text,
        "modified_text": "".join(replaced_text),
        "replaced_words": replaced_words
    }

text = "John said he would help Mary with her project because she needed him."
name_pronoun_map = {
    "John": "male",
    "Mary": "gender_fair"
}
result = replace_pronouns(text, name_pronoun_map)

import json
result = replace_pronouns(text, name_pronoun_map)
print(result)
print(json.dumps(result, indent=4))

{'original_text': 'John said he would help Mary with her project because she needed him.', 'modified_text': 'John said he would help Mary with their project because they needed them.', 'replaced_words': [{'original_word': 'her', 'replaced_word': 'their', 'word_index': 7, 'char_offset': 34, 'char_end_offset': 37}, {'original_word': 'she', 'replaced_word': 'they', 'word_index': 10, 'char_offset': 54, 'char_end_offset': 57}, {'original_word': 'him', 'replaced_word': 'them', 'word_index': 12, 'char_offset': 65, 'char_end_offset': 68}]}
{
    "original_text": "John said he would help Mary with her project because she needed him.",
    "modified_text": "John said he would help Mary with their project because they needed them.",
    "replaced_words": [
        {
            "original_word": "her",
            "replaced_word": "their",
            "word_index": 7,
            "char_offset": 34,
            "char_end_offset": 37
        },
        {
            "original_word": "she",
         