In [10]:
import spacy
import pandas as pd

nlp = spacy.load("en_core_web_sm")

def invert_pronouns(text, target_gender):

    male_to_female = {
        "he": "she",
        "him": "her",
        "his_adj": "her",
        "his_pro": "hers",
        "himself": "herself"
    }
    female_to_male = {
        "she": "he",
        "her_obj": "him",
        "her_adj": "his",
        "hers": "his",
        "herself": "himself"
    }

    doc = nlp(text)
    tokens = []
    for i, token in enumerate(doc):
        text_low = token.text.lower()
        replaced = None

        if target_gender == "female":
            if text_low == "his":
                if (i + 1 < len(doc) and doc[i + 1].pos_ in ("NOUN", "ADJ")):
                    replaced = male_to_female["his_adj"]
                else:
                    replaced = male_to_female["his_pro"]
            elif text_low in male_to_female:
                replaced = male_to_female[text_low]

        elif target_gender == "male":
            if text_low == "her":
                if token.dep_ == "poss" or (i + 1 < len(doc) and doc[i + 1].pos_ in ("NOUN", "ADJ")):
                    replaced = female_to_male["her_adj"]
                else:
                    replaced = female_to_male["her_obj"]
            elif text_low in female_to_male:
                replaced = female_to_male[text_low]

        if replaced:

            if token.text[0].isupper():
                replaced = replaced.capitalize()
            tokens.append(replaced)
        else:
            tokens.append(token.text)


    result = ''
    for token_text, orig_token in zip(tokens, doc):
        result += token_text
        if orig_token.whitespace_:
            result += orig_token.whitespace_
    return result.strip()




In [9]:

df = pd.read_csv('/content/pronoun_testcases.csv')


test_cases = df[['input_text', 'target_gender', 'expected_output']].values.tolist()


total = len(test_cases)
correct = 0

for input_text, target_gender, expected_output in test_cases:
    output = invert_pronouns(input_text, target_gender)
    is_correct = (expected_output.strip() == output.strip()) if isinstance(expected_output, str) else None

    print(f"Input:    {input_text}")
    print(f"Target:   {target_gender}")
    print(f"Output:   {output}")
    print(f"Expected: {expected_output}")
    print(f"Match:    {'✅' if is_correct else '❌' if is_correct == False else 'N/A'}\n")

    if is_correct:
        correct += 1

print(f"Accuracy: {correct}/{total} ({(correct/total)*100:.2f}%)")


Input:    He is going to the market.
Target:   female
Output:   She is going to the market.
Expected: She is going to the market.
Match:    ✅

Input:    His book is on the table.
Target:   female
Output:   Her book is on the table.
Expected: Her book is on the table.
Match:    ✅

Input:    I saw him yesterday.
Target:   female
Output:   I saw her yesterday.
Expected: I saw her yesterday.
Match:    ✅

Input:    He hurt himself.
Target:   female
Output:   She hurt herself.
Expected: She hurt herself.
Match:    ✅

Input:    I called him last night.
Target:   female
Output:   I called her last night.
Expected: I called her last night.
Match:    ✅

Input:    That is his car.
Target:   female
Output:   That is her car.
Expected: That is her car.
Match:    ✅

Input:    He told me about his trip.
Target:   female
Output:   She told me about her trip.
Expected: She told me about her trip.
Match:    ✅

Target:   female
Match:    ✅

Input:    He blames himself for the mistake.
Target:   female
Ou