In [11]:
import pandas as pd
import spacy

In [12]:
df=pd.read_csv("pronoun_testcases.csv")

In [13]:
print(df.head())


                   input_text target_gender              expected_output
0  He is going to the market.        female  She is going to the market.
1   His book is on the table.        female    Her book is on the table.
2        I saw him yesterday.        female         I saw her yesterday.
3            He hurt himself.        female            She hurt herself.
4    I called him last night.        female     I called her last night.


In [14]:
nlp = spacy.load("en_core_web_sm")


def exchange_pronouns(text, target_gender):
    doc = nlp(text)
    replacements = []
    for token in doc:
        original_text = token.text
        replacement = None
        pos = token.pos_
        dep = token.dep_
        if target_gender.lower() == 'female':
            if original_text.lower() == 'he':
                replacement = 'She' if original_text[0].isupper() else 'she'
            elif original_text.lower() == 'him':
                replacement = 'Her' if original_text[0].isupper() else 'her'
            elif original_text.lower() == 'his':
                replacement = 'Her' if original_text[0].isupper() else 'her'
            elif original_text.lower() == 'himself':
                replacement = 'Herself' if original_text[0].isupper() else 'herself'
        elif target_gender.lower() == 'male':
            if original_text.lower() == 'she':
                replacement = 'He' if original_text[0].isupper() else 'he'
            elif original_text.lower() == 'her':
                # Use dependency and POS to distinguish possessive vs object
                if dep == 'poss' or (pos == 'PRON' and dep == 'nmod') or (token.head.text.lower() == 'her' and dep == 'nmod'):
                    replacement = 'His' if original_text[0].isupper() else 'his'
                else:
                    replacement = 'Him' if original_text[0].isupper() else 'him'
            elif original_text.lower() == 'hers':
                replacement = 'His' if original_text[0].isupper() else 'his'
            elif original_text.lower() == 'herself':
                replacement = 'Himself' if original_text[0].isupper() else 'himself'
        if replacement:
            replacements.append((token.idx, token.idx + len(token.text), replacement))
    result = text
    for start, end, replacement in reversed(replacements):
        result = result[:start] + replacement + result[end:]
    return result

In [19]:
import pandas as pd
from IPython.display import display, HTML
import random

df['predicted_output'] = df.apply(lambda row: exchange_pronouns(row['input_text'], row['target_gender']), axis=1)

correct = (df['predicted_output'] == df['expected_output']).sum()
total = len(df)
print(f"\nAccuracy: {correct}/{total} = {correct/total:.2%}")


successful_cases = df[df['predicted_output'] == df['expected_output']][['input_text','target_gender','expected_output','predicted_output']].head(5)
display(successful_cases)


random_idx = random.randint(0, len(df)-1)
sample_row = df.iloc[random_idx]

print("\nToken Analysis:")
doc = nlp(sample_row['input_text'])
for token in doc:
    print(f"Token: '{token.text}', POS: {token.pos_}, Dep: {token.dep_}")


Accuracy: 26/26 = 100.00%


Unnamed: 0,input_text,target_gender,expected_output,predicted_output
0,He is going to the market.,female,She is going to the market.,She is going to the market.
1,His book is on the table.,female,Her book is on the table.,Her book is on the table.
2,I saw him yesterday.,female,I saw her yesterday.,I saw her yesterday.
3,He hurt himself.,female,She hurt herself.,She hurt herself.
4,I called him last night.,female,I called her last night.,I called her last night.



Token Analysis:
Token: 'The', POS: DET, Dep: det
Token: 'teacher', POS: NOUN, Dep: nsubj
Token: 'gave', POS: VERB, Dep: ROOT
Token: 'him', POS: PRON, Dep: dative
Token: 'a', POS: DET, Dep: det
Token: '.', POS: PUNCT, Dep: punct
