In [1]:
import pandas as pd
import spacy

In [2]:
df=pd.read_csv("pronoun_testcases.csv")

In [3]:
print(df.head())


                   input_text target_gender              expected_output
0  He is going to the market.        female  She is going to the market.
1   His book is on the table.        female    Her book is on the table.
2        I saw him yesterday.        female         I saw her yesterday.
3            He hurt himself.        female            She hurt herself.
4    I called him last night.        female     I called her last night.


In [4]:
nlp = spacy.load("en_core_web_sm")

def exchange_pronouns(text, target_gender):
    doc = nlp(text)
    
    replacements = []
    
    for token in doc:
        original_text = token.text
        replacement = None
        
        pos = token.pos_
        dep = token.dep_
        
        if target_gender.lower() == 'female':
            if original_text.lower() == 'he':
                replacement = 'She' if original_text[0].isupper() else 'she'
            elif original_text.lower() == 'him':
                replacement = 'Her' if original_text[0].isupper() else 'her'
            elif original_text.lower() == 'his':
                replacement = 'Her' if original_text[0].isupper() else 'her'
            elif original_text.lower() == 'himself':
                replacement = 'Herself' if original_text[0].isupper() else 'herself'
                
        elif target_gender.lower() == 'male':
            if original_text.lower() == 'she':
                replacement = 'He' if original_text[0].isupper() else 'he'
            elif original_text.lower() == 'her':
                if dep == 'poss' or pos == 'PRON' and dep == 'nmod':
                    replacement = 'His' if original_text[0].isupper() else 'his'
                else:
                    replacement = 'Him' if original_text[0].isupper() else 'him'
            elif original_text.lower() == 'hers':
                replacement = 'His' if original_text[0].isupper() else 'his'
            elif original_text.lower() == 'herself':
                replacement = 'Himself' if original_text[0].isupper() else 'himself'
        
        if replacement:
            replacements.append((token.idx, token.idx + len(token.text), replacement))
    
    result = text
    for start, end, replacement in reversed(replacements):
        result = result[:start] + replacement + result[end:]
    
    return result

In [5]:
df['predicted_output'] = df.apply(lambda row: exchange_pronouns(row['input_text'], row['target_gender']), axis=1)

correct = 0
total = len(df)

for i, row in df.iterrows():
    is_correct = row['predicted_output'] == row['expected_output']
    if is_correct:
        correct += 1
    else:
        print(f"  Test {i+1} Failed")

print(f" Accuracy: {correct}/{total} = {correct/total:.2%}")

print("\n Some correct transformations")
successful_cases = df[df['predicted_output'] == df['expected_output']].head(5)
for i, row in successful_cases.iterrows():
    print(f"  '{row['input_text']}' : '{row['predicted_output']}'")

# Taking a random sample from the dataset
import random
random_idx = random.randint(0, len(df)-1)
sample_row = df.iloc[random_idx]

print(f'\n Random Sample')
print(f"Input: '{sample_row['input_text']}'")
print(f"Output: '{sample_row['predicted_output']}'")
print("\n Token Analysis:")

doc = nlp(sample_row['input_text'])
for token in doc:
    print(f"Token: '{token.text}', POS: {token.pos_}, Dep: {token.dep_}")

 Accuracy: 26/26 = 100.00%

 Some correct transformations
  'He is going to the market.' : 'She is going to the market.'
  'His book is on the table.' : 'Her book is on the table.'
  'I saw him yesterday.' : 'I saw her yesterday.'
  'He hurt himself.' : 'She hurt herself.'
  'I called him last night.' : 'I called her last night.'

 Random Sample
Input: 'That is her car.'
Output: 'That is his car.'

 Token Analysis:
Token: 'That', POS: PRON, Dep: nsubj
Token: 'is', POS: AUX, Dep: ROOT
Token: 'her', POS: PRON, Dep: poss
Token: 'car', POS: NOUN, Dep: attr
Token: '.', POS: PUNCT, Dep: punct
