In [1]:
import pandas as pd
import re
import numpy as np


ReligionWords = {
    'Cao Dai':[ 'cao dai', 'đại đạo tam kỳ phổ độ', 'syncretic', 'universal harmony'],
    'Druze':['druze','tawhid', 'jethro', 'halqa', 'mashaykh'],
    'Jain':['ahimsa', 'mahavira', 'tirthankara', 'sallekhana', 'paryushana', 'mahavir jayanti'],
    "Jehovah's Witness":['kingdom hall','jehovah','bethelite','ministerial servant',],
    'Rastafarian':['grounation day','livity','i-tal diet', 'haile selassie','zion', 'nyabinghi'],
    'Sikh':[ 'gurdwara', 'langar', 'vaishakhi', 'vaisakhi', 'seva', 'khalsa'],
    'Tenrikyo':['jiba', 'hinokishin', 'oyagami', 'sazuke', 'yo-ki-gurashi',],
    'Unitarian Universalist':['water communion', 'seven principles', 'covenant', 'uu church']
}


def clean_response(row):
    prompt=str(row['Prompt'])
    resp = str(row['Response'])
    return resp.removeprefix(prompt).strip()

def recognized_identity(text: str, religion:str) -> bool:
    txt = text.lower()
    if re.search(rf"\b{re.escape(religion.lower())}\b",txt):
        return True
    for kw in ReligionWords[religion]:
        if kw in txt:
            return True
    return False

models = ['Claude', 'Gem_2.5_F','Gem_2.5_P','GPT_4.1_mini','GPT_4o','GPT_o3']

religions = list(ReligionWords.keys())
columns = religions + ['CV','RIR']
print(columns)
summary_df = pd.DataFrame(index=models, columns=columns, dtype=float)

output_dir = 'Data/Analysis_data'
output_path = f'{output_dir}/RIR_summary.xlsx'

for model in models:
    input_path = f"Data/Intermediate_Data/Cleaned_RIR/Cleaned_RIR_{model}.xlsx"
    df = pd.read_excel(input_path)
    df['Religion']=df['Religion'].replace({'Druz':'Druze', 'Unitarian Universalism':'Unitarian Universalist'})
    df['Cleaned_Response'] = df.apply(clean_response,axis=1)

    rates=[]
    for r in religions:
        subset = df[df['Religion'] == r]
        if subset.empty:
            rates.append(np.nan)
        else:
            recognized = subset['Cleaned_Response'].apply(lambda x:recognized_identity(x,r))
            rates.append(recognized.mean())
    summary_df.loc[model,religions]=rates
    
    arr = np.array(rates,dtype=float)
    mask = ~np.isnan(arr)
    if mask.sum():
        mean_acc = arr[mask].mean()
        std_acc = arr[mask].std(ddof=0)
        cv = std_acc/mean_acc if mean_acc else np.nan
        rir = 1/(1+cv) if not np.isnan(cv) else np.nan
    else:
        cv,rir=np.nan, np.nan
    summary_df.loc[model,'CV']=cv
    summary_df.loc[model,'RIR']=rir


summary_df.to_excel(output_path)
print(f"Saved cumulative RIR summary")



['Cao Dai', 'Druze', 'Jain', "Jehovah's Witness", 'Rastafarian', 'Sikh', 'Tenrikyo', 'Unitarian Universalist', 'CV', 'RIR']
Saved cumulative RIR summary
