In [3]:
import pandas as pd
import os

# 
model_name = 'Mistral-Small-24B-Instruct-2501'
cultures = ['arabic', 'african', 'turkish', 'indian', 'persian', 'korean', 'chinese', 'japanese']
base_input_folder = f'/home/maz2h/MoralStory/{model_name}-NC15-MS'


def to_int(val):
    if isinstance(val, str):
        val = val.strip()
    try:
        return int(float(val))
    except:
        return None

# output df
final_df = pd.DataFrame({'Culture': cultures})
for metric in ['PM', 'PMR', 'WBR', '#REC']:
    final_df[metric] = None

#
file_english = os.path.join(base_input_folder, f"{model_name}_NC15_MS_english_all.csv")
df_english = pd.read_csv(file_english)
df_english.rename(columns={'moral_action_selected': 'stance_english'}, inplace=True)
df_english['stance_english'] = df_english['stance_english'].apply(to_int)

# 
for culture in cultures:
    file_culture = os.path.join(base_input_folder, f"{model_name}_NC15_MS_{culture}_all.csv")
    if not os.path.exists(file_culture):
        print(f"[Warning] File not found: {file_culture}")
        continue

    df_culture = pd.read_csv(file_culture)
    df_culture.rename(columns={'moral_action_selected': 'stance_other'}, inplace=True)
    df_culture['stance_other'] = df_culture['stance_other'].apply(to_int)


    df_english['GPT_Answer'] = df_english['GPT_Answer'].astype(str).str.strip()
    df_culture['GPT_Answer'] = df_culture['GPT_Answer'].astype(str).str.strip()

                            # Merge based on each id
    

    merged = pd.merge(
        df_english[["ID", "GPT_Answer"]],
        df_culture[["ID", "GPT_Answer"]],
        on="ID",
        suffixes=("_eng", "_other")
    )

    filtered = merged[
        (merged["GPT_Answer_eng"] == "Yes") &
        (merged["GPT_Answer_other"] == "Yes")
    ]

    # filtered = merged
    # 
    if filtered.empty:
        continue

    # 
    filtered = filtered.merge(
        df_english[["ID", "stance_english"]],
        on="ID"
    ).merge(
        df_culture[["ID", "stance_other"]],
        on="ID"
    )

    # 
    filtered = filtered.dropna()
    filtered = filtered[
        (filtered['stance_english'].isin([1, 2])) &
        (filtered['stance_other'].isin([1, 2]))
    ]

    # 
    total_records = len(filtered)
    mismatches = filtered[filtered['stance_english'] != filtered['stance_other']]
    mismatch_count = len(mismatches)
    pmr = mismatch_count / total_records if total_records else 0
    wbr = mismatches[
        (mismatches['stance_english'] == 1) & 
        (mismatches['stance_other'] == 2)
    ].shape[0] / mismatch_count if mismatch_count else 0

    # ذخیره در خروجی
    final_df.loc[final_df['Culture'] == culture, 'PM'] = mismatch_count
    final_df.loc[final_df['Culture'] == culture, 'PMR'] = f"{pmr:.2%}"
    final_df.loc[final_df['Culture'] == culture, 'WBR'] = f"{wbr:.2%}"
    final_df.loc[final_df['Culture'] == culture, '#REC'] = total_records

# 
output_file = os.path.join(base_input_folder, f'{model_name}_comparison_report.csv')
final_df.to_csv(output_file, index=False)
print(f" Report saved to: {output_file}")

 Report saved to: /home/maz2h/MoralStory/Mistral-Small-24B-Instruct-2501-NC15-MS/Mistral-Small-24B-Instruct-2501_comparison_report.csv


In [17]:
import pandas as pd
import re
import os

# 
model_name = 'Llama-3.1-70B-Instruct'
cultures = ['arabic', 'african', 'turkish', 'indian', 'persian', 'korean', 'chinese', 'japanese']
base_input_folder = f'/home/maz2h/MoralStory/{model_name}-NC3-MS'
gender_info_file = '/home/maz2h/MoralStory/full_output_names_gpt_turkish.xlsx'
sample_size = 3000

# 
df_names = pd.read_excel(gender_info_file)

#ا
def extract_genders(text):
    if pd.isna(text):
        return []
    try:
        matches = re.findall(r'\(([^,]+),\s*(Male|Female)\)', text, re.IGNORECASE)
        genders = [gender.lower() for _, gender in matches]
        return genders
    except Exception as e:
        print(f"Error parsing: {text} -> {e}")
        return []

df_names['gender_list'] = df_names['ExtractedNames'].apply(extract_genders)

# 
def detect_gender_group(gender_list):
    if not gender_list:
        return 'mixed'
    if all(g == 'male' for g in gender_list):
        return 'masculine'
    elif all(g == 'female' for g in gender_list):
        return 'feminine'
    else:
        return 'mixed'

df_names['gender_group'] = df_names['gender_list'].apply(detect_gender_group)

# Final structured report
final_rows = []

# 
file_english = os.path.join(base_input_folder, f"{model_name}_NC3_MS_english_1.csv")
df_english = pd.read_csv(file_english)
df_english.rename(columns={'moral_action_selected': 'stance_english'}, inplace=True)
df_english['stance_english'] = pd.to_numeric(df_english['stance_english'], errors='coerce')
df_english = pd.merge(df_english, df_names[['ID', 'gender_group']], on='ID', how='left')

# ا
for culture in cultures:
    file_culture = os.path.join(base_input_folder, f"{model_name}_NC3_MS_{culture}_1.csv")
    if not os.path.exists(file_culture):
        print(f"[Warning] File not found: {file_culture}")
        continue

    df_culture = pd.read_csv(file_culture)
    df_culture.rename(columns={'moral_action_selected': 'stance_other'}, inplace=True)
    df_culture['stance_other'] = pd.to_numeric(df_culture['stance_other'], errors='coerce')
    df_culture = pd.merge(df_culture[['ID', 'stance_other']], df_names[['ID', 'gender_group']], on='ID', how='left')

    df_merged = pd.merge(df_english, df_culture, on=['ID', 'gender_group'], how='inner')
    df_merged = df_merged.dropna(subset=['stance_english', 'stance_other'])
    df_merged = df_merged[df_merged['stance_english'].isin([1, 2]) & df_merged['stance_other'].isin([1, 2])]

    row = {'Culture': culture}

    for gender in ['masculine', 'feminine']:
        df_group = df_merged[df_merged['gender_group'] == gender]
        available_records = len(df_group)
        if available_records == 0:
            #
            row[f'{gender.capitalize()}_#REC'] = 0
            row[f'{gender.capitalize()}_PM'] = 0
            row[f'{gender.capitalize()}_PMR'] = "0%"
            row[f'{gender.capitalize()}_WBR'] = "0%"
            continue
        
        n_sample = min(sample_size, available_records)
        df_sampled = df_group.sample(n=n_sample, random_state=42)

        total_records = len(df_sampled)
        mismatches = df_sampled[df_sampled['stance_english'] != df_sampled['stance_other']]
        mismatch_count = len(mismatches)
        pmr = mismatch_count / total_records if total_records else 0
        wbr = mismatches[
            (mismatches['stance_english'] == 1) & (mismatches['stance_other'] == 2)
        ].shape[0] / mismatch_count if mismatch_count else 0

        row[f'{gender.capitalize()}_#REC'] = total_records
        row[f'{gender.capitalize()}_PM'] = mismatch_count
        row[f'{gender.capitalize()}_PMR'] = f"{pmr:.2%}"
        row[f'{gender.capitalize()}_WBR'] = f"{wbr:.2%}"

    final_rows.append(row)

# 
final_df = pd.DataFrame(final_rows)

# 
print(final_df)

# 
final_df.to_csv(os.path.join(base_input_folder, f'{model_name}_NC3_MS_gender_bias_side_by_side.csv'), index=False)
print(f"\n Report saved to {model_name}_NC3_MS_gender_bias_side_by_side.csv")


    Culture  Masculine_#REC  Masculine_PM Masculine_PMR Masculine_WBR  \
0    arabic            3000           906        30.20%        86.75%   
1   african            3000           995        33.17%        89.15%   
2   turkish            3000           754        25.13%        86.60%   
3    indian            3000           426        14.20%        69.48%   
4   persian            3000           631        21.03%        84.63%   
5    korean            3000          1140        38.00%        87.72%   
6   chinese            3000          1211        40.37%        87.37%   
7  japanese            3000           997        33.23%        88.06%   

   Feminine_#REC  Feminine_PM Feminine_PMR Feminine_WBR  
0           2971          857       28.85%       86.81%  
1           2972         1064       35.80%       89.94%  
2           2996          773       25.80%       88.62%  
3           2801          468       16.71%       77.56%  
4           2982          673       22.57%       85.

In [1]:
import pandas as pd
import os

# 
model_name = 'Llama-3.1-70B-Instruct'
cultures = ['arabic', 'african', 'turkish', 'indian', 'persian', 'korean', 'chinese', 'japanese']
base_input_folder = f'/home/maz2h/MoralStory/{model_name}-NC8-MS'

def to_int(val):
    if isinstance(val, str):
        val = val.strip()
    try:
        return int(float(val))
    except:
        return None

# output df
final_df = pd.DataFrame({'Culture': cultures})
for metric in ['PM', 'PMR', 'WBR', '#REC']:
    final_df[metric] = None

#
file_english = os.path.join(base_input_folder, f"{model_name}_NC7_MS_english_all.csv")
df_english = pd.read_csv(file_english)
df_english.rename(columns={'moral_action_selected': 'stance_english'}, inplace=True)
df_english['stance_english'] = df_english['stance_english'].apply(to_int)

# 
for culture in cultures:
    file_culture = os.path.join(base_input_folder, f"{model_name}_NC7_MS_{culture}_all.csv")
    if not os.path.exists(file_culture):
        print(f"[Warning] File not found: {file_culture}")
        continue

    df_culture = pd.read_csv(file_culture)
    df_culture.rename(columns={'moral_action_selected': 'stance_other'}, inplace=True)
    df_culture['stance_other'] = df_culture['stance_other'].apply(to_int)


    df_english['GPT_Answer'] = df_english['GPT_Answer'].astype(str).str.strip()
    df_culture['GPT_Answer'] = df_culture['GPT_Answer'].astype(str).str.strip()

                            # Merge based on each id
    

    merged = pd.merge(
        df_english[["ID", "GPT_Answer"]],
        df_culture[["ID", "GPT_Answer"]],
        on="ID",
        suffixes=("_eng", "_other")
    )

    filtered = merged[(merged["GPT_Answer_eng"] == "Yes") & (merged["GPT_Answer_other"] == "Yes")]

    # filtered = merged
    # 
    if filtered.empty:
        continue

    # 
    filtered = filtered.merge(
        df_english[["ID", "stance_english"]],
        on="ID"
    ).merge(
        df_culture[["ID", "stance_other"]],
        on="ID"
    )

    # 
    filtered = filtered.dropna()
    filtered = filtered[
        (filtered['stance_english'].isin([1, 2])) &
        (filtered['stance_other'].isin([1, 2]))
    ]

    # 
    total_records = len(filtered)
    mismatches = filtered[filtered['stance_english'] != filtered['stance_other']]
    mismatch_count = len(mismatches)
    pmr = mismatch_count / total_records if total_records else 0
    wbr = mismatches[
        (mismatches['stance_english'] == 1) & 
        (mismatches['stance_other'] == 2)].shape[0]   /   mismatch_count if mismatch_count else 0

    # 
    final_df.loc[final_df['Culture'] == culture, 'PM'] = mismatch_count
    final_df.loc[final_df['Culture'] == culture, 'PMR'] = f"{pmr:.2%}"
    final_df.loc[final_df['Culture'] == culture, 'WBR'] = f"{wbr:.2%}"
    final_df.loc[final_df['Culture'] == culture, '#REC'] = total_records

# 
output_file = os.path.join(base_input_folder, f'{model_name}_comparison_report.csv')
final_df.to_csv(output_file, index=False)
print(f" Report saved to: {output_file}")

 Report saved to: /home/maz2h/MoralStory/Llama-3.1-70B-Instruct-NC8-MS/Llama-3.1-70B-Instruct_comparison_report.csv


In [2]:
import pandas as pd
import os

# 
model_name = 'Qwen2.5-72B-Instruct'
cultures = ['arabic', 'african', 'turkish', 'indian', 'persian', 'korean', 'chinese', 'japanese']
base_input_folder = f'/home/maz2h/MoralStory/{model_name}-NC9-MS'

def to_int(val):
    if isinstance(val, str):
        val = val.strip()
    try:
        return int(float(val))
    except:
        return None

# output df
final_df = pd.DataFrame({'Culture': cultures})
for metric in ['PM', 'PMR', 'WBR', '#REC']:
    final_df[metric] = None

#
file_english = os.path.join(base_input_folder, f"{model_name}_NC9_MS_english_all.csv")
df_english = pd.read_csv(file_english)
df_english.rename(columns={'moral_action_selected': 'stance_english'}, inplace=True)
df_english['stance_english'] = df_english['stance_english'].apply(to_int)

# 
for culture in cultures:
    file_culture = os.path.join(base_input_folder, f"{model_name}_NC9_MS_{culture}_all.csv")
    if not os.path.exists(file_culture):
        print(f"[Warning] File not found: {file_culture}")
        continue

    df_culture = pd.read_csv(file_culture)
    df_culture.rename(columns={'moral_action_selected': 'stance_other'}, inplace=True)
    df_culture['stance_other'] = df_culture['stance_other'].apply(to_int)


    df_english['GPT_Answer'] = df_english['GPT_Answer'].astype(str).str.strip()
    df_culture['GPT_Answer'] = df_culture['GPT_Answer'].astype(str).str.strip()

                            # Merge based on each id
    

    merged = pd.merge(
        df_english[["ID", "GPT_Answer"]],
        df_culture[["ID", "GPT_Answer"]],
        on="ID",
        suffixes=("_eng", "_other")
    )

    filtered = merged[(merged["GPT_Answer_eng"] == "Yes") & (merged["GPT_Answer_other"] == "Yes")]

    # filtered = merged
    # 
    if filtered.empty:
        continue

    # 
    filtered = filtered.merge(
        df_english[["ID", "stance_english"]],
        on="ID"
    ).merge(
        df_culture[["ID", "stance_other"]],
        on="ID"
    )

    # 
    filtered = filtered.dropna()
    filtered = filtered[
        (filtered['stance_english'].isin([1, 2])) &
        (filtered['stance_other'].isin([1, 2]))
    ]

    # 
    total_records = len(filtered)
    mismatches = filtered[filtered['stance_english'] != filtered['stance_other']]
    mismatch_count = len(mismatches)
    pmr = mismatch_count / total_records if total_records else 0
    wbr = mismatches[
        (mismatches['stance_english'] == 1) & 
        (mismatches['stance_other'] == 2)].shape[0]   /   mismatch_count if mismatch_count else 0

    # 
    final_df.loc[final_df['Culture'] == culture, 'PM'] = mismatch_count
    final_df.loc[final_df['Culture'] == culture, 'PMR'] = f"{pmr:.2%}"
    final_df.loc[final_df['Culture'] == culture, 'WBR'] = f"{wbr:.2%}"
    final_df.loc[final_df['Culture'] == culture, '#REC'] = total_records

# 
output_file = os.path.join(base_input_folder, f'{model_name}_comparison_report.csv')
final_df.to_csv(output_file, index=False)
print(f" Report saved to: {output_file}")

 Report saved to: /home/maz2h/MoralStory/Qwen2.5-72B-Instruct-NC9-MS/Qwen2.5-72B-Instruct_comparison_report.csv


In [9]:
import pandas as pd
import os

# 
model_name = 'Llama-3.1-8B-Instruct'
cultures = ['arabic', 'african', 'turkish', 'indian', 'persian', 'korean', 'chinese', 'japanese']
base_input_folder = f'/home/maz2h/MoralStory/{model_name}-NC10-MS'

def to_int(val):
    if isinstance(val, str):
        val = val.strip()
    try:
        return int(float(val))
    except:
        return None

# output df
final_df = pd.DataFrame({'Culture': cultures})
for metric in ['PM', 'PMR', 'WBR', '#REC']:
    final_df[metric] = None

#
file_english = os.path.join(base_input_folder, f"{model_name}_NC10_MS_english_all.csv")
df_english = pd.read_csv(file_english)
df_english.rename(columns={'moral_action_selected': 'stance_english'}, inplace=True)
df_english['stance_english'] = df_english['stance_english'].apply(to_int)

# 
for culture in cultures:
    file_culture = os.path.join(base_input_folder, f"{model_name}_NC10_MS_{culture}_all.csv")
    if not os.path.exists(file_culture):
        print(f"[Warning] File not found: {file_culture}")
        continue

    df_culture = pd.read_csv(file_culture)
    df_culture.rename(columns={'moral_action_selected': 'stance_other'}, inplace=True)
    df_culture['stance_other'] = df_culture['stance_other'].apply(to_int)


    df_english['GPT_Answer'] = df_english['GPT_Answer'].astype(str).str.strip()
    df_culture['GPT_Answer'] = df_culture['GPT_Answer'].astype(str).str.strip()

                            # Merge based on each id
    

    merged = pd.merge(
        df_english[["ID", "GPT_Answer"]],
        df_culture[["ID", "GPT_Answer"]],
        on="ID",
        suffixes=("_eng", "_other")
    )

    filtered = merged[(merged["GPT_Answer_eng"] == "Yes") & (merged["GPT_Answer_other"] == "Yes")]

    # filtered = merged
    # 
    if filtered.empty:
        continue

    # 
    filtered = filtered.merge(
        df_english[["ID", "stance_english"]],
        on="ID"
    ).merge(
        df_culture[["ID", "stance_other"]],
        on="ID"
    )

    # 
    filtered = filtered.dropna()
    filtered = filtered[
        (filtered['stance_english'].isin([1, 2])) &
        (filtered['stance_other'].isin([1, 2]))
    ]

    # 
    total_records = len(filtered)
    mismatches = filtered[filtered['stance_english'] != filtered['stance_other']]
    mismatch_count = len(mismatches)
    pmr = mismatch_count / total_records if total_records else 0
    wbr = mismatches[
        (mismatches['stance_english'] == 1) & 
        (mismatches['stance_other'] == 2)].shape[0]   /   mismatch_count if mismatch_count else 0

    # 
    final_df.loc[final_df['Culture'] == culture, 'PM'] = mismatch_count
    final_df.loc[final_df['Culture'] == culture, 'PMR'] = f"{pmr:.2%}"
    final_df.loc[final_df['Culture'] == culture, 'WBR'] = f"{wbr:.2%}"
    final_df.loc[final_df['Culture'] == culture, '#REC'] = total_records

# 
output_file = os.path.join(base_input_folder, f'{model_name}_comparison_report.csv')
final_df.to_csv(output_file, index=False)
print(f" Report saved to: {output_file}")

 Report saved to: /home/maz2h/MoralStory/Llama-3.1-8B-Instruct-NC10-MS/Llama-3.1-8B-Instruct_comparison_report.csv


In [1]:
import pandas as pd
import os

# 
model_name = 'Qwen2-72B-Instruct'
cultures = ['arabic', 'african', 'turkish', 'indian', 'persian', 'korean', 'chinese', 'japanese']
base_input_folder = f'/home/maz2h/MoralStory/{model_name}-NC11-MS'

def to_int(val):
    if isinstance(val, str):
        val = val.strip()
    try:
        return int(float(val))
    except:
        return None

# output df
final_df = pd.DataFrame({'Culture': cultures})
for metric in ['PM', 'PMR', 'WBR', '#REC']:
    final_df[metric] = None

#
file_english = os.path.join(base_input_folder, f"{model_name}_NC11_MS_english_all.csv")
df_english = pd.read_csv(file_english)
df_english.rename(columns={'moral_action_selected': 'stance_english'}, inplace=True)
df_english['stance_english'] = df_english['stance_english'].apply(to_int)

# 
for culture in cultures:
    file_culture = os.path.join(base_input_folder, f"{model_name}_NC11_MS_{culture}_all.csv")
    if not os.path.exists(file_culture):
        print(f"[Warning] File not found: {file_culture}")
        continue

    df_culture = pd.read_csv(file_culture)
    df_culture.rename(columns={'moral_action_selected': 'stance_other'}, inplace=True)
    df_culture['stance_other'] = df_culture['stance_other'].apply(to_int)


    df_english['GPT_Answer'] = df_english['GPT_Answer'].astype(str).str.strip()
    df_culture['GPT_Answer'] = df_culture['GPT_Answer'].astype(str).str.strip()

                            # Merge based on each id
    

    merged = pd.merge(
        df_english[["ID", "GPT_Answer"]],
        df_culture[["ID", "GPT_Answer"]],
        on="ID",
        suffixes=("_eng", "_other")
    )

    filtered = merged[(merged["GPT_Answer_eng"] == "Yes") & (merged["GPT_Answer_other"] == "Yes")]

    # filtered = merged
    # 
    if filtered.empty:
        continue

    # 
    filtered = filtered.merge(
        df_english[["ID", "stance_english"]],
        on="ID"
    ).merge(
        df_culture[["ID", "stance_other"]],
        on="ID"
    )

    # 
    filtered = filtered.dropna()
    filtered = filtered[
        (filtered['stance_english'].isin([1, 2])) &
        (filtered['stance_other'].isin([1, 2]))
    ]

    # 
    total_records = len(filtered)
    mismatches = filtered[filtered['stance_english'] != filtered['stance_other']]
    mismatch_count = len(mismatches)
    pmr = mismatch_count / total_records if total_records else 0
    wbr = mismatches[
        (mismatches['stance_english'] == 1) & 
        (mismatches['stance_other'] == 2)].shape[0]   /   mismatch_count if mismatch_count else 0

    # 
    final_df.loc[final_df['Culture'] == culture, 'PM'] = mismatch_count
    final_df.loc[final_df['Culture'] == culture, 'PMR'] = f"{pmr:.2%}"
    final_df.loc[final_df['Culture'] == culture, 'WBR'] = f"{wbr:.2%}"
    final_df.loc[final_df['Culture'] == culture, '#REC'] = total_records

# 
output_file = os.path.join(base_input_folder, f'{model_name}_comparison_report.csv')
final_df.to_csv(output_file, index=False)
print(f" Report saved to: {output_file}")

 Report saved to: /home/maz2h/MoralStory/Qwen2-72B-Instruct-NC11-MS/Qwen2-72B-Instruct_comparison_report.csv


In [4]:
import pandas as pd
import os

# 
model_name = 'Mistral-7B-Instruct-v0.3'
cultures = ['arabic', 'african', 'turkish', 'indian', 'persian', 'korean', 'chinese', 'japanese']
base_input_folder = f'/home/maz2h/MoralStory/{model_name}-NC12-MS'

def to_int(val):
    if isinstance(val, str):
        val = val.strip()
    try:
        return int(float(val))
    except:
        return None

# output df
final_df = pd.DataFrame({'Culture': cultures})
for metric in ['PM', 'PMR', 'WBR', '#REC']:
    final_df[metric] = None

#
file_english = os.path.join(base_input_folder, f"{model_name}_NC12_MS_english_all.csv")
df_english = pd.read_csv(file_english)
df_english.rename(columns={'moral_action_selected': 'stance_english'}, inplace=True)
df_english['stance_english'] = df_english['stance_english'].apply(to_int)

# 
for culture in cultures:
    file_culture = os.path.join(base_input_folder, f"{model_name}_NC12_MS_{culture}_all.csv")
    if not os.path.exists(file_culture):
        print(f"[Warning] File not found: {file_culture}")
        continue

    df_culture = pd.read_csv(file_culture)
    df_culture.rename(columns={'moral_action_selected': 'stance_other'}, inplace=True)
    df_culture['stance_other'] = df_culture['stance_other'].apply(to_int)


    df_english['GPT_Answer'] = df_english['GPT_Answer'].astype(str).str.strip()
    df_culture['GPT_Answer'] = df_culture['GPT_Answer'].astype(str).str.strip()

                            # Merge based on each id
    

    merged = pd.merge(
        df_english[["ID", "GPT_Answer"]],
        df_culture[["ID", "GPT_Answer"]],
        on="ID",
        suffixes=("_eng", "_other")
    )

    filtered = merged[(merged["GPT_Answer_eng"] == "Yes") & (merged["GPT_Answer_other"] == "Yes")]

    # filtered = merged
    # 
    if filtered.empty:
        continue

    # 
    filtered = filtered.merge(
        df_english[["ID", "stance_english"]],
        on="ID"
    ).merge(
        df_culture[["ID", "stance_other"]],
        on="ID"
    )

    # 
    filtered = filtered.dropna()
    filtered = filtered[
        (filtered['stance_english'].isin([1, 2])) &
        (filtered['stance_other'].isin([1, 2]))
    ]

    # 
    total_records = len(filtered)
    mismatches = filtered[filtered['stance_english'] != filtered['stance_other']]
    mismatch_count = len(mismatches)
    pmr = mismatch_count / total_records if total_records else 0
    wbr = mismatches[
        (mismatches['stance_english'] == 1) & 
        (mismatches['stance_other'] == 2)].shape[0]   /   mismatch_count if mismatch_count else 0

    # 
    final_df.loc[final_df['Culture'] == culture, 'PM'] = mismatch_count
    final_df.loc[final_df['Culture'] == culture, 'PMR'] = f"{pmr:.2%}"
    final_df.loc[final_df['Culture'] == culture, 'WBR'] = f"{wbr:.2%}"
    final_df.loc[final_df['Culture'] == culture, '#REC'] = total_records

# 
output_file = os.path.join(base_input_folder, f'{model_name}_comparison_report.csv')
final_df.to_csv(output_file, index=False)
print(f" Report saved to: {output_file}")

 Report saved to: /home/maz2h/MoralStory/Mistral-7B-Instruct-v0.3-NC12-MS/Mistral-7B-Instruct-v0.3_comparison_report.csv


In [1]:
import pandas as pd
import os

# 
model_name = 'Mistral-Small-24B-Instruct-2501'
cultures = ['arabic', 'african', 'turkish', 'indian', 'persian', 'korean', 'chinese', 'japanese']
base_input_folder = f'/home/maz2h/MoralStory/{model_name}-NC18-MS'

def to_int(val):
    if isinstance(val, str):
        val = val.strip()
    try:
        return int(float(val))
    except:
        return None

# output df
final_df = pd.DataFrame({'Culture': cultures})
for metric in ['PM', 'PMR', 'WBR', '#REC']:
    final_df[metric] = None

#
file_english = os.path.join(base_input_folder, f"{model_name}_NC18_MS_english_all.csv")
df_english = pd.read_csv(file_english)
df_english.rename(columns={'moral_action_selected': 'stance_english'}, inplace=True)
df_english['stance_english'] = df_english['stance_english'].apply(to_int)

# 
for culture in cultures:
    file_culture = os.path.join(base_input_folder, f"{model_name}_NC18_MS_{culture}_all.csv")
    if not os.path.exists(file_culture):
        print(f"[Warning] File not found: {file_culture}")
        continue

    df_culture = pd.read_csv(file_culture)
    df_culture.rename(columns={'moral_action_selected': 'stance_other'}, inplace=True)
    df_culture['stance_other'] = df_culture['stance_other'].apply(to_int)


    df_english['GPT_Answer'] = df_english['GPT_Answer'].astype(str).str.strip()
    df_culture['GPT_Answer'] = df_culture['GPT_Answer'].astype(str).str.strip()

                            # Merge based on each id
    

    merged = pd.merge(
        df_english[["ID", "GPT_Answer"]],
        df_culture[["ID", "GPT_Answer"]],
        on="ID",
        suffixes=("_eng", "_other")
    )

    filtered = merged[
        (merged["GPT_Answer_eng"] == "Yes") &
        (merged["GPT_Answer_other"] == "Yes")
    ]

    # filtered = merged
    # 
    if filtered.empty:
        continue

    # 
    filtered = filtered.merge(
        df_english[["ID", "stance_english"]],
        on="ID"
    ).merge(
        df_culture[["ID", "stance_other"]],
        on="ID"
    )

    # 
    filtered = filtered.dropna()
    filtered = filtered[
        (filtered['stance_english'].isin([1, 2])) &
        (filtered['stance_other'].isin([1, 2]))
    ]

    # 
    total_records = len(filtered)
    mismatches = filtered[filtered['stance_english'] != filtered['stance_other']]
    mismatch_count = len(mismatches)
    pmr = mismatch_count / total_records if total_records else 0
    wbr = mismatches[
        (mismatches['stance_english'] == 1) & 
        (mismatches['stance_other'] == 2)
    ].shape[0] / mismatch_count if mismatch_count else 0

    #
    final_df.loc[final_df['Culture'] == culture, 'PM'] = mismatch_count
    final_df.loc[final_df['Culture'] == culture, 'PMR'] = f"{pmr:.2%}"
    final_df.loc[final_df['Culture'] == culture, 'WBR'] = f"{wbr:.2%}"
    final_df.loc[final_df['Culture'] == culture, '#REC'] = total_records

# 
output_file = os.path.join(base_input_folder, f'{model_name}_comparison_report.csv')
final_df.to_csv(output_file, index=False)
print(f" Report saved to: {output_file}")

 Report saved to: /home/maz2h/MoralStory/Mistral-Small-24B-Instruct-2501-NC18-MS/Mistral-Small-24B-Instruct-2501_comparison_report.csv


In [3]:
import pandas as pd
import os

# 
model_name = 'Llama-3.1-70B-Instruct'
cultures = ['arabic', 'african', 'turkish', 'indian', 'persian', 'korean', 'chinese', 'japanese']
base_input_folder = f'/home/maz2h/MoralStory/{model_name}-NC80-MS'

def to_int(val):
    if isinstance(val, str):
        val = val.strip()
    try:
        return int(float(val))
    except:
        return None

# output df
final_df = pd.DataFrame({'Culture': cultures})
for metric in ['PM', 'PMR', 'WBR', '#REC']:
    final_df[metric] = None

#
file_english = os.path.join(base_input_folder, f"{model_name}_NC80_MS_english_all.csv")
df_english = pd.read_csv(file_english)
df_english.rename(columns={'moral_action_selected': 'stance_english'}, inplace=True)
df_english['stance_english'] = df_english['stance_english'].apply(to_int)

# 
for culture in cultures:
    file_culture = os.path.join(base_input_folder, f"{model_name}_NC80_MS_{culture}_all.csv")
    if not os.path.exists(file_culture):
        print(f"[Warning] File not found: {file_culture}")
        continue

    df_culture = pd.read_csv(file_culture)
    df_culture.rename(columns={'moral_action_selected': 'stance_other'}, inplace=True)
    df_culture['stance_other'] = df_culture['stance_other'].apply(to_int)


    df_english['GPT_Answer'] = df_english['GPT_Answer'].astype(str).str.strip()
    df_culture['GPT_Answer'] = df_culture['GPT_Answer'].astype(str).str.strip()

                            # Merge based on each id
    

    merged = pd.merge(
        df_english[["ID", "GPT_Answer"]],
        df_culture[["ID", "GPT_Answer"]],
        on="ID",
        suffixes=("_eng", "_other")
    )

    filtered = merged[(merged["GPT_Answer_eng"] == "Yes") & (merged["GPT_Answer_other"] == "Yes")]

    # filtered = merged
    # 
    if filtered.empty:
        continue

    # 
    filtered = filtered.merge(
        df_english[["ID", "stance_english"]],
        on="ID"
    ).merge(
        df_culture[["ID", "stance_other"]],
        on="ID"
    )

    # 
    filtered = filtered.dropna()
    filtered = filtered[
        (filtered['stance_english'].isin([1, 2])) &
        (filtered['stance_other'].isin([1, 2]))
    ]

    # 
    total_records = len(filtered)
    mismatches = filtered[filtered['stance_english'] != filtered['stance_other']]
    mismatch_count = len(mismatches)
    pmr = mismatch_count / total_records if total_records else 0
    wbr = mismatches[
        (mismatches['stance_english'] == 1) & 
        (mismatches['stance_other'] == 2)].shape[0]   /   mismatch_count if mismatch_count else 0

    # 
    final_df.loc[final_df['Culture'] == culture, 'PM'] = mismatch_count
    final_df.loc[final_df['Culture'] == culture, 'PMR'] = f"{pmr:.2%}"
    final_df.loc[final_df['Culture'] == culture, 'WBR'] = f"{wbr:.2%}"
    final_df.loc[final_df['Culture'] == culture, '#REC'] = total_records

# 
output_file = os.path.join(base_input_folder, f'{model_name}_comparison_report.csv')
final_df.to_csv(output_file, index=False)
print(f" Report saved to: {output_file}")

 Report saved to: /home/maz2h/MoralStory/Llama-3.1-70B-Instruct-NC80-MS/Llama-3.1-70B-Instruct_comparison_report.csv


In [8]:
import pandas as pd
import os

# 
model_name = 'Llama-3.1-8B-Instruct'
cultures = ['arabic', 'african', 'turkish', 'indian', 'persian', 'korean', 'chinese', 'japanese']
base_input_folder = f'/home/maz2h/MoralStory/{model_name}-NC90-MS'

def to_int(val):
    if isinstance(val, str):
        val = val.strip()
    try:
        return int(float(val))
    except:
        return None

# output df
final_df = pd.DataFrame({'Culture': cultures})
for metric in ['PM', 'PMR', 'WBR', '#REC']:
    final_df[metric] = None

#
file_english = os.path.join(base_input_folder, f"{model_name}_NC90_MS_english_all.csv")
df_english = pd.read_csv(file_english)
df_english.rename(columns={'moral_action_selected': 'stance_english'}, inplace=True)
df_english['stance_english'] = df_english['stance_english'].apply(to_int)

# 
for culture in cultures:
    file_culture = os.path.join(base_input_folder, f"{model_name}_NC90_MS_{culture}_all.csv")
    if not os.path.exists(file_culture):
        print(f"[Warning] File not found: {file_culture}")
        continue

    df_culture = pd.read_csv(file_culture)
    df_culture.rename(columns={'moral_action_selected': 'stance_other'}, inplace=True)
    df_culture['stance_other'] = df_culture['stance_other'].apply(to_int)


    df_english['GPT_Answer'] = df_english['GPT_Answer'].astype(str).str.strip()
    df_culture['GPT_Answer'] = df_culture['GPT_Answer'].astype(str).str.strip()

                            # Merge based on each id
    

    merged = pd.merge(
        df_english[["ID", "GPT_Answer"]],
        df_culture[["ID", "GPT_Answer"]],
        on="ID",
        suffixes=("_eng", "_other")
    )

    filtered = merged[(merged["GPT_Answer_eng"] == "Yes") & (merged["GPT_Answer_other"] == "Yes")]

    # filtered = merged
    # 
    if filtered.empty:
        continue

    # 
    filtered = filtered.merge(
        df_english[["ID", "stance_english"]],
        on="ID"
    ).merge(
        df_culture[["ID", "stance_other"]],
        on="ID"
    )

    # 
    filtered = filtered.dropna()
    filtered = filtered[
        (filtered['stance_english'].isin([1, 2])) &
        (filtered['stance_other'].isin([1, 2]))
    ]

    # 
    total_records = len(filtered)
    mismatches = filtered[filtered['stance_english'] != filtered['stance_other']]
    mismatch_count = len(mismatches)
    pmr = mismatch_count / total_records if total_records else 0
    wbr = mismatches[
        (mismatches['stance_english'] == 1) & 
        (mismatches['stance_other'] == 2)].shape[0]   /   mismatch_count if mismatch_count else 0

    # 
    final_df.loc[final_df['Culture'] == culture, 'PM'] = mismatch_count
    final_df.loc[final_df['Culture'] == culture, 'PMR'] = f"{pmr:.2%}"
    final_df.loc[final_df['Culture'] == culture, 'WBR'] = f"{wbr:.2%}"
    final_df.loc[final_df['Culture'] == culture, '#REC'] = total_records

# 
output_file = os.path.join(base_input_folder, f'{model_name}_comparison_report.csv')
final_df.to_csv(output_file, index=False)
print(f" Report saved to: {output_file}")

 Report saved to: /home/maz2h/MoralStory/Llama-3.1-8B-Instruct-NC90-MS/Llama-3.1-8B-Instruct_comparison_report.csv


In [4]:
import pandas as pd
import os

# 
model_name = 'Llama-3.3-70B-Instruct'
cultures = ['arabic', 'african', 'turkish', 'indian', 'persian', 'korean', 'chinese', 'japanese']
base_input_folder = f'/home/maz2h/MoralStory/{model_name}-NC100-MS'

def to_int(val):
    if isinstance(val, str):
        val = val.strip()
    try:
        return int(float(val))
    except:
        return None

# output df
final_df = pd.DataFrame({'Culture': cultures})
for metric in ['PM', 'PMR', 'WBR', '#REC']:
    final_df[metric] = None

#
file_english = os.path.join(base_input_folder, f"{model_name}_NC100_MS_english_all.csv")
df_english = pd.read_csv(file_english)
df_english.rename(columns={'moral_action_selected': 'stance_english'}, inplace=True)
df_english['stance_english'] = df_english['stance_english'].apply(to_int)

# 
for culture in cultures:
    file_culture = os.path.join(base_input_folder, f"{model_name}_NC100_MS_{culture}_all.csv")
    if not os.path.exists(file_culture):
        print(f"[Warning] File not found: {file_culture}")
        continue

    df_culture = pd.read_csv(file_culture)
    df_culture.rename(columns={'moral_action_selected': 'stance_other'}, inplace=True)
    df_culture['stance_other'] = df_culture['stance_other'].apply(to_int)


    df_english['GPT_Answer'] = df_english['GPT_Answer'].astype(str).str.strip()
    df_culture['GPT_Answer'] = df_culture['GPT_Answer'].astype(str).str.strip()

                            # Merge based on each id
    

    merged = pd.merge(
        df_english[["ID", "GPT_Answer"]],
        df_culture[["ID", "GPT_Answer"]],
        on="ID",
        suffixes=("_eng", "_other")
    )

    filtered = merged[(merged["GPT_Answer_eng"] == "Yes") & (merged["GPT_Answer_other"] == "Yes")]

    # filtered = merged
    # 
    if filtered.empty:
        continue

    # 
    filtered = filtered.merge(
        df_english[["ID", "stance_english"]],
        on="ID"
    ).merge(
        df_culture[["ID", "stance_other"]],
        on="ID"
    )

    # 
    filtered = filtered.dropna()
    filtered = filtered[
        (filtered['stance_english'].isin([1, 2])) &
        (filtered['stance_other'].isin([1, 2]))
    ]

    # 
    total_records = len(filtered)
    mismatches = filtered[filtered['stance_english'] != filtered['stance_other']]
    mismatch_count = len(mismatches)
    pmr = mismatch_count / total_records if total_records else 0
    wbr = mismatches[
        (mismatches['stance_english'] == 1) & 
        (mismatches['stance_other'] == 2)].shape[0]   /   mismatch_count if mismatch_count else 0

    # 
    final_df.loc[final_df['Culture'] == culture, 'PM'] = mismatch_count
    final_df.loc[final_df['Culture'] == culture, 'PMR'] = f"{pmr:.2%}"
    final_df.loc[final_df['Culture'] == culture, 'WBR'] = f"{wbr:.2%}"
    final_df.loc[final_df['Culture'] == culture, '#REC'] = total_records

# 
output_file = os.path.join(base_input_folder, f'{model_name}_comparison_report.csv')
final_df.to_csv(output_file, index=False)
print(f" Report saved to: {output_file}")

 Report saved to: /home/maz2h/MoralStory/Llama-3.3-70B-Instruct-NC100-MS/Llama-3.3-70B-Instruct_comparison_report.csv


In [4]:
import pandas as pd
import os

# 
model_name = 'Qwen2-72B-Instruct'
cultures = ['arabic', 'african', 'turkish', 'indian', 'persian', 'korean', 'chinese', 'japanese']
base_input_folder = f'/home/maz2h/MoralStory/{model_name}-NC200-MS'

def to_int(val):
    if isinstance(val, str):
        val = val.strip()
    try:
        return int(float(val))
    except:
        return None

# output df
final_df = pd.DataFrame({'Culture': cultures})
for metric in ['PM', 'PMR', 'WBR', '#REC']:
    final_df[metric] = None

#
file_english = os.path.join(base_input_folder, f"{model_name}_NC200_MS_english_all.csv")
df_english = pd.read_csv(file_english)
df_english.rename(columns={'moral_action_selected': 'stance_english'}, inplace=True)
df_english['stance_english'] = df_english['stance_english'].apply(to_int)

# 
for culture in cultures:
    file_culture = os.path.join(base_input_folder, f"{model_name}_NC200_MS_{culture}_all.csv")
    if not os.path.exists(file_culture):
        print(f"[Warning] File not found: {file_culture}")
        continue

    df_culture = pd.read_csv(file_culture)
    df_culture.rename(columns={'moral_action_selected': 'stance_other'}, inplace=True)
    df_culture['stance_other'] = df_culture['stance_other'].apply(to_int)


    df_english['GPT_Answer'] = df_english['GPT_Answer'].astype(str).str.strip()
    df_culture['GPT_Answer'] = df_culture['GPT_Answer'].astype(str).str.strip()

                            # Merge based on each id
    

    merged = pd.merge(
        df_english[["ID", "GPT_Answer"]],
        df_culture[["ID", "GPT_Answer"]],
        on="ID",
        suffixes=("_eng", "_other")
    )

    filtered = merged[(merged["GPT_Answer_eng"] == "Yes") & (merged["GPT_Answer_other"] == "Yes")]

    # filtered = merged
    # 
    if filtered.empty:
        continue

    # 
    filtered = filtered.merge(
        df_english[["ID", "stance_english"]],
        on="ID"
    ).merge(
        df_culture[["ID", "stance_other"]],
        on="ID"
    )

    # 
    filtered = filtered.dropna()
    filtered = filtered[
        (filtered['stance_english'].isin([1, 2])) &
        (filtered['stance_other'].isin([1, 2]))
    ]

    # 
    total_records = len(filtered)
    mismatches = filtered[filtered['stance_english'] != filtered['stance_other']]
    mismatch_count = len(mismatches)
    pmr = mismatch_count / total_records if total_records else 0
    wbr = mismatches[
        (mismatches['stance_english'] == 1) & 
        (mismatches['stance_other'] == 2)].shape[0]   /   mismatch_count if mismatch_count else 0

    # 
    final_df.loc[final_df['Culture'] == culture, 'PM'] = mismatch_count
    final_df.loc[final_df['Culture'] == culture, 'PMR'] = f"{pmr:.2%}"
    final_df.loc[final_df['Culture'] == culture, 'WBR'] = f"{wbr:.2%}"
    final_df.loc[final_df['Culture'] == culture, '#REC'] = total_records

# 
output_file = os.path.join(base_input_folder, f'{model_name}_comparison_report.csv')
final_df.to_csv(output_file, index=False)
print(f" Report saved to: {output_file}")

 Report saved to: /home/maz2h/MoralStory/Qwen2-72B-Instruct-NC200-MS/Qwen2-72B-Instruct_comparison_report.csv


In [8]:
import pandas as pd
import os

# 
model_name = 'Qwen2.5-72B-Instruct'
cultures = ['arabic', 'african', 'turkish', 'indian', 'persian', 'korean', 'chinese', 'japanese']
base_input_folder = f'/home/maz2h/MoralStory/{model_name}-NC300-MS'

def to_int(val):
    if isinstance(val, str):
        val = val.strip()
    try:
        return int(float(val))
    except:
        return None

# output df
final_df = pd.DataFrame({'Culture': cultures})
for metric in ['PM', 'PMR', 'WBR', '#REC']:
    final_df[metric] = None

#
file_english = os.path.join(base_input_folder, f"{model_name}_NC300_MS_english_all.csv")
df_english = pd.read_csv(file_english)
df_english.rename(columns={'moral_action_selected': 'stance_english'}, inplace=True)
df_english['stance_english'] = df_english['stance_english'].apply(to_int)

# 
for culture in cultures:
    file_culture = os.path.join(base_input_folder, f"{model_name}_NC300_MS_{culture}_all.csv")
    if not os.path.exists(file_culture):
        print(f"[Warning] File not found: {file_culture}")
        continue

    df_culture = pd.read_csv(file_culture)
    df_culture.rename(columns={'moral_action_selected': 'stance_other'}, inplace=True)
    df_culture['stance_other'] = df_culture['stance_other'].apply(to_int)


    df_english['GPT_Answer'] = df_english['GPT_Answer'].astype(str).str.strip()
    df_culture['GPT_Answer'] = df_culture['GPT_Answer'].astype(str).str.strip()

                            # Merge based on each id
    

    merged = pd.merge(
        df_english[["ID", "GPT_Answer"]],
        df_culture[["ID", "GPT_Answer"]],
        on="ID",
        suffixes=("_eng", "_other")
    )

    filtered = merged[(merged["GPT_Answer_eng"] == "Yes") & (merged["GPT_Answer_other"] == "Yes")]

    # filtered = merged
    # 
    if filtered.empty:
        continue

    # 
    filtered = filtered.merge(
        df_english[["ID", "stance_english"]],
        on="ID"
    ).merge(
        df_culture[["ID", "stance_other"]],
        on="ID"
    )

    # 
    filtered = filtered.dropna()
    filtered = filtered[
        (filtered['stance_english'].isin([1, 2])) &
        (filtered['stance_other'].isin([1, 2]))
    ]

    # 
    total_records = len(filtered)
    mismatches = filtered[filtered['stance_english'] != filtered['stance_other']]
    mismatch_count = len(mismatches)
    pmr = mismatch_count / total_records if total_records else 0
    wbr = mismatches[
        (mismatches['stance_english'] == 1) & 
        (mismatches['stance_other'] == 2)].shape[0]   /   mismatch_count if mismatch_count else 0

    # 
    final_df.loc[final_df['Culture'] == culture, 'PM'] = mismatch_count
    final_df.loc[final_df['Culture'] == culture, 'PMR'] = f"{pmr:.2%}"
    final_df.loc[final_df['Culture'] == culture, 'WBR'] = f"{wbr:.2%}"
    final_df.loc[final_df['Culture'] == culture, '#REC'] = total_records

# 
output_file = os.path.join(base_input_folder, f'{model_name}_comparison_report.csv')
final_df.to_csv(output_file, index=False)
print(f" Report saved to: {output_file}")

 Report saved to: /home/maz2h/MoralStory/Qwen2.5-72B-Instruct-NC300-MS/Qwen2.5-72B-Instruct_comparison_report.csv


In [18]:
import pandas as pd
import os

# 
model_name = 'Qwen2.5-7B-Instruct'
cultures = ['arabic', 'african', 'turkish', 'indian', 'persian', 'korean', 'chinese', 'japanese']
base_input_folder = f'/home/maz2h/MoralStory/{model_name}-NC400-MS'

def to_int(val):
    if isinstance(val, str):
        val = val.strip()
    try:
        return int(float(val))
    except:
        return None

# output df
final_df = pd.DataFrame({'Culture': cultures})
for metric in ['PM', 'PMR', 'WBR', '#REC']:
    final_df[metric] = None

#
file_english = os.path.join(base_input_folder, f"{model_name}_NC400_MS_english_all.csv")
df_english = pd.read_csv(file_english)
df_english.rename(columns={'moral_action_selected': 'stance_english'}, inplace=True)
df_english['stance_english'] = df_english['stance_english'].apply(to_int)

# 
for culture in cultures:
    file_culture = os.path.join(base_input_folder, f"{model_name}_NC400_MS_{culture}_all.csv")
    if not os.path.exists(file_culture):
        print(f"[Warning] File not found: {file_culture}")
        continue

    df_culture = pd.read_csv(file_culture)
    df_culture.rename(columns={'moral_action_selected': 'stance_other'}, inplace=True)
    df_culture['stance_other'] = df_culture['stance_other'].apply(to_int)


    df_english['GPT_Answer'] = df_english['GPT_Answer'].astype(str).str.strip()
    df_culture['GPT_Answer'] = df_culture['GPT_Answer'].astype(str).str.strip()

                            # Merge based on each id
    

    merged = pd.merge(
        df_english[["ID", "GPT_Answer"]],
        df_culture[["ID", "GPT_Answer"]],
        on="ID",
        suffixes=("_eng", "_other")
    )

    filtered = merged[(merged["GPT_Answer_eng"] == "Yes") & (merged["GPT_Answer_other"] == "Yes")]

    # filtered = merged
    # 
    if filtered.empty:
        continue

    # 
    filtered = filtered.merge(
        df_english[["ID", "stance_english"]],
        on="ID"
    ).merge(
        df_culture[["ID", "stance_other"]],
        on="ID"
    )

    # 
    filtered = filtered.dropna()
    filtered = filtered[
        (filtered['stance_english'].isin([1, 2])) &
        (filtered['stance_other'].isin([1, 2]))
    ]

    # 
    total_records = len(filtered)
    mismatches = filtered[filtered['stance_english'] != filtered['stance_other']]
    mismatch_count = len(mismatches)
    pmr = mismatch_count / total_records if total_records else 0
    wbr = mismatches[
        (mismatches['stance_english'] == 1) & 
        (mismatches['stance_other'] == 2)].shape[0]   /   mismatch_count if mismatch_count else 0

    # 
    final_df.loc[final_df['Culture'] == culture, 'PM'] = mismatch_count
    final_df.loc[final_df['Culture'] == culture, 'PMR'] = f"{pmr:.2%}"
    final_df.loc[final_df['Culture'] == culture, 'WBR'] = f"{wbr:.2%}"
    final_df.loc[final_df['Culture'] == culture, '#REC'] = total_records

# 
output_file = os.path.join(base_input_folder, f'{model_name}_comparison_report.csv')
final_df.to_csv(output_file, index=False)
print(f" Report saved to: {output_file}")

 Report saved to: /home/maz2h/MoralStory/Qwen2.5-7B-Instruct-NC400-MS/Qwen2.5-7B-Instruct_comparison_report.csv


In [1]:
import pandas as pd
import os

# 
model_name = 'Mistral-Nemo-Instruct-2407'
cultures = ['arabic', 'african', 'turkish', 'indian', 'persian', 'korean', 'chinese', 'japanese']
base_input_folder = f'/home/maz2h/MoralStory/{model_name}-NC500-MS'

def to_int(val):
    if isinstance(val, str):
        val = val.strip()
    try:
        return int(float(val))
    except:
        return None

# output df
final_df = pd.DataFrame({'Culture': cultures})
for metric in ['PM', 'PMR', 'WBR', '#REC']:
    final_df[metric] = None

#
file_english = os.path.join(base_input_folder, f"{model_name}_NC500_MS_english_all.csv")
df_english = pd.read_csv(file_english)
df_english.rename(columns={'moral_action_selected': 'stance_english'}, inplace=True)
df_english['stance_english'] = df_english['stance_english'].apply(to_int)

# 
for culture in cultures:
    file_culture = os.path.join(base_input_folder, f"{model_name}_NC500_MS_{culture}_all.csv")
    if not os.path.exists(file_culture):
        print(f"[Warning] File not found: {file_culture}")
        continue

    df_culture = pd.read_csv(file_culture)
    df_culture.rename(columns={'moral_action_selected': 'stance_other'}, inplace=True)
    df_culture['stance_other'] = df_culture['stance_other'].apply(to_int)


    df_english['GPT_Answer'] = df_english['GPT_Answer'].astype(str).str.strip()
    df_culture['GPT_Answer'] = df_culture['GPT_Answer'].astype(str).str.strip()

                            # Merge based on each id
    

    merged = pd.merge(
        df_english[["ID", "GPT_Answer"]],
        df_culture[["ID", "GPT_Answer"]],
        on="ID",
        suffixes=("_eng", "_other")
    )

    filtered = merged[(merged["GPT_Answer_eng"] == "Yes") & (merged["GPT_Answer_other"] == "Yes")]

    # filtered = merged
    # 
    if filtered.empty:
        continue

    # 
    filtered = filtered.merge(
        df_english[["ID", "stance_english"]],
        on="ID"
    ).merge(
        df_culture[["ID", "stance_other"]],
        on="ID"
    )

    # 
    filtered = filtered.dropna()
    filtered = filtered[
        (filtered['stance_english'].isin([1, 2])) &
        (filtered['stance_other'].isin([1, 2]))
    ]

    # 
    total_records = len(filtered)
    mismatches = filtered[filtered['stance_english'] != filtered['stance_other']]
    mismatch_count = len(mismatches)
    pmr = mismatch_count / total_records if total_records else 0
    wbr = mismatches[
        (mismatches['stance_english'] == 1) & 
        (mismatches['stance_other'] == 2)].shape[0]   /   mismatch_count if mismatch_count else 0

    # 
    final_df.loc[final_df['Culture'] == culture, 'PM'] = mismatch_count
    final_df.loc[final_df['Culture'] == culture, 'PMR'] = f"{pmr:.2%}"
    final_df.loc[final_df['Culture'] == culture, 'WBR'] = f"{wbr:.2%}"
    final_df.loc[final_df['Culture'] == culture, '#REC'] = total_records

# 
output_file = os.path.join(base_input_folder, f'{model_name}_comparison_report.csv')
final_df.to_csv(output_file, index=False)
print(f" Report saved to: {output_file}")

 Report saved to: /home/maz2h/MoralStory/Mistral-Nemo-Instruct-2407-NC500-MS/Mistral-Nemo-Instruct-2407_comparison_report.csv


In [2]:
import pandas as pd
import os

# 
model_name = 'Mistral-Small-24B-Instruct-2501'
cultures = ['arabic', 'african', 'turkish', 'indian', 'persian', 'korean', 'chinese', 'japanese']
base_input_folder = f'/home/maz2h/MoralStory/{model_name}-NC900-MS-temp0.8'

def to_int(val):
    if isinstance(val, str):
        val = val.strip()
    try:
        return int(float(val))
    except:
        return None

# output df
final_df = pd.DataFrame({'Culture': cultures})
for metric in ['PM', 'PMR', 'WBR', '#REC']:
    final_df[metric] = None

#
file_english = os.path.join(base_input_folder, f"{model_name}_NC900_MS_english_all.csv")
df_english = pd.read_csv(file_english)
df_english.rename(columns={'moral_action_selected': 'stance_english'}, inplace=True)
df_english['stance_english'] = df_english['stance_english'].apply(to_int)

# 
for culture in cultures:
    file_culture = os.path.join(base_input_folder, f"{model_name}_NC900_MS_{culture}_all.csv")
    if not os.path.exists(file_culture):
        print(f"[Warning] File not found: {file_culture}")
        continue

    df_culture = pd.read_csv(file_culture)
    df_culture.rename(columns={'moral_action_selected': 'stance_other'}, inplace=True)
    df_culture['stance_other'] = df_culture['stance_other'].apply(to_int)


    df_english['GPT_Answer'] = df_english['GPT_Answer'].astype(str).str.strip()
    df_culture['GPT_Answer'] = df_culture['GPT_Answer'].astype(str).str.strip()

                            # Merge based on each id
    

    merged = pd.merge(
        df_english[["ID", "GPT_Answer"]],
        df_culture[["ID", "GPT_Answer"]],
        on="ID",
        suffixes=("_eng", "_other")
    )

    filtered = merged[(merged["GPT_Answer_eng"] == "Yes") & (merged["GPT_Answer_other"] == "Yes")]

    # filtered = merged
    # 
    if filtered.empty:
        continue

    # 
    filtered = filtered.merge(
        df_english[["ID", "stance_english"]],
        on="ID"
    ).merge(
        df_culture[["ID", "stance_other"]],
        on="ID"
    )

    # 
    filtered = filtered.dropna()
    filtered = filtered[
        (filtered['stance_english'].isin([1, 2])) &
        (filtered['stance_other'].isin([1, 2]))
    ]

    # 
    total_records = len(filtered)
    mismatches = filtered[filtered['stance_english'] != filtered['stance_other']]
    mismatch_count = len(mismatches)
    pmr = mismatch_count / total_records if total_records else 0
    wbr = mismatches[
        (mismatches['stance_english'] == 1) & 
        (mismatches['stance_other'] == 2)].shape[0]   /   mismatch_count if mismatch_count else 0

    # 
    final_df.loc[final_df['Culture'] == culture, 'PM'] = mismatch_count
    final_df.loc[final_df['Culture'] == culture, 'PMR'] = f"{pmr:.2%}"
    final_df.loc[final_df['Culture'] == culture, 'WBR'] = f"{wbr:.2%}"
    final_df.loc[final_df['Culture'] == culture, '#REC'] = total_records

# 
output_file = os.path.join(base_input_folder, f'{model_name}_comparison_report.csv')
final_df.to_csv(output_file, index=False)
print(f" Report saved to: {output_file}")

 Report saved to: /home/maz2h/MoralStory/Mistral-Small-24B-Instruct-2501-NC900-MS-temp0.8/Mistral-Small-24B-Instruct-2501_comparison_report.csv


In [3]:
import pandas as pd
import os

# 
model_name = 'gemma-2-9b-it'
cultures = ['arabic', 'african', 'turkish', 'indian', 'persian', 'korean', 'chinese', 'japanese']
base_input_folder = f'/home/maz2h/MoralStory/{model_name}-NC3000-MS'

def to_int(val):
    if isinstance(val, str):
        val = val.strip()
    try:
        return int(float(val))
    except:
        return None

# output df
final_df = pd.DataFrame({'Culture': cultures})
for metric in ['PM', 'PMR', 'WBR', '#REC']:
    final_df[metric] = None

#
file_english = os.path.join(base_input_folder, f"{model_name}_NC3000_MS_english_all.csv")
df_english = pd.read_csv(file_english)
df_english.rename(columns={'moral_action_selected': 'stance_english'}, inplace=True)
df_english['stance_english'] = df_english['stance_english'].apply(to_int)

# 
for culture in cultures:
    file_culture = os.path.join(base_input_folder, f"{model_name}_NC3000_MS_{culture}_all.csv")
    if not os.path.exists(file_culture):
        print(f"[Warning] File not found: {file_culture}")
        continue

    df_culture = pd.read_csv(file_culture)
    df_culture.rename(columns={'moral_action_selected': 'stance_other'}, inplace=True)
    df_culture['stance_other'] = df_culture['stance_other'].apply(to_int)


    df_english['GPT_Answer'] = df_english['GPT_Answer'].astype(str).str.strip()
    df_culture['GPT_Answer'] = df_culture['GPT_Answer'].astype(str).str.strip()

                            # Merge based on each id
    

    merged = pd.merge(
        df_english[["ID", "GPT_Answer"]],
        df_culture[["ID", "GPT_Answer"]],
        on="ID",
        suffixes=("_eng", "_other")
    )

    filtered = merged[(merged["GPT_Answer_eng"] == "Yes") & (merged["GPT_Answer_other"] == "Yes")]

    # filtered = merged
    # 
    if filtered.empty:
        continue

    # 
    filtered = filtered.merge(
        df_english[["ID", "stance_english"]],
        on="ID"
    ).merge(
        df_culture[["ID", "stance_other"]],
        on="ID"
    )

    # 
    filtered = filtered.dropna()
    filtered = filtered[
        (filtered['stance_english'].isin([1, 2])) &
        (filtered['stance_other'].isin([1, 2]))
    ]

    # 
    total_records = len(filtered)
    mismatches = filtered[filtered['stance_english'] != filtered['stance_other']]
    mismatch_count = len(mismatches)
    pmr = mismatch_count / total_records if total_records else 0
    wbr = mismatches[
        (mismatches['stance_english'] == 1) & 
        (mismatches['stance_other'] == 2)].shape[0]   /   mismatch_count if mismatch_count else 0

    # 
    final_df.loc[final_df['Culture'] == culture, 'PM'] = mismatch_count
    final_df.loc[final_df['Culture'] == culture, 'PMR'] = f"{pmr:.2%}"
    final_df.loc[final_df['Culture'] == culture, 'WBR'] = f"{wbr:.2%}"
    final_df.loc[final_df['Culture'] == culture, '#REC'] = total_records

# 
output_file = os.path.join(base_input_folder, f'{model_name}_comparison_report.csv')
final_df.to_csv(output_file, index=False)
print(f" Report saved to: {output_file}")

 Report saved to: /home/maz2h/MoralStory/gemma-2-9b-it-NC3000-MS/gemma-2-9b-it_comparison_report.csv


In [13]:
import pandas as pd
import os

# 
model_name = 'gemma-2-27b-it'
cultures = ['arabic', 'african', 'turkish', 'indian', 'persian', 'korean', 'chinese', 'japanese']
base_input_folder = f'/home/maz2h/MoralStory/{model_name}-NC6000-MS'

def to_int(val):
    if isinstance(val, str):
        val = val.strip()
    try:
        return int(float(val))
    except:
        return None

# output df
final_df = pd.DataFrame({'Culture': cultures})
for metric in ['PM', 'PMR', 'WBR', '#REC']:
    final_df[metric] = None

#Here place the consistency all GPT-ANSWER(Norm) across all cultures

file_english = os.path.join(base_input_folder, f"{model_name}_NC6000_MS_english_all.csv")
df_english = pd.read_csv(file_english)
df_english.rename(columns={'moral_action_selected': 'stance_english'}, inplace=True)
df_english['stance_english'] = df_english['stance_english'].apply(to_int)

# 
for culture in cultures:
    file_culture = os.path.join(base_input_folder, f"{model_name}_NC6000_MS_{culture}_all.csv")
    if not os.path.exists(file_culture):
        print(f"[Warning] File not found: {file_culture}")
        continue

    df_culture = pd.read_csv(file_culture)
    df_culture.rename(columns={'moral_action_selected': 'stance_other'}, inplace=True)
    df_culture['stance_other'] = df_culture['stance_other'].apply(to_int)


    df_english['GPT_Answer'] = df_english['GPT_Answer'].astype(str).str.strip()
    df_culture['GPT_Answer'] = df_culture['GPT_Answer'].astype(str).str.strip()

                            # Merge based on each id
    

    merged = pd.merge(
        df_english[["ID", "GPT_Answer"]],
        df_culture[["ID", "GPT_Answer"]],
        on="ID",
        suffixes=("_eng", "_other")
    )

    filtered = merged[(merged["GPT_Answer_eng"] == "Yes") & (merged["GPT_Answer_other"] == "Yes")]

    # filtered = merged
    # 
    if filtered.empty:
        continue

    # 
    filtered = filtered.merge(
        df_english[["ID", "stance_english"]],
        on="ID"
    ).merge(
        df_culture[["ID", "stance_other"]],
        on="ID"
    )

    # 
    filtered = filtered.dropna()
    filtered = filtered[
        (filtered['stance_english'].isin([1, 2])) &
        (filtered['stance_other'].isin([1, 2]))
    ]

    # 
    total_records = len(filtered)
    mismatches = filtered[filtered['stance_english'] != filtered['stance_other']]
    mismatch_count = len(mismatches)
    pmr = mismatch_count / total_records if total_records else 0
    wbr = mismatches[
        (mismatches['stance_english'] == 1) & 
        (mismatches['stance_other'] == 2)].shape[0]   /   mismatch_count if mismatch_count else 0

    # 
    final_df.loc[final_df['Culture'] == culture, 'PM'] = mismatch_count
    final_df.loc[final_df['Culture'] == culture, 'PMR'] = f"{pmr:.2%}"
    final_df.loc[final_df['Culture'] == culture, 'WBR'] = f"{wbr:.2%}"
    final_df.loc[final_df['Culture'] == culture, '#REC'] = total_records

# 
output_file = os.path.join(base_input_folder, f'{model_name}_comparison_report.csv')
final_df.to_csv(output_file, index=False)
print(f" Report saved to: {output_file}")

 Report saved to: /home/maz2h/MoralStory/gemma-2-27b-it-NC6000-MS/gemma-2-27b-it_comparison_report.csv


In [12]:
import pandas as pd
import os

model_name = 'gemma-2-27b-it'
cultures = ['arabic', 'african', 'turkish', 'indian', 'persian', 'korean', 'chinese', 'japanese']
base_input_folder = f'/home/maz2h/MoralStory/{model_name}-NC6000-MS'

def to_int(val):
    if isinstance(val, str):
        val = val.strip()
    try:
        return int(float(val))
    except:
        return None

# Step 1: Load English file
file_english = os.path.join(base_input_folder, f"{model_name}_NC6000_MS_english_all.csv")
df_english = pd.read_csv(file_english)
df_english = df_english[['ID', 'GPT_Answer']]
df_english.rename(columns={'GPT_Answer': 'GPT_Answer_english'}, inplace=True)
df_english['GPT_Answer_english'] = df_english['GPT_Answer_english'].astype(str).str.strip().str.lower()

# Step 2: Merge all cultures
merged_df = df_english.copy()
for culture in cultures:
    file_culture = os.path.join(base_input_folder, f"{model_name}_NC6000_MS_{culture}_all.csv")
    if not os.path.exists(file_culture):
        print(f"[Warning] File not found: {file_culture}")
        continue

    df_culture = pd.read_csv(file_culture)
    df_culture = df_culture[['ID', 'GPT_Answer']]
    df_culture.rename(columns={'GPT_Answer': f'GPT_Answer_{culture}'}, inplace=True)
    df_culture[f'GPT_Answer_{culture}'] = df_culture[f'GPT_Answer_{culture}'].astype(str).str.strip().str.lower()

    merged_df = pd.merge(merged_df, df_culture, on='ID', how='inner')

# Step 3: Check consistency per row (all Yes)
answer_columns = [col for col in merged_df.columns if col.startswith('GPT_Answer')]
merged_df['consistency_flag'] = merged_df[answer_columns].apply(lambda row: int(all(ans == 'yes' for ans in row)), axis=1)

# Step 4: Write consistency flag back to each culture file
for culture in cultures:
    file_culture = os.path.join(base_input_folder, f"{model_name}_NC6000_MS_{culture}_all.csv")
    if not os.path.exists(file_culture):
        continue

    df_culture = pd.read_csv(file_culture)
    df_culture = pd.merge(df_culture, merged_df[['ID', 'consistency_flag']], on='ID', how='left')
    df_culture.to_csv(file_culture, index=False)

print("Consistency flags added to all _all.csv files.")

# Step 5: Generate final comparison report
final_df = pd.DataFrame({'Culture': cultures})
for metric in ['PM', 'PMR', 'WBR', '#REC']:
    final_df[metric] = None

df_english_full = pd.read_csv(file_english)
df_english_full.rename(columns={'moral_action_selected': 'stance_english'}, inplace=True)
df_english_full['stance_english'] = df_english_full['stance_english'].apply(to_int)
df_english_full['GPT_Answer'] = df_english_full['GPT_Answer'].astype(str).str.strip()

for culture in cultures:
    file_culture = os.path.join(base_input_folder, f"{model_name}_NC6000_MS_{culture}_all.csv")
    if not os.path.exists(file_culture):
        print(f"[Warning] File not found: {file_culture}")
        continue

    df_culture = pd.read_csv(file_culture)
    df_culture.rename(columns={'moral_action_selected': 'stance_other'}, inplace=True)
    df_culture['stance_other'] = df_culture['stance_other'].apply(to_int)
    df_culture['GPT_Answer'] = df_culture['GPT_Answer'].astype(str).str.strip()

    merged = pd.merge(
        df_english_full[["ID", "GPT_Answer"]],
        df_culture[["ID", "GPT_Answer" ,"consistency_flag" ]],
        on="ID",
        suffixes=("_eng", "_other")
    )

    filtered = merged[(merged["GPT_Answer_eng"] == "Yes") & (merged["GPT_Answer_other"] == "Yes") & (merged["consistency_flag"] ==1)]
    if filtered.empty:
        continue

    filtered = filtered.merge(df_english_full[["ID", "stance_english"]], on="ID").merge(
        df_culture[["ID", "stance_other"]], on="ID")

    filtered = filtered.dropna()
    filtered = filtered[(filtered['stance_english'].isin([1, 2])) & (filtered['stance_other'].isin([1, 2]))]

    total_records = len(filtered)
    mismatches = filtered[filtered['stance_english'] != filtered['stance_other']]
    mismatch_count = len(mismatches)
    pmr = mismatch_count / total_records if total_records else 0
    wbr = mismatches[(mismatches['stance_english'] == 1) & (mismatches['stance_other'] == 2)].shape[0] / mismatch_count if mismatch_count else 0

    final_df.loc[final_df['Culture'] == culture, 'PM'] = mismatch_count
    final_df.loc[final_df['Culture'] == culture, 'PMR'] = f"{pmr:.2%}"
    final_df.loc[final_df['Culture'] == culture, 'WBR'] = f"{wbr:.2%}"
    final_df.loc[final_df['Culture'] == culture, '#REC'] = total_records

# Step 6: Optional: Add overall consistency summary
overall_consistency = merged_df['consistency_flag'].sum() == len(merged_df)
final_df['Overall_Consistency_Flag'] = int(overall_consistency)

output_file = os.path.join(base_input_folder, f'{model_name}_comparison_report_2.csv')
final_df.to_csv(output_file, index=False)
print(f"Report saved to: {output_file}")


Consistency flags added to all _all.csv files.
Report saved to: /home/maz2h/MoralStory/gemma-2-27b-it-NC6000-MS/gemma-2-27b-it_comparison_report_2.csv


In [5]:
#--------------------------------------------------------
#ASKING The NORM/ Whitout Specifying the Norm
#---------------------------------------------------------
import pandas as pd
import os

model_name = 'Mistral-Small-24B-Instruct-2501'
cultures = ['arabic', 'african', 'turkish', 'indian', 'persian', 'korean', 'chinese', 'japanese']
# base_input_folder = f'/home/maz2h/MoralStory/{model_name}-NC6000-MS'
base_input_folder = "/mnt/pixstor/data/maz2h/MoralStory/Not-specifying-MoralNorms/Mistral-Small-24B-Instruct-2501-NC900C-MS"

def to_int(val):
    if isinstance(val, str):
        val = val.strip()
    try:
        return int(float(val))
    except:
        return None

# Step 1: Load English file
file_english = ("/mnt/pixstor/data/maz2h/MoralStory/Not-specifying-MoralNorms/Mistral-Small-24B-Instruct-2501-NC900C-MS/Mistral-Small-24B-Instruct-2501_NC900C_MS_english_all.csv")
df_english = pd.read_csv(file_english)
df_english = df_english[['ID', 'GPT_Answer']]
df_english.rename(columns={'GPT_Answer': 'GPT_Answer_english'}, inplace=True)
df_english['GPT_Answer_english'] = df_english['GPT_Answer_english'].astype(str).str.strip().str.lower()

# Step 2: Merge all cultures
merged_df = df_english.copy()
for culture in cultures:
    file_culture = os.path.join(base_input_folder, f"{model_name}_NC900C_MS_{culture}_all.csv")
    if not os.path.exists(file_culture):
        print(f"[Warning] File not found: {file_culture}")
        continue

    df_culture = pd.read_csv(file_culture)
    df_culture = df_culture[['ID', 'GPT_Answer']]
    df_culture.rename(columns={'GPT_Answer': f'GPT_Answer_{culture}'}, inplace=True)
    df_culture[f'GPT_Answer_{culture}'] = df_culture[f'GPT_Answer_{culture}'].astype(str).str.strip().str.lower()

    merged_df = pd.merge(merged_df, df_culture, on='ID', how='inner')

# Step 3: Check consistency per row (all Yes)
answer_columns = [col for col in merged_df.columns if col.startswith('GPT_Answer')]
merged_df['consistency_flag'] = merged_df[answer_columns].apply(lambda row: int(all(ans == 'yes' for ans in row)), axis=1)

# Step 4: Write consistency flag back to each culture file
for culture in cultures:
    file_culture = os.path.join(base_input_folder, f"{model_name}_NC900C_MS_{culture}_all.csv")
    if not os.path.exists(file_culture):
        continue

    df_culture = pd.read_csv(file_culture)
    df_culture = pd.merge(df_culture, merged_df[['ID', 'consistency_flag']], on='ID', how='left')
    df_culture.to_csv(file_culture, index=False)

print("Consistency flags added to all _all.csv files.")

# Step 5: Generate final comparison report
final_df = pd.DataFrame({'Culture': cultures})
for metric in ['PM', 'PMR', 'WBR', '#REC']:
    final_df[metric] = None

df_english_full = pd.read_csv(file_english)
df_english_full.rename(columns={'moral_action_selected': 'stance_english'}, inplace=True)
df_english_full['stance_english'] = df_english_full['stance_english'].apply(to_int)
df_english_full['GPT_Answer'] = df_english_full['GPT_Answer'].astype(str).str.strip()

for culture in cultures:
    file_culture = os.path.join(base_input_folder, f"{model_name}_NC900C_MS_{culture}_all.csv")
    if not os.path.exists(file_culture):
        print(f"[Warning] File not found: {file_culture}")
        continue

    df_culture = pd.read_csv(file_culture)
    df_culture.rename(columns={'moral_action_selected': 'stance_other'}, inplace=True)
    df_culture['stance_other'] = df_culture['stance_other'].apply(to_int)
    df_culture['GPT_Answer'] = df_culture['GPT_Answer'].astype(str).str.strip()

    merged = pd.merge(
        df_english_full[["ID", "GPT_Answer"]],
        df_culture[["ID", "GPT_Answer" ,"consistency_flag" ]],
        on="ID",
        suffixes=("_eng", "_other")
    )

    filtered = merged[(merged["GPT_Answer_eng"] == "Yes") & (merged["GPT_Answer_other"] == "Yes") & (merged["consistency_flag"] ==1)]
    if filtered.empty:
        continue

    filtered = filtered.merge(df_english_full[["ID", "stance_english"]], on="ID").merge(
        df_culture[["ID", "stance_other"]], on="ID")

    filtered = filtered.dropna()
    filtered = filtered[(filtered['stance_english'].isin([1, 2])) & (filtered['stance_other'].isin([1, 2]))]

    total_records = len(filtered)
    mismatches = filtered[filtered['stance_english'] != filtered['stance_other']]
    mismatch_count = len(mismatches)
    pmr = mismatch_count / total_records if total_records else 0
    wbr = mismatches[(mismatches['stance_english'] == 1) & (mismatches['stance_other'] == 2)].shape[0] / mismatch_count if mismatch_count else 0

    final_df.loc[final_df['Culture'] == culture, 'PM'] = mismatch_count
    final_df.loc[final_df['Culture'] == culture, 'PMR'] = f"{pmr:.2%}"
    final_df.loc[final_df['Culture'] == culture, 'WBR'] = f"{wbr:.2%}"
    final_df.loc[final_df['Culture'] == culture, '#REC'] = total_records

# Step 6: Optional: Add overall consistency summary
overall_consistency = merged_df['consistency_flag'].sum() == len(merged_df)
final_df['Overall_Consistency_Flag'] = int(overall_consistency)

output_file = os.path.join(base_input_folder, f'{model_name}_comparison_report_2.csv')
final_df.to_csv(output_file, index=False)
print(f"Report saved to: {output_file}")


Consistency flags added to all _all.csv files.
Report saved to: /mnt/pixstor/data/maz2h/MoralStory/Not-specifying-MoralNorms/Mistral-Small-24B-Instruct-2501-NC900C-MS/Mistral-Small-24B-Instruct-2501_comparison_report_2.csv


In [1]:
##------------ -------- ------- -------- -------- ------ ------- -------- --------
##----  Compute the WBR and BMR for Mistral 24B after grouping the lakert scale
#$----                        norm-gen vs. original-norm
##------------ ----------  ---------  ---------  ----------- ------------ --------  

import pandas as pd
import os

model_name = 'Mistral-Small-24B-Instruct-2501'
cultures = ['arabic','african','turkish','indian','persian','korean','chinese','japanese']
base_input_folder = "/mnt/pixstor/data/maz2h/MoralStory/Not-specifying-MoralNorms/Mistral-Small-24B-Instruct-2501-NC900C-MS"
SCORE_COL = 'equivalence_score_qwen72'
SCORE_SOURCE = 'culture'

def to_int(v):
    if isinstance(v, str):
        v = v.strip()
    try:
        return int(float(v))
    except:
        return None

file_english = os.path.join(base_input_folder, f"{model_name}_NC900C_MS_english_all_with_norm_scored.csv")
df_english = pd.read_csv(file_english)[['ID','GPT_Answer']]
df_english.rename(columns={'GPT_Answer':'GPT_Answer_english'}, inplace=True)
df_english['GPT_Answer_english'] = df_english['GPT_Answer_english'].astype(str).str.strip().str.lower()

merged_df = df_english.copy()
for culture in cultures:
    f = os.path.join(base_input_folder, f"{model_name}_NC900C_MS_{culture}_all.csv")
    if not os.path.exists(f):
        continue
    d = pd.read_csv(f)[['ID','GPT_Answer']]
    d.rename(columns={'GPT_Answer':f'GPT_Answer_{culture}'}, inplace=True)
    d[f'GPT_Answer_{culture}'] = d[f'GPT_Answer_{culture}'].astype(str).str.strip().str.lower()
    merged_df = pd.merge(merged_df, d, on='ID', how='inner')

ans_cols = [c for c in merged_df.columns if c.startswith('GPT_Answer_')]
merged_df['consistency_flag'] = merged_df[ans_cols].apply(lambda r: int(all(a=='yes' for a in r)), axis=1)

df_english_full = pd.read_csv(file_english)
df_english_full.rename(columns={'moral_action_selected':'stance_english'}, inplace=True)
df_english_full['stance_english'] = df_english_full['stance_english'].apply(to_int)
df_english_full['GPT_Answer'] = df_english_full['GPT_Answer'].astype(str).str.strip()
if SCORE_SOURCE=='english' and SCORE_COL not in df_english_full.columns:
    df_english_full[SCORE_COL] = None
if SCORE_SOURCE=='english':
    df_english_full[SCORE_COL] = df_english_full[SCORE_COL].apply(to_int)

rows = []
for culture in cultures:
    f_scored = os.path.join(base_input_folder, f"{model_name}_NC900C_MS_{culture}_all_with_norm_scored.csv")
    f_basic  = os.path.join(base_input_folder, f"{model_name}_NC900C_MS_{culture}_all.csv")
    src = f_scored if os.path.exists(f_scored) else f_basic
    if not os.path.exists(src):
        continue
    dfc = pd.read_csv(src)
    if 'moral_action_selected' in dfc.columns:
        dfc.rename(columns={'moral_action_selected':'stance_other'}, inplace=True)
    if 'stance_other' not in dfc.columns:
        dfc['stance_other'] = None
    dfc['stance_other'] = dfc['stance_other'].apply(to_int)
    dfc['GPT_Answer'] = dfc['GPT_Answer'].astype(str).str.strip()
    if SCORE_COL not in dfc.columns and SCORE_SOURCE=='culture':
        dfc[SCORE_COL] = None
    if SCORE_SOURCE=='culture':
        dfc[SCORE_COL] = dfc[SCORE_COL].apply(to_int)
    if 'consistency_flag' not in dfc.columns:
        dfc = pd.merge(dfc, merged_df[['ID','consistency_flag']], on='ID', how='left')

    if SCORE_SOURCE=='english':
        left_cols = ['ID','GPT_Answer',SCORE_COL]
        right_cols = ['ID','GPT_Answer','consistency_flag','stance_other']
    else:
        left_cols = ['ID','GPT_Answer']
        right_cols = ['ID','GPT_Answer','consistency_flag','stance_other',SCORE_COL]
    right_cols = [c for c in right_cols if c in dfc.columns]

    merged = pd.merge(df_english_full[left_cols], dfc[right_cols], on='ID', suffixes=('_eng','_other'))

    filt = merged[(merged['GPT_Answer_eng'].str.lower()=='yes') &
                  (merged['GPT_Answer_other'].str.lower()=='yes') &
                  (merged['consistency_flag']==1)]
    if filt.empty:
        continue
    filt = filt.merge(df_english_full[['ID','stance_english']], on='ID')
    need_cols = ['stance_english','stance_other',SCORE_COL] if SCORE_SOURCE=='culture' else ['stance_english','stance_other',SCORE_COL]
    filt = filt.dropna(subset=[c for c in need_cols if c in filt.columns])
    if SCORE_COL not in filt.columns:
        continue
    filt = filt[(filt['stance_english'].isin([1,2])) & (filt['stance_other'].isin([1,2]))]
    if filt.empty:
        continue

    for score, g in filt.groupby(SCORE_COL):
        n = len(g)
        mism = g[g['stance_english'] != g['stance_other']]
        m = len(mism)
        pmr = m/n if n else 0.0
        wbr = mism[(mism['stance_english']==1)&(mism['stance_other']==2)].shape[0]/m if m else 0.0
        rows.append({'Culture':culture,'Score':score,'PMR':pmr,'WBR':wbr})

df_long = pd.DataFrame(rows)
if df_long.empty:
    out = os.path.join(base_input_folder, f'{model_name}_matrix_by_score.csv')
    pd.DataFrame(columns=['Culture']).to_csv(out, index=False)
    print(out)
else:
    df_long['Score'] = df_long['Score'].astype(int)
    df_wide_pmr = df_long.pivot_table(index='Culture', columns='Score', values='PMR', aggfunc='mean')
    df_wide_wbr = df_long.pivot_table(index='Culture', columns='Score', values='WBR', aggfunc='mean')
    cols = sorted(set(df_long['Score']))
    wide = pd.DataFrame(index=sorted(df_long['Culture'].unique()))
    for s in cols:
        wide[(s,'PMR')] = df_wide_pmr.get(s)
        wide[(s,'WBR')] = df_wide_wbr.get(s)
    wide = wide.sort_index(axis=1, level=0).reset_index()
    wide.columns = ['Culture'] + [f'{a}_{b}' for a,b in wide.columns.tolist()[1:]]
    for c in wide.columns[1:]:
        if wide[c].dtype != object:
            wide[c] = (wide[c]*100).map(lambda x: f"{x:.2f}%")
    out = os.path.join(base_input_folder, f'{model_name}_matrix_by_score.csv')
    wide.to_csv(out, index=False)
    print(out)

  

/mnt/pixstor/data/maz2h/MoralStory/Not-specifying-MoralNorms/Mistral-Small-24B-Instruct-2501-NC900C-MS/Mistral-Small-24B-Instruct-2501_matrix_by_score.csv
