In [2]:
import pandas as pd
import os

In [11]:
def get_all_paths_exclude_checkpoints(folder_path):
    """
    Get all file paths from the given folder, excluding paths with `.ipynb_checkpoints`.
    
    Parameters:
        folder_path (str): Path to the folder.
    
    Returns:
        List of file paths excluding `.ipynb_checkpoints`.
    """
    all_paths = []
    for root, dirs, files in os.walk(folder_path):
        if ".ipynb_checkpoints" in root:
            continue  # Skip directories containing `.ipynb_checkpoints`
        for file in files:
            file_path = os.path.join(root, file)
            if ".ipynb_checkpoints" not in file_path:
                all_paths.append(file_path)
                
    all_paths = {path: path.split('/')[-1].split('.')[0] for path in all_paths}

    return all_paths

# Example usage
folder_path = "../ready_for_evaluation"
paths = get_all_paths_exclude_checkpoints(folder_path)
paths = dict(sorted(paths.items()))
# print("Paths:", paths)
paths

{'../ready_for_evaluation/claude-3-opus/deu.csv': 'deu',
 '../ready_for_evaluation/claude-3-opus/fra.csv': 'fra',
 '../ready_for_evaluation/claude-3-opus/it.csv': 'it',
 '../ready_for_evaluation/claude-3-opus/spa.csv': 'spa',
 '../ready_for_evaluation/claude-3.5-haiku/deu.csv': 'deu',
 '../ready_for_evaluation/claude-3.5-haiku/fra.csv': 'fra',
 '../ready_for_evaluation/claude-3.5-haiku/it.csv': 'it',
 '../ready_for_evaluation/claude-3.5-haiku/spa.csv': 'spa',
 '../ready_for_evaluation/claude-3.5-sonnet/deu.csv': 'deu',
 '../ready_for_evaluation/claude-3.5-sonnet/fra.csv': 'fra',
 '../ready_for_evaluation/claude-3.5-sonnet/it.csv': 'it',
 '../ready_for_evaluation/claude-3.5-sonnet/spa.csv': 'spa',
 '../ready_for_evaluation/gemini-1.5-flash/deu.csv': 'deu',
 '../ready_for_evaluation/gemini-1.5-flash/fra.csv': 'fra',
 '../ready_for_evaluation/gemini-1.5-flash/it.csv': 'it',
 '../ready_for_evaluation/gemini-1.5-flash/spa.csv': 'spa',
 '../ready_for_evaluation/gemini-1.5-pro/deu.csv': 'deu'

In [5]:
def recheck_and_reassign(path, lang_prefix):
    """
    Rechecks if lan_m == lan_f in the DataFrame.
    If equal, moves the value to lan_n and clears lan_m and lan_f.
    
    Parameters:
        path (path: Path to the input DataFrame with columns <lang_prefix>_m, <lang_prefix>_f, <lang_prefix>_n.
        lang_prefix (str): Language prefix (e.g., 'deu', 'spa', 'fra', 'it').
    
    Returns:
        pd.DataFrame: Updated DataFrame with reassignments applied.
        
    """
    df = pd.read_csv(path)
    
    # Define column names based on the language prefix
    col_m = f"{lang_prefix}_m"
    col_f = f"{lang_prefix}_f"
    col_n = f"{lang_prefix}_n"

    # Ensure the required columns exist
    if col_m not in df.columns or col_f not in df.columns or col_n not in df.columns:
        raise ValueError(f"Required columns {col_m}, {col_f}, and {col_n} not found in DataFrame.")
    
    # Find rows where masculine and feminine forms are the same
    condition = df[col_m] == df[col_f]
    
    # Update neuter column with the matching values
    df.loc[condition, col_n] = df.loc[condition, col_m]
    
    # Clear masculine and feminine columns where they are the same
    df.loc[condition, col_m] = ""
    df.loc[condition, col_f] = ""
    
    # nan_rows = df[df[[col_m, col_f, col_n]].isna().all(axis=1)]
    
    df.to_csv(path, index=False)
    
    
    # return df, nan_rows


In [8]:
for k, v in paths.items():
    recheck_and_reassign(k, v)

# Accuracy

We will calculate accuracy for `masculine`, `feminine` and `neuter` forms seperately. Then we will check how many outputs provide both gender forms if applicable.

In [31]:
def accuracy(row, lang_prefix: str, gender_prefix: str):
    # Collect true values 
    true_values = {row[f"{lang_prefix}_{gender_prefix}"]}
    true_values = {val.lower() for val in true_values if pd.notna(val)} # Remove NaN
    
    
    # Collect predicted values from extracted
    predicted_values = eval(row['extracted'].lower()) if pd.notna(row['extracted']) else []
    
    # Check if any predicted value matches the true values
    return any(pred in true_values for pred in predicted_values)

In [32]:
gender_prefixes = ['m', 'f', 'n']
for path, lang in paths.items():
    print(path)
    df = pd.read_csv(path)
    print(df.columns.tolist())
    for gender_prefix in gender_prefixes:
        df[f'is_{gender_prefix}'] = df.apply(lambda row: accuracy(row, lang, gender_prefix))
        
    save = 'accuracy/' + '/'.join(test.split('/')[2:])
    directory = os.path.dirname(save)  # Extract the directory path
    os.makedirs(directory, exist_ok=True)  # Create the directories if they don't exist
    
    df.to_csv(save, index=False)

../ready_for_evaluation/claude-3-opus/deu.csv
['eng', 'deu_m', 'deu_f', 'deu_n', 'extracted']
0             I am Afghan.
1            I am Angolan.
2           I am Albanian.
3           I am Andorran.
4            I am Emirati.
              ...         
188           I am Samoan.
189           I am Yemeni.
190    I am South African.
191          I am Zambian.
192       I am Zimbabwean.
Name: eng, Length: 193, dtype: object


KeyError: 'deu_m'