## preprocess

In [None]:
import pandas as pd
import glob
import numpy as np
import re
import os

folder_path = 'Path/to/inference'
folder_name = "inference_folder"


file_paths = glob.glob(os.path.join(folder_path, folder_name, "result_index_*.csv"))

def extract_index(path):
    match = re.search(r"result_index_(\d+)_", os.path.basename(path))
    return int(match.group(1)) if match else 1e9   
file_paths = sorted(file_paths, key=extract_index)

merged_df = pd.concat([pd.read_csv(fp) for fp in file_paths], ignore_index=True)
print(merged_df.columns) 
profile_columns = ['Review_age', 'Review_education',
       'Review_gender', 'Review_income', 'Review_political',
       'Review_religious']
res_columns = 'llm_answer'
rea_columns = 'llm_reason'
user_columns = 'user_choice'
merged_df.to_csv(folder_path + folder_name + "/merged_result.csv", index=False)
merged_df.head(2)

## 提取回答中的答案并转化为0/1

In [None]:
import re
def process_text_result(text):
    if text is None:
        return ""
    text = str(text)
    
    filt_str = re.sub(r'[^\u4e00-\u9fa5a-zA-Z0-9=()（）]', '', text)

    match = re.search(r'选择(.*?)$', filt_str)  
    if match:
        choice = match.group(1)
    else:
     
        choice = ''
    return str(choice)

merged_df = pd.read_csv(folder_path + folder_name + "/merged_result.csv")
new_col_name = 'llm_choice_num'
merged_df[new_col_name]= merged_df[res_columns].apply(process_text_result)
print((merged_df[new_col_name] == '').sum())
merged_df.head(2)

In [None]:
def text_to_binary(text):
    text = str(text)
    if "no" in text:
        return 0
    elif "yes" in text and "no" not in text:
        return 1
    else:
        return np.nan

merged_df['user_choice_num'] = merged_df[user_columns].apply(text_to_binary)
merged_df[new_col_name] = merged_df[new_col_name].apply(text_to_binary)
print(merged_df[new_col_name].isna().sum())
merged_df.to_csv(folder_path + folder_name + "/merged_result_num.csv", index=False)

## cal realism

In [None]:
import numpy as np 

def calculate_acc(series,human_series):
    correct_predictions = ( series == human_series).sum()
    accuracy = correct_predictions / len(series)
    return round(accuracy,3)
    

def calculate_rmse(series,human_series):

    if len(series) != len(human_series):
        raise ValueError("两个序列的长度必须相同")

    series = series.fillna(1 - human_series)

    squared_diff = (series - human_series) ** 2
    rmse = np.sqrt(np.mean(squared_diff))
    return rmse


def Cal_enr_kl_group(df,groups_column,non_human,human_df):
   
    choice_columns = ['llm_choice_num','user_choice_num']
    result_dict = {} 
    
    df_cleaned = df.dropna(subset=[choice_columns[0]])  # 删除 df 中该列为 NaN 的行
    df_cleaned, human_df_cleaned = df_cleaned.align(human_df, join='inner')

    temp_kl = calculate_acc(df[choice_columns[0]],human_df[choice_columns[1]])
    result_dict['acc'] = temp_kl
    print(temp_kl*df_cleaned.shape[0]/df.shape[0])
    temp_kl = calculate_rmse(df[choice_columns[0]],human_df[choice_columns[1]])
    result_dict['rmse'] = temp_kl

    
    return result_dict


In [None]:
import json
def save_en_kl(model_name,result_dict,groups_column,save_path,non_human):#,name):
    with open(save_path+'/mm_result.json', "w", encoding="utf-8") as f:
        json.dump(result_dict, f, ensure_ascii=False, indent=2)
    print(f'model：{model_name}\n all-acc:{result_dict["acc"]:.3f}\nall-rmse:{result_dict["rmse"]:.3f}')
    return result_dict


In [None]:
import pandas as pd
import numpy as np
def load_data(result_path):
    
    profile_columns = ['Review_age', 'Review_education',
       'Review_gender', 'Review_income', 'Review_political',
       'Review_religious'] 
    df = pd.read_csv(result_path+'/merged_result_num.csv')
    for col in profile_columns:
        df[col] = df[col].apply(lambda x: -1 if pd.isna(x) or x == 'default' else x)
    return df


profile_columns = ['Review_age', 'Review_education',
       'Review_gender', 'Review_income', 'Review_political',
       'Review_religious']
choice_columns = ['llm_choice_num','user_choice_num']


OLD_FOLDER_PATH= '/data5/fanbingbing/Behave-Benchmark-RL/Result/Step23_MM/0925tempmmcal/'
OLD_BASE_MODEL_NAME= ['RRE-SFT-v1s2','RRE-SFT-v1s1']


for old_base_model in zip(OLD_BASE_MODEL_NAME):
    base_result_path = OLD_FOLDER_PATH+old_base_model
    base_df = load_data(base_result_path)
    direct_result = Cal_enr_kl_group(base_df,True,base_df) 
    result_csv = save_en_kl(old_base_model,direct_result,base_result_path,True)