In [47]:
import pandas as pd
from pathlib import Path

# File mappings
file_model_pairs = {
    '../results/results_chatgpt.csv': 'GPTResponse',
    '../results/results_claude.csv': 'ClaudeResponse',
    '../results/results_llama31.csv': 'LLama31Response',
    '../results/results_llama32.csv': 'LLama32Response',
    '../results/results_qwen.csv': 'QwenResponse',
    '../results/results_phi.csv': 'PhiResponse',
    '../results/results_smol.csv': 'SmollResponse',
    '../results/results_rf.csv': 'RFResponse',
    '../results/results_svm.csv': 'SVMResponse',
}

# Load first file
first_file = list(file_model_pairs.items())[0]
combined = pd.read_csv(first_file[0], sep=None, engine='python')
model_name = Path(first_file[0]).stem.split('_')[1]
combined = combined[['Prompt', first_file[1]]].rename(columns={first_file[1]: f"{model_name}_response"})

# Add other files
for file, column in list(file_model_pairs.items())[1:]:
    df = pd.read_csv(file, sep=None, engine='python')
    model_name = Path(file).stem.split('_')[1]
    df = df.rename(columns={column: f"{model_name}_response"})
    combined = pd.merge(combined, df[['Prompt', f"{model_name}_response"]], on='Prompt', how='left')

# Add metadata
meta_df = pd.read_csv('../results/combined_model_responses_meta.ssv', sep=None, engine='python')
combined = pd.merge(combined, meta_df[['Prompt', 'Metadata']], on='Prompt', how='left')

# Save results 
output_path = Path('../results/final/final_dataset2.ssv')
output_path.parent.mkdir(parents=True, exist_ok=True)
combined.to_csv(output_path, index=False, sep=";")

In [2]:
import pandas as pd
from pathlib import Path
import re

def extract_transaction_context(prompt):
    """Extract transaction context from prompt using regex pattern matching"""
    pattern = r"Transaction Context:\s*A (\d+)-year-old ([^\s]+) who works as a ([^h][^\s]+) has made a purchase of \$([0-9.]+) at ([^\s]+)"
    match = re.search(pattern, prompt)
    if match:
        return {
            'age': int(match.group(1)),
            'gender': match.group(2),
            'occupation': match.group(3),
            'amount': float(match.group(4)),
            'merchant': match.group(5)
        }
    return None

def process_files(file_model_pairs):
    """Process multiple result files and combine them with metadata"""
    # Load first file
    first_file = list(file_model_pairs.items())[0]
    combined = pd.read_csv(first_file[0], sep=None, engine='python')
    model_name = Path(first_file[0]).stem.split('_')[1]
    combined = combined[['Prompt', first_file[1]]].rename(
        columns={first_file[1]: f"{model_name}_response"}
    )
    
    # Add other files
    for file, column in list(file_model_pairs.items())[1:]:
        df = pd.read_csv(file, sep=None, engine='python')
        model_name = Path(file).stem.split('_')[1]
        df = df.rename(columns={column: f"{model_name}_response"})
        combined = pd.merge(
            combined, 
            df[['Prompt', f"{model_name}_response"]], 
            on='Prompt', 
            how='left'
        )
    
    # Extract transaction contexts
    combined['transaction_data'] = combined['Prompt'].apply(extract_transaction_context)
    
    # Add metadata
    meta_df = pd.read_csv(
        '../results/combined_model_responses_meta.ssv', 
        sep=None, 
        engine='python'
    )
    combined = pd.merge(
        combined, 
        meta_df[['Prompt', 'Metadata']], 
        on='Prompt', 
        how='left'
    )
    
    return combined

def save_results(combined_df, output_path='../results/final/final_dataset2.ssv'):
    """Save the combined results to a file"""
    output_path = Path(output_path)
    output_path.parent.mkdir(parents=True, exist_ok=True)
    combined_df.to_csv(output_path, index=False, sep=";")

# File mappings
file_model_pairs = {
    '../results/results_chatgpt.csv': 'GPTResponse',
    '../results/results_claude.csv': 'ClaudeResponse',
    '../results/results_llama31.csv': 'LLama31Response',
    '../results/results_llama32.csv': 'LLama32Response',
    '../results/results_qwen.csv': 'QwenResponse',
    '../results/results_phi.csv': 'PhiResponse',
    '../results/results_smol.csv': 'SmollResponse',
    '../results/results_rf.ssv': 'Response',
    '../results/results_svm.ssv': 'Response',
}

combined_data = process_files(file_model_pairs)
    
# Save results
save_results(combined_data,  output_path='../results/final/final_dataset2.ssv')