In [None]:
import os
import pandas as pd
import json
from pathlib import Path

def get_unique_model_responses(id: int, context_type: str, with_misleading: bool = False) -> list:
    """
    Load and process model test result JSON files, returning a list of unique responses with scores,
    sorted by descending score.
    """
    model_name = 'llama-2-7b-80k'
    base_dir = Path('results/graph')
    
    file_name = f'{model_name}_id_{id}_{context_type}_misleading' if with_misleading else f'{model_name}_id_{id}_{context_type}'
    directory = base_dir / file_name

    results = []

    for filename in os.listdir(directory):
        if filename.endswith('.json'):
            filepath = directory / filename
            with open(filepath, 'r', encoding='utf-8') as f:
                data = json.load(f)
                results.append({
                    'model_response': data['model_response'],
                    'score': data['score']
                })

    df = pd.DataFrame(results)
    df = df.drop_duplicates(subset=['model_response'])
    df = df.sort_values(by='score', ascending=False).reset_index(drop=True)

    # Explicitly convert sorted DataFrame to list of dicts
    return df.to_dict(orient='records')


In [20]:
def save_all_contexts_for_id(id: int) -> None:
    """
    Gather results from all 4 context types and save them in a single JSON file for the given id.

    File will be saved as: unique_responses/{id}.json
    """
    save_dir = Path('unique_responses')
    save_dir.mkdir(parents=True, exist_ok=True)

    all_data = {
        'relevant': get_unique_model_responses(id, 'relevant', with_misleading=False),
        'relevant_misleading': get_unique_model_responses(id, 'relevant', with_misleading=True),
        'irrelevant': get_unique_model_responses(id, 'irrelevant', with_misleading=False),
        'irrelevant_misleading': get_unique_model_responses(id, 'irrelevant', with_misleading=True),
    }

    save_path = save_dir / f'{id}.json'
    with open(save_path, 'w', encoding='utf-8') as f:
        json.dump(all_data, f, ensure_ascii=False, indent=2)

    print(f"Saved all context responses for ID {id} to {save_path}")

In [21]:
save_all_contexts_for_id(160)

Saved all context responses for ID 160 to unique_responses/160.json
