In [1]:
import matplotlib.pyplot as plt
import numpy as np
import json
import os

def load_json(file_path): # Load JSON Data
    if os.path.exists(file_path):
        with open(file_path, "r", encoding="utf-8") as f:
            try:
                return json.load(f)
            except json.JSONDecodeError:
                print("JSON decode error — file is empty or malformed.")
                return {}
    else:
        return {}  # Return empty dict if file doesn't exist


def update_json(data, key, value): # Modify/Add Data
    data[key] = value
    return data

def save_json(file_path, data): # Save JSON Data
    with open(file_path, "w", encoding="utf-8") as f:
        json.dump(data, f, indent=4)

In [23]:
# creates a chart with the interpretation selections of Chat GPT, Llama and the Rwp minimization
#includes synthesis, interpretation info, balance score, Rwp, and likelihoods for each LLM

def find_all_choices(json_file):
    sample_choice = {}
    for sample in json_file:
        llama_probs = []
        chat_probs = []
        rwp_vals = []
        sample_choice[sample] = {}
        for interpret, info in json_file[sample].items():
            if interpret == "Synth_Conditions":
                continue
            try:
                llama_score = info["unnormalized_posterior_llama"]
                chat_score = info["unnormalized_posterior"]
                rwp = info["rwp"]
                if all(isinstance(x, (int, float)) for x in [llama_score, chat_score, rwp]):
                    llama_probs.append((interpret, llama_score))
                    chat_probs.append((interpret, chat_score))
                    rwp_vals.append((interpret, rwp))
            except KeyError as e:
                print(f"Missing key in sample '{sample}', interpretation '{interpret}': {e}")
        
        sample_choice[sample]["llama_choice"] = max(llama_probs, key=lambda x: x[1])[0] if llama_probs else None
        sample_choice[sample]["chat_choice"] = max(chat_probs, key=lambda x: x[1])[0] if chat_probs else None
        sample_choice[sample]["rwp_choice"] = min(rwp_vals, key=lambda x: x[1])[0] if rwp_vals else None
    
    return sample_choice
file_path = "Data/prompt3/interpretations_llm_v1_llama1.json"
json_file = load_json(file_path)
find_all_choices(json_file)

{}

In [19]:
import pandas as pd
file_path = "Data/prompt3/interpretations_llm_v1_llama1.json"
json_file = load_json(file_path)
sample_id = "TRI_105"  # sample name

def format_predicted_phases(json_file, sample_id, chat_choice):
    phases = json_file[sample_id][chat_choice]["phases"]
    wfs = json_file[sample_id][chat_choice]["weight_fraction"]
    phase_list = ""
    for i in range(len(phases)):
        if not phase_list:
            phase_list += f"{phases[i]} {wfs[i]}"
        else: phase_list += f", {phases[i]} {wfs[i]}"
    return phase_list

# Helper function to extract values safely
def assemble_df(sample_id, sample_choice, json_file):
    rows = []
    # assign Chat GPT row values
    chat_row = {}
    chat_choice = sample_choice[sample_id]["chat_choice"]
    chat_row["Selection Method"] = "Chat GPT"
    chat_row["Choice"] = chat_choice
    chat_row["Predicted Phases"] = format_predicted_phases(json_file, sample_id, chat_choice)
    chat_row["Balance Score"] = json_file[sample_id][chat_choice]["balance_score"]
    chat_row["Rwp"] = json_file[sample_id][chat_choice]["rwp"]
    chat_row["Chat GPT Likelihood"] = json_file[sample_id][chat_choice]["LLM_interpretation_likelihood"]
    chat_row["Llama Likelihood"] = json_file[sample_id][chat_choice]["LLM_interpretation_likelihood"]

    #assign Llama row values
    llama_row = {}
    llama_choice = sample_choice[sample_id]["llama_choice"]
    llama_row["Selection Method"] = "Llama"
    llama_row["Choice"] = llama_choice
    llama_row["Predicted Phases"] = format_predicted_phases(json_file, sample_id, llama_choice)
    llama_row["Balance Score"] = json_file[sample_id][llama_choice]["balance_score"]
    llama_row["Rwp"] = json_file[sample_id][llama_choice]["rwp"]
    llama_row["Chat GPT Likelihood"] = json_file[sample_id][llama_choice]["LLM_interpretation_likelihood"]
    llama_row["Llama Likelihood"] = json_file[sample_id][llama_choice]["LLM_interpretation_likelihood"]

    #assign lowest Rwp values
    rwp_row = {}
    rwp_choice = sample_choice[sample_id]["rwp_choice"]
    rwp_row["Selection Method"] = "Lowest Rwp"
    rwp_row["Choice"] = rwp_choice
    rwp_row["Predicted Phases"] = format_predicted_phases(json_file, sample_id, rwp_choice)
    rwp_row["Balance Score"] = json_file[sample_id][rwp_choice]["balance_score"]
    rwp_row["Rwp"] = json_file[sample_id][rwp_choice]["rwp"]
    rwp_row["Chat GPT Likelihood"] = json_file[sample_id][rwp_choice]["LLM_interpretation_likelihood"]
    rwp_row["Llama Likelihood"] = json_file[sample_id][rwp_choice]["LLM_interpretation_likelihood"]

sample_choice = find_all_choices(json_file)
rows = assemble_df(sample_id, sample_choice, json_file)
# Create DataFrame
df = pd.DataFrame(rows)
df = df[["Selection Method" "Choice", "Predicted Phases", "Balance Score", "Rwp", "Chat GPT Likelihood", "Llama Likelihood"]]

print(df)

KeyError: 'TRI_105'