In [18]:
import numpy as np
import json
import os
from benchmark.run import args
from libem.prepare.datasets import abt_buy
from libem.cascade.function import online
from util import sensitivity_analysis, generate_stats, confidence_cost_plot, confidence_f1_plot, plot_result, save_results

In [19]:
cascade_file = "/Users/charlesxu/libem/examples/cascade/output/2024-10-18-12-40-37-beer-cascade.json"
prematch_file = "/Users/charlesxu/libem/examples/cascade/output/2024-10-18-12-40-37-beer-gpt-4o-mini.json"
match_file = "/Users/charlesxu/libem/examples/cascade/output/2024-10-18-12-40-37-beer-gpt-4o.json"

with open(cascade_file, 'r') as file:
    cascade_stats = json.load(file)

with open(prematch_file, 'r') as file:
    prematch_single = json.load(file)
    
with open(match_file, 'r') as file:
    match_single = json.load(file)

In [22]:
prematch_results = prematch_single['results']
match_results = match_single['results']

In [23]:
prematch_mismatches = [pair for pair in prematch_results if (pair['label'] == 1 and "no" in pair['pred']) or (pair['label'] == 0 and "yes" in pair['pred'])]

match_dict = {f"{pair['left']['beer_name']}-{pair['right']['beer_name']}": pair for pair in match_results}

combined_mismatches = []

for prematch in prematch_mismatches:
    pair_id = f"{prematch['left']['beer_name']}-{prematch['right']['beer_name']}"
    match = match_dict.get(pair_id)
    if match:
        combined_mismatches.append({
            "pair_id": pair_id,
            "prematch": {
                "pred": prematch['pred'],
                "confidence": prematch['confidence'],
                "label": prematch['label']
            },
            "match": {
                "pred": match['pred'],
                "confidence": match['confidence'],
                "label": match['label']
            }
        })

with open("mismatches.json", "w") as file:
    json.dump(combined_mismatches, file, indent=4)

In [18]:
output_data = []

# Process prematch results
for prematch_result in prematch_all:
    for final_result in final:
        if final_result['left'] == prematch_result['left'] and final_result['right'] == prematch_result['right']:
            final_pred = final_result['pred']
            break

    entry = {
        "left": prematch_result['left'],
        "right": prematch_result['right'],
        "label": prematch_result['label'],
        "final_pred": final_pred,  # Add cascade prediction to final_pred
        "prematch_results": {
            "pred": prematch_result['pred'],
            "confidence": prematch_result['confidence'],
            "calibrated_confidence": prematch_result['calibrated_confidence'],
            "explanation": prematch_result['explanation'],
            "model_output": prematch_result['model_output'],
            "tool_outputs": prematch_result['tool_outputs'],
            "latency": prematch_result['latency'],
            "tokens": prematch_result['tokens']
        },
        "match_results": {} 
    }

    for match_result in prematch_match:
        if match_result['left'] == prematch_result['left'] and match_result['right'] == prematch_result['right']:
            entry['match_results'] = {
                "pred": match_result['pred'],
                "confidence": match_result['confidence'],
                # "calibrated_confidence": match_result['calibrated_confidence'],
                "explanation": match_result['explanation'],
                "model_output": match_result['model_output'],
                "tool_outputs": match_result['tool_outputs'],
                "latency": match_result['latency'],
                "tokens": match_result['tokens']
            }
            break  # Exit the loop once we find the match

    # Append the entry to the output list
    output_data.append(entry)

# Output to a JSON file
output_file = "output_results.json"
with open(output_file, "w") as f:
    json.dump(output_data, f, indent=4)

print(f"Results saved to {output_file}")


Results saved to output_results.json


In [28]:
result_path = "/Users/charlesxu/libem/examples/cascade/output_results.json"
with open(result_path, 'r') as file:
    result = json.load(file)

In [29]:
total_pairs = len(result)
num_prematch_only = 0
num_prematch_only_correct = 0
prematch_only_pairs = []

num_correct_flip = 0
num_incorrect_flip = 0
num_correct_no_flip = 0
num_incorrect_no_flip = 0

match_correct_flip = []
match_incorrect_flip = []
match_correct_no_flip = []
match_incorrect_no_flip = []

for entry in result:
    if not entry["match_results"]:
        num_prematch_only += 1
        prematch_only_pairs.append(entry)
        label = entry["label"] 
        final_pred = entry["final_pred"]
        if (label == 0 and "no" in final_pred.lower()) or (label == 1 and "yes" in final_pred.lower()):
            num_prematch_only_correct += 1
    else:
        label = entry["label"] 
        final_pred = entry["final_pred"]
        prematch_pred = entry["prematch_results"]["pred"]
        match_pred = entry["match_results"]["pred"]
        correct_flip_0 = (label == 0 and "yes" in prematch_pred.lower() and "no" in match_pred.lower())
        correct_flip_1 = (label == 1 and "no" in prematch_pred.lower() and "yes" in match_pred.lower())
        incorrect_flip_0 = (label == 0 and "no" in prematch_pred.lower() and "yes" in match_pred.lower())
        incorrect_flip_1 = (label == 1 and "yes" in prematch_pred.lower() and "no" in match_pred.lower())
        correct_no_flip_0 = (label == 0 and "no" in prematch_pred.lower() and "no" in match_pred.lower())
        correct_no_flip_1 = (label == 1 and "yes" in prematch_pred.lower() and "yes" in match_pred.lower())
        incorrect_no_flip_0 = (label == 1 and "no" in prematch_pred.lower() and "no" in match_pred.lower())
        incorrect_no_flip_1 = (label == 0 and "yes" in prematch_pred.lower() and "yes" in match_pred.lower())

        if correct_flip_0 or correct_flip_1:
            num_correct_flip += 1
            match_correct_flip.append(entry)
        elif incorrect_flip_0 or incorrect_flip_1:
            num_incorrect_flip += 1
            match_incorrect_flip.append(entry)
        elif correct_no_flip_0 or correct_no_flip_1:
            num_correct_no_flip += 1
            match_correct_no_flip.append(entry)
        elif incorrect_no_flip_0 or incorrect_no_flip_1:
            num_incorrect_no_flip += 1
            match_incorrect_no_flip.append(entry)
        else:
            print("Invalid entry")

stat = {
    "total_pairs": total_pairs,
    "prematch_final": {
        "num": num_prematch_only, 
        "num_correct": num_prematch_only_correct, 
        },
    "prematch_match_final":{
        "num_correct_flip": num_correct_flip, 
        "num_incorrect_flip": num_incorrect_flip, 
        "num_correct_no_flip": num_correct_no_flip, 
        "num_incorrect_no_flip": num_incorrect_no_flip
    }
}
result = {
    "stat": stat,
    "result": {
        "prematch_final": prematch_only_pairs,
        "match_correct_flip": match_correct_flip,
        "match_incorrect_flip": match_incorrect_flip, 
        "match_correct_no_flip": match_correct_no_flip, 
        "match_incorrect_no_flip": match_incorrect_no_flip
    }
}

In [33]:
import os
output_filename = "results-analysis.json"
output_path = os.path.join("examples", "cascade", "output")
output_filepath = os.path.join(output_path, output_filename)
os.makedirs(os.path.dirname(output_filepath), exist_ok=True)
with open(output_filepath, "w") as json_file:
    json.dump(result, json_file, indent=4)
print(f"result saved to {output_filepath}")

result saved to examples/cascade/output/results-analysis.json


In [2]:
import json
result_path = "/Users/charlesxu/libem/examples/cascade/output/results-by-stages-2024-11-07-20-24-37.json"
with open(result_path, 'r') as file:
    result = json.load(file)


def convert_back_to_original(result):
    prematch_all = []
    prematch_match = []
    final = []

    for entry in result:
        # Reconstruct prematch_all
        prematch_entry = {
            "left": entry["left"],
            "right": entry["right"],
            "label": entry["label"],
            "pred": entry["prematch_results"]["pred"],
            "confidence": entry["prematch_results"]["confidence"],
            "calibrated_confidence": entry["prematch_results"]["calibrated_confidence"],
            "explanation": entry["prematch_results"]["explanation"],
            "model_output": entry["prematch_results"]["model_output"],
            "tool_outputs": entry["prematch_results"]["tool_outputs"],
            "latency": entry["prematch_results"]["latency"],
            "tokens": entry["prematch_results"]["tokens"]
        }
        prematch_all.append(prematch_entry)

        # Reconstruct match_results if available
        if entry["match_results"]:
            match_entry = {
                "left": entry["left"],
                "right": entry["right"],
                "pred": entry["match_results"]["pred"],
                "confidence": entry["match_results"]["confidence"],
                "explanation": entry["match_results"]["explanation"],
                "model_output": entry["match_results"]["model_output"],
                "tool_outputs": entry["match_results"]["tool_outputs"],
                "latency": entry["match_results"]["latency"],
                "tokens": entry["match_results"]["tokens"]
            }
            prematch_match.append(match_entry)

        # Reconstruct final results
        final_entry = {
            "left": entry["left"],
            "right": entry["right"],
            "pred": entry["final_pred"]
        }
        final.append(final_entry)

    return {
        "prematch_results": prematch_all,
        "match_results": prematch_match,
        "cascade_result": final
    }


result_data = convert_back_to_original(result)

with open('result_abt_buy.json', 'w') as json_file:
    json.dump(result_data, json_file, indent=4)

print("Data has been saved to result_abt_buy.json")

Data has been saved to result_abt_buy.json


In [11]:
result_path = "result_abt_buy.json"
with open(result_path, 'r') as file:
    result = json.load(file)

prematch = result.get('prematch_results')
match = result.get('match_results')
cascade = result.get('cascade_result')
print(len(cascade))

1911
