In [7]:
import os
import sys
import json
from pathlib import Path
from tqdm import tqdm

# Setup paths
project_root = Path(os.getcwd()).parent
model_path = project_root / "pipelines" / "lm_model_classes"
input_path = Path("results/syntactic_results.json")
output_path = Path("results/formatter_per_variant_output.json")
output_path.parent.mkdir(parents=True, exist_ok=True)

# Add model path
sys.path.append(str(model_path))

# Import model classes
from GPT4oAPI import GPT4oAPI
from DeepSeekChatAPI import DeepSeekChatAPI

# Load data
with open(input_path, "r", encoding="utf-8") as f:
    syntactic_data = json.load(f)

# Initialize models
gpt4o = GPT4oAPI()
deepseek = DeepSeekChatAPI()

# Variants to process
variant_keys = ["gpt4o_gpt4o", "gpt4o_deepseek", "deepseek_gpt4o", "deepseek_deepseek"]

# Run formatters on each variant
def process_entry(entry):
    for key in variant_keys:
        original_text = entry[key]

        # Run GPT-4o formatter
        try:
            formatted = gpt4o.format_summarization(original_text)
            entry[f"gpt4o_formatter_on_{key}"] = formatted
        except Exception as e:
            entry[f"gpt4o_formatter_on_{key}"] = f"[GPT ERROR] {str(e)}"

        # Run DeepSeek formatter
        try:
            formatted = deepseek.format_summarization(original_text)
            entry[f"deepseek_formatter_on_{key}"] = formatted
        except Exception as e:
            entry[f"deepseek_formatter_on_{key}"] = f"[DeepSeek ERROR] {str(e)}"

    return entry

# Run with progress bar
results = []
for entry in tqdm(syntactic_data, desc="Formatting all 4 variants with both models"):
    processed = process_entry(entry)
    results.append(processed)

Formatting all 4 variants with both models: 100%|██████████| 100/100 [2:28:07<00:00, 88.88s/it]  


In [9]:

# Save output
with open(output_path, "w", encoding="utf-8") as f:
    json.dump(results, f, indent=2, ensure_ascii=False)

print(f"\n✅ Finished formatting per variant. Saved to: {output_path.resolve()}")


✅ Finished formatting per variant. Saved to: /Users/rolf/Desktop/GitHub Repos/MedEase-PoC-Eval/evaluation/results/formatter_per_variant_output.json
