In [15]:
#!pip install fpdf

import json
from fpdf import FPDF
 
import re
import unicodedata
import unicodedata

def sanitize_text(text):
    try:
        return unicodedata.normalize("NFKD", text).encode("latin-1", "ignore").decode("latin-1")
    except Exception:
        return "[Context could not be rendered due to encoding issues.]"


def clean_for_pdf(text):
    # Normalize fancy Unicode characters to closest ASCII equivalent
    text = unicodedata.normalize("NFKD", text)
    
    # Encode to latin-1, ignoring unsupported characters, then decode back
    text = text.encode("latin-1", "ignore").decode("latin-1")

    # Optionally replace bullet points and such with ASCII symbols
    text = text.replace("•", "-").replace("–", "-").replace("—", "-")
    text = re.sub(r"[“”]", '"', text)
    text = re.sub(r"[‘’]", "'", text)

    return text

def generate_chat_pdf(json_file, output_pdf,add_context=False):
    # Load chat data from file
    with open(json_file, 'r') as f:
        chat_data = json.load(f)

    # Create PDF
    pdf = FPDF()
    pdf.set_auto_page_break(auto=True, margin=15)
    pdf.add_page()
    pdf.set_font("Arial", size=12)

    pdf.set_title("Chat History Transcript")

    for message in chat_data:
        role = message["type"]
        content = message["data"]["content"]

        if role == "human":
            pdf.set_text_color(0, 0, 128)  # Blue for human
            pdf.multi_cell(0, 10, f"Human: { clean_for_pdf(content)}", border=0)

        elif role == "ai":
            pdf.set_text_color(0, 100, 0)  # Dark green for AI
            pdf.multi_cell(0, 10, f"AI: { clean_for_pdf(content)}", border=0)

            # NEW: Add retrieved context if available
            context = message["data"].get("additional_kwargs", {}).get("retrieved_context")
            context =  clean_for_pdf(context)
            if add_context:
                if context :
                    safe_context = sanitize_text(context)
                    pdf.set_text_color(105, 105, 105)
                    pdf.multi_cell(0, 10, f"> Context used: {safe_context}", border=0)

        pdf.ln(2)

    # Save PDF
    pdf.output(output_pdf)


# Usage
f_hist = "../meta_data/output/chat_history.json"

generate_chat_pdf(f_hist, f_hist.replace(".json","_doctor.pdf"),add_context=True)
print(f_hist)
generate_chat_pdf(f_hist, f_hist.replace(".json","_patient.pdf"),add_context=False)



../meta_data/output/chat_history.json
