### Export each patient case (user query) + the LLMs and RAG-enhanced LLMs diagnoses in a PDF file

In [None]:
import os
import pandas as pd
from fpdf import FPDF

# Load CSV file
df = pd.read_csv("./rag_gemini.csv")
# Define output folder
output_folder = "./rag_gemini_pdfs"
os.makedirs(output_folder, exist_ok=True)

# Create PDF class
class PDF(FPDF):
    def header(self):
        self.set_font("Arial", "B", 12)

    def chapter_title(self, title):
        self.set_font("Arial", "B", 14)
        self.cell(0, 10, title.encode('latin-1', 'replace').decode('latin-1'), ln=True, align="L")
        self.ln(5)

    def chapter_content(self, content):
        self.set_font("Arial", "", 10)
        self.multi_cell(0, 10, content.encode('latin-1', 'replace').decode('latin-1'))
        self.ln(5)

# Define the row range to process (inclusive)
start_row = 100  # Adjust as needed
end_row = 149 # Adjust as needed (inclusive)

# Add content from a specific range of rows in the CSV
for index, row in df.iloc[start_row:end_row + 1].iterrows():
    pdf = PDF()
    pdf.set_auto_page_break(auto=True, margin=15)
    pdf.add_page()
    pdf.chapter_title("User Query")
    pdf.chapter_content(str(row["p_prompt"]).replace("Õ", "'"))
    pdf.chapter_title("LLM Response 1")
    pdf.chapter_content(str(row["llm_response"]))
    pdf.chapter_title("LLM Response 2")
    pdf.chapter_content(str(row["llm_rag_response"]))

# For pdfs 1-50, LLM Response 1 = Base LLM
# For pdfs 51-100, LLM Response 1 = RAG LLM
# For pdfs 101-150, LLM Response 1 = Base LLM
# For pdfs 151-214, LLM Response 1 = RAG LLM

    # Save each PDF file separately
    output_filename = os.path.join(output_folder, f"rag_gemini_row_{index + 1}.pdf")
    pdf.output(output_filename)
    print(f"PDF generated successfully and saved as '{output_filename}'")


### Update the names of the files to avoid bias during evaluation

In [9]:
import os

def rename_files_in_folder(folder_path, old_str, new_str):
    # Check if the folder path exists
    if not os.path.exists(folder_path):
        print(f"The folder '{folder_path}' does not exist.")
        return

    # Iterate over all files in the folder
    for filename in os.listdir(folder_path):
        # Construct the full file path
        file_path = os.path.join(folder_path, filename)

        # Check if it's a file (not a directory)
        if os.path.isfile(file_path):
            # Replace 'abc' with 'def' in the filename
            new_filename = filename.replace(old_str, new_str)

            # If the filename has changed, rename the file
            if new_filename != filename:
                new_file_path = os.path.join(folder_path, new_filename)
                os.rename(file_path, new_file_path)
                print(f"Renamed: {filename} -> {new_filename}")

# Example usage
folder_path = "./rag_gemini_pdfs"  # Replace with your folder path
old_str = "gemini"
new_str = "llmA"

rename_files_in_folder(folder_path, old_str, new_str)

Renamed: rag_gpt3-5_row_1.pdf -> rag_llmB_row_1.pdf
Renamed: rag_gpt3-5_row_10.pdf -> rag_llmB_row_10.pdf
Renamed: rag_gpt3-5_row_100.pdf -> rag_llmB_row_100.pdf
Renamed: rag_gpt3-5_row_101.pdf -> rag_llmB_row_101.pdf
Renamed: rag_gpt3-5_row_102.pdf -> rag_llmB_row_102.pdf
Renamed: rag_gpt3-5_row_103.pdf -> rag_llmB_row_103.pdf
Renamed: rag_gpt3-5_row_104.pdf -> rag_llmB_row_104.pdf
Renamed: rag_gpt3-5_row_105.pdf -> rag_llmB_row_105.pdf
Renamed: rag_gpt3-5_row_106.pdf -> rag_llmB_row_106.pdf
Renamed: rag_gpt3-5_row_107.pdf -> rag_llmB_row_107.pdf
Renamed: rag_gpt3-5_row_108.pdf -> rag_llmB_row_108.pdf
Renamed: rag_gpt3-5_row_109.pdf -> rag_llmB_row_109.pdf
Renamed: rag_gpt3-5_row_11.pdf -> rag_llmB_row_11.pdf
Renamed: rag_gpt3-5_row_110.pdf -> rag_llmB_row_110.pdf
Renamed: rag_gpt3-5_row_111.pdf -> rag_llmB_row_111.pdf
Renamed: rag_gpt3-5_row_112.pdf -> rag_llmB_row_112.pdf
Renamed: rag_gpt3-5_row_113.pdf -> rag_llmB_row_113.pdf
Renamed: rag_gpt3-5_row_114.pdf -> rag_llmB_row_114.pdf
