# Prompts 2 Table

Welcome! This workbook walks through some examples in using the prompts 2 tables code base for information extraction from reports w/LLMs

In [None]:
# First import the PFClient to use the client to interact with PromptFlow
# Then there are some helper functions that are used to run the flow and get the results
from promptflow.client import PFClient
from app.helper_functions.run_pf_wrapper import pf_batch_run_wrapper
from app.helper_functions.flat_results import flatten_outputs
from app.helper_functions.get_json_outputs import get_json_outputs

# Creating the client
pf_client = PFClient()


In [None]:

# Running the flow for diagnosis. Diagnosis is a feature report meaning there is one diagnosis per report
# Note that if you want to see what the intermediate data looks like, set flush_intermediate_data to False
flow_result_diagnosis = pf_batch_run_wrapper(
    pf_client, 
    data_path="example_data/input/example_jsonl_data.jsonl",
    schema_path="app/schemas/pathology_rcc_schema_v12.json",
    item_name="diagnosis", 
    connection_name="qwen", 
    pf_worker_count=2
)

# Getting the full results in a dataframe from promptflow
df_diag = pf_client.get_details(flow_result_diagnosis)

# This handy function extracts and organizes the results in a neat dataframe 
flat_df_diag = flatten_outputs(pf_client=pf_client, flow_result=flow_result_diagnosis)

# This dataframe can then be saved as a csv file
flat_df_diag.to_csv("example_data/output_flat/diagnosis.csv", index=False)

# This function gets the full JSON outputs from the flow so you can check out the reasoning sections
json_outs_diag = get_json_outputs(pf_client=pf_client, flow_result=flow_result_diagnosis)

# This can be saved to a JSON file
json_outs_diag.to_json('example_data/output_json/diagnosis.json', orient='index')

In [None]:

# In this example we will use a second CSV of report IDs to only run the flow on a subset of the data
# This handy for keeping all of your reports in one place, and only running the flow on reports that you want to
flow_result_diagnosis = pf_batch_run_wrapper(
    pf_client, 
    data_path="example_data/input/example_jsonl_data.jsonl",
    schema_path="app/schemas/pathology_rcc_schema_v12.json",
    item_name="diagnosis", 
    connection_name="qwen", 
    pf_worker_count=2,
    csv_to_filter="example_data/input/example_filter_by.csv",
)

# Getting the full results in a dataframe from promptflow
df_diag = pf_client.get_details(flow_result_diagnosis)

# This handy function extracts and organizes the results in a neat dataframe 
flat_df_diag = flatten_outputs(pf_client=pf_client, flow_result=flow_result_diagnosis)

# This dataframe can then be saved as a csv file
flat_df_diag.to_csv("example_data/output_flat/diagnosis_filtered.csv", index=False)

# This function gets the full JSON outputs from the flow so you can check out the reasoning sections
json_outs_diag = get_json_outputs(pf_client=pf_client, flow_result=flow_result_diagnosis)

# This can be saved to a JSON file
json_outs_diag.to_json('example_data/output_json/diagnosis_filtered.json', orient='index')

In [None]:
# Here we run histology, which is a feature specimen, meaning it will return the histology for each specimen in the report

# Histology
flow_result_histology = pf_batch_run_wrapper(
    pf_client, 
    data_path="example_data/input/example_csv_data.csv",
    schema_path="app/schemas/pathology_rcc_schema_v12.json",
    item_name="histology", 
    connection_name="kidney_west"
)

# Getting the full results in a dataframe from promptflow
df_hist = pf_client.get_details(flow_result_histology)

# This handy function extracts and organizes the results in a neat dataframe 
flat_df_hist = flatten_outputs(pf_client=pf_client, flow_result=flow_result_histology)

# This dataframe can then be saved as a csv file
flat_df_hist.to_csv("example_data/output_flat/histology.csv", index=False)

# This function gets the full JSON outputs from the flow so you can check out the reasoning sections
json_outs_hist = get_json_outputs(pf_client=pf_client, flow_result=flow_result_histology)

# This can be saved to a JSON file
json_outs_hist.to_json('example_data/output_json/histology.json', orient='index')



In [None]:
# Here we run immunohistochemistry (ihc), which is a panel specimen, meaning it will return all assays by specimen and block
# IHC
flow_result_ihc = pf_batch_run_wrapper(
    pf_client, 
    data_path="example_data/input/example_jsonl_data.jsonl",
    schema_path="app/schemas/pathology_rcc_schema_v12.json",
    item_name="immunohistochemistry", 
    connection_name="kidney_west",
)

# Getting the full results in a dataframe from promptflow
df_ihc = pf_client.get_details(flow_result_ihc)

# This handy function extracts and organizes the results in a neat dataframe 
flat_df_ihc = flatten_outputs(pf_client=pf_client, flow_result=flow_result_ihc)

# This dataframe can then be saved as a csv file
flat_df_ihc.to_csv("example_data/output_flat/ihc.csv", index=False)

# This function gets the full JSON outputs from the flow so you can check out the reasoning sections
json_outs_ihc = get_json_outputs(pf_client=pf_client, flow_result=flow_result_ihc)

# This can be saved to a JSON file
json_outs_ihc.to_json('example_data/output_json/ihc.json', orient='index')