In [16]:
from graphviz import Digraph

dot = Digraph(comment='Simplification Pipeline Graph (No Dynamic Layer)', format='png')
dot.attr(rankdir='LR', fontname="LibreSans")  # Left to right layout

# Input and Output
dot.node('Input', 'Input\n(Medical Report)', shape='oval')
dot.node('Output', 'Output\n(Simplified Report)', shape='oval')

# Define models
lexical_models = {
    # "L1": "gpt-o1",   # gpt-o1 got high-precision benchmarking and clinical release-ready output, but it's way more expensive 
    "L2": "gpt-4o", # comparing to o1, gives around ~95% of the quality at ~17% of the cost
    "L3": "deepseek-chat",
    "L4": "MedLlama"
}

syntactic_models = {
    "S1": "gpt-4o",
    # "S2": "gpt-4o-mini",  # cost lower than 4o, perform a little bit worse. same tier as ds chat.
    "S3": "deepseek-chat",
    "S4": "t5-small",
    "S5": "bart-large-cnn",
    "S6": "MedLlama"
}

format_models = {
    "F1": "gpt-4o-mini",
    "F2": "t5-small",
    "F3": "bart-large-cnn",
    "F4": "deepseek-chat",
    "F5": "MedLlama"
}

# Create same-rank subgraphs to align vertically
with dot.subgraph() as s:
    s.attr(rank='same')
    for key, label in lexical_models.items():
        s.node(key, f"Lexical\n({label})")

with dot.subgraph() as s:
    s.attr(rank='same')
    for key, label in syntactic_models.items():
        s.node(key, f"Syntactic\n({label})")

with dot.subgraph() as s:
    s.attr(rank='same')
    for key, label in format_models.items():
        s.node(key, f"Format\n({label})")

# Connections
for key in lexical_models.keys():
    dot.edge('Input', key)

for l_key in lexical_models.keys():
    for s_key in syntactic_models.keys():
        dot.edge(l_key, s_key)

for s_key in syntactic_models.keys():
    for f_key in format_models.keys():
        dot.edge(s_key, f_key)

for f_key in format_models.keys():
    dot.edge(f_key, 'Output')

# Output the diagram
dot.render('simplification_pipeline', format='png', cleanup=True)


'simplification_pipeline.png'