In [1]:
!pip install diagrams

Collecting diagrams
  Downloading diagrams-0.23.4-py3-none-any.whl (24.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m19.2 MB/s[0m eta [36m0:00:00[0m
Collecting typed-ast<2.0.0,>=1.5.4 (from diagrams)
  Downloading typed_ast-1.5.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (824 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m824.7/824.7 kB[0m [31m24.0 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: typed-ast, diagrams
Successfully installed diagrams-0.23.4 typed-ast-1.5.5


In [None]:
import requests
from PIL import Image
from io import BytesIO
from diagrams import Diagram, Node, Cluster, Edge
from diagrams.generic.storage import Storage #input documents
from diagrams.elastic.beats import Filebeat #Text chunks
from diagrams.gcp.database import SQL #vector database
from diagrams.custom import Custom
from diagrams.aws.general import User

def download_image(url, filepath):
    response = requests.get(url)
    with open(filepath, 'wb') as f:
        f.write(response.content)

with Diagram("Structured Database and Text Extraction System", show=False, direction="LR", outformat="png") as diag:
    # Create a pipeline icon
    encode_icon_path = "encoding.png"
    download_image("https://th.bing.com/th/id/OIP.Bp78Z0Zjsq2r6QgI8pRzugHaHa?rs=1&pid=ImgDetMain", encode_icon_path)
    encoder = Custom("\nEncoder Model", encode_icon_path, pipeline_label="Encode Model", fontsize="12")

    # Create a Question icon
    qn_icon_path = "qn.png"
    download_image("https://cdn4.iconfinder.com/data/icons/cloud-computing-27/24/cdn-remote-cloud-question-512.png", qn_icon_path)
    qn = Custom("\nQuestion", qn_icon_path, xml_label="Question", fontsize="12")

    # Create a LLM icon
    llm_icon_path = "llm.png"
    download_image("https://th.bing.com/th/id/OIP.qdyf-4g5JyOfIpTJ4JIGaAAAAA?pid=ImgDet&w=206&h=206&c=7&dpr=1.3", llm_icon_path)
    llm = Custom("\nLLM", llm_icon_path, llm_label="LLLM Model", fontsize="12")

    # Create an Answer icon
    ans_icon_path = "ans.png"
    download_image("https://cdn0.iconfinder.com/data/icons/operating-system-7/500/yul997_10_translate_operating_system-512.png", ans_icon_path)
    ans = Custom("\nAnswer", ans_icon_path, ans_label="Answer", fontsize="12")

    # Cluster A
    with Cluster("Document Storage"):
        ipdoc = Storage("Input Documents")
        txtchunks = Filebeat("Text Chunks")
        encoder1 = encoder
        database = SQL("Vector Database")


 # LLM Prompting Cluster
    with Cluster("LLM Prompting"):
        qn1 = qn
        encoder2 = encoder
        llm1 = llm
        Filebeat1 = Filebeat("Relevant Chunks")
        ans1 = ans

        # Connect Sagemaker to Streamlit without attempting to add a label directly
        ipdoc >> txtchunks
        txtchunks >> encoder1
        encoder1 >> Edge(label="embedding") >> database
        database >> Edge(label="Similarity search") >> Filebeat1
        qn1 >> encoder2
        encoder2 >> Edge(label="embedding") >> database
        qn1 >> Edge(label="Generator LLM Model") >> llm1
        llm1 >> ans1
        Filebeat1 >> llm

diag
