In [6]:
from diagrams import Diagram

In [8]:
from diagrams import Diagram, Cluster, Edge
from diagrams.aws.storage import S3
from diagrams.onprem.compute import Server
from diagrams.onprem.client import Users
from diagrams.programming.language import Python
from diagrams.onprem.container import Docker
from diagrams.saas.cdn import Cloudflare
from diagrams.onprem.iac import Terraform
from diagrams.custom import Custom

with Diagram("AI Application Data Pipeline", show=False):
    user = Users("User")
    
    with Cluster("PDF Scraping"):
        pdf_parser = Custom("PyMuPDF", "./pymupdf.png")
        enterprise_pdf = Custom("Azure AI Document Intelligence", "./azure.png")
    
    with Cluster("Web Scraping"):
        web_scraper = Custom("Selenium", "./selenium.png")
        enterprise_web = Custom("ScrapingBee", "./scrapingbee.png")
    
    with Cluster("Markdown Conversion"):
        markdown_converter = [
            Custom("Docling", "./docling.png"),
            Custom("MarkItDown", "./markitdown.png")
        ]
        pdf_parser >> markdown_converter
        web_scraper >> markdown_converter
        enterprise_pdf >> markdown_converter
        enterprise_web >> markdown_converter
    
    with Cluster("Cloud Storage & Organization"):
        s3_bucket = S3("AWS S3")
        markdown_converter >> s3_bucket
    
    with Cluster("API & Deployment"):
        fastapi = Python("FastAPI")
        streamlit_ui = Custom("Streamlit", "./streamlit.png")
        fastapi >> s3_bucket
        user >> streamlit_ui >> fastapi
    
    


In [4]:
from diagrams import Diagram, Cluster, Edge
from diagrams.aws.storage import S3
from diagrams.onprem.compute import Server
from diagrams.onprem.client import Users
from diagrams.programming.language import Python
from diagrams.onprem.container import Docker
from diagrams.custom import Custom

with Diagram("AI Application Data Pipeline", show=False, direction="LR"):
    user = Users("User")
    
    with Cluster("Data Ingestion"):
        with Cluster("PDF Scraping"):
            pdf_parser = Custom("PyMuPDF", "./pymupdf.png")
            enterprise_pdf = Custom("Azure AI Document Intelligence", "./azure.png")
        
        with Cluster("Web Scraping"):
            web_scraper = Custom("Selenium", "./selenium.png")
            enterprise_web = Custom("ScrapingBee", "./scrapingbee.png")
    
    with Cluster("Processing & Conversion"):
        markdown_converter = [
            Custom("Docling", "./docling.png"),
            Custom("MarkItDown", "./markitdown.png")
        ]
    
    with Cluster("Cloud Storage & Organization"):
        s3_bucket = S3("AWS S3")
    
    with Cluster("API & Deployment"):
        fastapi = Python("FastAPI")
        streamlit_ui = Custom("Streamlit", "./streamlit.png")
    
    user >> streamlit_ui >> fastapi
    pdf_parser >> markdown_converter
    enterprise_pdf >> markdown_converter
    web_scraper >> markdown_converter
    enterprise_web >> markdown_converter
    markdown_converter >> s3_bucket
    fastapi >> s3_bucket

In [22]:
from diagrams import Diagram, Cluster, Edge
from diagrams.aws.storage import S3
from diagrams.onprem.client import Users
from diagrams.programming.language import Python
from diagrams.custom import Custom
from diagrams.generic.blank import Blank

with Diagram("AI Application Data Pipeline", show=False, direction="TB"):
    user = Users("User")
    
    with Cluster("API & User Interface\n\n\n\n\n\n"):
        fastapi = Python("FastAPI")
        streamlit_ui = Custom("Streamlit", "./streamlit.png")
    
    with Cluster("Data Processing & Storage\n\n\n\n\n\n"):
        with Cluster("Data Ingestion - PDF Scraping\n\n\n\n\n\n"):
            pdf_parser = Custom("PyMuPDF", "./pymupdf.png")
            spacer_pdf = Blank(" ")
            enterprise_pdf = Custom("Azure AI Document Intelligence", "./azure.png")
            
        
        with Cluster("\n\n\n\n\n\nData Ingestion - Web Scraping\n\n\n\n\n\n"):
            web_scraper = Custom("Selenium", "./selenium.png")
            spacer_web = Blank(" ")
            enterprise_web = Custom("ScrapingBee", "./scrapingbee.png")
            
        
        with Cluster("\n\n\n\n\n\nProcessing\n\n\n\n\n\n"):
            docling = Custom("Docling", "./docling.png")
            spacer_processing = Blank(" ")
            markitdown = Custom("MarkItDown", "./markitdown.png")
            
    with Cluster("\n\n\nStorage\n\n\n"):
        s3_bucket = S3("AWS S3")
    
    user >> streamlit_ui
    streamlit_ui >> Edge(minlen="3") >> fastapi
    fastapi >> Edge(minlen="5") >> web_scraper
    fastapi >> Edge(minlen="5") >> enterprise_web
    fastapi >> Edge(minlen="5") >> enterprise_pdf
    fastapi >> Edge(minlen="5") >> pdf_parser
    fastapi >> Edge(minlen="3") >> s3_bucket
    pdf_parser >> Edge(minlen="5") >> docling
    enterprise_pdf >> Edge(minlen="5") >> markitdown
    web_scraper >> Edge(minlen="5") >> docling
    enterprise_web >> Edge(minlen="5") >> markitdown
    docling >> Edge(minlen="5") >> s3_bucket
    markitdown >> Edge(minlen="5") >> s3_bucket
    

In [43]:
from diagrams import Diagram, Cluster, Edge
from diagrams.aws.storage import S3
from diagrams.onprem.client import Users
from diagrams.programming.language import Python
from diagrams.custom import Custom
from diagrams.generic.blank import Blank

with Diagram("AI Application Data Pipeline", show=False, direction="TB"):
    user = Users("User")
    
    with Cluster("API & User Interface\n\n\n\n\n\n"):
        fastapi = Python("FastAPI")
        streamlit_ui = Custom("Streamlit", "./streamlit.png")
    
    with Cluster("\n\t\t\t\t\t\t\t\t\t\t\t\t\t\tData Processing & Storage\n\n\n\n\n\n"):
        with Cluster("\n\t\t\t\t\tData Ingestion - PDF Scraping\n\n\n\n"):
            pdf_parser = Custom("PyMuPDF", "./pymupdf.png")
            spacer_processing = Blank(" ")
            enterprise_pdf = Custom("Azure AI Document Intelligence", "./azure.png")
            
        

        with Cluster("\n\t\t\t\t\tData Ingestion - Web Scraping\n\n\n\n"):
            web_scraper = Custom("Selenium", "./selenium.png")
            spacer_processing = Blank(" ")
            enterprise_web = Custom("ScrapingBee", "./scrapingbee.png")
            
        
        with Cluster("\n\t\t\t\t\t\t\tProcessing\n\n\n"):
            docling = Custom("Docling", "./docling.png")
            spacer_processing = Blank(" ")
            markitdown = Custom("MarkItDown", "./markitdown.png")
            
        
    with Cluster("\n\tStorage\n\n\n"):
        s3_bucket = S3("AWS S3")
    
    user >> streamlit_ui
    streamlit_ui >> Edge(minlen="3") >> fastapi
    fastapi >> Edge(minlen="5") >> web_scraper
    fastapi >> Edge(minlen="5") >> enterprise_web
    fastapi >> Edge(minlen="5") >> enterprise_pdf
    fastapi >> Edge(minlen="5") >> pdf_parser
    fastapi >> Edge(minlen="3", lhead="cluster_API & User Interface") >> s3_bucket
    pdf_parser >> Edge(minlen="5") >> docling
    enterprise_pdf >> Edge(minlen="5") >> markitdown
    web_scraper >> Edge(minlen="5") >> docling
    enterprise_web >> Edge(minlen="5") >> markitdown
    docling >> Edge(minlen="5") >> s3_bucket
    markitdown >> Edge(minlen="5") >> s3_bucket
    s3_bucket >> Edge(minlen="5") >> fastapi