In [3]:
from diagrams import Diagram, Edge, Cluster
from diagrams.aws.storage import S3
from diagrams.custom import Custom
from diagrams.onprem.workflow import Airflow
from diagrams.generic.compute import Rack
from diagrams.azure.database import SQLDatabases

# File paths for custom logos
user_logo = "user_logo.png"
streamlit_logo = "streamlit_logo.png"
azuresql_logo = "azuresql_logo.png"
openai_api_logo = "openai_api_logo.png"
fastapi_logo = "fastapi_logo.png"

# Diagram creation
with Diagram("GAIA OpenAI Evaluation Tool Architecture", show=True, filename="Structural Diagram", direction="TB"):
    
    # User Interface
    user = Custom("User", user_logo)
    
    # Streamlit App
    with Cluster("Frontend"):
        streamlit = Custom("Streamlit App", streamlit_logo)
        user >> Edge(label="Interacts") >> streamlit
    
    # Airflow Pipeline
    with Cluster("Airflow Preprocessing Pipeline"):
        airflow = Airflow("Airflow")
        pymupdf_dag = airflow >> Edge(label="DAG 1: PyMuPDF Extraction") >> S3("Extracted PDFs - PyMuPDF")
        textract_dag = airflow >> Edge(label="DAG 2: Amazon Textract Extraction") >> S3("Extracted PDFs - Textract")
    
    # AWS S3 Storage
    with Cluster("AWS"):
        s3_storage = S3("S3 Storage")
        airflow >> Edge(label="Uploads PDF Text") >> s3_storage
    
    # FastAPI Backend
    with Cluster("Backend - FastAPI"):
        fastapi = Custom("FastAPI", fastapi_logo)
        
        # FastAPI Database
        with Cluster("Azure SQL Database"):
            azuresql_db = Custom("Azure SQL", azuresql_logo)
            fastapi >> Edge(label="Stores Results") >> azuresql_db
        
        # FastAPI to S3
        fastapi >> Edge(label="Fetches PDF Text") >> s3_storage
        
        # FastAPI to OpenAI API
        openai_api = Custom("OpenAI API", openai_api_logo)
        fastapi >> Edge(label="Sends Questions & Text") >> openai_api
    
    # Streamlit interaction with FastAPI
    streamlit >> Edge(label="Sends User Inputs") >> fastapi
    fastapi >> Edge(label="Returns Results") >> streamlit

    # Data flow
    s3_storage >> Edge(label="PDFs") >> airflow
    openai_api >> Edge(label="ChatGPT Responses") >> fastapi


In [17]:
from diagrams import Diagram, Cluster, Edge
from diagrams.custom import Custom
from diagrams.aws.storage import S3
from diagrams.onprem.workflow import Airflow
from diagrams.azure.database import SQLDatabases
from diagrams.programming.flowchart import StartEnd
from diagrams.programming.language import Python

# File paths for custom logos
hf_logo = "hf_logo.png"
user_logo = "user_logo.png"
streamlit_logo = "streamlit_logo.png"
azuresql_logo = "azuresql_logo.png"
fastapi_logo = "fastapi_logo.png"

with Diagram("Application Backend Workflow",filename="Backend Workflow Diagram", show=True):
    
    # User and Admin Icons
    user = Custom("User / Admin", user_logo)
    
    with Cluster("Data Ingestion"):
        # Process Dataset Step with Hugging Face Logo
        huggingface = Custom("Hugging Face Dataset", hf_logo)
        bronze_s3 = S3("AWS S3 (Bronze)")

    with Cluster("ETL Scripts"):
        # Process Dataset Script
        dataset_script = Python("process_dataset.py")
        azure_sql = Custom("Azure SQL", azuresql_logo)
        setup_db_script = Python("setup_database.py")
        
        # Hugging Face to Process Dataset for Structured and Unstructured Data
        huggingface >> Edge(label="Processes Data") >> dataset_script
        dataset_script >> Edge(label="Stores Unstructured Data") >> bronze_s3
        dataset_script >> Edge(label="Stores Structured Data") >> azure_sql
        setup_db_script >> Edge(label="Sets up DB") >> azure_sql

    with Cluster("Preprocessing with Airflow"):
        airflow = Airflow("Airflow DAGs")
        dag1 = Python("pymupdf_extraction_dag.py")
        dag2 = Python("textract_extraction_dag.py")
        
        # Bronze to Silver Data Movement for PDFs only
        silver_s3 = S3("AWS S3 (Silver)")
        bronze_s3 >> Edge(label="PDF Processing Only using pymupdf") >> airflow >> dag1 >> silver_s3
        bronze_s3 >> Edge(label="PDF Processing Only using textract") >> airflow >> dag2 >> silver_s3

    with Cluster("Backend Processing"):
        fastapi = Custom("FastAPI", fastapi_logo)
        db = azure_sql
        silver_s3 >> Edge(label="When question has associated pdf then we use extracted information") >> fastapi
        fastapi >> Edge(label="Stores and Update results for an user") >> db

    with Cluster("Frontend Interaction"):
        streamlit = Custom("Streamlit Interface", streamlit_logo)
        fastapi >> Edge(label="Data Retrieval") >> streamlit
        user >> Edge(label="Access App") >> streamlit
