In [26]:
# from diagrams import Diagram, Cluster, Node, Edge
# from diagrams.onprem.database import MySQL
# from diagrams.aws.storage import S3
# from diagrams.onprem.compute import Server
# from diagrams.custom import Custom

# huggingface_icon = "hf.png"
# openai_icon = "openai.png" 
# streamlit_icon = "streamlit.png" 

# # Diagram creation
# with Diagram("Data Flow: Assignment 1", show=True, outformat="png", filename="test"):
    
#     # Define the source of data
#     with Cluster("Huggingface Dataset"):
#         huggingface = Custom("GAIA Benchmark Dataset", huggingface_icon)
    
#     # Define the Python server processing the data
#     server = Server("Python Script")
    
#     # Storage block that contains both MySQL and AWS S3
#     with Cluster("Storage Block"):
#         mysql = MySQL("MySQL \n(Metadata Storage)")
#         s3 = S3("AWS S3 \n(Unstructured \nData Storage)")

#     with Cluster("Streamlit Interface", direction="TB"):
        
#         # First Streamlit app in the box
#         with Cluster("First Page: Streamlit UI (Submit Data)"):
#             streamlit_app_1 = Custom("Streamlit User Interface\nSubmit Data", streamlit_icon)
        
#         # Second Streamlit app in the box
#         with Cluster("Second Page: Streamlit UI (Correct Answer)", direction="TB"):
#             streamlit_app_2 = Custom("Streamlit User Interface\nCorrect Answer", streamlit_icon)

#     with Cluster("OpenAI Validation"):
#         openai = Custom("OpenAI (Validation)", openai_icon)

    
#       # Data flow connections
#     huggingface >> Edge(label="Token Access") >> server >> [mysql, s3]

#     # Fetch data from storage to first Streamlit app
#     [mysql, s3] >> Edge(label="Data Fetch") >> streamlit_app_1

#     # User submits data for validation in the first Streamlit app
#     streamlit_app_1 >> Edge(label="Submit Data") >> openai

#     # OpenAI generates an answer based on the input from the first Streamlit app
#     openai >> Edge(label="Generate Answer") >> streamlit_app_1

#     # If the answer is different, user is directed to the second Streamlit app
#     streamlit_app_1 >> Edge(label="Answer Different") >> streamlit_app_2

#     # Fetch data for validation in the second Streamlit app
#     streamlit_app_2 << Edge(label="Fetch Data") >> mysql

#     # OpenAI regenerates the correct answer in the second Streamlit app
#     streamlit_app_2 >> Edge(label="Regenerate Answer") >> openai

#     # Final correct answer output to second Streamlit
#     openai >> Edge(label="Correct Answer") >> streamlit_app_2

In [16]:
from diagrams import Diagram, Cluster, Node, Edge
from diagrams.onprem.database import MySQL
from diagrams.aws.storage import S3
from diagrams.onprem.compute import Server
from diagrams.custom import Custom

huggingface_icon = "hf.png"
openai_icon = "openai.png"
streamlit_icon = "streamlit.png"

# Diagram creation
with Diagram("Data Flow with Storage Block", show=True, outformat="png", filename="updated_diagram"):
    
    # Define the source of data in its own cluster
    with Cluster("Huggingface Gaia Dataset"):
        huggingface = Custom("Huggingface Gaia Dataset\n(Data Source)", huggingface_icon)

    # Define the Python server processing the data
    server = Server("Python Script")
    
    # Storage block that contains both MySQL and AWS S3
    with Cluster("Storage Block"):
        mysql = MySQL("MySQL\n(Metadata Database)")
        s3 = S3("AWS S3\n(Data Storage)")
    
    # OpenAI block
    with Cluster("AI"):
        openai = Custom("OpenAI\n(Answer Validation)", openai_icon)

    # Streamlit application block with three pages in boxes inside one cluster, positioned below storage
    with Cluster("Streamlit Application"):
        with Cluster("Page 1"):
            streamlit_1 = Custom("Submit Input Questions", streamlit_icon)
        with Cluster("Page 2"):
            streamlit_2 = Custom("Generate Answers", streamlit_icon)
        with Cluster("Page 3"):
            streamlit_3 = Custom("Provide Correct Answer", streamlit_icon)

    # Data flow connections with only one arrow for fetching data
    huggingface >> Edge(color="blue", label="Token Access") >> server
    server >> Edge(color="green", label="Fetch Data") >> s3
    s3 >> Edge(color="green", label="Fetch Data") >> streamlit_1  # Fetch Data arrow from storage block to Streamlit

    streamlit_1 >> Edge(color="orange", label="Generate Answers") >> streamlit_2
    streamlit_2 >> Edge(color="purple", label="Validate with OpenAI") >> openai
    openai >> Edge(color="red", label="Correct Answer") >> streamlit_3
    streamlit_3 >> Edge(color="red", label="Regenerate Answer Steps") >> openai
