In [2]:
pip install diagrams

Collecting diagrams
  Downloading diagrams-0.23.4-py3-none-any.whl.metadata (7.0 kB)
Collecting typed-ast<2.0.0,>=1.5.4 (from diagrams)
  Downloading typed_ast-1.5.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.7 kB)
Downloading diagrams-0.23.4-py3-none-any.whl (24.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m41.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading typed_ast-1.5.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (824 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m824.7/824.7 kB[0m [31m36.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: typed-ast, diagrams
Successfully installed diagrams-0.23.4 typed-ast-1.5.5


In [28]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [35]:
# Import required modules from diagrams
from diagrams import Diagram, Cluster, Edge
from diagrams.gcp.database import SQL
from diagrams.gcp.storage import GCS
from diagrams.onprem.workflow import Airflow
from diagrams.onprem.container import Docker
from diagrams.custom import Custom
from diagrams.programming.language import Python

In [38]:
# Create the diagram with smaller clusters
with Diagram("Architecture Diagram", show=True):
    # Data Ingestion with Docker icon only for GAIA Dataset, Airflow, and GCP Bucket
    with Cluster("Data Ingestion"):
        with Cluster("Docker"):
            docker_icon_ingestion = Docker("Docker")
            gaia_dataset = Custom("GAIA Dataset", "/content/gaia.png")  # Adjust path as needed
            airflow = Airflow("Airflow")
            gcp_bucket = GCS("GCP Bucket")

            # Define relationships within the Docker container
            gaia_dataset >> airflow >> gcp_bucket
            docker_icon_ingestion - Edge(label="runs on") - gaia_dataset

        # Metadata and GCP SQL outside the Docker container
        metadata = Custom("Metadata", "/content/json.png")  # Adjust path as needed
        gcp_sql = SQL("GCP SQL")
        metadata >> gcp_sql

    # API & Client section with smaller clusters
    with Cluster("API & Client"):
        # FastAPI and OpenAI in a Docker environment but in separate clusters
        with Cluster("Docker"):
            docker_icon_api = Docker("Docker")

            # Smaller clusters for FastAPI and OpenAI
            fastapi = Custom("FastAPI", "/content/fastapi.png")  # Adjust path for a different FastAPI image
            openai = Custom("OpenAI", "/content/openai.png")  # Adjust path as needed

            # Connect FastAPI to OpenAI within the Docker environment
            fastapi >> openai
            openai >> fastapi

            # Show that FastAPI and OpenAI run in one Docker container
            docker_icon_api - Edge(label="runs on") - fastapi

        # Streamlit in a separate Docker container with a bi-directional arrow to FastAPI
        with Cluster("Docker"):
            streamlit = Python("Streamlit")
            streamlit << Edge(label="communicates with", forward=False) >> fastapi

        # Docker icon for Streamlit
        docker_icon_streamlit = Docker("Docker")
        docker_icon_streamlit - Edge(label="runs on") - streamlit

    # Connect FastAPI to the Data Ingestion component (using Airflow as a representative)
    fastapi << Edge(label="connects to Data Ingestion") >> airflow