## IMPORT LIBRARIES

In [1]:
from diagrams import Diagram, Cluster
from diagrams.programming.language import Python
from diagrams.custom import Custom
from diagrams.aws.storage import S3
from diagrams.generic.database import SQL
from diagrams.saas.analytics import Snowflake

In [15]:
# Adjust these Graphviz attributes to increase diagram size, node spacing, etc.
graph_attr = {
    "fontsize": "10"
}

filename = "flow_diagram"

## FLOW DIAGRAM CREATION

In [16]:
def create_flow_diagram():
    try:
        with Diagram("Flow Diagram", filename=filename, show=False, direction="LR", graph_attr=graph_attr):
            website = Custom("Website", "./input_icons/website.png")
            local_storage_1 = Custom("Local Storage", "./input_icons/local_storage.png")

            with Cluster("Data Extraction"):
                web_scraping = Python("Web Scraping")
                pdf_text_extraction = Python(
                    "PDF Text Extraction\n using PyPDF & Grobid")

            local_storage_2 = Custom("Local Storage", "./input_icons/local_storage.png")

            with Cluster("Data Loading"):
                csv_data_upload = Python(
                    "CSV Data upload\n to Snowflake")
                all_files_upload = Python(
                    "All files upload to S3\n and grobid metadata\n to Snowflake")

            snowflake = Snowflake("Snowflake")
            s3_bucket = S3("S3 Bucket")
    
            website >> web_scraping
            local_storage_1 >> pdf_text_extraction

            web_scraping >> local_storage_2
            pdf_text_extraction >> local_storage_2

            local_storage_2 >> csv_data_upload
            local_storage_2 >> all_files_upload

            csv_data_upload >> snowflake
            all_files_upload >> snowflake
            all_files_upload >> s3_bucket
    except Exception as e:
        print("Exception: ",e)
    

In [17]:
create_flow_diagram()