## IMPORT LIBRARIES

In [4]:
from diagrams import Diagram, Cluster, Edge
from diagrams.programming.language import Python
from diagrams.custom import Custom
from diagrams.aws.storage import S3
from diagrams.aws.network import ELB
from diagrams.aws.compute import EC2
from diagrams.saas.analytics import Snowflake
from diagrams.onprem.workflow import Airflow
from diagrams.programming.framework import FastAPI
from diagrams.digitalocean.compute import Docker
from diagrams.onprem.database import Mongodb

In [5]:
# Adjust these Graphviz attributes to increase diagram size, node spacing, etc.
graph_attr = {
    "fontsize": "14"
}

filename = "flow_diagram"

## FLOW DIAGRAM CREATION

In [14]:
def create_flow_diagram():
    try:
        with Diagram("Flow Diagram", filename=filename, show=False, direction="LR", graph_attr=graph_attr):
            
            linkedin = Custom("LinkedIn","./input_icons/linkedin.png")
            indeed = Custom("Indeed", "./input_icons/indeed.png")
            with Cluster("Hourly pipleine"):
                # airflow1 = Airflow("Hourly Pipeline")
                job_scrape = Python("Job scrape")
                data_validation = Python("Data Validation")
                job_load = Python("Jobs load")
            
            with Cluster("Hourly vectorize"):
                vectorize = Python("Embedding")
            
            pinecone = Custom("Pinecone", "./input_icons/pinecone.png")
            snowflake = Snowflake("Snowflake")
                
            linkedin >> job_scrape
            indeed >> job_scrape
            job_scrape >> data_validation >> job_load >> snowflake 
            snowflake >> vectorize >> pinecone
            
            with Cluster("Daily job update pipeline"):
                batch = Python("Data Batch\n processing")
                status_update =Python("Status update")
              
            snowflake >> Edge(label="Jobs fetch") >> batch
            batch >> Edge(label="URL pass\n") >> status_update
            status_update >> Edge(label="Updated status") >> snowflake
            
            user = Custom("User", "./input_icons/user.png")
            streamlit = Custom("UI", "./input_icons/streamlit.png")
            fastapi = FastAPI("Fast API")
            s3 = S3("Staging")
            
            
            user >> Edge(label = "Uploads resume") >> streamlit >> fastapi >> s3
            streamlit >> Edge(label="\n\nTop 10 jobs") >> user
            
            with Cluster("Pipeline"):
                pdf_extract = Python("\nResume text\n extract")
                vector = Python("embedding")
                similar_jobs = Python("Top 10 jobs\nmatch")
                
            mongo = Mongodb("Mongo")
            openai = Custom("OpenAi", "./input_icons/openai.png")
            
            fastapi >> Edge(label="S3 path") >> pdf_extract >> vector >> Edge(label="\npinecone metadata") >>similar_jobs
            pdf_extract >> Edge(label="User portfolio\n(text format)") >> s3
            vector >> pinecone
            s3 >> Edge(label="fetch user portfolio") >> vector
            similar_jobs >> Edge(label="Resume mapping") << mongo
            fastapi >> Edge(label="User authentication") << mongo
            vector >> Edge(label="Embedding ") << openai
            pinecone >> similar_jobs
            similar_jobs >> openai

      
    except Exception as e:
        print("Exception: ",e)
    

In [15]:
create_flow_diagram()