In [1]:
import os
import json
import base64

from openai import OpenAI
from dotenv import load_dotenv
from pydantic import BaseModel
from prompts import *

load_dotenv()

True

In [2]:
def encode_image(image_path: str) -> str:
  with open(image_path, "rb") as image_file:
    return base64.b64encode(image_file.read()).decode('utf-8')

In [3]:
client = OpenAI(
    # base_url="https://api.together.xyz/v1",
    api_key=os.getenv("OPENAI_API_KEY")
)

In [4]:
class Block(BaseModel):
    block_name: str
    description: str
    components: list[str]
    number_in_diagram: int | None
    position_in_diagram: str

class BlockWrapper(BaseModel):
    blocks: list[Block]

In [6]:
res = client.beta.chat.completions.parse(
    model="gpt-4o",
    messages=[
        {"role": "system", "content": block_system_prompt},
        {"role": "user", "content": [
            {
                "type": "image_url",
                "image_url": {
                    "url": f"data:image/jpeg;base64,{encode_image('../dgms/athena_forecasting.png')}"
                }
            }
        ]}
    ],
    response_format=BlockWrapper,
)

In [7]:
json.loads(res.choices[0].message.content)["blocks"]

[{'block_name': 'IoT Data Source',
  'description': 'Raw data collection from scooters.',
  'components': ['IoT data from scooters'],
  'number_in_diagram': None,
  'position_in_diagram': 'Far left'},
 {'block_name': 'Storage',
  'description': 'Data is stored for further processing and retrieval.',
  'components': ['DynamoDB'],
  'number_in_diagram': None,
  'position_in_diagram': 'Left, next to IoT data'},
 {'block_name': 'Processing',
  'description': 'Executes backend processes and manipulations on data.',
  'components': ['Lambda function'],
  'number_in_diagram': None,
  'position_in_diagram': 'Middle, next to DynamoDB'},
 {'block_name': 'Analysis',
  'description': 'Performs interactive queries on data.',
  'components': ['Athena'],
  'number_in_diagram': None,
  'position_in_diagram': 'Middle, next to Lambda function'},
 {'block_name': 'Data Storage',
  'description': 'Stores data for analysis and machine learning.',
  'components': ['Amazon S3'],
  'number_in_diagram': None,
 

In [8]:
blocks = res.choices[0].message.parsed.blocks

In [13]:
print(blocks[0].model_dump_json(indent=4))

{
    "block_name": "IoT Data Source",
    "description": "Raw data collection from scooters.",
    "components": [
        "IoT data from scooters"
    ],
    "number_in_diagram": null,
    "position_in_diagram": "Far left"
}


In [14]:
class ConnInfo(BaseModel):
    block_name: str | None
    component_name: str | None

class Connection(BaseModel):
    source: ConnInfo | None
    target: ConnInfo | None
    direction: str | None
    description: str | None

class ConnectionWrapper(BaseModel):
    connections: list[Connection]

In [15]:
print(blocks[0].model_dump_json(indent=4))


{
    "block_name": "IoT Data Source",
    "description": "Raw data collection from scooters.",
    "components": [
        "IoT data from scooters"
    ],
    "number_in_diagram": null,
    "position_in_diagram": "Far left"
}


In [16]:
blocks_str = "\n".join(b.model_dump_json(indent=4) for b in blocks)

In [17]:
conns = client.beta.chat.completions.parse(
    model="gpt-4o",
    messages=[
        {"role": "system", "content": connection_system_prompt.format(blocks_info=blocks_str)},
        {
            "role": "user", "content": [
                {
                    "type": "image_url",
                    "image_url": {
                        "url": f"data:image/jpeg;base64,{encode_image('../dgms/athena_forecasting.png')}"
                    }
                }
            ]
        }
    ],
    response_format=ConnectionWrapper,
)

In [18]:
print(conns.choices[0].message.parsed.connections[0].model_dump_json(indent=4))

{
    "source": {
        "block_name": "IoT Data Source",
        "component_name": "IoT data from scooters"
    },
    "target": {
        "block_name": "Storage",
        "component_name": "DynamoDB"
    },
    "direction": "one-way",
    "description": "Data received from scooters is stored in DynamoDB."
}


In [19]:
conns.choices[0].message.parsed.connections

[Connection(source=ConnInfo(block_name='IoT Data Source', component_name='IoT data from scooters'), target=ConnInfo(block_name='Storage', component_name='DynamoDB'), direction='one-way', description='Data received from scooters is stored in DynamoDB.'),
 Connection(source=ConnInfo(block_name='Storage', component_name='DynamoDB'), target=ConnInfo(block_name='Processing', component_name='Lambda function'), direction='one-way', description='Lambda function processes data stored in DynamoDB.'),
 Connection(source=ConnInfo(block_name='Processing', component_name='Lambda function'), target=ConnInfo(block_name='Analysis', component_name='Athena'), direction='one-way', description='Processed data is queried using Athena.'),
 Connection(source=ConnInfo(block_name='Data Storage', component_name='Amazon S3'), target=ConnInfo(block_name='Analysis', component_name='Athena'), direction='one-way', description='Athena accesses data stored in Amazon S3 for query processing.'),
 Connection(source=ConnIn

In [20]:
master_prompt = ""

In [21]:
for block in blocks:
    master_prompt += block.model_dump_json(indent=4)
    master_prompt += "\n\n"
    for conn in conns.choices[0].message.parsed.connections:
        if conn.source.block_name == block.block_name:
            master_prompt += conn.model_dump_json(indent=4)
            master_prompt += "\n\n"

In [22]:
print(master_prompt)

{
    "block_name": "IoT Data Source",
    "description": "Raw data collection from scooters.",
    "components": [
        "IoT data from scooters"
    ],
    "number_in_diagram": null,
    "position_in_diagram": "Far left"
}

{
    "source": {
        "block_name": "IoT Data Source",
        "component_name": "IoT data from scooters"
    },
    "target": {
        "block_name": "Storage",
        "component_name": "DynamoDB"
    },
    "direction": "one-way",
    "description": "Data received from scooters is stored in DynamoDB."
}

{
    "block_name": "Storage",
    "description": "Data is stored for further processing and retrieval.",
    "components": [
        "DynamoDB"
    ],
    "number_in_diagram": null,
    "position_in_diagram": "Left, next to IoT data"
}

{
    "source": {
        "block_name": "Storage",
        "component_name": "DynamoDB"
    },
    "target": {
        "block_name": "Processing",
        "component_name": "Lambda function"
    },
    "direction": "one-w