In [3]:
from dotenv import load_dotenv

load_dotenv(dotenv_path=".env", override=True)

True

In [5]:
import os
from CameraDocument import CameraDocument
from glob import glob
from openai import OpenAI

camera_model_list = ["gfx100ii", "x-e4", "x-s20", "x-t5", "x100v"]
json_dir = "./data/json"


client = OpenAI(
    api_key=os.getenv("OPENAI_API_KEY")
)
MODEL = "text-embedding-3-small"

for camera_model in camera_model_list:
    detail_json_dir = os.path.join(json_dir, camera_model, "LlamaParseMultimodal", "processed_data")
    json_path = os.path.join(detail_json_dir, "*.json")
    json_list = glob(json_path)

    documents = []
    for path in json_list:
        document = CameraDocument()
        document.load_json(path)
        documents.append(document)

    for document in documents:
        res = client.embeddings.create(
            input=document.parsing_result,
            model=MODEL
        )
        document.embedding_result = res.data[0].embedding
        document.metadata['token'] = res.usage.total_tokens
        document.metadata['embedding_model'] = res.model
        print(f"model: {document.metadata['model']} complete page: {document.metadata['page']}")

        output_dir = os.path.join(json_dir, camera_model, MODEL)
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)
        output_path = os.path.join(output_dir, f"{document.metadata['model']}_page{document.metadata['page']}.json")
        document.save_json(output_path)

        

model: gfx100ii complete page: 128
./data/json/gfx100ii/text-embedding-3-small/gfx100ii_page128.json complete
model: gfx100ii complete page: 12
./data/json/gfx100ii/text-embedding-3-small/gfx100ii_page12.json complete
model: gfx100ii complete page: 36
./data/json/gfx100ii/text-embedding-3-small/gfx100ii_page36.json complete
model: gfx100ii complete page: 291
./data/json/gfx100ii/text-embedding-3-small/gfx100ii_page291.json complete
model: gfx100ii complete page: 242
./data/json/gfx100ii/text-embedding-3-small/gfx100ii_page242.json complete
model: gfx100ii complete page: 366
./data/json/gfx100ii/text-embedding-3-small/gfx100ii_page366.json complete
model: gfx100ii complete page: 383
./data/json/gfx100ii/text-embedding-3-small/gfx100ii_page383.json complete
model: gfx100ii complete page: 288
./data/json/gfx100ii/text-embedding-3-small/gfx100ii_page288.json complete
model: gfx100ii complete page: 198
./data/json/gfx100ii/text-embedding-3-small/gfx100ii_page198.json complete
model: gfx100i