In [3]:
import logging
import os
from datetime import datetime
from typing import Literal

import pandas as pd
from dotenv import load_dotenv
from openai import OpenAI
from pydantic import BaseModel, Field
from tqdm import tqdm

client = OpenAI()

logging.basicConfig(level=logging.WARNING)
load_dotenv()
api_key = os.getenv("OPENAI_API_KEY")
client = OpenAI(api_key=api_key)
df = pd.read_excel("Wedding Values.xlsx")
df.columns

Index(['City', 'Zip Code ', 'State', 'Country ', 'Email', 'Phone Number',
       'Price', 'Price Breakdown', 'Menu Breakdown', 'Bar Breakdown',
       'Groom and Bridal Set-Up', 'Ceremony Cost ', 'Guest Capacity ',
       'Outside Food', 'Outside Alcohol', 'Outside Dessert ',
       'Outside Wedding Coordinator', 'Outside Photographer ',
       'Package Approach', 'Pricing Transparency ', 'Reception or Ceremony',
       'Style', 'Indoor/Outdoor', 'Deposit and Payment Plans ', 'Privacy',
       'Accommodations ', 'Photography Score ', 'Environmental ',
       'What Time Does the Party Need to Stop', 'Late Night Eats ',
       'General Vibe', 'Top Choices ', 'Menu Choices '],
      dtype='object')

In [5]:
from wedding_venue_models import (
    WeddingContactInfo,
    FoodBreakdown,
    WeddingFoodInfo,
    BarBreakdown,
    WeddingVenuePricingSummary,
    WeddingVenueStyle,
    WeddingVenueOther,
    create_system_prompt,
)  # noqa: F403


In [6]:
models = [
    WeddingContactInfo,
    FoodBreakdown,
    WeddingFoodInfo,
    BarBreakdown,
    WeddingVenuePricingSummary,
    WeddingVenueStyle,
    WeddingVenueOther,
]

In [None]:
from itertools import chain

from openpyxl.styles import Font, PatternFill


def assert_keys_in_readable_columns(
    models: list[type[BaseModel]], readable_columns: dict[str, str]
) -> None:
    keys = list(
        chain.from_iterable(
            [model.__name__ + "_" + x for x in model.model_fields.keys()]
            for model in models
        )
    )
    keys = [
        str(key).replace("_tiers", "_summary").replace("_options", "_summary")
        for key in keys
    ]
    keys
    assert set(keys) - set(readable_columns.keys()) == set(), (
        f"missing keys in readable_columns: {set(keys) - set(readable_columns.keys())}"
    )


readable_columns = {
    "venue": "wedding venue",
    "WeddingVenuePricingSummary_price": "price per guest",
    "WeddingVenuePricingSummary_base_prices": "price breakdown",
    "WeddingVenuePricingSummary_taxes_and_fees": "price breakdown taxes and fees",
    "WeddingVenuePricingSummary_flexibility": "venue customization flexibility",
    # "WeddingPriceInfo_option": "options",
    "WeddingContactInfo_city": "city",
    "WeddingContactInfo_state": "state",
    "WeddingContactInfo_country": "country",
    "WeddingContactInfo_zip_code": "zip code",
    "WeddingContactInfo_email": "email",
    "WeddingContactInfo_website": "website",
    "WeddingContactInfo_phone": "phone",
    "WeddingContactInfo_facebook": "facebook",
    "WeddingContactInfo_instagram": "instagram",
    # "WeddingVenuePricingSummary_summary": "venue pricing summary",
    "FoodBreakdown_summary": "food menu breakdown",
    "FoodBreakdown_flexibility": "food menu flexibility",
    "BarBreakdown_summary": "bar menu breakdown",
    "BarBreakdown_flexibility": "bar menu flexibility",
    "WeddingVenuePricingSummary_pricing_transparency": "pricing transparency",
    "WeddingVenuePricingSummary_deposit_and_payment_plans": "deposit and payment plans",
    "WeddingVenueStyle_style": "style",
    "WeddingVenueStyle_indoor_outdoor": "indoor/outdoor seating",
    "WeddingVenueStyle_privacy": "privacy",
    "WeddingVenueStyle_accommodations": "accommodations",
    "WeddingVenueStyle_environmental": "environmental",
    "WeddingVenueStyle_general_vibe": "general vibe",
    "WeddingFoodInfo_east_asian_food": "serves east asian food",
    "WeddingFoodInfo_gluten_free_food": "serves gluten free food",
    "WeddingFoodInfo_halal_food": "serves halal food",
    "WeddingFoodInfo_indian_food": "serves indian food",
    "WeddingFoodInfo_kosher_food": "serves kosher food",
    "WeddingFoodInfo_late_night_food": "serves late night food",
    "WeddingFoodInfo_other_ethnic_food_style": "serves other ethnic food",
    "WeddingFoodInfo_outside_alcohol_allowed": "allows outside alcohol",
    "WeddingFoodInfo_outside_dessert_allowed": "allows outside dessert",
    "WeddingFoodInfo_outside_food_allowed": "allows outside food",
    "WeddingVenueOther_guest_capacity": "guest capacity",
    "WeddingVenueOther_what_time_does_the_party_need_to_stop": "what time does the party need to stop",
    "WeddingVenueOther_outside_photographer": "allows outside photographer",
    "WeddingVenueOther_package_approach": "package approach",
    "WeddingVenueOther_outside_wedding_coordinator": "allows outside wedding coordinator",
    "WeddingVenueOther_reception_or_ceremony": "reception or ceremony",
    "WeddingVenueOther_top_choices": "top choices",
}


assert_keys_in_readable_columns(models, readable_columns)


def flatten_dict(d: dict, parent_key: str = "", sep: str = "_") -> dict:
    """Flatten a nested dictionary by concatenating nested keys with a separator.

    Parameters
    ----------
    d : dict
        The dictionary to flatten
    parent_key : str, optional
        The parent key for nested dictionaries, by default ""
    sep : str, optional
        The separator to use between nested keys, by default "_"

    Returns
    -------
    dict
        A flattened dictionary with concatenated keys

    Examples
    --------
    >>> d = {"a": 1, "b": {"c": 2, "d": {"e": 3}}}
    >>> flatten_dict(d)
    {'a': 1, 'b_c': 2, 'b_d_e': 3}
    """
    items: list = []
    for k, v in d.items():
        new_key = f"{parent_key}{sep}{k}" if parent_key else k
        if isinstance(v, dict):
            items.extend(flatten_dict(v, new_key, sep).items())
        else:
            items.append((new_key, v))
    return dict(items)


class WeddingVenue:
    def __init__(self, venue_name: str, raw: list[BaseModel]):
        item_dict = {"venue": venue_name}
        for item in raw:
            obj_dict = item.model_dump()
            if "tiers" in obj_dict:
                obj_dict.pop("tiers")
                obj_dict["summary"] = item.to_string()

            if "options" in obj_dict:
                obj_dict.pop("options")
                obj_dict["summary"] = item.to_string()

            item_dict[item.__class__.__name__] = obj_dict

        self.df = pd.DataFrame()
        self.update(item_dict)

    def add_price_breakdown(self) -> None:
        self.df["price breakdown"] = self.df[
            [
                "price breakdown",
                "price breakdown taxes and fees",
            ]
        ].apply(
            lambda x: f"""
                base prices: {x.iloc[0]}
                taxes and fees: {x.iloc[1]}
                """,
            axis=1,
        )
        del self.df["price breakdown taxes and fees"]

    def update(self, d: dict) -> None:
        self.df = pd.DataFrame(flatten_dict(d), index=[0])
        self.rename_columns()
        self.add_price_breakdown()
        # self.add_bar_flexibility()
        # self.add_indoor_outdoor_seating()

    def _repr_html_(self) -> str:
        return self.df._repr_html_()

    def rename_columns(self) -> None:
        """Rename and reorder columns based on readable_columns dictionary."""
        self.df.rename(columns=readable_columns, inplace=True)
        self.df.set_index("wedding venue", inplace=True)
        ordered_columns = [
            col for col in readable_columns.values() if col != "wedding venue"
        ]
        self.df = self.df.reindex(columns=ordered_columns)

    def to_excel(self, name: str = "wedding_venue.xlsx"):
        if not name.endswith(".xlsx"):
            name = f"{name}.xlsx"
        if os.path.exists(name):
            name = f"{name}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.xlsx"
        print("saving to: ", name)
        with pd.ExcelWriter(name, engine="openpyxl") as writer:
            self.df.to_excel(writer, sheet_name="Venue Options")

            worksheet = writer.sheets["Venue Options"]

            header_fill = PatternFill(
                start_color="B3E5FC", end_color="B3E5FC", fill_type="solid"
            )
            header_font = Font(bold=True)

            for col in range(1, len(df.columns) + 1):
                cell = worksheet.cell(row=1, column=col)
                cell.fill = header_fill
                cell.font = header_font

            for col in worksheet.columns:
                max_length = 0
                column = col[0].column_letter
                for cell in col:
                    if cell.value:
                        max_length = max(max_length, len(str(cell.value)))
                adjusted_width = max_length + 2
                worksheet.column_dimensions[column].width = min(adjusted_width, 50)

            worksheet.auto_filter.ref = worksheet.dimensions

        self.df.to_excel(writer, sheet_name="Venue Options")
        return self

    def __add__(self, other: "WeddingVenue") -> "WeddingVenue":
        self.df = pd.concat([self.df, other.df])
        return self

In [13]:
from openai import OpenAI
from pydantic import BaseModel
from wedding_venues.image import resize_image

In [14]:
from openai import OpenAI
from pydantic import BaseModel, Field
from wedding_venues.image import local_image_to_data_url


class WeddingText(BaseModel):
    text: str | None = Field(
        description="""
        Extracted text from the image in markdown format. Returns None if text
        is unreadable or gibberish
        """
    )
    is_photo: bool = Field(
        description="""Indicates if the image is a meaningful photograph (food,
                    decor, venue, or wedding related) rather than a logo,
                    background, or decorative element"""
    )
    description: str = Field(
        description="""Brief description of the image content, particularly
                    useful for categorizing photos of food, decor, or
                    wedding-related scenes"""
    )


def image_to_wedding_text(image_path: str) -> WeddingText:
    client = OpenAI()

    prompt = """
        You are tasked with summarizing the description of the images about
        wedding venues. Give a concise summary of the images provided to you.
        Focus on the style and wedding theme. If decorative elements (e.g.,
        illustrations such as leaves and flowers, templates) are present, do not
        confuse them with real settings.The output should not be more than 30
        words. If there is text present, please transcribe. If the image is not
        related to a wedding venue, please ignore."""

    data_url = local_image_to_data_url(image_path)

    response = client.beta.chat.completions.parse(
        model="gpt-4o",
        messages=[
            {
                "role": "user",
                "content": [
                    {
                        "type": "text",
                        "text": prompt,
                    },
                    {"type": "image_url", "image_url": {"url": data_url}},
                ],
            }
        ],
        response_format=WeddingText,
    )
    return response.choices[0].message.parsed


In [15]:
import os
from datetime import datetime
import json
import zipfile
import pandas as pd  # type: ignore
import re
import glob


def get_dict_xlsx(outputzipextract, xlsx_file):
    """
    Function to read excel output from adobe API
    """
    # Read excel
    df = pd.read_excel(
        os.path.join(outputzipextract, xlsx_file),
        sheet_name="Sheet1",
        engine="openpyxl",
    )

    # Clean df
    df.columns = [re.sub(r"_x([0-9a-fA-F]{4})_", "", col) for col in df.columns]
    df = df.replace({r"_x([0-9a-fA-F]{4})_": ""}, regex=True)

    # Convert df to string
    data_dict = df.to_dict(orient="records")

    return data_dict


def extract_text_from_file_adobe(venue, output_zipextract_folder):
    """
    Function to extract text and table from adobe output zip file
    """
    output_zip_path = list(glob.glob(f"{output_zipextract_folder}/*.zip"))[0]
    # if not venue.endswith(".zip"):
    #     output_zip_path = f"test_output/{venue}"
    # else:
    #     output_zip_path = venue
    print(f"output zipextract folder: {output_zipextract_folder}")
    print(f"output zip path: {output_zip_path}")

    json_file_path = os.path.join(output_zipextract_folder, "structuredData.json")
    # check if json file exist:
    if os.path.exists(json_file_path):
        print(
            f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')} JSON file already exists. Skipping extraction."
        )
    else:
        try:
            print(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')} unzip file")
            # Open the ZIP file
            with zipfile.ZipFile(output_zip_path, "r") as zip_ref:
                # Extract all the contents of the ZIP file to the current working directory
                zip_ref.extractall(path=output_zipextract_folder)
        except Exception as e:
            print("----Error: cannot unzip file")
            print(e)

    try:
        print(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')} open json file")
        # Opening JSON file
        with open(
            os.path.join(output_zipextract_folder, "structuredData.json")
        ) as json_file:
            data = json.load(json_file)
    except Exception as e:
        print("----Error: cannot open json file")
        print(e)
        return pd.DataFrame()

    print(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')} extract text")
    dfs = pd.DataFrame()
    page = ""

    figures_folder = os.path.join(output_zipextract_folder, "figures")

    try:  # Loop through elements in the document
        for ele in data["elements"]:
            df = pd.DataFrame()
            # Get element page
            if "Page" in ele.keys():
                page = ele["Page"]

            # Append table
            if any(x in ele["Path"] for x in ["Table"]):
                if "filePaths" in ele:
                    if [s for s in ele["filePaths"] if "xlsx" in s]:
                        # Read excel table
                        data_dict = get_dict_xlsx(
                            output_zipextract_folder, ele["filePaths"][0]
                        )
                        json_string = json.dumps(data_dict)
                        df = pd.DataFrame({"text": json_string}, index=[0])

            # Append text
            elif ("Text" in ele.keys()) and ("Figure" not in ele["Path"]):
                df = pd.DataFrame({"text": ele["Text"]}, index=[0])

            # Process figures and extract text from images
            elif (
                "Figure" in ele.get("Path", "")
                and "filePaths" in ele
                and ele["filePaths"]
            ):
                figure_filename = os.path.basename(ele["filePaths"][0])
                full_path = None

                # Find matching figure file in figures folder
                for filepath in glob.glob(os.path.join(figures_folder, "*")):
                    if figure_filename in os.path.basename(filepath):
                        full_path = filepath
                        break

                if full_path:
                    try:
                        # Resize and process the image with GPT-4o
                        resized_path = resize_image(full_path, 1024)
                        wedding_text = image_to_wedding_text(resized_path)

                        # Format the extracted text
                        image_content = ""
                        if wedding_text.text is not None:
                            image_content += wedding_text.text + "\n"
                        if wedding_text.is_photo:
                            image_content += (
                                f"\n[Image Description: {wedding_text.description}]\n"
                            )

                        if image_content.strip():
                            df = pd.DataFrame({"text": image_content}, index=[0])
                    except Exception as e:
                        print(f"Error processing image {full_path}: {e}")

            # Add page number and concat to main dataframe
            if not df.empty:
                df["page_number"] = page
                dfs = pd.concat([dfs, df], axis=0)

    except Exception as e:
        print("----Error: processing elements in JSON")
        print(e)

    dfs = dfs.reset_index(drop=True)

    # Groupby page
    dfs = dfs.dropna()
    if "text" not in dfs.columns:
        print(f"no text found in document {venue}.pdf")
        return ""

    dfs = dfs.groupby("page_number")["text"].apply(lambda x: "\n".join(x)).reset_index()
    text_content = "\n".join(dfs["text"].values)

    return text_content

In [16]:
# # Setup
# from pathlib import Path

# import openai

# load_dotenv()
# api_key = os.getenv("OPENAI_API_KEY")
# client = OpenAI(api_key=api_key)

# venue_data = []
# md_path = Path("test_md")
# if not md_path.exists():
#     raise FileNotFoundError("Directory 'test_md' not found")

# md_files = list(md_path.glob("*.md"))[-5:]

# venue_data = None
# for file in tqdm(md_files, desc="Processing venues", unit="file"):
#     tqdm.write(f"Processing: {file.name}")
#     with open(file, "r", encoding="utf-8") as f:
#         md_content = f.read()

#     raw = []
#     venue_name = file.stem
#     venue_dict = {"name": venue_name}

#     for model_class in models:
#         system_prompt = create_system_prompt(model_class)
#         # try:
#         if model_class == WeddingVenuePricingSummary:
#             ai_model = "o3-mini"
#             temperature = openai.NOT_GIVEN
#         else:
#             ai_model = "gpt-4o-mini"
#             temperature = 0
#         completion = client.beta.chat.completions.parse(
#             model=ai_model,
#             messages=[
#                 {"role": "system", "content": system_prompt},
#                 {
#                     "role": "user",
#                     "content": f"Extract venue information from this text about '{venue_name}':\n\n{md_content}",
#                 },
#             ],
#             response_format=model_class,
#             temperature=temperature,
#         )
#         obj = completion.choices[0].message.parsed
#         raw.append(obj)
#         if hasattr(obj, "to_string"):
#             string_summary = obj.to_string()
#             venue_dict[f"{model_class.__name__}_summary"] = string_summary
#         else:
#             venue_dict[f"{model_class.__name__}_summary"] = obj.model_dump()

#         tqdm.write(f"✓ Processed {model_class.__name__} for: {venue_name}")

#         # except Exception as e:
#         #     tqdm.write(f"✗ Error with {model_class.__name__} for {venue_name}: {e}")
#         #     venue_dict[f"{model_class.__name__}_summary"] = None
#     if venue_data is None:
#         venue_data = WeddingVenue(venue_name, raw)
#     else:
#         try:
#             venue_data += WeddingVenue(venue_name, raw)
#         except Exception as e:
#             print(f"✗ Error adding {venue_name}: {e}")
#             # venue_data = None
# now = datetime.now().strftime("%Y%m%d%")
# if venue_data is not None:
#     venue_data.to_excel(f"all_info_{now}.xlsx")
# else:
#     print("⚠️ No venue data processed.")

In [17]:
from wedding_venues.adobe import pdf2zip

for venue in glob.glob("test_pdf/*.pdf"):
    if os.path.exists(f"test_output/{os.path.basename(venue)}"):
        continue  # skip if already processed
    pdf2zip(venue, f"test_output/{os.path.basename(venue)}")
    print(f"{venue} ready")


ERROR:root:Exception encountered while executing operation: description =ERROR - Unable to extract content. Internal error; requestTrackingId=895429f8-ed56-44ca-8358-12a632fce41e; statusCode=500; errorCode=ERROR
Traceback (most recent call last):
  File "/Users/mac-robertsocolewicz/Documents/private/playground_tables/src/wedding_venues/adobe.py", line 101, in pdf2zip
    pdf_services_response = pdf_services.get_job_result(location, ExtractPDFResult)
                            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/mac-robertsocolewicz/Documents/private/playground_tables/.venv/lib/python3.11/site-packages/adobe/pdfservices/operation/internal/util/enforce_types.py", line 37, in wrapper
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^
  File "/Users/mac-robertsocolewicz/Documents/private/playground_tables/.venv/lib/python3.11/site-packages/adobe/pdfservices/operation/pdf_services.py", line 101, in get_job_result
    return PDFServicesHelpe

test_pdf/Avenue of the Arts.pdf ready


In [2]:
from wedding_venues import extract_text_from_file_docling

pdf_path = "/Users/mac-robertsocolewicz/Documents/private/playground_tables/test_pdf/94th Aero Squadron Restaurant_.pdf"
output_dir = "test_pdf/test"

extract_text_from_file_docling(
    pdf_path,
    output_dir,
    force_extract=False,
)


INFO:root:Skipping /Users/mac-robertsocolewicz/Documents/private/playground_tables/test_pdf/94th Aero Squadron Restaurant_.pdf because it already exists


In [6]:
import glob
import os
from pathlib import Path
from tqdm import tqdm
from wedding_venues import (
    extract_text_from_file_docling,
    figure_to_md,
    replace_images_with_markdown,
)

OUTPUT_DIR = "test_output2"

progress_bar = tqdm(glob.glob("test_pdf/*.pdf"), desc="Processing venues", unit="venue")
for pdf_file_path in progress_bar:
    pdf_file_name = os.path.basename(pdf_file_path)
    venue_name = os.path.splitext(pdf_file_name)[0]
    progress_bar.set_postfix(file=pdf_file_path)
    output_dir_venue = Path(OUTPUT_DIR) / venue_name

    extract_text_from_file_docling(pdf_file_path, output_dir_venue, force_extract=False)
    figures = filter(
        lambda fig: fig.suffix in [".png", ".jpg", ".jpeg"],
        output_dir_venue.glob("**/*"),
    )
    for figure in tqdm(sorted(figures)):
        figure_to_md(figure, force_extract=False, delete_if_false_image=True)

    try:
        markdown_path = list(output_dir_venue.glob("*.md"))[0]
    except IndexError:
        print(f"No markdown file found for {venue_name}")
        continue
    new_md = replace_images_with_markdown(
        markdown_path,
        inplace=True,
    )


Processing venues:   0%|          | 0/12 [00:00<?, ?venue/s, file=test_pdf/Alcazar Palm Springs.pdf]INFO:root:Skipping test_pdf/Alcazar Palm Springs.pdf because it already exists
0it [00:00, ?it/s]
Processing venues:   0%|          | 0/12 [00:00<?, ?venue/s, file=test_pdf/Aliso Viejo Country Club.pdf]

INFO:root:Skipping test_pdf/Aliso Viejo Country Club.pdf because it already exists
100%|██████████| 15/15 [00:00<00:00, 5973.09it/s]
Processing venues:   0%|          | 0/12 [00:00<?, ?venue/s, file=test_pdf/94th Aero Squadron Restaurant_.pdf]INFO:root:Skipping test_pdf/94th Aero Squadron Restaurant_.pdf because it already exists
100%|██████████| 26/26 [00:00<00:00, 9538.35it/s]
Processing venues:   0%|          | 0/12 [00:00<?, ?venue/s, file=test_pdf/Agua Hedionda Nature Center.pdf]   INFO:root:Skipping test_pdf/Agua Hedionda Nature Center.pdf because it already exists
0it [00:00, ?it/s]
Processing venues:   0%|          | 0/12 [00:00<?, ?venue/s, file=test_pdf/RIVERHOUSEBBQ.COM.pdf]          INFO:root:Skipping test_pdf/RIVERHOUSEBBQ.COM.pdf because it already exists
100%|██████████| 15/15 [00:00<00:00, 12468.20it/s]
Processing venues:   0%|          | 0/12 [00:00<?, ?venue/s, file=test_pdf/Aliso Viejo Wedgewood.pdf]INFO:root:Skipping test_pdf/Aliso Viejo Wedgewood.pdf because it alr

In [18]:
# Setup
from wedding_venues import markdown_to_structured_output
from dotenv import load_dotenv
from datetime import datetime

load_dotenv(override=True)
from pathlib import Path

output_dir = Path(OUTPUT_DIR)
venue_data = None
try:
    for venue_dir in output_dir.glob("*"):
        markdown_path = list(venue_dir.glob("*.md"))
        if not markdown_path:
            continue
        markdown_path = markdown_path[0]
        row = markdown_to_structured_output(markdown_path)
        if venue_data is None:
            venue_data = row
        else:
            try:
                venue_data += row
            except Exception as e:
                print(f"✗ Error adding {venue}: {e}")
finally:
    now = datetime.now().strftime("%Y%m%d%H%M%S")
    venue_data.to_excel(f"all_info_{now}.xlsx")


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


✓ Processed WeddingContactInfo for: Shell_s Loft


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


✓ Processed FoodBreakdown for: Shell_s Loft


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


✓ Processed WeddingFoodInfo for: Shell_s Loft


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


✓ Processed BarBreakdown for: Shell_s Loft


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


✓ Processed WeddingVenuePricingSummary for: Shell_s Loft


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


✓ Processed WeddingVenueStyle for: Shell_s Loft


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


✓ Processed WeddingVenueOther for: Shell_s Loft


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


✓ Processed WeddingContactInfo for: Ace Hotel Palm Springs


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


✓ Processed FoodBreakdown for: Ace Hotel Palm Springs


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


✓ Processed WeddingFoodInfo for: Ace Hotel Palm Springs


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


✓ Processed BarBreakdown for: Ace Hotel Palm Springs


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


✓ Processed WeddingVenuePricingSummary for: Ace Hotel Palm Springs


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


✓ Processed WeddingVenueStyle for: Ace Hotel Palm Springs


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


✓ Processed WeddingVenueOther for: Ace Hotel Palm Springs


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


✓ Processed WeddingContactInfo for: Aliso Viejo Country Club


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


✓ Processed FoodBreakdown for: Aliso Viejo Country Club


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


✓ Processed WeddingFoodInfo for: Aliso Viejo Country Club


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


✓ Processed BarBreakdown for: Aliso Viejo Country Club


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


✓ Processed WeddingVenuePricingSummary for: Aliso Viejo Country Club


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


✓ Processed WeddingVenueStyle for: Aliso Viejo Country Club


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


✓ Processed WeddingVenueOther for: Aliso Viejo Country Club


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


✓ Processed WeddingContactInfo for: Rustic Pearl Weddings & Events


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


✓ Processed FoodBreakdown for: Rustic Pearl Weddings & Events


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


✓ Processed WeddingFoodInfo for: Rustic Pearl Weddings & Events


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


✓ Processed BarBreakdown for: Rustic Pearl Weddings & Events


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


✓ Processed WeddingVenuePricingSummary for: Rustic Pearl Weddings & Events


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


✓ Processed WeddingVenueStyle for: Rustic Pearl Weddings & Events


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


✓ Processed WeddingVenueOther for: Rustic Pearl Weddings & Events


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


✓ Processed WeddingContactInfo for: Aliso Viejo Wedgewood


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


✓ Processed FoodBreakdown for: Aliso Viejo Wedgewood


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


✓ Processed WeddingFoodInfo for: Aliso Viejo Wedgewood


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


✓ Processed BarBreakdown for: Aliso Viejo Wedgewood


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


✓ Processed WeddingVenuePricingSummary for: Aliso Viejo Wedgewood


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


✓ Processed WeddingVenueStyle for: Aliso Viejo Wedgewood


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


✓ Processed WeddingVenueOther for: Aliso Viejo Wedgewood


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


✓ Processed WeddingContactInfo for: Avenue of the Arts


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


✓ Processed FoodBreakdown for: Avenue of the Arts


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


✓ Processed WeddingFoodInfo for: Avenue of the Arts


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


✓ Processed BarBreakdown for: Avenue of the Arts


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


✓ Processed WeddingVenuePricingSummary for: Avenue of the Arts


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


✓ Processed WeddingVenueStyle for: Avenue of the Arts


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


✓ Processed WeddingVenueOther for: Avenue of the Arts


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


✓ Processed WeddingContactInfo for: 94th Aero Squadron Restaurant_


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


✓ Processed FoodBreakdown for: 94th Aero Squadron Restaurant_


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


✓ Processed WeddingFoodInfo for: 94th Aero Squadron Restaurant_


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


✓ Processed BarBreakdown for: 94th Aero Squadron Restaurant_


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


✓ Processed WeddingVenuePricingSummary for: 94th Aero Squadron Restaurant_


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


✓ Processed WeddingVenueStyle for: 94th Aero Squadron Restaurant_


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


✓ Processed WeddingVenueOther for: 94th Aero Squadron Restaurant_


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


✓ Processed WeddingContactInfo for: Alcazar Palm Springs


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


✓ Processed FoodBreakdown for: Alcazar Palm Springs


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


✓ Processed WeddingFoodInfo for: Alcazar Palm Springs


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


✓ Processed BarBreakdown for: Alcazar Palm Springs


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


✓ Processed WeddingVenuePricingSummary for: Alcazar Palm Springs


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


✓ Processed WeddingVenueStyle for: Alcazar Palm Springs


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


✓ Processed WeddingVenueOther for: Alcazar Palm Springs


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


✓ Processed WeddingContactInfo for: Agua Hedionda Nature Center


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


✓ Processed FoodBreakdown for: Agua Hedionda Nature Center


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


✓ Processed WeddingFoodInfo for: Agua Hedionda Nature Center


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


✓ Processed BarBreakdown for: Agua Hedionda Nature Center


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


✓ Processed WeddingVenuePricingSummary for: Agua Hedionda Nature Center


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


✓ Processed WeddingVenueStyle for: Agua Hedionda Nature Center


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


✓ Processed WeddingVenueOther for: Agua Hedionda Nature Center


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


✓ Processed WeddingContactInfo for: a.o.c. Brentwood


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


✓ Processed FoodBreakdown for: a.o.c. Brentwood


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


✓ Processed WeddingFoodInfo for: a.o.c. Brentwood


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


✓ Processed BarBreakdown for: a.o.c. Brentwood


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


✓ Processed WeddingVenuePricingSummary for: a.o.c. Brentwood


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


✓ Processed WeddingVenueStyle for: a.o.c. Brentwood


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


✓ Processed WeddingVenueOther for: a.o.c. Brentwood


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


✓ Processed WeddingContactInfo for: Almansor Court


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


✓ Processed FoodBreakdown for: Almansor Court


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


✓ Processed WeddingFoodInfo for: Almansor Court


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


✓ Processed BarBreakdown for: Almansor Court


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


✓ Processed WeddingVenuePricingSummary for: Almansor Court


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


✓ Processed WeddingVenueStyle for: Almansor Court


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


✓ Processed WeddingVenueOther for: Almansor Court


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


✓ Processed WeddingContactInfo for: RIVERHOUSEBBQ.COM


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


✓ Processed FoodBreakdown for: RIVERHOUSEBBQ.COM


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


✓ Processed WeddingFoodInfo for: RIVERHOUSEBBQ.COM


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


✓ Processed BarBreakdown for: RIVERHOUSEBBQ.COM


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


✓ Processed WeddingVenuePricingSummary for: RIVERHOUSEBBQ.COM


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


✓ Processed WeddingVenueStyle for: RIVERHOUSEBBQ.COM


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


✓ Processed WeddingVenueOther for: RIVERHOUSEBBQ.COM
saving to:  all_info_20250419231909.xlsx


In [17]:
venue_dir.glob("*.txt").__next__()

StopIteration: 

# old

In [21]:
# Setup
import os
import openai
from dotenv import load_dotenv
from openai import OpenAI
from google import genai


class Response:
    def __init__(self, ai: Literal["openai", "google"]):
        self.ai = ai
        if ai == "openai":
            self.client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
            self.response = self._response_openai
        if ai == "google":
            self.client = genai.Client(api_key=os.getenv("GEMINI_API_KEY"))
            self.response = self._response_google

    def _response_openai(
        self, model, system_prompt, user_prompt, response_format, temperature
    ):
        completion = self.client.beta.chat.completions.parse(
            model=model,
            messages=[
                {"role": "system", "content": system_prompt},
                {
                    "role": "user",
                    "content": user_prompt,
                },
            ],
            response_format=response_format,
            temperature=temperature,
        )
        return completion.choices[0].message.parsed

    def _response_google(
        self, model, system_prompt, user_prompt, response_format, temperature
    ):
        response = self.client.models.generate_content(
            model=model,
            contents=f"{system_prompt}\n{user_prompt}",
            config={
                "response_mime_type": "application/json",
                "response_schema": response_format,
            },
        )
        return response.parsed


load_dotenv(override=True)
api_key = os.getenv("OPENAI_API_KEY")
client = OpenAI(api_key=api_key)

# List of venues to process
venues = os.listdir("test_pdf")

venue_data = None

# openAI
# response = Response(ai="openai")
# gemini
response = Response(ai="google")


# Process each venue
for venue in tqdm(venues, desc="Processing venues", unit="venue"):
    tqdm.write(f"Processing venue: {venue}")

    output_zip_path = list(glob.glob(f"test_output/{venue}/*.zip"))[0]
    if len(list(glob.glob(f"test_output/{venue}/*.zip"))) == 0:
        output_zip_path = f"test_output/{os.path.basename(venue)}"
        print(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')} Run Adobe")
        os.makedirs(os.path.dirname(output_zip_path), exist_ok=True)
        pdf_path = f"test_pdf/{venue}"
        if os.path.exists(pdf_path):
            pdf2zip(pdf_path, output_zip_path)
        else:
            print(f"Error: PDF file not found at {pdf_path}")
            continue

    # Extract output folder path
    output_zipextract_folder = f"test_output/{venue}/"

    # Extract text from Adobe output file
    # text_content = extract_text_from_file_adobe(venue, output_zipextract_folder)
    text_content = extract_text_from_file_docling(venue, output_zipextract_folder)

    if not text_content:
        tqdm.write(f"⚠️ No text content found for venue: {venue}")
        continue

    raw = []
    venue_dict = {"name": venue}

    for model_class in models:
        try:
            system_prompt = create_system_prompt(model_class)
            if response.ai == "openai":
                if model_class == WeddingVenuePricingSummary:
                    ai_model = "o3-mini"
                    temperature = openai.NOT_GIVEN
                else:
                    ai_model = "gpt-4o-mini"
                    temperature = 0
                obj = response.response(
                    model=ai_model,
                    system_prompt=system_prompt,
                    user_prompt=f"Extract venue information from this text about '{venue}':\n\n{text_content}",
                    response_format=model_class,
                    temperature=temperature,
                )
            elif response.ai == "google":
                temperature = 0
                ai_model = "gemini-2.0-flash-001"
                obj = response.response(
                    model=ai_model,
                    system_prompt=system_prompt,
                    user_prompt=f"Extract venue information from this text about '{venue}':\n\n{text_content}",
                    response_format=model_class,
                    temperature=temperature,
                )
            raw.append(obj)
            if hasattr(obj, "to_string"):
                string_summary = obj.to_string()
                venue_dict[f"{model_class.__name__}_summary"] = string_summary
            else:
                venue_dict[f"{model_class.__name__}_summary"] = obj.model_dump()

            tqdm.write(f"✓ Processed {model_class.__name__} for: {venue}")

        except Exception as e:
            tqdm.write(f"✗ Error with {model_class.__name__} for {venue}: {e}")
            venue_dict[f"{model_class.__name__}_summary"] = None

    # Add venue data to the collection
    if venue_data is None:
        venue_data = WeddingVenue(venue, raw)
    else:
        try:
            venue_data += WeddingVenue(venue, raw)
        except Exception as e:
            print(f"✗ Error adding {venue}: {e}")

# Save the processed data
now = datetime.now().strftime("%Y%m%d%H%M%S")
if venue_data is not None:
    venue_data.to_excel(f"all_info_{now}.xlsx")
else:
    print("⚠️ No venue data processed.")

Processing venues:   0%|          | 0/12 [00:00<?, ?venue/s]

Processing venue: Alcazar Palm Springs.pdf
output zipextract folder: test_output/Alcazar Palm Springs.pdf/
output zip path: test_output/Alcazar Palm Springs.pdf/extract_2025-04-13T02-11-34.zip
2025-04-13 15:22:14 JSON file already exists. Skipping extraction.
2025-04-13 15:22:14 open json file
2025-04-13 15:22:14 extract text


Processing venues:   0%|          | 0/12 [00:01<?, ?venue/s]

✓ Processed WeddingContactInfo for: Alcazar Palm Springs.pdf


Processing venues:   0%|          | 0/12 [00:02<?, ?venue/s]

✓ Processed FoodBreakdown for: Alcazar Palm Springs.pdf


Processing venues:   0%|          | 0/12 [00:03<?, ?venue/s]

✓ Processed WeddingFoodInfo for: Alcazar Palm Springs.pdf


Processing venues:   0%|          | 0/12 [00:04<?, ?venue/s]

✓ Processed BarBreakdown for: Alcazar Palm Springs.pdf


Processing venues:   0%|          | 0/12 [00:06<?, ?venue/s]

✓ Processed WeddingVenuePricingSummary for: Alcazar Palm Springs.pdf


Processing venues:   0%|          | 0/12 [00:07<?, ?venue/s]

✓ Processed WeddingVenueStyle for: Alcazar Palm Springs.pdf


Processing venues:   8%|▊         | 1/12 [00:08<01:38,  8.99s/venue]

✓ Processed WeddingVenueOther for: Alcazar Palm Springs.pdf
Processing venue: Aliso Viejo Country Club.pdf
output zipextract folder: test_output/Aliso Viejo Country Club.pdf/
output zip path: test_output/Aliso Viejo Country Club.pdf/extract_2025-04-13T14-38-23.zip
2025-04-13 15:22:23 JSON file already exists. Skipping extraction.
2025-04-13 15:22:23 open json file
2025-04-13 15:22:23 extract text


Processing venues:   8%|▊         | 1/12 [01:37<01:38,  8.99s/venue]

✓ Processed WeddingContactInfo for: Aliso Viejo Country Club.pdf


Processing venues:   8%|▊         | 1/12 [01:39<01:38,  8.99s/venue]

✓ Processed FoodBreakdown for: Aliso Viejo Country Club.pdf


Processing venues:   8%|▊         | 1/12 [01:41<01:38,  8.99s/venue]

✓ Processed WeddingFoodInfo for: Aliso Viejo Country Club.pdf


Processing venues:   8%|▊         | 1/12 [01:42<01:38,  8.99s/venue]

✓ Processed BarBreakdown for: Aliso Viejo Country Club.pdf


Processing venues:   8%|▊         | 1/12 [01:44<01:38,  8.99s/venue]

✓ Processed WeddingVenuePricingSummary for: Aliso Viejo Country Club.pdf


Processing venues:   8%|▊         | 1/12 [01:46<01:38,  8.99s/venue]

✓ Processed WeddingVenueStyle for: Aliso Viejo Country Club.pdf


Processing venues:  17%|█▋        | 2/12 [01:47<10:16, 61.68s/venue]

✓ Processed WeddingVenueOther for: Aliso Viejo Country Club.pdf
Processing venue: 94th Aero Squadron Restaurant_.pdf
output zipextract folder: test_output/94th Aero Squadron Restaurant_.pdf/
output zip path: test_output/94th Aero Squadron Restaurant_.pdf/extract_2025-04-13T14-39-03.zip
2025-04-13 15:24:02 JSON file already exists. Skipping extraction.
2025-04-13 15:24:02 open json file
2025-04-13 15:24:02 extract text


Processing venues:  17%|█▋        | 2/12 [04:19<10:16, 61.68s/venue]

✓ Processed WeddingContactInfo for: 94th Aero Squadron Restaurant_.pdf


Processing venues:  17%|█▋        | 2/12 [04:23<10:16, 61.68s/venue]

✓ Processed FoodBreakdown for: 94th Aero Squadron Restaurant_.pdf


Processing venues:  17%|█▋        | 2/12 [04:24<10:16, 61.68s/venue]

✓ Processed WeddingFoodInfo for: 94th Aero Squadron Restaurant_.pdf


Processing venues:  17%|█▋        | 2/12 [04:25<10:16, 61.68s/venue]

✓ Processed BarBreakdown for: 94th Aero Squadron Restaurant_.pdf


Processing venues:  17%|█▋        | 2/12 [04:27<10:16, 61.68s/venue]

✓ Processed WeddingVenuePricingSummary for: 94th Aero Squadron Restaurant_.pdf


Processing venues:  17%|█▋        | 2/12 [04:29<10:16, 61.68s/venue]

✓ Processed WeddingVenueStyle for: 94th Aero Squadron Restaurant_.pdf


Processing venues:  25%|██▌       | 3/12 [04:30<16:11, 107.93s/venue]

✓ Processed WeddingVenueOther for: 94th Aero Squadron Restaurant_.pdf
Processing venue: Agua Hedionda Nature Center.pdf
output zipextract folder: test_output/Agua Hedionda Nature Center.pdf/
output zip path: test_output/Agua Hedionda Nature Center.pdf/extract_2025-04-13T14-39-13.zip
2025-04-13 15:26:45 JSON file already exists. Skipping extraction.
2025-04-13 15:26:45 open json file
2025-04-13 15:26:45 extract text


Processing venues:  25%|██▌       | 3/12 [04:31<16:11, 107.93s/venue]

✓ Processed WeddingContactInfo for: Agua Hedionda Nature Center.pdf


Processing venues:  25%|██▌       | 3/12 [04:32<16:11, 107.93s/venue]

✓ Processed FoodBreakdown for: Agua Hedionda Nature Center.pdf


Processing venues:  25%|██▌       | 3/12 [04:33<16:11, 107.93s/venue]

✓ Processed WeddingFoodInfo for: Agua Hedionda Nature Center.pdf


Processing venues:  25%|██▌       | 3/12 [04:35<16:11, 107.93s/venue]

✓ Processed BarBreakdown for: Agua Hedionda Nature Center.pdf


Processing venues:  25%|██▌       | 3/12 [04:37<16:11, 107.93s/venue]

✓ Processed WeddingVenuePricingSummary for: Agua Hedionda Nature Center.pdf


Processing venues:  25%|██▌       | 3/12 [04:38<16:11, 107.93s/venue]

✓ Processed WeddingVenueStyle for: Agua Hedionda Nature Center.pdf


Processing venues:  33%|███▎      | 4/12 [04:40<09:13, 69.16s/venue] 

✓ Processed WeddingVenueOther for: Agua Hedionda Nature Center.pdf
Processing venue: RIVERHOUSEBBQ.COM.pdf
output zipextract folder: test_output/RIVERHOUSEBBQ.COM.pdf/
output zip path: test_output/RIVERHOUSEBBQ.COM.pdf/extract_2025-04-13T14-39-40.zip
2025-04-13 15:26:55 JSON file already exists. Skipping extraction.
2025-04-13 15:26:55 open json file
2025-04-13 15:26:55 extract text


Processing venues:  33%|███▎      | 4/12 [06:35<09:13, 69.16s/venue]

✓ Processed WeddingContactInfo for: RIVERHOUSEBBQ.COM.pdf


Processing venues:  33%|███▎      | 4/12 [06:37<09:13, 69.16s/venue]

✓ Processed FoodBreakdown for: RIVERHOUSEBBQ.COM.pdf


Processing venues:  33%|███▎      | 4/12 [06:38<09:13, 69.16s/venue]

✓ Processed WeddingFoodInfo for: RIVERHOUSEBBQ.COM.pdf


Processing venues:  33%|███▎      | 4/12 [06:40<09:13, 69.16s/venue]

✓ Processed BarBreakdown for: RIVERHOUSEBBQ.COM.pdf


Processing venues:  33%|███▎      | 4/12 [06:43<09:13, 69.16s/venue]

✓ Processed WeddingVenuePricingSummary for: RIVERHOUSEBBQ.COM.pdf


Processing venues:  33%|███▎      | 4/12 [06:44<09:13, 69.16s/venue]

✓ Processed WeddingVenueStyle for: RIVERHOUSEBBQ.COM.pdf


Processing venues:  42%|████▏     | 5/12 [06:45<10:25, 89.38s/venue]

✓ Processed WeddingVenueOther for: RIVERHOUSEBBQ.COM.pdf
Processing venue: Aliso Viejo Wedgewood.pdf
output zipextract folder: test_output/Aliso Viejo Wedgewood.pdf/
output zip path: test_output/Aliso Viejo Wedgewood.pdf/extract_2025-04-13T14-40-01.zip
2025-04-13 15:29:00 JSON file already exists. Skipping extraction.
2025-04-13 15:29:00 open json file
2025-04-13 15:29:00 extract text


Processing venues:  42%|████▏     | 5/12 [07:37<10:25, 89.38s/venue]

✓ Processed WeddingContactInfo for: Aliso Viejo Wedgewood.pdf


Processing venues:  42%|████▏     | 5/12 [07:39<10:25, 89.38s/venue]

✓ Processed FoodBreakdown for: Aliso Viejo Wedgewood.pdf


Processing venues:  42%|████▏     | 5/12 [07:41<10:25, 89.38s/venue]

✓ Processed WeddingFoodInfo for: Aliso Viejo Wedgewood.pdf


Processing venues:  42%|████▏     | 5/12 [07:42<10:25, 89.38s/venue]

✓ Processed BarBreakdown for: Aliso Viejo Wedgewood.pdf


Processing venues:  42%|████▏     | 5/12 [07:44<10:25, 89.38s/venue]

✓ Processed WeddingVenuePricingSummary for: Aliso Viejo Wedgewood.pdf


Processing venues:  42%|████▏     | 5/12 [07:45<10:25, 89.38s/venue]

✓ Processed WeddingVenueStyle for: Aliso Viejo Wedgewood.pdf


Processing venues:  50%|█████     | 6/12 [07:47<07:59, 79.94s/venue]

✓ Processed WeddingVenueOther for: Aliso Viejo Wedgewood.pdf
Processing venue: Ace Hotel Palm Springs.pdf
output zipextract folder: test_output/Ace Hotel Palm Springs.pdf/
output zip path: test_output/Ace Hotel Palm Springs.pdf/extract_2025-04-13T02-14-12.zip
2025-04-13 15:30:02 JSON file already exists. Skipping extraction.
2025-04-13 15:30:02 open json file
2025-04-13 15:30:02 extract text


Processing venues:  50%|█████     | 6/12 [11:41<07:59, 79.94s/venue]

✓ Processed WeddingContactInfo for: Ace Hotel Palm Springs.pdf


Processing venues:  50%|█████     | 6/12 [11:44<07:59, 79.94s/venue]

✓ Processed FoodBreakdown for: Ace Hotel Palm Springs.pdf


Processing venues:  50%|█████     | 6/12 [11:46<07:59, 79.94s/venue]

✓ Processed WeddingFoodInfo for: Ace Hotel Palm Springs.pdf


Processing venues:  50%|█████     | 6/12 [11:47<07:59, 79.94s/venue]

✓ Processed BarBreakdown for: Ace Hotel Palm Springs.pdf


Processing venues:  50%|█████     | 6/12 [11:50<07:59, 79.94s/venue]

✓ Processed WeddingVenuePricingSummary for: Ace Hotel Palm Springs.pdf


Processing venues:  50%|█████     | 6/12 [11:52<07:59, 79.94s/venue]

✓ Processed WeddingVenueStyle for: Ace Hotel Palm Springs.pdf


Processing venues:  58%|█████▊    | 7/12 [11:53<11:11, 134.38s/venue]

✓ Processed WeddingVenueOther for: Ace Hotel Palm Springs.pdf
Processing venue: Rustic Pearl Weddings & Events.pdf
output zipextract folder: test_output/Rustic Pearl Weddings & Events.pdf/
output zip path: test_output/Rustic Pearl Weddings & Events.pdf/extract_2025-04-13T14-44-52.zip
2025-04-13 15:34:08 JSON file already exists. Skipping extraction.
2025-04-13 15:34:08 open json file
2025-04-13 15:34:08 extract text


Processing venues:  58%|█████▊    | 7/12 [12:16<11:11, 134.38s/venue]

✓ Processed WeddingContactInfo for: Rustic Pearl Weddings & Events.pdf


Processing venues:  58%|█████▊    | 7/12 [12:17<11:11, 134.38s/venue]

✓ Processed FoodBreakdown for: Rustic Pearl Weddings & Events.pdf


Processing venues:  58%|█████▊    | 7/12 [12:18<11:11, 134.38s/venue]

✓ Processed WeddingFoodInfo for: Rustic Pearl Weddings & Events.pdf


Processing venues:  58%|█████▊    | 7/12 [12:20<11:11, 134.38s/venue]

✓ Processed BarBreakdown for: Rustic Pearl Weddings & Events.pdf


Processing venues:  58%|█████▊    | 7/12 [12:21<11:11, 134.38s/venue]

✓ Processed WeddingVenuePricingSummary for: Rustic Pearl Weddings & Events.pdf


Processing venues:  58%|█████▊    | 7/12 [12:23<11:11, 134.38s/venue]

✓ Processed WeddingVenueStyle for: Rustic Pearl Weddings & Events.pdf


Processing venues:  67%|██████▋   | 8/12 [12:25<06:46, 101.62s/venue]

✓ Processed WeddingVenueOther for: Rustic Pearl Weddings & Events.pdf
Processing venue: Almansor Court.pdf
output zipextract folder: test_output/Almansor Court.pdf/
output zip path: test_output/Almansor Court.pdf/extract_2025-04-13T02-14-44.zip
2025-04-13 15:34:39 JSON file already exists. Skipping extraction.
2025-04-13 15:34:39 open json file
2025-04-13 15:34:39 extract text


Processing venues:  67%|██████▋   | 8/12 [15:24<06:46, 101.62s/venue]

✓ Processed WeddingContactInfo for: Almansor Court.pdf


Processing venues:  67%|██████▋   | 8/12 [15:27<06:46, 101.62s/venue]

✓ Processed FoodBreakdown for: Almansor Court.pdf


Processing venues:  67%|██████▋   | 8/12 [15:28<06:46, 101.62s/venue]

✓ Processed WeddingFoodInfo for: Almansor Court.pdf


Processing venues:  67%|██████▋   | 8/12 [15:30<06:46, 101.62s/venue]

✓ Processed BarBreakdown for: Almansor Court.pdf


Processing venues:  67%|██████▋   | 8/12 [15:32<06:46, 101.62s/venue]

✓ Processed WeddingVenuePricingSummary for: Almansor Court.pdf


Processing venues:  67%|██████▋   | 8/12 [15:33<06:46, 101.62s/venue]

✓ Processed WeddingVenueStyle for: Almansor Court.pdf


Processing venues:  75%|███████▌  | 9/12 [15:35<06:28, 129.39s/venue]

✓ Processed WeddingVenueOther for: Almansor Court.pdf
Processing venue: Shell_s Loft.pdf
output zipextract folder: test_output/Shell_s Loft.pdf/
output zip path: test_output/Shell_s Loft.pdf/extract_2025-04-13T14-45-50.zip
2025-04-13 15:37:50 JSON file already exists. Skipping extraction.
2025-04-13 15:37:50 open json file
2025-04-13 15:37:50 extract text


Processing venues:  75%|███████▌  | 9/12 [19:26<06:28, 129.39s/venue]

✓ Processed WeddingContactInfo for: Shell_s Loft.pdf


Processing venues:  75%|███████▌  | 9/12 [19:28<06:28, 129.39s/venue]

✓ Processed FoodBreakdown for: Shell_s Loft.pdf


Processing venues:  75%|███████▌  | 9/12 [19:29<06:28, 129.39s/venue]

✓ Processed WeddingFoodInfo for: Shell_s Loft.pdf


Processing venues:  75%|███████▌  | 9/12 [19:30<06:28, 129.39s/venue]

✓ Processed BarBreakdown for: Shell_s Loft.pdf


Processing venues:  75%|███████▌  | 9/12 [19:33<06:28, 129.39s/venue]

✓ Processed WeddingVenuePricingSummary for: Shell_s Loft.pdf


Processing venues:  75%|███████▌  | 9/12 [19:34<06:28, 129.39s/venue]

✓ Processed WeddingVenueStyle for: Shell_s Loft.pdf


Processing venues:  83%|████████▎ | 10/12 [19:36<05:27, 163.74s/venue]

✓ Processed WeddingVenueOther for: Shell_s Loft.pdf
Processing venue: a.o.c. Brentwood.pdf
output zipextract folder: test_output/a.o.c. Brentwood.pdf/
output zip path: test_output/a.o.c. Brentwood.pdf/extract_2025-04-13T02-15-11.zip
2025-04-13 15:41:51 JSON file already exists. Skipping extraction.
2025-04-13 15:41:51 open json file
2025-04-13 15:41:51 extract text


Processing venues:  83%|████████▎ | 10/12 [20:41<05:27, 163.74s/venue]

✓ Processed WeddingContactInfo for: a.o.c. Brentwood.pdf


Processing venues:  83%|████████▎ | 10/12 [20:45<05:27, 163.74s/venue]

✓ Processed FoodBreakdown for: a.o.c. Brentwood.pdf


Processing venues:  83%|████████▎ | 10/12 [20:46<05:27, 163.74s/venue]

✓ Processed WeddingFoodInfo for: a.o.c. Brentwood.pdf


Processing venues:  83%|████████▎ | 10/12 [20:48<05:27, 163.74s/venue]

✓ Processed BarBreakdown for: a.o.c. Brentwood.pdf


Processing venues:  83%|████████▎ | 10/12 [20:50<05:27, 163.74s/venue]

✓ Processed WeddingVenuePricingSummary for: a.o.c. Brentwood.pdf


Processing venues:  83%|████████▎ | 10/12 [20:52<05:27, 163.74s/venue]

✓ Processed WeddingVenueStyle for: a.o.c. Brentwood.pdf


Processing venues:  92%|█████████▏| 11/12 [20:53<01:53, 113.99s/venue]

✓ Processed WeddingVenueOther for: a.o.c. Brentwood.pdf
Processing venue: Avenue of the Arts.pdf





IndexError: list index out of range

In [22]:
# Save the processed data
now = datetime.now().strftime("%Y%m%d%H%M%S")
if venue_data is not None:
    venue_data.to_excel(f"all_info_gemini_{now}.xlsx")
else:
    print("⚠️ No venue data processed.")

saving to:  all_info_gemini_20250413160430.xlsx


In [None]:
venue = WeddingVenue(venue_name, raw)
venue.to_excel()

In [48]:
print(text_content)

Thank you for this information. For something like this, it would need to be a full restaurant buyout. 
With a full restaurant buyout, you will have access to three beautiful, spacious, Nickey Kehoe designed dining rooms: the San Vicente Room, the Green Room, and the Main Room for up to 150 total as a standing, cocktail style reception. The price is $20,000 (Sun) or $26,000 (Fri/Sat) for up to 80 guests and $150 per person thereafter. This price includes private use of the restaurant, a specialty menu (food stations with tray passed hors d'oeuvres are recommended), and beverages (sommelier select wine, specialty cocktails, and select-spirit open bar). All prices are before tax, fees, and gratuity. 
Please note, because you are dancing, we will need to have you bring in a dance floor to protect the floors, as well as additional insurance for your guests. 
Attached please find the following: 
● 
Private Events Brochure 
● 
Pricing 
● 
More information on an a.o.c. brentwood buyout 
● 
Cu

In [20]:
obj

WeddingVenueOther(outside_wedding_coordinator='True', outside_photographer='True', package_approach='1: This venue offers fixed packages, with a few extras or options', reception_or_ceremony='3: Space for both reception and the ceremony', what_time_does_the_party_need_to_stop='X: Not enough information', top_choices='The venue offers a stylish and elegant space with various room options, including a full restaurant buyout for larger events. The shared-plate menus and curated wine selection provide a unique dining experience.', guest_capacity='150')