In [6]:
from wedding_venues import pdf2zip

pdf2zip(
    "/Users/mac-robertsocolewicz/Documents/private/playground_tables/test_pdf/94th Aero Squadron Restaurant_.pdf",
    "tmp",
)

INFO:adobe.pdfservices.operation.internal.pdf_services_helper:Started uploading asset
INFO:adobe.pdfservices.operation.internal.pdf_services_helper:Finished uploading asset
INFO:adobe.pdfservices.operation.internal.pdf_services_helper:Started submitting EXTRACT_PDF job
INFO:adobe.pdfservices.operation.internal.pdf_services_helper:Started getting job result
INFO:adobe.pdfservices.operation.internal.pdf_services_helper:Finished polling for status
INFO:adobe.pdfservices.operation.internal.pdf_services_helper:Finished getting job result
INFO:adobe.pdfservices.operation.internal.pdf_services_helper:Started getting content
INFO:adobe.pdfservices.operation.internal.pdf_services_helper:Finished getting content


In [2]:
import zipfile

zip_path = "/Users/mac-robertsocolewicz/Documents/private/playground_tables/tmp/extract_2025-04-12T21-05-01.zip"
with zipfile.ZipFile(
    zip_path,
    "r",
) as zip_ref:
    zip_ref.extractall("tmp")

In [16]:
from openai import OpenAI
from pydantic import BaseModel, Field
from wedding_venues.image import local_image_to_data_url


class WeddingText(BaseModel):
    text: str | None = Field(
        description="""
        Extracted text from the image in markdown format. Returns None if text
        is unreadable or gibberish
        """
    )
    is_photo: bool = Field(
        description="""Indicates if the image is a meaningful photograph (food,
                    decor, venue, or wedding related) rather than a logo,
                    background, or decorative element"""
    )
    description: str = Field(
        description="""Brief description of the image content, particularly
                    useful for categorizing photos of food, decor, or
                    wedding-related scenes"""
    )


def image_to_wedding_text(image_path: str) -> WeddingText:
    client = OpenAI()

    prompt = """
        You are tasked with summarizing the description of the images about
        wedding venues. Give a concise summary of the images provided to you.
        Focus on the style and wedding theme. If decorative elements (e.g.,
        illustrations such as leaves and flowers, templates) are present, do not
        confuse them with real settings.The output should not be more than 30
        words. If there is text present, please transcribe. If the image is not
        related to a wedding venue, please ignore."""

    data_url = local_image_to_data_url(image_path)

    response = client.beta.chat.completions.parse(
        model="gpt-4o",
        messages=[
            {
                "role": "user",
                "content": [
                    {
                        "type": "text",
                        "text": prompt,
                    },
                    {"type": "image_url", "image_url": {"url": data_url}},
                ],
            }
        ],
        response_format=WeddingText,
    )
    return response.choices[0].message.parsed


In [13]:
from wedding_venues import image_properties
import glob
import math

from wedding_venues.image import resize_image


def downsize_image(image_path, max_size=1024):
    resized_path = resize_image(image_path, max_size)
    return local_image_to_data_url(resized_path)


def get_cost(properties):
    width, height = properties["width"], properties["height"]
    tokens = 85 + math.ceil(width / 512) * math.ceil(height / 512) * 170
    return tokens * 2.5 / 1_000_000


figures = glob.glob(
    "/Users/mac-robertsocolewicz/Documents/private/playground_tables/tmp/figures/*"
)
total_cost = 0
for figure in sorted(figures):
    properties = image_properties(resize_image(figure, 1024))
    is_photo = properties["is_photo"]
    has_text = properties["has_text"]
    size = f"{properties['width']}x{properties['height']}"
    if has_text or is_photo:
        print(size, get_cost(properties))
        total_cost += get_cost(properties)
print(total_cost)


750x1024 0.0019125
1024x957 0.0019125
1024x821 0.0019125
526x363 0.0010625
365x269 0.0006375
1024x781 0.0019125
1024x503 0.0010625
1024x507 0.0010625
483x293 0.0006375
791x1024 0.0019125
764x1024 0.0019125
1024x444 0.0010625
1024x451 0.0010625
863x1024 0.0019125
1024x722 0.0019125
667x238 0.0010625
1024x663 0.0019125
1024x1022 0.0019125
1024x1020 0.0019125
766x1023 0.0019125
1024x464 0.0010625
0.0316625


In [18]:
from tqdm import tqdm

figures = glob.glob(
    "/Users/mac-robertsocolewicz/Documents/private/playground_tables/tmp/figures/*"
)
total_cost = 0
wedding_texts = []
for figure in tqdm(sorted(figures)):
    figure = resize_image(figure, 1024)
    properties = image_properties(figure)
    is_photo = properties["is_photo"]
    has_text = properties["has_text"]

    if has_text or is_photo:
        wedding_text = image_to_wedding_text(figure)
        wedding_texts.append(wedding_text)

  0%|          | 0/32 [00:00<?, ?it/s]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
  3%|▎         | 1/32 [00:16<08:26, 16.35s/it]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
  9%|▉         | 3/32 [00:23<03:24,  7.06s/it]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
 12%|█▎        | 4/32 [00:27<02:44,  5.87s/it]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
 19%|█▉        | 6/32 [00:30<01:27,  3.37s/it]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
 22%|██▏       | 7/32 [00:32<01:15,  3.01s/it]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
 28%|██▊       | 9/32 [00:39<01:05,  2.86s/it]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
 31%|███▏      | 10/32 [00:53<02:17,  6.25s/it]IN

In [21]:
text = ""
for wedding_text in wedding_texts:
    if wedding_text.is_photo:
        text += f"""
            ==== image description ====
            {wedding_text.description}
            ==== end of image description ====
            """
    if wedding_text.text is not None:
        text += wedding_text.text
print(text)

94TH AERO SQUADRON RESTAURANT  
HORS D’ OEUVRES & COCKTAIL RECEPTION PACKAGES

COLD SELECTIONS  
PRICED PER PIECE - MINIMUM OF 50 PIECES IS REQUIRED  
APPETIZERS ARE BUFFET STYLE - IF ATTENDANT IS REQUIRED, A FEE OF $100++ WILL APPLY.

JUMBO SHRIMP SHOOTER  11  
SMOKED SALMON ROSE – DILL CREAM CHEESE  8  
CAPRESE SALAD SKEWERS – MOZZARELLA, TOMATO, & BASIL  5  
ROAST BEEF PINWHEEL – HORSERADISH CREAM  8  
SPICED SEARED AHI – SIRACHA AIOLI  7

HOT SELECTIONS  
PRICED PER PIECE – MINIMUM OF 50 PIECES IS REQUIRED  
APPETIZERS ARE BUFFET STYLE – IF ATTENDANT IS REQUIRED, A FEE OF $100++ WILL APPLY.

STUFFED POTATO SKIN – BACON, CHEESE, & PICO DE GALLO  5  
VEGETABLE SPRING ROLL – SWEET & SOUR SAUCE  3  
BEEF OR CHICKEN QUESADILLA TRIANGLE – SALSA ROJA  6  
CHICKEN FINGER – RANCH & BBQ SAUCE  6  
CHICKEN WING – BUFFALO SAUCE  4  
ARTICHOKE PARM FRITTER  6  
BUFFALO CHICKEN RAGOON  5

COLD DISPLAYS  
SEASONAL FRESH FRUIT – SERVES 50  135  
IMPORTED & DOMESTIC CHEESE – SERVES 50  195  
FRESH 

In [19]:
wedding_texts

[WeddingText(text='94TH AERO SQUADRON RESTAURANT  \nHORS D’ OEUVRES & COCKTAIL RECEPTION PACKAGES\n\nCOLD SELECTIONS  \nPRICED PER PIECE - MINIMUM OF 50 PIECES IS REQUIRED  \nAPPETIZERS ARE BUFFET STYLE - IF ATTENDANT IS REQUIRED, A FEE OF $100++ WILL APPLY.\n\nJUMBO SHRIMP SHOOTER  11  \nSMOKED SALMON ROSE – DILL CREAM CHEESE  8  \nCAPRESE SALAD SKEWERS – MOZZARELLA, TOMATO, & BASIL  5  \nROAST BEEF PINWHEEL – HORSERADISH CREAM  8  \nSPICED SEARED AHI – SIRACHA AIOLI  7\n\nHOT SELECTIONS  \nPRICED PER PIECE – MINIMUM OF 50 PIECES IS REQUIRED  \nAPPETIZERS ARE BUFFET STYLE – IF ATTENDANT IS REQUIRED, A FEE OF $100++ WILL APPLY.\n\nSTUFFED POTATO SKIN – BACON, CHEESE, & PICO DE GALLO  5  \nVEGETABLE SPRING ROLL – SWEET & SOUR SAUCE  3  \nBEEF OR CHICKEN QUESADILLA TRIANGLE – SALSA ROJA  6  \nCHICKEN FINGER – RANCH & BBQ SAUCE  6  \nCHICKEN WING – BUFFALO SAUCE  4  \nARTICHOKE PARM FRITTER  6  \nBUFFALO CHICKEN RAGOON  5\n\nCOLD DISPLAYS  \nSEASONAL FRESH FRUIT – SERVES 50  135  \nIMPORT

In [2]:
image_properties(figures[0])

{'width': 1757,
 'height': 1153,
 'total_pixels': 2025821,
 'text_density': 0.00495107909336511,
 'color_std': np.float64(59.85857381633642),
 'edge_density': 0.09011309488844275,
 'is_photo': True,
 'has_text': True,
 'extracted_text': 'Green Room'}

In [17]:
print(response.choices[0].message.parsed.text)

94TH AERO SQUADRON RESTAURANT
HORS D’ OEUVRES & COCKTAIL RECEPTION PACKAGES

COLD SELECTIONS

PRICED PER PIECE - MINIMUM OF 50 PIECES IS REQUIRED
APPETIZERS ARE BUFFET STYLE - IF ATTENDANT IS REQUIRED, A FEE OF $100++ WILL APPLY.

JUMBO SHRIMP SHOOTER 11
SMOKED SALMON ROSE - DILL CREAM CHEESE 8
CAPRESE SALAD SKEWERS - MOZZARELLA, TOMATO, & BASIL 5
ROAST BEEF PINWHEEL - HORSERADISH CREAM 8
SPICED SEARED AHI - SIRACHA AIOLI 7

HOT SELECTIONS

PRICED PER PIECE - MINIMUM OF 50 PIECES IS REQUIRED
APPETIZERS ARE BUFFET STYLE - IF ATTENDANT IS REQUIRED, A FEE OF $100++ WILL APPLY.

STUFFED POTATO SKIN - BACON, CHEESE, & PICO DE GALLO 5
VEGETABLE SPRING ROLL - SWEET & SOUR SAUCE 3
BEEF OR CHICKEN QUESADILLA TRIANGLE - SALSA ROJA 6
CHICKEN FINGER - RANCH & BBQ SAUCE 6
CHICKEN WING - BUFFALO SAUCE 4
ARTICHOKE PARM FRITTER 6
BUFFALO CHICKEN RAGOON 5

COLD DISPLAYS

SEASONAL FRESH FRUIT - SERVES 50 135
IMPORTED & DOMESTIC CHEESE - SERVES 50 195
FRESH VEGETABLE CRUDITÉ - SERVES 50 120

DESSERT SELE

In [3]:
from wedding_venues import zip2md


zip2md(
    "/Users/mac-robertsocolewicz/Documents/private/playground_tables/test_pdf/extracted/extract_2025-04-12T14-35-45.zip",
    "/Users/mac-robertsocolewicz/Documents/private/playground_tables/test_pdf/extracted/extract_2025-04-12T14-35-45.md",
)

output zip path: /Users/mac-robertsocolewicz/Documents/private/playground_tables/test_pdf/extracted/extract_2025-04-12T14-35-45.zip
2025-04-12 17:02:36 unzip file
2025-04-12 17:02:36 open json file
2025-04-12 17:02:36 extract content


'Thank you for this information. For something like this, it would need to be a full restaurant buyout. \nWith a full restaurant buyout, you will have access to three beautiful, spacious, Nickey Kehoe designed dining rooms: the San Vicente Room, the Green Room, and the Main Room for up to 150 total as a standing, cocktail style reception. The price is $20,000 (Sun) or $26,000 (Fri/Sat) for up to 80 guests and $150 per person thereafter. This price includes private use of the restaurant, a specialty menu (food stations with tray passed hors d\'oeuvres are recommended), and beverages (sommelier select wine, specialty cocktails, and select-spirit open bar). All prices are before tax, fees, and gratuity. \nPlease note, because you are dancing, we will need to have you bring in a dance floor to protect the floors, as well as additional insurance for your guests. \nAttached please find the following: \n● \nPrivate Events Brochure \n● \nPricing \n● \nMore information on an a.o.c. brentwood bu

In [3]:
import glob
from wedding_venues import image_properties
import os

for image_path in glob.glob(
    "/Users/mac-robertsocolewicz/Documents/private/playground_tables/test_pdf/extracted/a.o.c. Brentwood/figures/*"
):
    properties = image_properties(image_path)
    print(
        os.path.basename(image_path),
        properties["is_photo"],
        properties["extracted_text"],
    )


fileoutpart22.png True Green Room
fileoutpart23.png True a
fileoutpart21.png False 
fileoutpart20.png True 
fileoutpart18.png True hi il Wi rr Lilli i ————
fileoutpart19.png True 
fileoutpart17.png True 
fileoutpart16.png False 
fileoutpart14.png True a Ba GS eam Am A gM oi GAN pl AM AN AN gD Ng gM A AN gD AD gD AND gh ABD AD A Ay Mg Ag SS ee ee ee a —- o_o Ne eee ee eee SS —————— OO Se ee ee
fileoutpart15.png True 
fileoutpart4.png True 
fileoutpart5.png True 
fileoutpart1.png False 
fileoutpart0.png False 
fileoutpart2.png False 
fileoutpart3.png False 


In [5]:
image_path = "image.png"
properties = image_properties(image_path)
print(
    os.path.basename(image_path),
    properties["is_photo"],
    properties["extracted_text"],
)
print(properties["extracted_text"])

image.png False 94TH AERO SQUADRON RESTAURANT HORS D’ OEUVRES & COCKTAIL RECEPTION PACKAGES — vtfa COLD SELECTIONS PRICED PER PIECE - MINIMUM OF 50 PIECES IS REQUIRED APPETIZERS ARE BUFFET STYLE - IF ATTENDANT IS REQUIRED, A FEE OF $100++ WILL APPLY. JUMBO SHRIMP SHOOTER SMOKED SALMON ROSE - DILL CREAM CHEESE CAPRESE SALAD SKEWERS - MOZZARELLA, TOMATO, & BASIL ROAST BEEF PINWHEEL - HORSERADISH CREAM SPICED SEARED AHI - SIRACHA AIOLI = = J 0 Ui OO HOT SELECTIONS PRICED PER PIECE - MINIMUM OF 50 PIECES IS REQUIRED APPETIZERS ARE BUFFET STYLE - IF ATTENDANT IS REQUIRED, A FEE OF $100++ WILL APPLY. STUFFED POTATO SKIN - BACON, CHEESE, & PICO DE GALLO 5 VEGETABLE SPRING ROLL - SWEET & SOUR SAUCE 3 BEEF OR CHICKEN QUESADILLA TRIANGLE - SALSA ROJA 6 CHICKEN FINGER - RANCH & BBQ SAUCE 6 CHICKEN WING - BUFFALO SAUCE 4 ARTICHOKE PARM FRITTER 6 BUFFALO CHICKEN RAGOON 5 COLD DISPLAYS SEASONAL FRESH FRUIT - SERVES 50 135 IMPORTED & DOMESTIC CHEESE - SERVES 50 195 FRESH VEGETABLE CRUDITE - SERVES 50

In [1]:
from wedding_venues import PDFExtractionConverter

converter = PDFExtractionConverter(
    "/Users/mac-robertsocolewicz/Documents/private/playground_tables/test_pdf/a.o.c. Brentwood.pdf"
)

converter.save_markdown("custom_output.md")

INFO:adobe.pdfservices.operation.internal.pdf_services_helper:Started uploading asset
INFO:adobe.pdfservices.operation.internal.pdf_services_helper:Finished uploading asset
INFO:adobe.pdfservices.operation.internal.pdf_services_helper:Started submitting EXTRACT_PDF job
INFO:adobe.pdfservices.operation.internal.pdf_services_helper:Started getting job result
INFO:adobe.pdfservices.operation.internal.pdf_services_helper:Finished polling for status
INFO:adobe.pdfservices.operation.internal.pdf_services_helper:Finished getting job result
INFO:adobe.pdfservices.operation.internal.pdf_services_helper:Started getting content
INFO:adobe.pdfservices.operation.internal.pdf_services_helper:Finished getting content
