In [1]:
import os
import base64
import requests
import json

In [2]:
# OpenAI API Key
api_key = os.getenv('OPENAI_API_KEY')

# Function to encode the image
def encode_image(image_path):
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode('utf-8')

In [5]:
# Function to process image to generate descriptions and saving them in a JSON format
def process_image(image_path):
    base64_image = encode_image(image_path)
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {api_key}"
    }
    payload = {
        "model": "gpt-4-vision-preview",
        "messages": [
            {
                "role": "user",
                "content": [
                    {
                        "type": "text",
                        # "text": "Describe the main subjects and objects in this image, including any context or scene details, recognize any text or symbols, and identify emotions or actions depicted. Remember to use only keywords and not the filler words accodrding to the grammar "
                        # "text": """
                        #         As an image description generator, your task is to provide concise and accurate descriptions of images within a 20-30 word limit. Follow these guidelines to ensure your descriptions are both informative and succinct:
                                
                        #         1. List any people or prominent subjects, noting their positions and actions. If the image is crowded, prioritize the most central or striking figures.
                        #         2. Briefly describe significant background elements. Choose details that frame the context of the image effectively.
                        #         3. Mention any clear text, symbols, or logos, which are crucial for understanding the image's context or intention.
                        #         4. Only comment on the mood or themes like happiness, freedom, success.....etc., if these are directly observable from expressions or actions.
                        #         5. Include only what is clearly visible. Do not infer or assume details not explicitly shown in the image.
                        #         6. Maintain a neutral tone, avoiding subjective interpretations or emotional language unless directly relevant to the described actions or expressions.
                        #         Most importantly your description should balance brevity with informativeness, capturing the essence of the image while adhering to the word limit.
                        #         """
                        "text": """
                                As an image tags generator, your task is to provide concise and accurate tags of images. Follow these guidelines to ensure your tags are both informative and succinct:
                                
                                1. List any people or prominent subjects, noting their positions and actions. If the image is crowded, prioritize the most central or striking figures.
                                2. Give tags significant background elements. Choose details that frame the context of the image effectively.
                                3. Give tags for any clear text, symbols, or logos, which are crucial for understanding the image's context or intention.
                                4. Only give tags on the mood or themes like happiness, freedom, success.....etc., if these are directly observable from expressions or actions.
                                5. Include only what is clearly visible. Do not infer or assume details not explicitly shown in the image.
                                6. Maintain a neutral tone, avoiding subjective interpretations or emotional language unless directly relevant to the described actions or expressions.
                                Most importantly your tags should balance brevity with informativeness, capturing the essence of the image while adhering to the condition that tags must be separated by a space(“ ”) and for each tag include a few similar words that can be used inplace of the oriignal tag
                                """
        
                    },
                    {
                        "type": "image_url",
                        "image_url": {
                            "url": f"data:image/jpeg;base64,{base64_image}"
                        }
                    }
                ]
            }
        ],
        "max_tokens": 300
    }
    response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
    return response.json()


In [6]:
# Directory containing images
directory_path = r"path_to_the_image_directory"

# Loop through each file in the directory
for filename in os.listdir(directory_path):
    if filename.lower().endswith(('.png', '.jpg')):
        full_path = os.path.join(directory_path, filename)
        output = process_image(full_path)
        
        # Save the output in a JSON file
        output_filename = os.path.splitext(filename)[0] + '.json'
        output_path = os.path.join(directory_path, output_filename)
        with open(output_path, 'w') as json_file:
            json.dump(output, json_file, indent=4)

        print(f"Processed and saved output for {filename}")

Processed and saved output for 001121e5-406a-4c02-9d49-96cacb33bc78.jpg
Processed and saved output for 002_112_001.jpg
Processed and saved output for 002_121_001.jpg
Processed and saved output for 002_541_001.jpg
Processed and saved output for 002_579_001.jpg
Processed and saved output for 003afd84-635e-4a02-bf08-8c3a6be2c8f6.jpg
Processed and saved output for 004d83ee-e38c-4ad1-8d3d-8d1595aeb57d.jpg
Processed and saved output for 00be5180-9c0b-49e5-aec9-098633c8ab2b.jpg
Processed and saved output for 027_029_001.jpg
Processed and saved output for 035_320_001.jpg
Processed and saved output for 035_321_001.jpg
Processed and saved output for 04f77e3d-4d90-47f7-8d31-ada88fa53788.jpg
Processed and saved output for 071_018_002.jpg
Processed and saved output for 071_018_003.jpg
Processed and saved output for 14.jpg
Processed and saved output for 1408c1b2-c488-4924-afde-0d8835f1070e.jpg
Processed and saved output for 15.jpg
Processed and saved output for 16.jpg
Processed and saved output for 