In [1]:
import openai
from openai import OpenAI
import os
from os import path
import base64
import requests
import json
import time

In [None]:
OPENAI_API_KEY=""
openai.api_key=OPENAI_API_KEY

In [3]:
client = OpenAI(api_key=OPENAI_API_KEY)

In [4]:
# Function to encode the image
def encode_image(image_path):
  with open(image_path, "rb") as image_file:
    return base64.b64encode(image_file.read()).decode('utf-8')

In [5]:
DATASET_BASE = "../dataset/clic2020-professional/preprocessed"
# SUBSETS = ['test']
SUBSETS = ['valid']
QUESTION = "Please give me the following information based on the input image:\
1. Striking Objects: a list of names of the three most striking objects, people, or animals in that image.\
2. Detailed Description: a list that consists of each object's detailed description. The content of each item should be placed in the same line. Each description should be within 10 words.\
3. Overall Description: an overall description of this image within 50 words."
INTERVAL_BT_REQ = 5 # in seconds

In [6]:
headers = {
  "Content-Type": "application/json",
  "Authorization": f"Bearer {OPENAI_API_KEY}"
}

## CLIC2020 Test Set

In [8]:
for subset in SUBSETS:
    dirname = path.abspath(path.join(DATASET_BASE, subset))
    print(f"Subset: {subset}: {dirname}")
    files = [f for f in os.listdir(dirname) if path.isfile(path.join(dirname, f))]
    for f in files:
        if ".png" in f:
            img_name = f.split(".png")[0]
            img_fullpath = path.join(dirname, f)

            if f"{img_name}.answer.txt" in files:
                continue

            # Getting the base64 string
            base64_image = encode_image(img_fullpath)

            payload = {
              "model": "gpt-4-turbo",
              "messages": [
                {
                  "role": "user",
                  "content": [
                    {
                      "type": "text",
                      "text": QUESTION
                    },
                    {
                      "type": "image_url",
                      "image_url": {
                        "url": f"data:image/png;base64,{base64_image}"
                      }
                    }
                  ]
                }
              ],
              "max_tokens": 300
            }
            response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
            answer = json.loads(response.text)['choices'][0]['message']['content']
            # Writing to files
            with open(path.join(dirname, f"{img_name}.response.txt"), "w") as res_f:
                res_f.write(response.text)
            with open(path.join(dirname, f"{img_name}.answer.txt"), "w") as ans_f:
                ans_f.write(answer)
            print(f"FILE: {f}; ANSWER: {answer}")
            time.sleep(INTERVAL_BT_REQ)

Subset: test: C:\Users\CrashedBboy\Projects\MISC\dataset\clic2020-professional\preprocessed\test
FILE: 0c0ba31b279da4b8a0b848b63d302e8b.png; ANSWER: 1. **Striking Objects:**  
   - Woman  
   - Glasses  
   - Lights  

2. **Detailed Description:**  
   - Woman: Caucasian, blonde, wearing striped turtleneck, looking contemplative.  
   - Glasses: Large, round, reflective lenses, fashionable accessory.  
   - Lights: Warm, glowing, string of bulbs, blurred background illuminating.  

3. **Overall Description:**  
   The image captures a reflective moment of a woman with blonde hair and fashionable glasses, illuminated by the warm glow of string lights. She appears contemplative amidst the bokeh effect created by the lights, adding a dreamy ambiance to the scene.
FILE: 0d57c99aa83305d93d8fc5d21910343a.png; ANSWER: 1. Striking Objects:
   - Person
   - Mountain
   - Lake

2. Detailed Description:
   - Person in yellow jacket standing on rock.
   - Snow-capped mountain shrouded in mist.
   

## CLIC2020 Valid Set

In [7]:
for subset in SUBSETS:
    dirname = path.abspath(path.join(DATASET_BASE, subset))
    print(f"Subset: {subset}: {dirname}")
    files = [f for f in os.listdir(dirname) if path.isfile(path.join(dirname, f))]
    for f in files:
        if ".png" in f:
            img_name = f.split(".png")[0]
            img_fullpath = path.join(dirname, f)

            if f"{img_name}.answer.txt" in files:
                continue

            # Getting the base64 string
            base64_image = encode_image(img_fullpath)

            payload = {
              "model": "gpt-4-turbo",
              "messages": [
                {
                  "role": "user",
                  "content": [
                    {
                      "type": "text",
                      "text": QUESTION
                    },
                    {
                      "type": "image_url",
                      "image_url": {
                        "url": f"data:image/png;base64,{base64_image}"
                      }
                    }
                  ]
                }
              ],
              "max_tokens": 300
            }
            response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
            answer = json.loads(response.text)['choices'][0]['message']['content']
            # Writing to files
            with open(path.join(dirname, f"{img_name}.response.txt"), "w") as res_f:
                res_f.write(response.text)
            with open(path.join(dirname, f"{img_name}.answer.txt"), "w") as ans_f:
                ans_f.write(answer)
            print(f"FILE: {f}; ANSWER: {answer}")
            time.sleep(INTERVAL_BT_REQ)

Subset: valid: D:\Projects\MISC\dataset\clic2020-professional\preprocessed\valid
FILE: alberto-montalesi-176097.png; ANSWER: 1. Striking Objects:
   - Large Jellyfish
   - Cluster of Small Jellyfish
   - Floating Particles

2. Detailed Description:
   - Large Jellyfish: Predominant, translucent with white spots, tentacles.
   - Cluster of Small Jellyfish: Numerous smaller jellyfish, mostly transparent.
   - Floating Particles: Tiny, speck-like debris suspended in water.

3. Overall Description:
   This image captures a mesmerizing scene underwater, featuring a larger jellyfish with distinctive white spots and flowing tentacles, surrounded by numerous smaller jellyfish and floating particles, all set against a deep blue backdrop.
FILE: alejandro-escamilla-6.png; ANSWER: 1. Striking Objects:
   - Laptop
   - Smartphone
   - Notebook

2. Detailed Description:
   - Laptop: Silver, open on wooden table, black screen.
   - Smartphone: Black, placed next to the laptop.
   - Notebook: Closed w