## ChatGPT

Ask VQA to ChatGPT.

Docs:

* how to upload images: https://platform.openai.com/docs/guides/vision
* types of models: https://stackoverflow.com/questions/75774873/openai-api-error-this-is-a-chat-model-and-not-supported-in-the-v1-completions
* token limits: https://platform.openai.com/settings/organization/limits
* models: https://platform.openai.com/docs/models/model-endpoint-compatibility

In [1]:
#!pip install openai

In [None]:
#####dir_api = '/Users/jnaiman/Dropbox/Paper_JCDL2025/chatgpt_api/' # where to store API results

dir_api = '/Users/jnaiman/LLM_VQA_JCDL2025/example_hists/LLM_outputs/chatgpt_api/' #store API results for example-hists
model = "gpt-5-nano-2025-08-07"

#dir_api = '/Users/jnaiman/LLM_VQA_JCDL2025/example_hists/LLM_outputs/chatgpt_api_mini/' #store API results for example-hists
#model = "gpt-5-mini-2025-08-07"

key_file = '/Users/jnaiman/.openai/key.txt'

jsons_dir = '/Users/jnaiman/LLM_VQA_JCDL2025/example_hists/jsons/' # directory where jsons created with figure are stored
imgs_dir = '/Users/jnaiman/LLM_VQA_JCDL2025/example_hists/imgs/' # where images are stored

# for saving temp images for reading in
tmp_dir = '/Users/jnaiman/Downloads/tmp/'

img_format = 'jpeg'

In [13]:
import openai
import base64
from openai import OpenAI
from PIL import Image
import numpy as np
import json
import re
import pickle
import os
from glob import glob

from sys import path
path.append('../')
from utils.llm_utils import parse_qa, load_image, get_img_json_pair, parse_for_errors

In [14]:
def send_to_chatgpt(question_list, client, image_path, encoded_image,
                    model ="gpt-4o-mini", 
                    tmp_dir = '/Users/jnaiman/Downloads/tmp/',
                    test_run = True, fac=1.0, img_format='png',
                    verbose=True, 
                    subset_questions_by_keys = None):
    # models as of ~May 2024
    #model="gpt-4",
    #model="gpt-4o",
    #model ="gpt-4o-mini",
    #model ="gpt-3.5-turbo",
    #model="gpt-3.5-turbo-instruct",
    """
    subset_questions_by_keys : only use a subset of questions to ping to model, e.g. ['median', 'how many gaussians']
    """

    iFac = 1.0 # just in case we want to progressively make the image smaller
    success = False
    is_subset = False
    while not success:
        try:
            # current question format is: ['persona', 'context','question', 'format'] (see readme in example_hists)
            question = question_list['context'] + " " + question_list['question'] + " " + question_list['format']
            # lowercase the first word, just in case
            question = question.lstrip() # no whitespace
            question = question[0].lower() + question[1:]
            if verbose: print('   on question:',question)
            # Prepare the API request
            prompt = f"I am going to show you an image. Here is the image: [Image: {encoded_image}]. Now, {question}"
            prompt_save = f"I am going to show you an image. Here is the image: [Image: <ENCODED IMAGE>]. Now, {question}"

            if subset_questions_by_keys is not None and type(subset_questions_by_keys) == type([]): # make sure list
                for s in subset_questions_by_keys:
                    if s in question:
                        is_subset = True
            else:
                is_subset = True
            
            if not test_run and is_subset:
                # Send the request to the GPT-4o API
                response = client.chat.completions.create(
                    model = model,
                    messages=[
                        {"role": "system", "content": question_list['persona']},
                        {"role":"user", "content": [
                            {
                            "type": "text",
                            "text": prompt
                            },
                            {
                            "type": "image_url",
                            "image_url": {
                                #"url": f"data:image/jpeg;base64,{encoded_image}" # PNG encoding??? does thsi need to change??
                                "url": f"data:image/{img_format};base64,{encoded_image}" 
                            }
                            }
                        ]
                        }
                    ]
                )
                success = True
            elif not is_subset:
                response = 'Not asked'
                success = True
            else:
                success = True
        except Exception as e:
            print(e)
            new_fac = fac/iFac
            print('new fac = ', new_fac)
            encoded_image = load_image(image_path,fac=new_fac, tmp_dir=tmp_dir)
            iFac += 1
    
    if not test_run and is_subset:
        # Get the response from the API
        answer = response.choices[0].message.content
        question_list['raw answer'] = answer
        # format answer
        answer_format = answer.replace("```json\n",'').replace("\n```",'')
        try:
            question_list['Response'] = json.loads(answer_format)
        except:
            question_list['Response'] = answer_format
            question_list['Error'] = 'JSON formatting'
        question_list['Response String'] = answer_format
        success = True
    elif not is_subset:
        answer = response
        question_list['raw answer'] = answer
        question_list['Response'] = answer
        question_list['Response String'] = answer
        success = True
    else:
        question_list['Response'] = 'TEST RUN'
        question_list['Response String'] = 'TEST RUN'

    return question_list, prompt_save

In [15]:
def print_qa(pickle_file, qa_in, subset_questions_by_keys=None, showNotAsked=False):
    if subset_questions_by_keys is not None:
        print('---------- ASKED ----------')
    print(pickle_file)
    print('*********')
    for qa_pairs in qa_in:
        hasSub = False
        if subset_questions_by_keys is not None and type(subset_questions_by_keys) == type([]):
            for s in subset_questions_by_keys:
                if s in qa_pairs['prompt']:
                    hasSub = True
        else:
            hasSub = True

        if hasSub:
            print('Prompt:', qa_pairs['prompt'])
            print('   Real A:', qa_pairs['A'])
            print('ChatGPT A:', qa_pairs['Response'])
            print('')

    if subset_questions_by_keys is not None and showNotAsked:
        print('')
        print('')
        print('------------ NOT ASKED -----------')
        for qa_pairs in qa_in:
            hasSub = False
            if subset_questions_by_keys is not None and type(subset_questions_by_keys) == type([]):
                for s in subset_questions_by_keys:
                    if s in qa_pairs['prompt']:
                        hasSub = True

            if not hasSub:
                print('Prompt:', qa_pairs['prompt'])
                print('   Real A:', qa_pairs['A'])
                print('ChatGPT A:', qa_pairs['Response'])
                print('')

In [16]:
# setup
with open(key_file,'r') as f:
    api_key = f.read()

client = OpenAI(
  api_key=api_key.strip(),  # this is also the default, it can be omitted
)

In [17]:
jsons_to_parse = glob(jsons_dir + '/*.json')
jsons_to_parse[:3]

['/Users/jnaiman/LLM_VQA_JCDL2025/example_hists/jsons/id_0077.json',
 '/Users/jnaiman/LLM_VQA_JCDL2025/example_hists/jsons/id_0020.json',
 '/Users/jnaiman/LLM_VQA_JCDL2025/example_hists/jsons/id_0036.json']

Look at a possible questions:

In [18]:
json_path = jsons_to_parse[0]
verbose = False
restart = False

fac = 0.5

img_path = imgs_dir + json_path.split('/')[-1].removesuffix('.json') + '.' + img_format
encoded_image, img_format_media, base_json, err = get_img_json_pair(img_path, json_path, dir_api, 
                                                    fac=fac, restart=restart,
                                                    tmp_dir=tmp_dir)

base_json['VQA']

{'Level 1': {'Figure-level questions': {},
  'Plot-level questions': {'nbars': {'plot0': {'Q': 'You are a helpful assistant that can analyze images.  How many bars are there in the specified figure panel? Please format the output as a json as {"nbars":""} for this figure panel, where the "nbars" value should be an integer.',
     'A': {'nbars': 20},
     'persona': 'You are a helpful assistant that can analyze images.',
     'context': '',
     'question': 'How many bars are there in the specified figure panel?',
     'format': 'Please format the output as a json as {"nbars":""} for this figure panel, where the "nbars" value should be an integer.'}}}},
 'Level 2': {'Plot-level questions': {'minimum': {'plot0': {'Q': 'You are a helpful assistant that can analyze images.  What is the minimum value of the data in this figure panel?  Please format the output as a json as {"minimum x":""} for this figure panel, where the "minimum x" value should be a float, calculated from the data values u

In [20]:
iMax = 150 # should be 80
verbose = False
test_run = False # run w/o actually pinging openai
restart = False
#model ="gpt-4o-mini"

subset_questions_by_keys = ['median', 'ngaussians'] # set to None to do all questions

fac = 0.5
for ijson,json_path in enumerate(jsons_to_parse):
    if ijson >= iMax:
        continue

    print('on', ijson+1, 'of', min(iMax,len(jsons_to_parse)))

    # get image and base json
    img_path = imgs_dir + json_path.split('/')[-1].removesuffix('.json') + '.' + img_format
    encoded_image, img_format_media, base_json, err = get_img_json_pair(img_path, json_path, dir_api, 
                                                      fac=fac, restart=restart,
                                                      tmp_dir=tmp_dir)

    if err:
        continue


    ###### create QA ########
    qa = []
    
    for k,v in base_json['VQA']['Level 1']['Figure-level questions'].items():
        out = {'Q':v['Q'], 'A':v['A'], 'Level':'Level 1', 'type':'Figure-level questions', 'Response':""}
        qa.append(out)
    
    # what kinds?
    types = ['(words + list)', '(words)']
    
    # get uniques
    level_parse = 'Level 1'
    plot_level = 'Plot-level questions'
    qa = parse_qa(level_parse, plot_level, qa, base_json['VQA'], types)
    
    level_parse = 'Level 2'
    plot_level = 'Plot-level questions'
    qa = parse_qa(level_parse, plot_level, qa, base_json['VQA'], types)
    
    level_parse = 'Level 3'
    plot_level = 'Plot-level questions'
    qa = parse_qa(level_parse, plot_level, qa, base_json['VQA'], types)

    responses = []
    for question_list in qa:
        response, prompt = send_to_chatgpt(question_list, client, img_path, encoded_image,
                    model = model, img_format = img_format_media,
                    test_run = test_run, subset_questions_by_keys=subset_questions_by_keys)
        responses.append(response)
        question_list['prompt'] = prompt

    # parse for errors
    qa = parse_for_errors(qa)

    # dump to file
    if not test_run:
        with open(dir_api + json_path.split('/')[-1].removesuffix('.json')+ '.pickle', 'wb') as ff:
            pickle.dump([qa, model], ff)
        print("just saved:", dir_api + json_path.split('/')[-1].removesuffix('.json')+ '.pickle')
    else:
        print('Would store at:', dir_api + json_path.split('/')[-1].removesuffix('.json')+ '.pickle')


on 1 of 80
   on question: how many bars are there in the specified figure panel? Please format the output as a json as {"nbars":""} for this figure panel, where the "nbars" value should be an integer.
   on question: what is the maximum value of the data in this figure panel?  Please format the output as a json as {"maximum x":""} for this figure panel, where the "maximum x" value should be a float, calculated from the data values used to create the plot.
   on question: what is the mean value of the data in this figure panel?  Please format the output as a json as {"mean x":""} for this figure panel, where the "mean x" value should be a float, calculated from the data values used to create the plot.
   on question: what is the median value of the data in this figure panel?  Please format the output as a json as {"median x":""} for this figure panel, where the "median x" value should be a float, calculated from the data values used to create the plot.
   on question: what is the minim

## Look at data

Check out one, if you wanna:

In [40]:
pickles = glob(dir_api + '*.pickle')
pickles[:5]

['/Users/jnaiman/LLM_VQA_JCDL2025/example_hists/LLM_outputs/chatgpt_api/id_0039.pickle',
 '/Users/jnaiman/LLM_VQA_JCDL2025/example_hists/LLM_outputs/chatgpt_api/id_0044.pickle',
 '/Users/jnaiman/LLM_VQA_JCDL2025/example_hists/LLM_outputs/chatgpt_api/id_0027.pickle',
 '/Users/jnaiman/LLM_VQA_JCDL2025/example_hists/LLM_outputs/chatgpt_api/id_0056.pickle',
 '/Users/jnaiman/LLM_VQA_JCDL2025/example_hists/LLM_outputs/chatgpt_api/id_0035.pickle']

In [None]:
ifile = 0
with open(pickles[ifile], 'rb') as f:
    qa_in = pickle.load(f)[0]

In [42]:
print_qa(pickles[ifile], qa_in, subset_questions_by_keys=subset_questions_by_keys, showNotAsked=False)

---------- ASKED ----------
/Users/jnaiman/LLM_VQA_JCDL2025/example_hists/LLM_outputs/chatgpt_api/id_0039.pickle
*********
Prompt: I am going to show you an image. Here is the image: [Image: <ENCODED IMAGE>]. Now, what are the median data values  in this figure panel?  Please format the output as a json as {"median x":""} for this figure panel, where the "median x" value should be a float, calculated from the data values used to create the plot.
   Real A: 0.2655385491960254
ChatGPT A: {'median x': 0.27}

Prompt: I am going to show you an image. Here is the image: [Image: <ENCODED IMAGE>]. Now, how many gaussians were used to generate the data for the plot in the figure panel? Please format the output as a json as {"ngaussians":""} for this figure panel, where the "ngaussians" value should be an integer.
   Real A: 3
ChatGPT A: {'ngaussians': 3}

