In [1]:
import os
import json
import torch
import base64
import requests
import argparse
import numpy as np
from PIL import Image
from tqdm import tqdm
from datasets import load_dataset

In [2]:
def encode_image(image_path):
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode('utf-8')

In [3]:
def write_completion_request(prompt, base64_image, gpt_model):
    """
    Compose completion request.
    """
    
    completion = {
      "model": gpt_model,
      "messages": [
          {"role": "user",
           "content": [
               {"type": "text", "text": prompt},
               {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}}
           ]}
      ],
      "max_tokens": 10
    }
    return completion

In [4]:
gpt_model="gpt-4o-mini-2024-07-18"

with open('../2024-spatial-join-exp/join_task_data/index.txt', 'r') as f:
    index = json.load(f)
test_index=index['test']    
fp_index=index['fp']
fn_index=index['fn']

data=load_dataset('beanham/spatial_join')
test_data=data['test']
fp_data=data['fp']
fn_data=data['fn']

p_path=f'../2024-spatial-join-exp/join_task_imgs/positive/'
n_path=f'../2024-spatial-join-exp/join_task_imgs/negative/'
fp_path = f'../2024-spatial-join-exp/join_task_imgs/false_positive/'
fn_path = f'../2024-spatial-join-exp/join_task_imgs/false_negative/'
api_web = "https://api.openai.com/v1/chat/completions"
headers = {
    "Content-Type": "application/json",
    "Authorization": f"Bearer {api_key}"
}

In [None]:
def evaluate_gpt_vision(index, data, split, paths):
    model_outputs=[]
    for i in tqdm(range(len(index))):
        img_name=index[i]
        sidewalk = "\nSidewalk:\n"+str(data['sidewalk'][i])
        road = "\n\nRoad:\n"+str(data['road'][i])
        message=prompt+sidewalk+road
        if split=='test':
            if 'positive' in img_name:
                img_path=paths['p_path']+img_name+'.png'
            else:
                img_path=paths['n_path']+img_name+'.png'
        elif split=='fp':
            img_path=paths['fp_path']+img_name+'.png'
        else:
            img_path=paths['fn_path']+img_name+'.png'
            
        base64_image = encode_image(img_path)
        completion = write_completion_request(message, base64_image, gpt_model)
        response = requests.post(api_web, headers=headers, json=completion)
        model_outputs.append(response.json()['choices'][0]['message']['content'])
    return model_outputs

In [5]:
test_outputs=[]
for i in tqdm(range(len(test_index))):
    img_name=test_index[i]
    sidewalk = "\nSidewalk:\n"+str(test_data['sidewalk'][i])
    road = "\n\nRoad:\n"+str(test_data['road'][i])
    message=prompt+sidewalk+road
    if 'positive' in img_name:img_path=p_path+img_name+'.png'
    else:img_path=n_path+img_name+'.png'
    base64_image = encode_image(img_path)
    completion = write_completion_request(message, base64_image, gpt_model)
    response = requests.post(api_web, headers=headers, json=completion)
    test_outputs.append(response.json()['choices'][0]['message']['content'])

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 762/762 [40:50<00:00,  3.22s/it]


In [8]:
fp_outputs=[]
for i in tqdm(range(len(fp_index))):
    img_name=fp_index[i]
    sidewalk = "\nSidewalk:\n"+str(fp_data['sidewalk'][i])
    road = "\n\nRoad:\n"+str(fp_data['road'][i])
    message=prompt+sidewalk+road
    img_path=fp_path+img_name+'.png'
    base64_image = encode_image(img_path)
    completion = write_completion_request(message, base64_image, gpt_model)
    response = requests.post(api_web, headers=headers, json=completion)
    fp_outputs.append(response.json()['choices'][0]['message']['content'])

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 84/84 [04:02<00:00,  2.89s/it]


In [10]:
fn_outputs=[]
for i in tqdm(range(len(fn_index))):
    img_name=fn_index[i]
    sidewalk = "\nSidewalk:\n"+str(fn_data['sidewalk'][i])
    road = "\n\nRoad:\n"+str(fn_data['road'][i])
    message=prompt+sidewalk+road
    img_path=fn_path+img_name+'.png'
    base64_image = encode_image(img_path)
    completion = write_completion_request(message, base64_image, gpt_model)
    response = requests.post(api_web, headers=headers, json=completion)
    fn_outputs.append(response.json()['choices'][0]['message']['content'])

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 82/82 [03:54<00:00,  2.86s/it]


In [13]:
save_path=f'inference_results/'
model_id='gpt-4o-mini-2024-07-18'

In [14]:
np.save(save_path+f"{model_id}-vision_finetuned_False_fewshot_False_test.npy", test_outputs)
np.save(save_path+f"{model_id}-vision_finetuned_False_fewshot_False_fp.npy", fp_outputs)
np.save(save_path+f"{model_id}-vision_finetuned_False_fewshot_False_fn.npy", fn_outputs)