In [15]:
from openai import OpenAI
import pandas as pd
import os
import base64

client = OpenAI()

In [24]:
csv_path_natural = f"data/natural/affordance_natural.csv"
img_folder_natural = f"data/natural/images"

csv_path_synthetic = f"data/synthetic/affordance_synthetic.csv"
img_folder_synthetic = f"data/synthetic/images"

df_natural = pd.read_csv(csv_path_natural)
df_synthetic = pd.read_csv(csv_path_synthetic)

In [25]:
df_natural

Unnamed: 0,group_id,condition,prompt_type,key_verb,afforded_text,non-afforded_text,related_text,afforded_image,non-afforded_image,related_image
0,1,Brad was sitting in his office when an intrude...,explicit,protect,laptop computer,paper napkin,hunting rifle,laptopcomputer_natural.jpeg,papernapkin_natural.jpeg,huntingrifle_natural.jpeg
1,2,Jenny started her hike in the woods before her...,explicit,point,pen,nickel,string,pen_natural.jpeg,nickel_natural.jpeg,string_natural.jpeg
2,3,Art was having guests over for a cocktail part...,explicit,protect,floppy disks,pencils,old magazines,floppydisks_natural.jpeg,pencils_natural.jpeg,oldmagazines_natural.jpeg
3,4,Mike was freezing while walking up State Stree...,explicit,cover,newspaper,matchbook,ski-mask,newspaper_natural.jpeg,matchbook_natural.jpeg,skimask_natural.jpeg
4,5,"David was playing hide and seek with his son, ...",explicit,hide,clothes dryer,cookie jar,broom closet,clothesdryer_natural.jpeg,cookiejar_natural.jpeg,broomcloset_natural.jpeg
5,6,Bill was working on painting his barn. He reac...,explicit,stand,tractor,hammer,chair,tractor_natural.jpeg,hammer_natural.jpeg,chair_natural.jpeg
6,7,Phil was trying to get a barbecue going early ...,explicit,start,map,rock,bellows,map_natural.jpeg,rock_natural.jpeg,bellows_natural.jpeg
7,8,Peggy was babysitting her infant nephew while ...,explicit,occupy,plastic spoon,large refrigerator,red beanbag,plasticspoon_natural.jpeg,largerefrigerator_natural.jpeg,redbeanbag_natural.jpeg
8,9,Joe's roommates had taken all of the space in ...,explicit,use,vacuum cleaner,electric outlet,door knob,vacuumcleaner_natural.jpeg,electricoutlet_natural.jpeg,doorknob_natural.jpeg
9,10,"Jonathan opened up his car's trunk, but the ic...",explicit,chisel,seven iron,ham sandwich,screw driver,seveniron_natural.jpeg,hamsandwich_natural.jpeg,screwdriver_natural.jpeg


In [26]:
# Function to encode the image
def encode_image(image_path):
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode('utf-8')

# Function to iterate and encode images in a folder
def encode_images_in_folder(folder_path):
    encoded_images = {}
    for image_name in os.listdir(folder_path):
        image_path = os.path.join(folder_path, image_name)
        if os.path.isfile(image_path):
            encoded_images[image_name] = encode_image(image_path)
    return encoded_images

encoded_natural_images = encode_images_in_folder(img_folder_natural)
encoded_synthetic_images = encode_images_in_folder(img_folder_synthetic)

In [32]:
def generate_data_url(base64_image, image_name):
    if image_name.lower().endswith('.png'):
        mime_type = 'image/png'
    else:
        mime_type = 'image/jpeg'
    
    return f"data:{mime_type};base64,{base64_image}"

def gpt4_vision_call(prompt, base64_image):
    data_url = generate_data_url(base64_image, image_name)
    
    response = client.chat.completions.create(
        model="gpt-4-vision-preview",
        messages=[
            {"role": "system",
             "content": """In this task, you will read short passages and look at an image of an object.
Please rate how sensible it would be to take action described in the last sentence using the object in
the image in the context of the whole passage. The scale goes from 1 (virtual nonsense) to 7 (completely sensible).
Be sure to read the sentences carefully. Please respond only with a number between 1 and 7.
"""},

            {
                "role": "user",
                "content": [
                    {"type": "text", "text": prompt},
                    {
                        "type": "image_url",
                        "image_url": {
                            "url": data_url,
                        },
                    },
                ],
            }
        ],
        max_tokens=300
    )
    return response.choices[0].message.content

def iterate_gpt4v(df, encoded_images_dict):
    results_afforded = []
    results_non_afforded = []  # Corrected variable name
    results_related = []
    
    for _, row in df.iterrows():
        condition = row['condition']
        
        # Process each image type
        for image_type in ['afforded_image', 'non-afforded_image', 'related_image']:
            image_name = row[image_type]
            if image_name in encoded_images_dict:
                base64_image = encoded_images_dict[image_name]
                result = make_gpt4_vision_call(condition, base64_image)
            else:
                result = "Image encoding not found"
            
            if image_type == 'afforded_image':
                results_afforded.append(result)
            elif image_type == 'non-afforded_image':
                results_non_afforded.append(result) 
            elif image_type == 'related_image':
                results_related.append(result)
    
    df['gpt4v_result_afforded'] = results_afforded
    df['gpt4v_result_non_afforded'] = results_non_afforded 
    df['gpt4v_result_related'] = results_related

    return df


In [33]:
df_natural = iterate_gpt4v(df_natural, encoded_natural_images)
df_synthetic = iterate_gpt4v(df_synthetic, encoded_synthetic_images)

In [37]:
folder_path = 'gpt4v_results'

file_path_natural = os.path.join(folder_path, 'df_natural.csv')
file_path_synthetic = os.path.join(folder_path, 'df_synthetic.csv')

df_natural.to_csv(file_path_natural, index=False)
df_synthetic.to_csv(file_path_synthetic, index=False)