# Script to prompt LLMs

In [53]:
# import packages
import requests
import json
import pandas as pd
import os
import openai
from openai import OpenAI
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from tqdm import tqdm

from visualization import *
from utils import *

# import warning
import warnings
warnings.filterwarnings("ignore")

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
# If OpenAI used
# Get API key from the environment variable
api_key = os.environ.get('OPENAI_API_KEY')

# Set the API key
openai.api_key = api_key

In [132]:
# Define function to create a prompt
def create_prompt(object_name, N_responses):
    prompt = f"""
    What is a surprising use for a {object_name}?
    
    Generate exactly {N_responses} alternative uses for the object [{object_name}]. 
    
    Each alternative use should be a concise sentence and follow the same format as the examples below: 
    Sock, Color it and maybe make a snake
    Sock, Use it as a puppet
    """
    return prompt


# Define function to call the LLM API
def call_openai_api(prompt, llm):
    client = OpenAI()
    
    response = client.chat.completions.create(
        model = llm,
        #model = "gpt-3.5-turbo-0125",
        #model="gpt-4-0125-preview", 
        messages=[
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": prompt}
            ]#,
        #seed=seed,
        #max_tokens=200,
        #temperature=temperature,
        )
    return response


# Define function to store the result in a JSON file
def store_result_json(object_name, alternative_uses, path_folder_to_save, llm_name):
    result = [{"object_name": object_name, "alternative_uses": use} for use in  alternative_uses.split("\n") if use.strip()]
    with open(path_folder_to_save + f"/{llm_name}_aut_{object_name}.json", "w") as json_file:
        json.dump(result, json_file)


# Define function to create a CSV file and Pandas DataFrame
def create_csv_and_dataframe(object_name, path_folder_to_save, llm_name):
    with open(path_folder_to_save + f"/{llm_name}_aut_{object_name}.json", "r") as json_file:
        data = json.load(json_file)
    df = pd.DataFrame(data)
    
    # remove the first and last lines of the alternative uses
    df = df.iloc[1:-1].reset_index(drop=True)
    
    # remove prefix
    df['alternative_uses']= df['alternative_uses'].apply(lambda x: x.split(", ", 1)[1])
    
    #rename the columns to fit ocsai format
    df = df.rename(columns={"object_name": "prompt", "alternative_uses": "response"})
    
    # save for evaluation
    df.to_csv(path_folder_to_save + f"/{llm_name}_aut_{object_name}.csv", index=False)
    
    # Delete the JSON file
    os.remove(path_folder_to_save + f"/{llm_name}_aut_{object_name}.json")
    
    return df

# Define function to estimate the price of an OpenAI API request
def estimate_price(prompt_tokens, response_tokens, llm_name):
    # Define your pricing model (e.g., price per token)
    if llm_name == "gpt35":
        price_per_input_token = 0.50 / 1e6 # gpt3.5-turbo-0125
        price_per_output_token = 1.5 / 1e6 # gpt3.5-turbo-0125
    elif llm_name == "gpt4":
        price_per_input_token = 10 / 1e6 # gpt4-0125-preview
        price_per_output_token = 30 / 1e6 # gpt4-0125-preview
    else:
        price_per_input_token = 0
        price_per_output_token = 0
        print("Error with the price estimation (wrong name of model)")
    
    # Estimate the price based on the total number of tokens
    price = (prompt_tokens * price_per_input_token) + (response_tokens * price_per_output_token)
    return price

def pipeline_object(object_name, llm, llm_name, N_responses, path_folder_to_save):
    prompt = create_prompt(object_name, N_responses)
    response = call_openai_api(prompt, llm)
    
    response_content = response.choices[0].message.content
    system_fingerprint = response.system_fingerprint
    prompt_tokens = response.usage.prompt_tokens
    completion_tokens = response.usage.total_tokens - response.usage.prompt_tokens

    print(f"System fingerprint: {system_fingerprint}")
    print(f"Prompt tokens: {prompt_tokens}")
    print(f"Completion tokens: {completion_tokens}")

    price = estimate_price(prompt_tokens, completion_tokens, llm_name)
    print(f"Estimated price: ${price}")
    
    # store
    store_result_json(object_name, response_content, path_folder_to_save, llm_name)
    df = create_csv_and_dataframe(object_name, path_folder_to_save, llm_name)
    return df

In [96]:
objects = ['brick', 'box', 'knife', 'rope']

## Call to OpenAI API

In [130]:
# Run the pipeline for each object
object_name = objects[0]
path_folder_to_save = "./data_to_process/prompt_6"
llm = "gpt-3.5-turbo-0125"
llm_name = "gpt35"
#llm = "gpt-4-0125-preview"
#llm_name = "gpt4"

df = pipeline_object(object_name, llm, llm_name, 10, path_folder_to_save)
print(f"length of df: {len(df)}")
df.head()

System fingerprint: None
Prompt tokens: 85
Completion tokens: 127
Estimated price: $0.000233
length of df: 8


Unnamed: 0,prompt,response
0,brick,Arrange them for a rustic garden path
1,brick,Stack them vertically for a makeshift barbeque
2,brick,Use as weights for makeshift outdoor workouts
3,brick,Carve out a hollow space for a unique plant pot
4,brick,Create a DIY doorstop by wrapping it in fabric


### Evaluate originality with ocsai

In [108]:
#%%capture
for i in tqdm(range(len(df))):
    eval_ocsai = call_api_ocsai(df.loc[i, 'prompt'], df.loc[i, 'response'])['scores'][0]
    df.loc[i, 'originality'] = eval_ocsai['originality']
    df.loc[i, 'elaboration'] = eval_ocsai['elaboration']
df.head()

100%|██████████| 100/100 [03:22<00:00,  2.02s/it]


Unnamed: 0,prompt,response,originality,elaboration
0,rope,Weave a hammock for backyard relaxation.,1.3,6.0
1,rope,Create a vertical garden by hanging pots.,3.3,7.0
2,rope,Make a sturdy leash for your pet.,1.3,7.0
3,rope,"Design a unique piece of jewelry, like a brace...",2.0,6.0
4,rope,Craft a rustic picture frame.,2.7,5.0


In [109]:
# check if there are any NaNs
if df.isnull().values.any():
    print("There are NaNs in the DataFrame")
    N_rows_deleted = len(df[df.isnull().any(axis=1)])
    
    # remove lines with NaNs
    df = df.dropna().reset_index(drop=True)
    df.to_csv(path_folder_to_save + f"/{llm_name}_aut_{object_name}.csv", index=False)
    print(f"Saved successfully by deleting {N_rows_deleted} row(s) with NaNs")
else:
    df.to_csv(path_folder_to_save + f"/{llm_name}_aut_{object_name}.csv", index=False)
    print("Saved successfully")

Saved successfully


### Combine different objects together

In [110]:
df_brick = pd.read_csv(path_folder_to_save + f"/{llm_name}_aut_brick.csv")
df_box   = pd.read_csv(path_folder_to_save + f"/{llm_name}_aut_box.csv")
df_knife = pd.read_csv(path_folder_to_save + f"/{llm_name}_aut_knife.csv")
df_rope  = pd.read_csv(path_folder_to_save + f"/{llm_name}_aut_rope.csv")

# merge the datasets for all objects
df_merged = pd.concat([df_brick, df_box, df_knife, df_rope])
print(f"Length of the merged dataset: {len(df_merged)}")
print(f"Object: {df_merged['prompt'].value_counts()}")

# save dataset: change name
df_merged.to_csv(path_folder_to_save + f"/{llm_name}_merged_{len(df_merged)}.csv", index=False) 

df_merged.head()

Length of the merged dataset: 391
Object: prompt
knife    100
rope     100
box       97
brick     94
Name: count, dtype: int64


Unnamed: 0,prompt,response,originality,elaboration
0,brick,Doorstop to keep doors from closing,1.0,6.0
1,brick,Paperweight to hold down papers or documents,1.0,7.0
2,brick,Outdoor step for a small elevation adjustment,1.0,7.0
3,brick,Grill press to ensure even cooking of meats or...,3.0,10.0
4,brick,Plant stand to elevate a small planter for bet...,2.0,11.0


## Generate and evaluate for all objects at once

In [135]:
#%%capture # delete the output because OCSAI API calls are outputting warnings

### PARAMETERS ###
path_folder_to_save = "./data_to_process/prompt_6"
#llm = "gpt-3.5-turbo-0125"
#llm_name = "gpt35"
llm = "gpt-4-0125-preview"
llm_name = "gpt4"
Nb_alternative_uses = 100
##################

for i in range(4):
    print("Start generating alternative uses for the object: ", objects[i])
    object_name = objects[i]

    df = pipeline_object(object_name, llm, llm_name, Nb_alternative_uses, path_folder_to_save)

    for i in tqdm(range(len(df))):
        eval_ocsai = call_api_ocsai(df.loc[i, 'prompt'], df.loc[i, 'response'])['scores'][0]
        df.loc[i, 'originality'] = eval_ocsai['originality']
        df.loc[i, 'elaboration'] = eval_ocsai['elaboration']
        
    # check if there are any NaNs
    if df.isnull().values.any():
        print("There are NaNs in the DataFrame")
    else:
        df.to_csv(path_folder_to_save + f"/{llm_name}_aut_{object_name}.csv", index=False)
        print("Saved successfully")
    
df_brick = pd.read_csv(path_folder_to_save + f"/{llm_name}_aut_brick.csv")
df_box   = pd.read_csv(path_folder_to_save + f"/{llm_name}_aut_box.csv")
df_knife = pd.read_csv(path_folder_to_save + f"/{llm_name}_aut_knife.csv")
df_rope  = pd.read_csv(path_folder_to_save + f"/{llm_name}_aut_rope.csv")

# merge the datasets for all objects
df_merged = pd.concat([df_brick, df_box, df_knife, df_rope])

# save dataset
df_merged.to_csv(path_folder_to_save + f"/{llm_name}_merged_{len(df_merged)}.csv", index=False) 

Start generating alternative uses for the object:  brick
System fingerprint: None
Prompt tokens: 86
Completion tokens: 1488
Estimated price: $0.0455


100%|██████████| 98/98 [03:19<00:00,  2.04s/it]


Saved successfully
Start generating alternative uses for the object:  box
System fingerprint: None
Prompt tokens: 86
Completion tokens: 1127
Estimated price: $0.03467


100%|██████████| 98/98 [03:18<00:00,  2.03s/it]


Saved successfully
Start generating alternative uses for the object:  knife
System fingerprint: None
Prompt tokens: 86
Completion tokens: 1302
Estimated price: $0.039920000000000004


100%|██████████| 98/98 [03:16<00:00,  2.01s/it]


Saved successfully
Start generating alternative uses for the object:  rope
System fingerprint: None
Prompt tokens: 87
Completion tokens: 1196
Estimated price: $0.036750000000000005


100%|██████████| 99/99 [03:20<00:00,  2.02s/it]

Saved successfully





In [136]:
print(f"Length of the merged dataset: {len(df_merged)}")
print(f"Object: {df_merged['prompt'].value_counts()}")
df_merged.head()

Length of the merged dataset: 393
Object: prompt
rope     99
brick    98
box      98
knife    98
Name: count, dtype: int64


Unnamed: 0,prompt,response,originality,elaboration
0,brick,Stack under furniture legs to raise the height.,1.0,8.0
1,brick,Create a makeshift bookend on a shelf.,1.0,7.0
2,brick,Use as a hammer for small nails.,2.0,7.0
3,brick,Construct a flower bed edge in your garden.,1.0,8.0
4,brick,Break up into pieces for drainage in plant pots.,2.0,9.0
