# Script to prompt LLMs

In [1]:
# import packages
import requests
import json
import pandas as pd
import os
import openai
from openai import OpenAI
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from tqdm import tqdm

from visualization import *
from utils import *

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
# If OpenAI used
# Get API key from the environment variable
api_key = os.environ.get('OPENAI_API_KEY')

# Set the API key
openai.api_key = api_key

In [24]:
# Define function to create a prompt
def create_prompt(object_name, N_responses):
    #prompt = f"Generate alternative uses for the object [{object_name}]."
    prompt = f"""
    You are a very creative, open-minded person and can propose creative, out-of-the-box ideas while staying realistic. 
    
    You are meant to assist students in group ideation. They are asked to propose alternative
    uses for an object and you should propose yours to give them ideas as well as inspire them to 
    explore other uses. Your ideas will be even more appreciated if they are original or useful in real-life or both.
    
    Generate exactly {N_responses} alternative uses for the object [{object_name}]. 
    
    Each alternative use should be a concise sentence and follow the same format as the examples below: 
    Sock, Color it and maybe make a snake
    Sock, Use it as a puppet
    """
    return prompt


# Define function to call the LLM API
def call_openai_api(prompt, llm):
    client = OpenAI()
    
    response = client.chat.completions.create(
        model = llm,
        #model = "gpt-3.5-turbo-0125",
        #model="gpt-4-0125-preview", 
        messages=[
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": prompt}
            ]#,
        #seed=seed,
        #max_tokens=200,
        #temperature=temperature,
        )
    return response


# Define function to store the result in a JSON file
def store_result_json(object_name, alternative_uses, path_folder_to_save, llm_name):
    result = [{"object_name": object_name, "alternative_uses": use} for use in  alternative_uses.split("\n") if use.strip()]
    with open(path_folder_to_save + f"/{llm_name}_aut_{object_name}.json", "w") as json_file:
        json.dump(result, json_file)


# Define function to create a CSV file and Pandas DataFrame
def create_csv_and_dataframe(object_name, path_folder_to_save, llm_name):
    with open(path_folder_to_save + f"/{llm_name}_aut_{object_name}.json", "r") as json_file:
        data = json.load(json_file)
    df = pd.DataFrame(data)
    
    # remove prefix
    df['alternative_uses']= df['alternative_uses'].apply(lambda x: x.split(", ", 1)[1])
    #df['alternative_uses'] = df['alternative_uses'].str.split(f': ').str[1]
    #df['alternative_uses'] = df['alternative_uses'].str.split(f'{object_name}: ').str[1]
    
    #rename the columns to fit ocsai format
    df = df.rename(columns={"object_name": "prompt", "alternative_uses": "response"})
    
    # save for evaluation
    df.to_csv(path_folder_to_save + f"/{llm_name}_aut_{object_name}.csv", index=False)
    
    # Delete the JSON file
    os.remove(path_folder_to_save + f"/{llm_name}_aut_{object_name}.json")
    
    return df

# Define function to estimate the price of an OpenAI API request
def estimate_price(prompt_tokens, response_tokens, llm_name):
    # Define your pricing model (e.g., price per token)
    if llm_name == "gpt35":
        price_per_input_token = 0.50 / 1e6 # gpt3.5-turbo-0125
        price_per_output_token = 1.5 / 1e6 # gpt3.5-turbo-0125
    elif llm_name == "gpt4":
        price_per_input_token = 10 / 1e6 # gpt4-0125-preview
        price_per_output_token = 30 / 1e6 # gpt4-0125-preview
    else:
        price_per_input_token = 0
        price_per_output_token = 0
        print("Error with the price estimation (wrong name of model)")
    
    # Estimate the price based on the total number of tokens
    price = (prompt_tokens * price_per_input_token) + (response_tokens * price_per_output_token)
    return price

def pipeline_object(object_name, llm, llm_name, N_responses, path_folder_to_save):
    prompt = create_prompt(object_name, N_responses)
    response = call_openai_api(prompt, llm)
    
    response_content = response.choices[0].message.content
    system_fingerprint = response.system_fingerprint
    prompt_tokens = response.usage.prompt_tokens
    completion_tokens = response.usage.total_tokens - response.usage.prompt_tokens

    print(f"System fingerprint: {system_fingerprint}")
    print(f"Prompt tokens: {prompt_tokens}")
    print(f"Completion tokens: {completion_tokens}")

    price = estimate_price(prompt_tokens, completion_tokens, llm_name)
    print(f"Estimated price: ${price}")
    
    # store
    store_result_json(object_name, response_content, path_folder_to_save, llm_name)
    df = create_csv_and_dataframe(object_name, path_folder_to_save, llm_name)
    return df

In [16]:
objects = ['brick', 'box', 'knife', 'rope']

## Call to OpenAI API

In [25]:
# Run the pipeline for each object
object_name = objects[1]
path_folder_to_save = "./data_to_process/prompt_1"
llm = "gpt-3.5-turbo-0125"
llm_name = "gpt35"
#llm = "gpt-4-0125-preview"
#llm_name = "gpt4"

df = pipeline_object(object_name, llm, llm_name, 100, path_folder_to_save)
print(f"length of df: {len(df)}")
df.head()

System fingerprint: None
Prompt tokens: 165
Completion tokens: 963
Estimated price: $0.0015270000000000001
length of df: 84


Unnamed: 0,prompt,response
0,box,Turn it into a mini herb garden
1,box,Use it as a makeshift dollhouse
2,box,Cut it into pieces and make DIY coasters
3,box,Fill it with soil and grow microgreens
4,box,Decorate it and turn it into a gift box


### Evaluate originality with ocsai

In [10]:
%%capture
for i in tqdm(range(len(df))):
    eval_ocsai = call_api_ocsai(df.loc[i, 'prompt'], df.loc[i, 'response'])['scores'][0]
    df.loc[i, 'originality'] = eval_ocsai['originality']
    df.loc[i, 'elaboration'] = eval_ocsai['elaboration']

In [11]:
df.head()

Unnamed: 0,prompt,response,originality,elaboration
0,brick,Paint them and use as decorative bookends on s...,2.0,9.0
1,brick,Create a DIY outdoor coffee table by stacking ...,2.0,11.0
2,brick,Turn them into unique planters for succulents ...,3.0,9.0
3,brick,Use as weights for outdoor furniture to preven...,1.7,12.0
4,brick,Build a small makeshift fireplace for outdoor ...,1.0,8.0


In [69]:
# check if there are any NaNs
if df.isnull().values.any():
    print("There are NaNs in the DataFrame")
else:
    df.to_csv(path_folder_to_save + f"/{llm_name}_aut_{object_name}.csv", index=False)
    print("Saved successfully")

Saved successfully


### Combine different objects together

In [70]:
# Evaluate
df_brick = pd.read_csv(f'./data_to_process/_prompt_1/{llm_name}_aut_brick.csv')
df_box   = pd.read_csv(f'./data_to_process/_prompt_1/{llm_name}_aut_box.csv')
df_knife = pd.read_csv(f'./data_to_process/_prompt_1/{llm_name}_aut_knife.csv')
df_rope  = pd.read_csv(f'./data_to_process/_prompt_1/{llm_name}_aut_rope.csv')

# merge the datasets for all objects
df_merged = pd.concat([df_brick, df_box, df_knife, df_rope])
print(f"Length of the merged dataset: {len(df_merged)}")
print(f"Object: {df_merged['prompt'].value_counts()}")

# save dataset: change name
df_merged.to_csv(f'./data_to_process/prompt_1/{llm_name}_merged_{len(df_merged)}.csv', index=False) 

df_merged.head()

Length of the merged dataset: 361
Object: prompt
rope     96
box      91
brick    87
knife    87
Name: count, dtype: int64


Unnamed: 0,prompt,response,originality,elaboration
0,brick,Use as a paperweight for outdoor projects,1.7,7.0
1,brick,Stack to create a makeshift bookshelf,2.0,6.0
2,brick,Shatter into pieces for a mosaic art project,2.0,8.0
3,brick,Use as a rustic doorstop,1.0,5.0
4,brick,Submerge in water as a habitat for aquatic pla...,3.0,9.0
