In [1]:
openai_key = "" ## Please change it to your key

In [2]:
## Comment this out if you run on colab
##!pip install openai 
##!pip install backoff

In [2]:
import os
import openai
import numpy as np
import pandas as pd
import time
import random
import backoff

openai.api_key = openai_key
random.seed(777)

In [3]:
## We do not use System Message here and only test gpt-3.5 and gpt-4 but the code is compatible to the old models
## We test the snapshot so that the results are reproducible.
## The model are "text-davinci-003", "gpt-3.5-turbo", "gpt-4"
models = ["text-davinci-003", "gpt-3.5-turbo", "gpt-4"]
D003 = "text-davinci-003"
CHATGPT = "gpt-3.5-turbo"
GPT4 = "gpt-4"

## Backofff for chat since both chatgpt and gpt-4 are always heavy used.
@backoff.on_exception(backoff.expo, openai.error.RateLimitError, max_time=6000)
def completions_with_backoff(**kwargs):
    return openai.Completion.create(**kwargs)

@backoff.on_exception(backoff.expo, openai.error.RateLimitError, max_time=6000)
def chat_completions_with_backoff(**kwargs):
    return openai.ChatCompletion.create(**kwargs)

def gptQuery(prompt, model, temperature = 0, n=1, logprobs=1, echo = False, **kwargs):
  if model == models[0]:
    out=completions_with_backoff(model=model, 
                               prompt=prompt, 
                               logprobs=logprobs, 
                               temperature=temperature, max_tokens = 500,
                               n = n, **kwargs)
    if echo: print(out)
    return [response.text.strip() for response in out.choices ]
  if model == models[1] or model == models[2]:
    out = chat_completions_with_backoff(model=model,
                                     messages=[{"role":"user","content":prompt}], 
                                     temperature=temperature, max_tokens = 500,
                                     n=n)
    if echo: print(out) 
    return [response.message.content.strip() for response in out.choices ]

## Test that the openAIkey is working correctly
## print(gptQuery(prompt = "Hello Test Test", model=models[0]))
print(gptQuery(prompt = "Hello Test Test", model=models[1]))
print(gptQuery(prompt = "Hello Test Test", model=models[2]))

['Hello! How can I assist you today?']
['Hello! How can I help you today? If you have any questions or need assistance, feel free to ask.']


In [46]:
## Reading the annotation
df = pd.read_csv("Obama_annotations_v7.csv")

## New combined variables
df['ContainerMovement'] = df['ContainerMovement'].fillna("")
df['PhysicalGesture'] = df['GestureType']
for i in df.index:
    #if df['GestureType'][i] == "container" and df["ContainerMovement"][i] != "":
    #    df.loc[i, 'PhysicalGesture'] = df["ContainerMovement"][i] + " " + df['GestureType'][i]
    if df['GestureType'][i] == "sweep":
        df.loc[i, 'PhysicalGesture']= "palm " + df["HandPlane"][i] + " " + df['GestureType'][i]



In [47]:
df_sweep = df[df['GestureType'] == "sweep"].copy()
df_sweep.index = range(df_sweep.shape[0])

In [54]:
def genPrompt(dat, i, output, num_examples, examples = [], context = False, fillin=False):
    ## dat = data, i = index of question to be predicted 
    ## output =  expected output. Format is a list. If more than one, the output will be concated and separated by comma
    ## num_example = number of examples of each gesturetype. -1 for all except itself.
    ## full_utterance if true will append the full utterance to each example 
    ## i.e., "full utterarance", "utterance" : "output" 
    
    ## Instruction:     
    out = "Barrack Obama is giving a speech at Democratic National Convention.\n" if context else "A man is giving a speech.\n"

    if num_examples == - 1:
        examples = list(range(dat.shape[0]))
        examples.remove(i)

    ##Add n gesturetype that is not equal to i in order
    if(len(examples)==0):
        span_ind = [ind for ind in dat.index[dat["GestureType"] == "span"].to_list() if ind != i ]
        sweep_ind = [ind for ind in dat.index[dat["GestureType"] == "sweep"].to_list() if ind != i]
        container_ind = [ind for ind in dat.index[dat["GestureType"] == "container"].to_list() if ind != i]
        for j in range(num_examples):
            examples.append(span_ind[j])
            examples.append(sweep_ind[j])
            examples.append(container_ind[j])
    examples.sort()
    for ind in examples: 
        if not fillin: 
            out += 'He said "' + dat["utteranceLong"][ind] + '" When he said "' + dat["utterance"][ind] + '", he used the following gesture: ' + dat[output][ind] + '.\n'
        else:
            out += 'He said "' + dat["utteranceLong"][ind] + '" When he said "' + dat["utterance"][ind] + '", he used a ' + dat[output][ind] + ' gesture.\n'
        
    ## Add question
    if not fillin:
        out += 'He said "' + dat["utteranceLong"][i] + '" When he said "' + dat["utterance"][i] + '", he used the following gesture:'
    else:
        out += 'He said "' + dat["utteranceLong"][i] + '" When he said "' + dat["utterance"][i] + '", he used a ___  gesture.'
    return out



In [64]:
def genOneOutputType(df, model, output, num_examples_list = [0, 2, 4, -1], context_list=[True, False], fillin = False, extra_name="", echo = False):
    dat_out = []
    dat_out.append(list(df["utterance"]))
    dat_out.append(list(df[output]))

    column_names = ["Utterance","Annotation"]
    for n in num_examples_list:
        for context in context_list:
            responses = []
            for i in range(df.shape[0]):
                prompt = genPrompt(df, i=i, output=output, num_examples=n, examples=[], context=context, fillin=fillin)
                time.sleep(2)
                response = gptQuery(prompt = prompt, model=model)[0]
                responses.append(response)
                if echo:
                    print(prompt)
                    print("Response: " + response + ", Expected Response: " + df[output][i])
                    print("\n")
            dat_out.append(responses)
            column_names.append("num="+str(n)+"_context="+str(context))

    df = pd.DataFrame(list(zip(*dat_out)), columns=column_names)
    df.to_csv(model+"_" +output+extra_name+".csv")
    return df

In [None]:
genOneOutputType(df, CHATGPT, "GestureType", [0, 2, 4, 6, -1],  context_list=[True], extra_name="-v7", echo=True)

genOneOutputType(df, CHATGPT, "GestureType", [0, 2, 4, 6, -1],  context_list=[True], fillin=True, extra_name="-v7-fill-in", echo=True)

genOneOutputType(df,CHATGPT, "PhysicalGesture", [2, 4, 6, -1], context_list=[True], extra_name="-v7")

genOneOutputType(df,CHATGPT, "SemanticDescription", [2, 4, 6, -1], context_list=[True], extra_name="-v7")

genOneOutputType(df_sweep,CHATGPT, "PhysicalGesture", [-1], context_list=[True], extra_name="SweepOnly-v7")

In [None]:
genOneOutputType(df, GPT4, "GestureType", [0, 2, 4, 6, -1], context_list=[True], extra_name="-v7", echo=True)

genOneOutputType(df, GPT4, "GestureType", [0, 2, 4, 6, -1], context_list=[True], fillin=True, extra_name="-v7-fill-in", echo=True)

genOneOutputType(df, GPT4, "PhysicalGesture", [2, 4, 6, -1], context_list=[True], extra_name="-v7", echo=True)

genOneOutputType(df, GPT4, "SemanticDescription", [2, 4, 6, -1], context_list=[True], extra_name="-v7")

genOneOutputType(df_sweep,GPT4, "PhysicalGesture", [-1], context_list=[True], extra_name="SweepOnly-v7")