#Setup

In [None]:
#install packages not native to colab
!pip install openai

Collecting openai
  Downloading openai-0.28.1-py3-none-any.whl (76 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m77.0/77.0 kB[0m [31m1.2 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: openai


In [None]:
#import packages
import os, sys, math, random
import requests, json
import re, io, ast
import openai
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from itertools import permutations
from google.colab import drive, files, data_table

In [None]:
#set up paths for local modules and data sources
drive.mount('/content/drive',force_remount=True)
sys.path.append('/content/drive/Shareddrives/MoralLearning/GPT_v1/')
stim_path = "/content/drive/Shareddrives/MoralLearning/Stimuli/"
data_path = '/content/drive/Shareddrives/MoralLearning/GPT_v1/Results/'
#import local modules
from GPT import promptGPT2, getEmbeddings
from utilities import *

In [None]:
#set environment variable with API key
%env OPENAI_API_KEY = Bearer [add key here]
# os.getenv("OPENAI_API_KEY")
openai.api_key = '[add key here]'

#Embeddings

In [None]:
def return_embeddings_diff(attributes_a, attributes_b):
  '''
  Pass two sets of attributes and get all vector differences of all a-b combinations
  '''
  emb_high = pd.DataFrame()
  emb_low = pd.DataFrame()

  #loop through each attribute set and save the embeddings in two dataframes
  for a in range(len(attributes_a)):
    #get embedding of the high, low, and compute difference
    this_emb = getEmbeddings(attributes_a[a])
    emb_high.insert(loc=0,column=a,value=this_emb["data"][0]["embedding"])

  for b in range(len(attributes_b)):
    #get embedding of the high, low, and compute difference
    this_emb = getEmbeddings(attributes_b[b])
    emb_low.insert(loc=0,column=b,value=this_emb["data"][0]["embedding"])

  #get all differences
  vector_diff = pd.DataFrame()

  for a in range(len(attributes_a)):
    for b in range(len(attributes_b)):
      this_col = str(a)+'_'+str(b)
      vector_diff.insert(loc=0,column = this_col, value = emb_high[a] - emb_low[b])

  return(vector_diff.mean(axis=1))


def return_list_embeddings(alist):
  '''
  get embeddings for a list of items
  '''
  n_df = pd.DataFrame()
  for i in alist:
    this_em = getEmbeddings(i)["data"][0]["embedding"]
    n_df.insert(loc=0,column = i,value = this_em, allow_duplicates = True)

  return (n_df)


def get_projections(stim_list, moral_v, hedonic_v, movement_v):
  '''
  get projections for list of items
  '''
  #construct dataframe to save items
  projection_df = pd.DataFrame(index=stim_list,columns = ['moral_v','hedonic_v','movement_v'],data=0)

  #loop through items to get embeddings in moral, hedonic, and movement vector directions

  for a in stim_list:
    this_em = getEmbeddings(a)["data"][0]["embedding"]
    projection_moral = np.inner(np.array(this_em),np.array(moral_v))
    projection_hedonic = np.inner(np.array(this_em),np.array(hedonic_v))
    projection_movement = np.inner(np.array(this_em),np.array(movement_v))
    projection_df.loc[a,"moral_v"] = projection_moral
    projection_df.loc[a,"hedonic_v"] = projection_hedonic
    projection_df.loc[a,"movement_v"] = projection_movement

  return projection_df

In [None]:
# get moral direction in GPT embeddings
attributes_morality_high = ['morally virtuous','ethical', 'high moral value']
attributes_morality_low = ['morally wrong','unethical', 'low moral value']
moral_v = return_embeddings_diff(attributes_morality_high,attributes_morality_low)
moral_v.to_csv(data_path+'moral_direction_embeddings.csv')

# get hedonic direction in GPT embeddings
attributes_hedonic_high = ['personally rewarding','pleasurable for me', 'high hedonic value for me']
attributes_hedonic_low = ['personally costly','unpleasurable for me', 'low hedonic value for me']
hedonic_v = return_embeddings_diff(attributes_hedonic_high,attributes_hedonic_low)
hedonic_v.to_csv(data_path+'hedonic_direction_embeddings.csv')

# get physicality direction in GPT embeddings
attributes_movement_high = ['physical','bodily action', 'high movement']
attributes_movement_low = ['mental','minimally active', 'low movement']
movement_v = return_embeddings_diff(attributes_movement_high,attributes_movement_low)
movement_v.to_csv(data_path+'movement_direction_embeddings.csv')

In [None]:
# read in stimuli to project
action_data = pd.read_csv(stim_path+"action_neurips.csv",header=None)
action_list_all = action_data[0].tolist()

# split into three batches to match the prompting method
action_list_1 = action_list_all[0:18]
action_list_2 = action_list_all[18:36]
action_list_3 = action_list_all[36:]

In [None]:
# get projections of stimuli onto each vector direction

#batch 1
projections_1 = get_projections(action_list_1, moral_v, hedonic_v, movement_v)
projections_1.to_csv(data_path + "vector_projections_batch1.csv")

#batch 2
projections_2 = get_projections(action_list_2, moral_v, hedonic_v, movement_v)
projections_2.to_csv(data_path + "vector_projections_batch2.csv")

#batch 3
projections_3 = get_projections(action_list_3, moral_v, hedonic_v, movement_v)
projections_3.to_csv(data_path + "vector_projections_batch3.csv")

In [None]:
# Report correlation among attribute embeddings
print('Correlation between moral and hedonic embedding is '+ str(correlate(moral_v,hedonic_v)[0,1]))
print('Correlation between moral and movement embedding is '+ str(correlate(moral_v,movement_v)[0,1]))
print('Correlation between hedonic and hedonic embedding is '+ str(correlate(hedonic_v,movement_v)[0,1]))

In [None]:
# Report correlation among items in terms of their projections

#for this, concatenate all three batches
projection_df = pd.concat([projections_1,projections_2,projections_3],axis=0)
print(projection_df.shape)
print('Moral and hedonic item correlation is '+str(correlate(projection_df["moral_v"],projection_df["hedonic_v"])[0,1]))
print('Moral and movement item correlation is '+str(correlate(projection_df["moral_v"],projection_df["movement_v"])[0,1]))
print('Hedonic and movement item correlation is '+str(correlate(projection_df["hedonic_v"],projection_df["movement_v"])[0,1]))

In [None]:
# Correlations among prompted item distances

#load results with sort & rate prompting in the three batches
ratings_1=pd.read_csv(data_path +'neurips_actions_set1.csv',index_col=0)
ratings_2=pd.read_csv(data_path +'neurips_actions_set2.csv',index_col=0)
ratings_3=pd.read_csv(data_path +'neurips_actions_set3.csv',index_col=0)

# correlate the ratings within set
c1= correlate(ratings_1["rescored_moral"],ratings_1["rescored_hedonic"])[0,1]
c2 = correlate(ratings_2["rescored_moral"],ratings_2["rescored_hedonic"])[0,1]
c3 = correlate(ratings_3["rescored_moral"],ratings_3["rescored_hedonic"])[0,1]

print('moral - hedonic item correlation with sort-rated distances is '+str(np.mean([c1,c2,c3])))

In [None]:
# Correlation across embedding and prompted distances

#moral
#batch 1
#ensure the items match across the sets
items_exist = ratings_1["item"].to_list()
projections_match = projections_1.loc[items_exist]
c1=correlate(ratings_1["rescored_moral"],projections_match["moral_v"])[0,1]

#batch 2
#ensure the items match across the sets
items_exist = ratings_2["item"].to_list()
projections_match = projections_2.loc[items_exist]
c2=correlate(ratings_2["rescored_moral"],projections_match["moral_v"])[0,1]

#batch 3
#ensure the items match across the sets
items_exist = ratings_3["item"].to_list()
projections_match = projections_3.loc[items_exist]
c3=correlate(ratings_3["rescored_moral"],projections_match["moral_v"])[0,1]

print('Correlation on morality values between embeddings and prompted distances '+ str(np.mean([c1,c2,c3])))

#hedonic
#batch 1
#ensure the items match across the sets
items_exist = ratings_1["item"].to_list()
projections_match = projections_1.loc[items_exist]
c4=correlate(ratings_1["rescored_hedonic"],projections_match["hedonic_v"])[0,1]

#batch 2
#ensure the items match across the sets
items_exist = ratings_2["item"].to_list()
projections_match = projections_2.loc[items_exist]
c5=correlate(ratings_2["rescored_hedonic"],projections_match["hedonic_v"])[0,1]

#batch 3
#ensure the items match across the sets
items_exist = ratings_3["item"].to_list()
projections_match = projections_3.loc[items_exist]
c6=correlate(ratings_3["rescored_hedonic"],projections_match["hedonic_v"])[0,1]

print('Correlation on morality values between embeddings and prompted distances '+ str(np.mean([c4,c5,c6])))

In [None]:
# Plot batch 1
# sort proections by morality before plotting
projections_1 = projections_1.sort_values('moral_v', ascending=False)
projections_1

In [None]:
# plot moral v hedonic
fig, ax = plt.subplots(figsize=(10, 10))
cmap = plt.get_cmap("tab20")

plot_i = 1
for i in range(len(projection_df)):
    x = projection_df['hedonic_v'].iloc[i]
    y = projection_df['moral_v'].iloc[i]
    colors = cmap(i/len(projection_df))
    l = str(plot_i) + '. ' + projection_df.index[i].lstrip('[').rstrip(']')
    ax.scatter([x],[y], label=l)
    ax.annotate(plot_i, (projection_df['hedonic_v'][i]+.0005, projection_df['moral_v'][i]-.0001))
    plot_i += 1

ax.set_xlabel('Hedonic')
ax.set_ylabel('Moral')
ax.legend()

In [None]:
# plot moral v movement
fig, ax = plt.subplots(figsize=(10, 10))
cmap = plt.get_cmap("tab20")

# Plot each item
plot_i = 1
for i in range(len(projection_df)):
    x = projection_df['movement_v'].iloc[i]
    y = projection_df['moral_v'].iloc[i]
    colors = cmap(i/len(projection_df))
    l = str(plot_i) + '. ' + projection_df.index[i].lstrip('[').rstrip(']')
    ax.scatter([x],[y], label=l)
    ax.annotate(plot_i, (projection_df['movement_v'][i]+.0005, projection_df['moral_v'][i]-.0001))
    plot_i += 1

ax.set_xlabel('Movement')
ax.set_ylabel('Moral')
ax.legend()

# GPT prompt ranking

In [None]:
# prompts
def prompt_sort(action_list, dimension):

  #randomize the order of the list
  random.shuffle(action_list)

  #turn list into formatted string
  action_string = (', ').join(action_list)

  # Switch by sorting dimension
  if dimension == 'moral':
    prompt = f'I will sort these actions in terms of their moral value.\
    I am sorting only on morality according to how morally wrong each action is,\
    starting with the most morally wrong and ending with the most morally virtuous.\
    I will format the list as a python list of strings. Here is an example.\
    Sort these actions: walk a dog, rob a bank, rescue a cat, steal food. \
    Response: ["rob a bank", "steal food", "walk a dog", "rescue a cat"]\
    Sort these actions: {action_string}.'

  elif dimension == 'hedonic':
    prompt = f'I will sort these actions in terms of how good and rewarding they are for myself.\
    I am sorting only on how good this action is for me,\
    starting with the most bad and least rewarding, and ending with the most good and rewarding.\
    I will format the list as a python list of strings. Here is an example.\
    Sort these actions: miss the bus, win a sweepstakes, eat a meal, lose my wallet.\
    Response: ["lose my wallet", "miss the bus", "eat a meal", "win a sweepstakes"]\
    Sort these actions: {action_string}.'

  elif dimension == 'action':
    prompt = f'I will sort these actions in terms of how much they involve physical body movement.\
    I am sorting only on how much physical movement this involves,\
    starting with the least movement, and ending with the most movement.\
    I will format the list as a python list of strings. Here is an example.\
    Sort these actions: play soccer, listen to music, pack boxes, order coffee at a cafe.\
    Response: ["listen to music", "order coffee at a cafe", "pack boxes", "play soccer"]\
    Sort these actions: {action_string}.'

  else:
    raise Exception("Specify a rating dimension: moral, hedonic, action")

  return prompt

def prompt_rate_sorted(action_list, dimension):

  #turn list into formatted string
  action_string = (', ').join(action_list)

  # Switch by rating dimension
  if dimension == 'moral':
    prompt = f'Given a list of actions, I will rate how similar pairs of actions are in terms of their moral value. \
    I will assign a number to each adjacent pair, rating 0 if the actions are very similar \
    in terms of moral value, and 10 if the actions are very different in terms of moral value. \
    I will give the first action a rating of 0. I will give the second action a rating in comparison to the first action, and so on. \
    I will format the response as a list of python strings, with the action followed by a ":" and its rating. Here is an example:\
    Rate these actions: rob a bank, steal food, walk a dog, rescue a cat. \
    Response: ["rob a bank: 0", "steal food: 3", "walk a dog: 10", "rescue a cat: 6"] \
    Rate these actions: {action_string}.'

  elif dimension == 'hedonic':
    prompt = f'Given a list of actions, I will rate how similar pairs of actions are in terms of how good or rewarding they are for me. \
    I will assign a number to each adjacent pair, rating 0 if the actions are very similar \
    in terms of being good for me, and 10 if the actions are very different in terms of being good for me. \
    I will give the first action a rating of 0. I will give the second action a rating in comparison to the first action, and so on. \
    I will format the response as a list of python strings, with the action followed by a ":" and its rating. Here is an example:\
    Rate these actions: lose my wallet, miss the bus, eat a meal, win a sweepstakes. \
    Response: ["lose my wallet: 0", "miss the bus: 2", "eat a meal: 9", "win a sweepstakes: 8"]\
    Rate these actions: {action_string}.'

  elif dimension == 'action':
    prompt = f'Given a list of actions, I will rate how similar pairs of actions are in terms of how much physical movement they involve. \
    I will assign a number to each adjacent pair, rating 0 if the actions are very similar \
    in terms of physical movement, and 10 if the actions are very different in terms of physical movement. \
    I will give the first action a rating of 0. I will give the second action a rating in comparison to the first action, and so on. \
    I will format the response as a list of python strings, with the action followed by a ":" and its rating. Here is an example:\
    Rate these actions: listen to music, order coffee at a cafe, pack boxes, play soccer. \
    Response: ["listen to music: 0", "order coffee at a cafe: 2", "pack boxes: 6", "play soccer: 9"]\
    Rate these actions: {action_string}.'
  else:
    raise Exception("Specify a rating dimension: moral, hedonic, action")

  return prompt

In [None]:
# sort and rate function
def sort_rate(action_list, sys_prompt, dimension):

  resp_sorted = promptGPT2(sys_prompt, prompt_sort(action_list, dimension), 0)

  # Converting string to list
  try:
    resp_sorted = '[' + resp_sorted.split('[')[1]
    resp_sorted_list = ast.literal_eval(resp_sorted)
  except:
    print(resp_sorted)

  # define next prompt, which asks GPT to quantify the distances among items
  resp_rated = promptGPT2(sys_prompt, prompt_rate_sorted(resp_sorted_list, dimension), 0)

  # Convert response to list
  try:
    resp_rated = '[' + resp_rated.split('[')[1]
    resp_rated_list = ast.literal_eval(resp_rated)
    resp_rated_list = [r.split(': ') for r in resp_rated_list]
  except:
    print(resp_rated)


  # parse the ratings into a dataframe
  resp_df = pd.DataFrame(resp_rated_list, columns=['item', 'rating'])
  resp_df['rating'] = pd.to_numeric(resp_df['rating'])
  resp_df['rating_sum'] = resp_df['rating'].cumsum()
  resp_df['rescored_' + dimension] = resp_df['rating_sum'] / resp_df['rating_sum'].max() * 100
  resp_df = resp_df.drop(columns=['rating', 'rating_sum'])

  return resp_df

In [None]:
# process actions
def sort_rate_actions(action_list, sys_prompt):

  moral_df = sort_rate(action_list, sys_prompt, 'moral')
  hedonic_df = sort_rate(action_list, sys_prompt, 'hedonic')
  action_df = sort_rate(action_list, sys_prompt, 'action')

  full_df = pd.merge(moral_df, hedonic_df, on='item', how='inner')
  full_df = pd.merge(full_df, action_df, on='item', how='inner')

  return full_df

In [None]:
# read in action stimuli and convert to list
action_data = pd.read_csv(stim_path+"action_neurips.csv",header=None)
action_list = action_data[0].tolist()

In [None]:
new_list = action_list[:18]
#new_list = action_list[18:36]
#new_list = action_list[36:]

In [None]:
sys_prompt = 'I am a diligent human research subject.'

In [None]:
# run the pipeline and get data!
full_df = sort_rate_actions(new_list, sys_prompt)

In [None]:
# inspect dataframe
data_table.DataTable(full_df, include_index=False, num_rows_per_page=20)

In [None]:
# correlations
full_df.corr()

In [None]:
# plot
fig, ax = plt.subplots(figsize=(10, 10))
cmap = plt.get_cmap("tab20")

plot_i = 1
for i in reversed(range(len(full_df))):
    x = full_df['rescored_hedonic'].iloc[i]
    y = full_df['rescored_moral'].iloc[i]
    colors = cmap(i/len(full_df))
    l = str(plot_i) + '. ' + full_df['item'].iloc[i].lstrip('[').rstrip(']')
    ax.scatter([x],[y], label=l)
    ax.annotate(plot_i, (full_df['rescored_hedonic'][i]+2, full_df['rescored_moral'][i]-1))
    plot_i += 1

ax.set_xlabel('Hedonic')
ax.set_ylabel('Moral')
ax.legend()

#Shift ranking with context

In [None]:
# define prompt_sort_context

def prompt_sort_context(action_list, dimension, context):
  #context is added to the start of the prompt, action_list as the items to sort

  #randomize the order of the list
  random.shuffle(action_list)

  #turn list into formatted string
  action_string = (', ').join(action_list)

  # Switch by sorting dimension
  if dimension == 'moral':
    prompt = f'{context}. \n I will sort these actions in terms of their moral value.\
    I am sorting only on morality according to how morally wrong each action is,\
    starting with the most morally wrong and ending with the most morally virtuous.\
    I will format the list as a python list of strings. Here is an example.\
    Sort these actions: walk a dog, rob a bank, rescue a cat, steal food. \
    Response: ["rob a bank", "steal food", "walk a dog", "rescue a cat"]\
    Sort these actions: {action_string}.'

  elif dimension == 'hedonic':
    prompt = f'{context}. \n I will sort these actions in terms of how good and rewarding they are for myself.\
    I am sorting only on how good this action is for me,\
    starting with the most bad and least rewarding, and ending with the most good and rewarding.\
    I will format the list as a python list of strings. Here is an example.\
    Sort these actions: miss the bus, win a sweepstakes, eat a meal, lose my wallet.\
    Response: ["lose my wallet", "miss the bus", "eat a meal", "win a sweepstakes"]\
    Sort these actions: {action_string}.'

  elif dimension == 'action':
    prompt = f'{context}. \n  I will sort these actions in terms of how much they involve physical body movement.\
    I am sorting only on how much physical movement this involves,\
    starting with the least movement, and ending with the most movement.\
    I will format the list as a python list of strings. Here is an example.\
    Sort these actions: play soccer, listen to music, pack boxes, order coffee at a cafe.\
    Response: ["listen to music", "order coffee at a cafe", "pack boxes", "play soccer"]\
    Sort these actions: {action_string}.'

  else:
    raise Exception("Specify a rating dimension: moral, hedonic, action")

  return prompt

In [None]:
# read in action stimuli and convert to list
action_data = pd.read_csv(stim_path+"action_neurips.csv",header=None)
action_list = action_data[0].tolist()
action_list_1 = action_list[0:18]

In [None]:
def shift_item_with_context(action_list, dimension, key_item, new_context):
  '''
  Add a new item to the list of actions, identify where it falls along a given dimension.
  then change context and see if its position changes.
  '''
  reps = 10 # n times to repeat the GPT call
  no_context = "I am an average person." # Default context

  old_index_list = [] # list of index positions with no context
  new_index_list = [] # list of positions with new context
  action_list.append(key_item) # add the new item we want to measure

  for i in range(reps):

    # no context
    resp_sorted = promptGPT2('I am a diligent human research subject.',
                              prompt_sort_context(action_list, dimension, no_context), 0)
    try:
      resp_sorted = '[' + resp_sorted.split('[')[1]
      resp_sorted_list = ast.literal_eval(resp_sorted)
      old_index_list.append(resp_sorted_list.index(key_item))
    except:
      print('GPT crapped out! Response:')
      print(resp_sorted)

    # new context
    resp_sorted = promptGPT2('I am a diligent human research subject.',
                              prompt_sort_context(action_list, dimension, new_context), 0)
    try:
      resp_sorted = '[' + resp_sorted.split('[')[1]
      resp_sorted_list = ast.literal_eval(resp_sorted)
      new_index_list.append(resp_sorted_list.index(key_item))
    except:
      print('GPT crapped out! Response:')
      print(resp_sorted)

  return pd.DataFrame({'old_list': old_index_list, 'new_list': new_index_list})

In [None]:
#@title test context on moral items
key_item = 'eating a hamburger'
context = "I am a vegan, and think it's very wrong to eat meat or consume any product from animals."
context_df_1 = shift_item_with_context(action_list_1, 'moral', key_item, context)

key_item = 'telling a white lie to be polite'
context = "I strongly value honesty and believe it is always wrong to lie."
context_df_2 = shift_item_with_context(action_list_1, 'moral', key_item, context)

key_item = 'swearing in public'
context = "I think it is morally wrong to be impolite and use crass language."
context_df_3 = shift_item_with_context(action_list_1, 'moral', key_item, context)

key_item = 'being nude at the beach'
context = "I am a very modest person and believe that it is wrong to be nude in public."
context_df_4 = shift_item_with_context(action_list_1, 'moral', key_item, context)

key_item = 'driving slowly in the left lane'
context = "I believe that driving so slowly that you impede traffic is very dangerous, immoral, and should be outlawed."
context_df_5 = shift_item_with_context(action_list_1, 'moral', key_item, context)

In [None]:
all_df = pd.concat([context_df_1,context_df_2,context_df_3,context_df_4,context_df_5],axis=1)
all_df.columns = ["1 -context","1 +context","2 -context","2 +context","3 -context","3 +context","4 -context","4 +context","5 -context","5 +context",]