In [5]:
import os
import openai
import pandas as pd
import ast
from dotenv import load_dotenv
load_dotenv() # read local .env file

openai.api_key  = os.environ['OPENAI-APIKEY']

In [6]:
# function to call openai API with instruction and get a completion
def get_completion_from_messages(messages, model="gpt-4", temperature=0, max_tokens=500):
    response = openai.ChatCompletion.create(
        model=model,
        messages=messages,
        temperature=temperature, 
        max_tokens=max_tokens, 
    )
    return response.choices[0].message["content"]

In [7]:
# set list of ecological issues
ecological_issues = ['Pollution and Waste Management','Climate Change and Energy Use','Natural Resource Management and Biodiversity',
                     'Wildlife and Agriculture','Societal Challenges']

In [18]:
# Function to build instruction to be sent to OpenAI
def find_ecological_issue(user_input,ecological_issues):

    delimiter = "####"
    system_message = f"""
    You will be provided with a prompt and two completions in a block of text. \
    Output a python list, where each element is 1 or 0 depending on whether the text
    is about the corresponding ecological issue in this list of five {ecological_issues} or not.
    For example, if the text is "plastic is bad for marine life", the output should be:
    [1,0,0,1,0]. The text you have to analyse is delimited by {delimiter}.
    
    ###
    
   {user_input}

    
    ###
    
      
    """
    
    messages =  [  
    {'role':'system', 'content': system_message},    
    ] 
    
    return get_completion_from_messages(messages)


In [101]:
%%time
# List to hold the results
results_list = []

# Read CSV file
df = pd.read_csv('H4rmony.csv')

# Loop through each row of the DataFrame
for index, row in df.iterrows():
    # Join columns with line feeds
    chat = '\n'.join([str(row['Prompt']), str(row['BetterCompletion']), str(row['WorseCompletion'])]) 
    

    result=[row['Id']]
    
    # Pass the variable to your function receive list inside string
    resultstr=find_ecological_issue(chat,ecological_issues)
               
    # Convert to list
    resultlist = ast.literal_eval(resultstr)
        
    # Merge id and results
    result += resultlist
    
    # Append the result to the list
    results_list.append(result)


Wall time: 2min 37s


In [103]:
df = pd.DataFrame(results_list,columns=['Id','PWM','CCEU','NRMB','WA','SC'])

df.to_csv("ecoissues.csv")

In [75]:
# the dataframe shows the the id of the prompt and what issues it addresses
# PWM = Pollution and Waste Management
# CCEU = Climate Change and Energy Usage
# NRMB = Natural Resource Management and Biodiversity
# WA = Wildlife and Agriculture
# SC = Societal Challenges
df

Unnamed: 0,Id,PWM,CCEU,NRMB,WA,SC
0,1,0,0,1,0,1
1,2,0,1,1,0,0
2,3,1,0,0,0,0
3,4,0,0,1,1,0
4,5,0,1,0,0,0
...,...,...,...,...,...,...
95,96,1,0,0,0,1
96,97,0,1,0,0,1
97,98,0,0,1,0,1
98,99,0,1,0,0,0


In [102]:
print(results_list)


[[1468, 0, 1, 0, 0, 0], [1469, 0, 0, 1, 1, 0], [1470, 0, 0, 1, 1, 0], [1471, 0, 0, 1, 1, 1], [1472, 0, 0, 1, 0, 1], [1473, 0, 0, 1, 0, 0], [1474, 1, 1, 0, 0, 0], [1475, 0, 0, 0, 1, 1], [1476, 0, 1, 0, 1, 0], [1477, 1, 0, 0, 1, 0], [1478, 0, 1, 0, 0, 0], [1479, 0, 1, 0, 0, 1], [1480, 1, 0, 0, 0, 0], [1481, 0, 1, 0, 0, 0], [1482, 0, 0, 1, 1, 0], [1483, 1, 0, 0, 0, 1], [1484, 0, 0, 1, 1, 0], [1485, 0, 0, 1, 1, 0], [1486, 0, 0, 1, 0, 0], [1487, 1, 1, 0, 0, 0], [1488, 1, 0, 0, 0, 1], [1489, 0, 0, 1, 0, 0], [1490, 0, 0, 1, 0, 1], [1491, 0, 0, 0, 1, 1], [1492, 0, 0, 1, 0, 1], [1493, 0, 0, 1, 1, 1], [1494, 0, 0, 1, 1, 1], [1495, 0, 0, 1, 0, 1], [1496, 0, 0, 1, 0, 0], [1497, 0, 0, 0, 1, 1], [1498, 0, 0, 1, 1, 0], [1499, 0, 0, 0, 0, 1], [1500, 0, 0, 1, 1, 0], [1501, 0, 0, 1, 0, 0], [1502, 0, 0, 1, 0, 1], [1503, 0, 0, 1, 1, 0], [1504, 0, 0, 1, 1, 0], [1505, 0, 0, 1, 0, 1], [1506, 0, 0, 0, 1, 1], [1507, 0, 0, 0, 0, 1], [1508, 0, 0, 1, 1, 0], [1509, 0, 0, 0, 1, 0], [1510, 0, 0, 1, 0, 1], [1511, 0, 