In [1]:
from openai import OpenAI
import os
import pandas as pd
import json

In [2]:
client = OpenAI(
    api_key=os.environ.get("OPENAI_API_KEY"))

#API confing
with open("Evaluation_Prompt.json", "r") as file:
    data = json.load(file)
    
texts = data["texts"]
prompts = data["prompts"]

#Models to tests
models = ["gpt-4o", "gpt-4o-mini", "o1-mini", "gpt-4-turbo"]

#Initialazing list to store results
results = []

#Combinations loop
for model in models:
    for prompt in prompts:
        for text in texts:
            for repetition in range(5):
                try:
                    #Model input
                    message = prompt + text

                    #OpenAI API calling
                    if model == "o1-mini":
                        response = client.chat.completions.create(
                            model=model,
                            messages=[{"role": "user", "content": message}]
                            )
                    else:
                        response = client.chat.completions.create(
                            model=model,
                            messages=[{"role": "user", "content": message}],
                            temperature=0.5
                            )

                    # storing results
                    results.append({
                        "model": model,
                        "prompt": prompt,
                        "text": text,
                        "repetition": repetition + 1,
                        "output": response.choices[0].message.content
                    })

                except Exception as e:
                    #Ignoring errors
                    results.append({
                        "model": model,
                        "prompt": prompt,
                        "text": text,
                        "repetition" : repetition + 1,
                        "error": str(e)
                    })

print("Iteration completed successfully")

Iteration completed successfully


In [3]:
# Saving results in JSON file
with open("Iteration_results.json", "w") as file:
    json.dump(results, file, indent=4)

#Dataframing results
df = pd.DataFrame(results)
df.to_csv("Iteration_results.csv", index=False)
df.to_excel("Iteration_results.xlsx")

print("Iterarion result saved. Results saving in 'Iteration_results.json', 'Iteration_results.csv', and 'Iteration_results.xlsx'")

Iterarion result saved. Results saving in 'Iteration_results.json', 'Iteration_results.csv', and 'Iteration_results.xlsx'


In [4]:
df

Unnamed: 0,model,prompt,text,repetition,output
0,gpt-4o,Prompt : \n#INSTRUCTION\n1. Analyse whether a ...,Text 1 : Dairy beef could help cut greenhouse ...,1,**Suggestion**: Highly appropriate; text is co...
1,gpt-4o,Prompt : \n#INSTRUCTION\n1. Analyse whether a ...,Text 1 : Dairy beef could help cut greenhouse ...,2,**Suggestion**: Highly appropriate; text is co...
2,gpt-4o,Prompt : \n#INSTRUCTION\n1. Analyse whether a ...,Text 1 : Dairy beef could help cut greenhouse ...,3,**Suggestion**: Highly appropriate; text is co...
3,gpt-4o,Prompt : \n#INSTRUCTION\n1. Analyse whether a ...,Text 1 : Dairy beef could help cut greenhouse ...,4,**Suggestion**: Highly appropriate; text is co...
4,gpt-4o,Prompt : \n#INSTRUCTION\n1. Analyse whether a ...,Text 1 : Dairy beef could help cut greenhouse ...,5,**Suggestion**: Highly appropriate; text is co...
...,...,...,...,...,...
295,gpt-4-turbo,Prompt : \n#INSTRUCTION\n1. Analyse whether a ...,Text 15 : Killing of UnitedHealthcare CEO prom...,1,**Suggestion**: Inappropriate; not recommended...
296,gpt-4-turbo,Prompt : \n#INSTRUCTION\n1. Analyse whether a ...,Text 15 : Killing of UnitedHealthcare CEO prom...,2,**Suggestion**: Inappropriate; not recommended...
297,gpt-4-turbo,Prompt : \n#INSTRUCTION\n1. Analyse whether a ...,Text 15 : Killing of UnitedHealthcare CEO prom...,3,**Suggestion**: Inappropriate; not recommended...
298,gpt-4-turbo,Prompt : \n#INSTRUCTION\n1. Analyse whether a ...,Text 15 : Killing of UnitedHealthcare CEO prom...,4,**Suggestion**: Inappropriate; not recommended...


In [5]:
# Extract required columns
Results = df[['model', 'text', 'output']]

# Extracting Text Id
Results['text'] = df['text'].str[:7]

# Extracting Suggestion

Results['output'] = df['output'].str.extract(r'\*\*Suggestion\*\*:\s*(Highly appropriate|Mostly appropriate|Partially appropriate|Inappropriate|Highly inappropriate)')[0]
Results

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  Results['text'] = df['text'].str[:7]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  Results['output'] = df['output'].str.extract(r'\*\*Suggestion\*\*:\s*(Highly appropriate|Mostly appropriate|Partially appropriate|Inappropriate|Highly inappropriate)')[0]


Unnamed: 0,model,text,output
0,gpt-4o,Text 1,Highly appropriate
1,gpt-4o,Text 1,Highly appropriate
2,gpt-4o,Text 1,Highly appropriate
3,gpt-4o,Text 1,Highly appropriate
4,gpt-4o,Text 1,Highly appropriate
...,...,...,...
295,gpt-4-turbo,Text 15,Inappropriate
296,gpt-4-turbo,Text 15,Inappropriate
297,gpt-4-turbo,Text 15,Inappropriate
298,gpt-4-turbo,Text 15,Inappropriate


In [6]:
Results.to_csv('Results.csv')
Results.to_excel('Results.xlsx')

In [3]:
Results = pd.read_csv('Results.csv', index_col=0)

In [4]:
Results

Unnamed: 0,model,text,output
0,gpt-4o,Text 1,Highly appropriate
1,gpt-4o,Text 1,Highly appropriate
2,gpt-4o,Text 1,Highly appropriate
3,gpt-4o,Text 1,Highly appropriate
4,gpt-4o,Text 1,Highly appropriate
...,...,...,...
295,gpt-4-turbo,Text 15,Inappropriate
296,gpt-4-turbo,Text 15,Inappropriate
297,gpt-4-turbo,Text 15,Inappropriate
298,gpt-4-turbo,Text 15,Inappropriate


In [15]:
expected_output = Results.groupby('text')['output'].describe().reset_index()[['text','top']]

#Deleting blank caracters
expected_output['text'] = expected_output['text'].str.strip()
expected_output['top'] = expected_output['top'].str.strip()
expected_output

Unnamed: 0,text,top
0,Text 1,Highly appropriate
1,Text 10,Mostly appropriate
2,Text 11,Highly appropriate
3,Text 12,Mostly appropriate
4,Text 13,Mostly appropriate
5,Text 14,Partially appropriate
6,Text 15,Inappropriate
7,Text 2,Highly inappropriate
8,Text 3,Highly appropriate
9,Text 4,Highly inappropriate


In [16]:
expected_output.to_csv('Expected_Outout.csv')

In [17]:
expected_output.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15 entries, 0 to 14
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   text    15 non-null     object
 1   top     15 non-null     object
dtypes: object(2)
memory usage: 372.0+ bytes
