In [None]:
import pandas as pd
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score
import os
from openai import OpenAI
import re
client = OpenAI(api_key='')

# Load the data
file_name = 'ist4.csv'
df = pd.read_csv(file_name)

ground_truths = []
predictions = []

for index, row in df.iterrows():

    system_prompt = "Evaluate the following electrical measure observation statement. Answer with just one 'True' or 'False' statement at the beginning of the answer. "
    #test_string = f'Is {row["Voltage Measurements"]}  {row["Acceptance limits"]} ?'
    user_prompt = f'Is {row["Measured Values"]}  {row["Expected Values (*)"]} ?'

    # Call the OpenAI API to get the validation result
    response = client.chat.completions.create(
            model="gpt-4",
            # Define the prompt GPT
            messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt}                                      # Common Ground for ChatGPT
            ],
            max_tokens=2000
    )
    ground_truth = input('Enter the ground truth (True/False): ')
    
    # Extract the generated text from the API response
    validation_result = response.choices[0].message.content

    match = re.search(r'\b(True|False)\b', validation_result)
    if match.group(0) == "True":
        prediction = 'True'
    else: 
        prediction = 'False'
    
    ground_truths.append(ground_truth)
    predictions.append(prediction)

ground_truths_binary = [0 if gt == 'True' else 1 for gt in ground_truths]
predictions_binary = [0 if pred == 'True' else 1 for pred in predictions]

# Calculate the confusion matrix
tn, fp, fn, tp = confusion_matrix(ground_truths_binary, predictions_binary).ravel()

# Calculate the accuracy, precision, recall, and F1-score
accuracy = accuracy_score(ground_truths_binary, predictions_binary)
precision = precision_score(ground_truths_binary, predictions_binary)
recall = recall_score(ground_truths_binary, predictions_binary)
f1 = f1_score(ground_truths_binary, predictions_binary)

# Print results
print(f'True Positives: {tp}, False Positives: {fp}, True Negatives: {tn}, False Negatives: {fn}')
print(f'Accuracy: {accuracy}, Precision: {precision}, Recall: {recall}, F1-Score: {f1}')

# Store the results in a csv
result_df = pd.DataFrame({
    'File Name': [file_name],
    'Number of Tests': [len(df)],
    'True Positives': [tp],
    'False Positives': [fp],
    'True Negatives': [tn],
    'False Negatives': [fn],
    'Accuracy': [accuracy],
    'Precision': [precision],
    'Recall': [recall],
    'F1-Score': [f1]
})
result_df.to_csv('result_gpt4.csv', mode='a', header=False, index=False)
