Default Frame for CDM test running

In [None]:
import sys
# sys.path.append("FULL_PATH_TO\FYP\Metrics")
from gptzero import GPTZeroAPI
import os
import csv
import time

In [None]:
# define the api key
api_key = ""

class CsvProcessor:
    def __init__(self, input_file, output_file):
        self.input_file = input_file
        self.output_file = output_file
        # Instantiate the other class here
        self.api_client = GPTZeroAPI(api_key)

    # get the response from the API and process it
    def process_response(self, response):
        output_dict = {}
        documents = response.get("documents", [])

        if documents:
            document = documents[0]
            output_dict["average_generated_prob"] = document.get(
                "average_generated_prob", 0
            )
            output_dict["completely_generated_prob"] = document.get(
                "completely_generated_prob", 0
            )
            output_dict["overall_burstiness"] = document.get("overall_burstiness", 0)

        return output_dict

    # get the label from the response
    def get_label(self, short_response):
        completely_generated_prob = short_response.get("completely_generated_prob", 0)
        if completely_generated_prob > 0.6:
            return 1
        else:
            return 0

    # process the csv file
    def process_csv(self):
        with open(self.input_file, "r") as csv_input_file:
            with open(self.output_file, "w", newline="") as csv_output_file:
                reader = csv.DictReader(csv_input_file)

                # add the new headers
                headers = reader.fieldnames + [
                    "GPTzero_answer_GPT_binary",
                    "GPTzero_answer_GPT_full",
                ]

                # write the headers
                writer = csv.DictWriter(csv_output_file, fieldnames=headers)
                writer.writeheader()

                count = 0

                for row in reader:
                    retries = 0
                    while retries < 3:
                        try:
                            # call the API
                            gpt_result = self.api_client.text_predict(row["GPT Answer"])

                            break

                        except Exception as e:
                            retries += 1
                            print(f"Error calling API: {e}. Retrying...")
                            time.sleep(1)

                    else:
                        print(
                            f'Max retries reached for row {row["index"]}. Skipping...'
                        )
                        continue

                    # get full answer and label
                    gpt_full = self.process_response(gpt_result)
                    gpt_label = self.get_label(gpt_full)

                    # write
                    gpt_label = 0 if gpt_label == 1 else 1

                    row["GPTzero_answer_GPT_binary"] = gpt_label
                    row["GPTzero_answer_GPT_full"] = gpt_full

                    # count
                    count += 1
                    print("row + 1 =", count)
                    print("GPTzero_answer_GPT_binary =", gpt_label)

                    writer.writerow(row)

    # process the csv file for human answers
    def process_csv_human(self):
        with open(self.input_file, "r") as csv_input_file:
            with open(self.output_file, "w", newline="") as csv_output_file:
                reader = csv.DictReader(csv_input_file)

                # add the new headers
                headers = reader.fieldnames + [
                    "GPTzero_answer_human_binary",
                    "GPTzero_answer_human_full",
                ]

                # write the headers
                writer = csv.DictWriter(csv_output_file, fieldnames=headers)
                writer.writeheader()

                count = 0

                for row in reader:
                    retries = 0
                    while retries < 3:
                        try:
                            # call the API
                            gpt_result = self.api_client.text_predict(
                                row["Python Code"]
                            )

                            break

                        except Exception as e:
                            retries += 1
                            print(f"Error calling API: {e}. Retrying...")
                            time.sleep(1)

                    else:
                        print(
                            f'Max retries reached for row {row["index"]}. Skipping...'
                        )
                        continue

                    # get full answer and label
                    gpt_full = self.process_response(gpt_result)
                    gpt_label = self.get_label(gpt_full)

                    # write
                    gpt_label = 0 if gpt_label == 1 else 1

                    row["GPTzero_answer_human_binary"] = gpt_label
                    row["GPTzero_answer_human_full"] = gpt_full

                    # count
                    count += 1
                    print("row + 1 =", count)
                    print("GPTzero_answer_human_binary =", gpt_label)

                    writer.writerow(row)

In [None]:
# select variant, count, cdm name and input directory
variant = 10
count = "full"
cdm_name = "GPTzero"
input_directory = "/Users/yungxinshin/Monash/FYP/FYP/CDM/data"

# define input and output file
input_file = f"variant_{variant}_{count}.csv"
input_path = os.path.join(input_directory, input_file)

output_file = f"{cdm_name}_variants_prompt_{variant}_{count}.csv"

# process csv
csv_processor = CsvProcessor(input_path, output_file)
csv_processor.process_csv()