## Prompt Generator

- Using data stored in different files to create prompts
- Uses printcombinedtext function to print and copy generated promtps
- Uses prompt generator function to generate prompts into json / excel / csv files

In [2]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import pyperclip
import json

In [None]:
# I had to do this because I had unorganised data
answer_df = pd.read_excel()
question_df = pd.read_excel()
response_df = pd.read_csv()

In [None]:
#Prompt prefix

Explanation_scheme_eng_c3 = """
As an essay grader, your task is to evaluate and assign a grade on a scale from 1 to 5 to the provided Korean text. Please provide your grade as a single integer value without any additional text or formatting. The text you are grading is an essay on an essay prompt, where the problem is stated within the question itself. Your evaluation should be based on the "Coherence of Claim-Reasoning/Evidence Relationship."

Coherence of Claim-Reasoning/Evidence Relationship: Assess whether there is a strong and logical connection established between the claim and the accompanying reasoning/evidence. Verify if the provided reasons or evidence effectively support the claim or subclaims, and if the claim aligns well with the relevant issue.

Please use the following grading scheme:

5 points: Exceptionally appropriate reasoning and evidence, highly persuasive.
4 points: Reasoning and evidence are generally appropriate and persuasive.
3 points: Reasoning and evidence are somewhat acceptable but could be improved.
2 points: Many instances where reasoning and evidence are not appropriate or lack persuasiveness.
1 point: Most of the reasoning and evidence are not appropriate, lacking persuasiveness.

Now, carefully review the following essay prompt and essay, and assign a grade accordingly:
"""

In [None]:
# Function used to check the prompt / copy into clipboard for web api use
def print_combined_text(prompt_type, text_id):
    text_column = answer_df[answer_df['EXAMINEE_ID'] == text_id]['TEXT'].values[0]
    question_column = question_df[question_df['Q_NUM'] == int(text_id[1])]['Q_TEXT'].values
    combined_text = f"{prompt_type}\n**Discussion point:**\n {question_column}\n\n**Essay to grade:**\n [{text_column}]"
    print(combined_text)
    pyperclip.copy(combined_text)
    print("Copied to clipboard!")

In [None]:
print_combined_text(exo, "A1-01")

In [None]:
# CSV
def generate_promptqa(examinee_id, question_text, text_to_grade):
    return pd.Series([examinee_id, f"**질문:**\n {question_text}\n\n**채점할 답변:**\n [{text_to_grade}]"])

def generate_prompts(data):
    prompts = []
    for index, row in data.iterrows():
        examinee_id = row['EXAMINEE_ID']
        text_column = row['TEXT']
        question_column = question_df[question_df['Q_NUM'] == int(examinee_id[1])]['Q_TEXT'].values[0]
        prompt = generate_promptqa(examinee_id, question_column, text_column)
        prompts.append(prompt)
    
    return pd.concat(prompts, axis=1).T

prompts_df = generate_prompts(answer_df)  # Pass the correct DataFrame object
prompts_df.to_csv(r'', index = False)

In [None]:
def generate_prompt(essay_prompt, essay, grade):
    return {
        "instruction": essay_prompt,
        "input": essay,
        "output": grade
    }

def generate_prompts(json_file_path):
    with open(json_file_path, 'r', encoding='utf-8') as json_file:
        data = json.load(json_file)

    prompts = []
    for entry in data:
        question_number = entry['Q']
        essay = entry['TEXT']
        grade = entry['C1']

        essay_prompt = question_df[question_df['Q_NUM'] == question_number]['Q_TEXT'].values[0]

        prompt = generate_prompt(essay_prompt, essay, grade)
        prompts.append(prompt)

    return prompts


# Set file path
json_file_path = r""
prompts_list = generate_prompts(json_file_path)

# Saving the prompts list as a JSON file
with open(r'', 'w', encoding='utf-8') as json_file:
    json.dump(prompts_list, json_file, ensure_ascii=False, indent=4)


In [None]:
# Saving data to match LoRA templates

def generate_prompt(essay_prompt, essay, grade):
    return {
        "instruction": essay_prompt,
        "input": essay,
        "output": grade
    }

def generate_prompts(json_file_path, excel_file_path):
    excel_data = pd.read_excel(excel_file_path, index_col='글 자료 ID')
    with open(json_file_path, 'r', encoding='utf-8') as json_file:
        data = json.load(json_file)

    prompts = []
    for entry in data:
        EXAMINEE_ID = entry["EXAMINEE_ID"]
        question_number = entry['Q']

        # Check if the '글 자료 ID' exists 
        if EXAMINEE_ID in excel_data.index:
            # Get the essay prompt from the Q variable using the question_number
            essay_prompt = question_df[question_df['Q_NUM'] == question_number]['Q_TEXT'].values[0]
            grade = excel_data.loc[EXAMINEE_ID, 'C1_responses_gpt3.5']
            essay = entry['TEXT']

            prompt = generate_prompt(essay_prompt, essay, grade)
            prompts.append(prompt)

    return prompts

# Set file paths
json_file_path = r""
excel_file_path = r""
prompts_list = generate_prompts(json_file_path, excel_file_path)

# Save the prompts list as a JSON file
with open(r'', 'w', encoding='utf-8') as json_file:
    json.dump(prompts_list, json_file, ensure_ascii=False, indent=4)