In [8]:
import csv
import threading
from openai import OpenAI

def read_csv_file(file_path):
    """Reads the content of a given CSV file."""
    try:
        with open(file_path, mode='r') as csvfile:
            reader = csv.DictReader(csvfile)
            rows = [row for row in reader]
            return rows
    except IOError as e:
        print(f"Error reading file {file_path}: {e}")
        return None

def write_csv_file(file_path, data):
    """Writes the content to a CSV file."""
    try:
        with open(file_path, mode='w', newline='') as csvfile:
            fieldnames = ['response', 'file_content', 'evaluation', 'score', 'reason']
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
            writer.writeheader()
            writer.writerows(data)
    except IOError as e:
        print(f"Error writing to file {file_path}: {e}")

def evaluate_response(client, response, file_content, prompt):
    """Evaluates the response using OpenAI's API and returns the evaluation."""
    try:
        full_prompt = prompt.replace("response", response).replace("file_content", file_content)
        api_response = client.chat.completions.create(
            model="gpt-4-turbo",
            messages=[
                {"role": "user", "content": full_prompt},
            ],
        )
        evaluation_content = api_response.choices[0].message.content.strip()
        return evaluation_content
    except Exception as e:
        print(f"Failed to evaluate response: {e}")
        return None

def process_row(client, row, prompt, results, index, lock):
    """Process a single row and store the result."""
    response = row.get("response", "")
    file_content = row.get("file_content", "")
    if response and file_content:
        evaluation = evaluate_response(client, response, file_content, prompt)
        if evaluation:
            # Parse evaluation to extract score and reason
            evaluation_parts = evaluation.split('|')
            if len(evaluation_parts) >= 2:
                score = evaluation_parts[0].strip()
                reason = evaluation_parts[1].strip()
            else:
                score = "N/A"
                reason = "No reason provided"
            
            result = {
                'response': response,
                'file_content': file_content,
                'evaluation': evaluation,
                'score': score,
                'reason': reason,
            }
        else:
            result = {
                'response': response,
                'file_content': file_content,
                'evaluation': 'Failed to evaluate',
                'score': 'N/A',
                'reason': 'Failed to evaluate response',
            }
        with lock:
            results[index] = result

def process_csv_file(input_file, output_file, prompt):
    """Processes the input CSV file and writes the evaluation to the output CSV file."""
    client = OpenAI(api_key="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")

    rows = read_csv_file(input_file)
    if rows is None:
        return

    results = [None] * len(rows)
    threads = []
    lock = threading.Lock()

    for index, row in enumerate(rows):
        thread = threading.Thread(target=process_row, args=(client, row, prompt, results, index, lock))
        threads.append(thread)
        thread.start()

    for thread in threads:
        thread.join()

    write_csv_file(output_file, results)

# Main execution block
if __name__ == "__main__":
    input_file = "/Users/parthagarwal/Desktop/Concept_Cards/bio2.csv"
    output_file = "/Users/parthagarwal/Desktop/Concept_Cards/evaluated_responses_bio.csv"
    user_prompt = "Evaluate the concept cards in the \"response\" column of the CSV file based on correctness, completeness, formula accuracy, and clarity from a NEET biology exam preparation perspective. Determine if the content accurately reflects the context provided, comprehensively covers all key concepts and data, accurately presents LaTeX formulas, and if the information is clear and understandable to a student. Assign a score from 1 to 100 for each parameter, with 1 being the worst and 100 being the best, considering a slightly strict scoring criteria. Provide an overall score by averaging the parameter scores. Additionally, assign a confidence score indicating your level of confidence in the provided scoring.The evaluation should be strict and large penalties on score should be given if parameter requirements are not satisfied. Finally, conclude with one of the following reasons for the overall score: \"There is scope for improvement in the response," "The response generated is good," or "The response is not good.\""
    process_csv_file(input_file, output_file, user_prompt)
