In [1]:
!pip install openai

Collecting openai
  Downloading openai-1.51.2-py3-none-any.whl.metadata (24 kB)
Collecting jiter<1,>=0.4.0 (from openai)
  Downloading jiter-0.6.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.2 kB)
Downloading openai-1.51.2-py3-none-any.whl (383 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m383.7/383.7 kB[0m [31m14.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading jiter-0.6.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (325 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m325.2/325.2 kB[0m [31m17.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: jiter, openai
Successfully installed jiter-0.6.1 openai-1.51.2


In [None]:
import pandas as pd
import openai
import time

openai.api_key = '<your openai api key>'

def make_api_call(prompt):
    try:
        response = openai.chat.completions.create(
            model="gpt-4", 
            messages = [
                    {
                        "role": "system",
                        "content": (
                            "You are an expert AI assistant tasked with evaluating the completeness and thoroughness of a review. Your goal is to classify reviews as either 'Exhaustive' or 'Trivial' based on their coverage of key sections and aspects. Here are the definitions of the terms:\n\n"

                            "1. **Exhaustive**: The review provides comprehensive feedback across multiple sections and aspects of the paper, offering detailed insight into key areas such as methodology, results, experiments, and more. A review should be classified as 'Exhaustive' if it covers a wide range of sections and aspects (e.g., Abstract, Introduction, Methodology, etc.) with depth, leaving no significant sections or questions unaddressed.\n\n"

                            "2. **Trivial**: The review lacks depth and does not sufficiently cover critical sections or aspects. It might focus only on one or two areas (e.g., comments on Abstract or Introduction) and fails to address significant sections or aspects in detail. A 'Trivial' review might provide shallow or vague comments that do not contribute much to improving the paper.\n\n"

                            "Here are the key sections and aspects you should be aware of:\n"
                            "Sections: Abstract (ABS), Introduction (INT), Related Works (RWK), Problem Definition/Idea (PDI), Data/Datasets (DAT), Methodology (MET), Experiments (EXP), Results (RES), Tables & Figures (TNF), Analysis (ANA), Future Work (FWK), Overall (OAL), Bibliography (BIB), External Knowledge (EXT).\n"
                            "Aspects: Appropriateness (APR), Originality/Novelty (NOV), Significance/Impact (IMP), Meaningful Comparison (CMP), Presentation/Formatting (PNF), Recommendation (REC), Empirical/Theoretical Soundness (EMP), Substance (SUB), Clarity (CLA).\n\n"

                            "For example, a review that comments on several sections like 'Methodology,' 'Experiments,' and 'Results' in detail and provides constructive feedback on 'Originality,' 'Significance,' and 'Empirical Soundness' should be considered exhaustive. A review that only comments on the 'Introduction' or 'Abstract' without providing much insight into other sections should be considered trivial.\n\n"

                            "### Important:\n"
                            "**Your decision MUST be directly supported by the step-by-step reasoning in the CoT (Chain of Thought).** "
                            "Carefully evaluate each section and aspect mentioned in the CoT reasoning before making your final decision.\n\n"

                            "### Double-Check:\n"
                            "Before you finalize your decision, ask yourself: 'Does the reasoning I've provided support an 'Exhaustive' or 'Trivial' decision?' "
                            "Your decision **must align** with the CoT reasoning."
                        )
                    },
                    {
                        "role": "user",
                        "content": (
                            f"{prompt}\n\n"
                            "Evaluate the review's coverage of sections and aspects based on the reasoning provided. "
                            "Please ensure the chain of thought reasoning is **step-wise**, following the Chain of Thought (CoT) reasoning process."
                        )
                    }
                ],
            max_tokens=3000,  
            temperature=0.2
        )
        result = response.choices[0].message.content
        decision = "Trivial" if "Trivial" in result else "Exhaustive"
        return decision, result
    except Exception as e:
        print(f"Error occurred: {str(e)}")
        return "Error", str(e)

def generate_prompt(group):
    review_texts = ' '.join(group['review_text'].dropna())
    section_coverage = ' '.join(group.filter(like='section_coverage').fillna('').agg(' '.join, axis=1))
    aspect_coverage = ' '.join(group.filter(like='aspect_coverage').fillna('').agg(' '.join, axis=1))
    return f"Review Text: {review_texts} Section Coverage: {section_coverage} Aspect Coverage: {aspect_coverage}"

def annotate_dataset(df):
    df['Decision'] = None
    df['CoT_Reasoning'] = None

    grouped = df.groupby(['review_id', 'review_number'])

    for (review_id, review_number), group in grouped:
        prompt = generate_prompt(group)
        decision, cot_reasoning = make_api_call(prompt)

        df.loc[(df['review_id'] == review_id) & (df['review_number'] == review_number), 'Decision'] = decision
        df.loc[(df['review_id'] == review_id) & (df['review_number'] == review_number), 'CoT_Reasoning'] = cot_reasoning
    
        time.sleep(2)
    
    return df
df = pd.read_csv('/kaggle/input/newversion/split_dataset_part_4.csv')
annotated_df = annotate_dataset(df)
annotated_df.to_csv('/kaggle/working/GPT_4.csv', index=False)