In [1]:
import pandas as pd
import anthropic
from dotenv import load_dotenv
import os
import json
from openpyxl import load_workbook

# load the API key from the .env file
load_dotenv()
api_key = os.getenv("ANTHROPIC_API_KEY")

# initialize the client
client = anthropic.Anthropic(api_key=api_key)


# Enter the Error IDs here to rerun the API call for these and replace the existing rows in the output Excel file
error_ids = []

# Global variables
counter = 0  # To keep track of processed rows

# Load the input CSV file and the output Excel file
input_path_piloting = 'Auswahlkriterien_Piloting_Literatur.csv'
input_path = 'Auswahlkriterien_Durchführung_Literatur.csv'

output_path_piloting = 'Auswahlkriterien_Piloting.xlsx'  # Specify the output Excel file
output_path = 'Auswahlkriterien_Durchführung.xlsx'  # Specify the output Excel file


df = pd.read_csv(input_path, sep=',', encoding='utf-8-sig')  
df_output = pd.read_excel(output_path)  

# Filter the input DataFrame to only include the rows with error IDs
# df = df[df['ID'].isin(error_ids)]


# Start the counter from the max ID in the output file to continue from where it left off
# max_id = df_output['ID'].max()
# if pd.isna(max_id):
#     print("No valid IDs in the file.")
# else:
#     print(f"Max ID: {max_id}")
#     df = df[df['ID'] > max_id]


def append_to_excel(row, output_path):
    global counter
    row_df = pd.DataFrame([row])

    # Load existing workbook and sheet
    workbook = load_workbook(output_path)
    sheet = workbook.active

    # Check if ID exists in the sheet
    existing_ids = [cell.value for cell in sheet['A']]  # Assuming 'A' is the column where IDs are stored

    # If ID exists, replace the row
    if row['ID'] in existing_ids and row['ID'] in error_ids:
        # Find the row with the matching ID and replace it
        row_index = existing_ids.index(row['ID']) + 1  # Excel is 1-indexed
        for col_num, value in enumerate(row_df.iloc[0], start=1):
            sheet.cell(row=row_index, column=col_num, value=value)
            row["Error"] = "Error"
        print(f"Replaced ID: {row['ID']}")
    else:
        # Otherwise, append the new row
        startrow = sheet.max_row + 1
        for col_num, value in enumerate(row_df.iloc[0], start=1):
            sheet.cell(row=startrow, column=col_num, value=value)
        print(f"Appended ID: {row['ID']}")

    # Save the workbook
    workbook.save(output_path)
    counter += 1
    print(f"Counter: {counter}")


def check_kriterien(row):
    global response
    global counter
    try:
        title = row['Title']
        abstract = row['Abstract']

        systemprompt = "You are a reviewer conducting a systematic literature review, evaluating studies based on their title and abstracts and inclusion and exclusion criteria for their relevance. The title of the systematic literature review is 'Detection of visual deepfakes using machine learning'"
        
        prompt = f"""
            1. Carefully read the title in <title> and abstract provided in <abstract>.
            2. Evaluate the study for relevance using the inclusion and exclusion criteria from <inclusion_criteria> and <exclusion_criteria>:
            - Follow this guided approach for each criterion:
                1. Use the **unique ID** assigned to each criterion for traceability.
                2. Identify if the criterion applies based on the title and abstract.
                3. Determine if the criterion is **met** (`true` or `false`)
                4. If an inclusion criterion is not met or an exclusion criterion is met, provide a brief explanation.
            3. Make a **final decision** about the study's relevance:
            - If **all inclusion criteria** are met and **no exclusion criteria** is met, mark the study as `"relevant": true`.
            - Otherwise, mark it as `"relevant": false`.
            4. Return the evaluation results as a transparent JSON object following the structure in <desired_output>. Note: Is just an example for the structure of the output!
            
            <title>
            {title}
            </title>

            <abstract>
            {abstract}
            </abstract>
            
            <inclusion_criteria>
            - **IC1**: The focus of the work is on deepfake detection in videos and photos (visual deepfakes).
            - **IC2**: Deepfake detection is achieved using machine learning methods (e.g. Logistic Regression, Support Vector Machines (SVMs), Convolutional Neural Networks (CNNs), Generative Adversarial Networks (GANs).
            </inclusion_criteria>

            <exclusion_criteria>
            - **EC1**: Not written in English.
            - **EC2**: The focus of the work is not on the detection of visual deepfakes.
            - **EC3**: Focus on preventive methods for detecting and preventing manipulations, such as visual encoding of original images, instead of directly detecting visual manipulations
            - **EC4**: It is a secondary study (e.g., survey, systematic literature review, meta-analysis
            </exclusion_criteria>
            
        <desired_output>
            {{
                "criteria_evaluation": {{
                    "inclusion_criteria": {{
                        "IC1": {{
                            "decision": True,
                        }},
                        "IC2": {{
                            "decision": False,
                            "explanation": "brief explanation here"
                        }}
                    }},
                    "exclusion_criteria": {{
                        "EC1": {{
                            "decision": False
                        }},
                        "EC2": {{
                            "decision": False
                        }},
                        "EC3": {{
                            "decision": True,
                            "explanation": "brief explanation here"
                        }},
                        "EC4": {{
                            "decision": False
                        }}
                    }}
                }},
                "relevant": False
            }}
            </desired_output>

            Just return the json object according to this structure.
            """
        # API-Aufruf
        response = client.messages.create(
            model="claude-3-5-sonnet-20241022",
            max_tokens=2048,
            system=systemprompt,
            temperature=0,
            messages=[
                {"role": "user", "content": prompt}
            ]
        )

        # get the content of the response
        content = response.content[0].text

        data = json.loads(content)
        # extract the relevant information
        inclusion_criteria = data["criteria_evaluation"]["inclusion_criteria"]
        exclusion_criteria = data["criteria_evaluation"]["exclusion_criteria"]
        relevant = data["relevant"]

        # add the relevant information to the row
        row["IC1"] = inclusion_criteria["IC1"].get("decision", None)
        row["IC2"] = inclusion_criteria["IC2"].get("decision", None)
        row["EC1"] = exclusion_criteria["EC1"].get("decision", None)
        row["EC2"] = exclusion_criteria["EC2"].get("decision", None)
        row["EC3"] = exclusion_criteria["EC3"].get("decision", None)
        row["EC4"] = exclusion_criteria["EC4"].get("decision", None)
        row["Auto Decision"] = relevant
        
        # add the explanation to the row
        row["IC1_explanation"] = inclusion_criteria["IC1"].get("explanation", "")
        row["IC2_explanation"] = inclusion_criteria["IC2"].get("explanation", "")
        row["EC1_explanation"] = exclusion_criteria["EC1"].get("explanation", "")
        row["EC2_explanation"] = exclusion_criteria["EC2"].get("explanation", "")
        row["EC3_explanation"] = exclusion_criteria["EC3"].get("explanation", "")
        row["EC4_explanation"] = exclusion_criteria["EC4"].get("explanation", "")

        row["input_tokens"] = response.usage.input_tokens
        row["output_tokens"] = response.usage.output_tokens

    except Exception as e:
        # falls ein Fehler auftritt, füge die ID zur error_ids-Liste hinzu
        print(f"Error bei ID: {row['ID']} : {e}")
        # add the ID to the error_ids list
        error_ids.append(row['ID'])
        row["IC1"] = None
        row["IC2"] = None
        row["EC1"] = None
        row["EC2"] = None
        row["EC3"] = None
        row["EC4"] = None
        row["Relevant"] = None

    append_to_excel(row, output_path)
    return row

# apply the function to the DataFrame
df = df.apply(check_kriterien, axis=1)



FileNotFoundError: [Errno 2] No such file or directory: 'Literature_All.csv'

In [None]:
error_ids

[379, 490]