<a href="https://colab.research.google.com/github/Tomo-for-lab/Code-of-automating-DE/blob/main/analysis/score_calculation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Preparation

In [None]:
import re
import ipykernel
from notebook.notebookapp import list_running_servers
import requests
from urllib.parse import urljoin
import json
import os
from google.colab import auth
from googleapiclient.discovery import build
import glob
import copy
import logging
import numpy as np

Change the working directory to the path of the current notebook

In [None]:
from google.colab import drive
# Mount Google Drive to access files stored in it
drive.mount('/content/drive')


def get_notebook_path():
    # Get the path of the current Jupyter notebook
    kernel_id = re.search('kernel-(.*).json', ipykernel.get_connection_file()).group(1)
    servers = list_running_servers()
    for ss in servers:
        response = requests.get(urljoin(ss['url'], 'api/sessions'), params={'token': ss.get('token', '')})
        for nn in json.loads(response.text):
            if nn['kernel']['id'] == kernel_id:
                relative_path = nn['notebook']['path']
                return os.path.join(ss['notebook_dir'], relative_path)

def get_folder_path(folder_id):
    # Recursively get the full path of a folder given its ID
    if folder_id:
        folder = drive_service.files().get(fileId=folder_id, fields="name, parents").execute()
        folder_name = folder.get('name')
        parents = folder.get('parents')
        if parents:
            parent_path = get_folder_path(parents[0])
            return parent_path + '/' + folder_name
        else:
            return folder_name
    return ''

def get_file_path(file_id):
    # Recursively get the full path of a file given its ID
    file = drive_service.files().get(fileId=file_id, fields="name, parents").execute()
    file_name = file.get('name')
    parents = file.get('parents')
    if parents:
        parent_id = parents[0]
        parent_path = get_folder_path(parent_id)
        return parent_path
    else:
        return file_name

# Get the path of the current notebook
notebook_path = get_notebook_path()


# Authenticate and initialize the Google Drive API
auth.authenticate_user()
drive_service = build('drive', 'v3')

# Extract the file ID from the notebook path (assumes file ID is part of the path)
file_id = re.search(r'fileId=(\w+)', notebook_path).group(1)

# Get the full path of the file using its ID
file_path = get_file_path(file_id)
if 'マイドライブ' in file_path:
    converted_path = re.sub(r'(^|/)マイドライブ($|/)', '/content/drive/MyDrive/', file_path)
elif 'MyDrive' in file_path:
    converted_path = re.sub(r'(^|/)MyDrive($|/)', '/content/drive/MyDrive/', file_path)
else:
    converted_path = '/content/drive/MyDrive/' + file_path

# Change the working directory to the converted path
os.chdir(converted_path)

Mounted at /content/drive


# Prepare data extracted by human

In [None]:
# Specify the file path and sheet name for the Excel file
file_path = 'DE.xlsx'
sheet_name = 'R1_FINAL'


# Read the Excel file, specifying header=1 because the first row is blank
df = pd.read_excel(file_path, sheet_name=sheet_name, header=1)

# Analysis

##Get the list of arm name

In [None]:
file_path = 'arm_matched_list.xlsx'
sheet_name = 'Sheet1'

df_arm_matched = pd.read_excel(file_path, sheet_name=sheet_name)
df_arm_matched = df_arm_matched.drop(df.index[0])

In [None]:
df_arm_matched_list = {}
df_arm_matched_list["chat"] = df_arm_matched

## Create output storage dict

In [None]:
# Define the path to the JSON file
json_file_path = 'original_description_insomnia.json'

# Load the JSON data from the file
with open(json_file_path, 'r') as file:
    data = json.load(file)

# Extract variables that are of numeric type (integer or number)
numeric_fields = [field for field in data if field['type'] in ('integer', 'number')]
numeric_list = [item['name'] for item in numeric_fields]


## Calculate accuracy



In [None]:
# Initialize variables and dictionaries for storing results
first_flag = True
first_flag_analysis = True
accuracy_output = {}
sensitivity_output = {}
specificity_output = {}

mean_accuracy_dict = {}
mean_sensitivity_dict = {}
mean_specificity_dict = {}

shot_name_list = ["chat"] # List of shot names

# Loop through each shot name
for shot_name_num in range(1):
  shot_name = shot_name_list[shot_name_num]


  df_arm_matched = df_arm_matched_list[shot_name]
  # Basic logging configuration
  logging.basicConfig(level=logging.ERROR, filename=f'error_log_{shot_name}.log', filemode='a',
                      format='%(asctime)s - %(levelname)s - %(message)s')
  for n_index in range(5,6):
    accuracy_output[n_index] = {}
    sensitivity_output[n_index] = {}
    specificity_output[n_index] = {}

    accuracy_total = 0
    sensitivity_total = 0
    specificity_total = 0


    for fold_index in range(10):
      # Initialize counters for evaluation metrics
      non_text_match_count = 0 #The number of data matched
      non_text_total = 0 #The total number of data
      judge_count_TP = 0 #The number of true positive
      judge_count_FN = 0 #The number of false negative
      judge_count_P = 0 #The total number of variable for which data exists
      judge_count_FP = 0 #The number of false positive
      judge_count_TN = 0 #The number of true negative
      judge_count_N = 0 #TThe total number of variable for which data does not exist

      for eval_index in range(5):
        try:
          # Filter the DataFrame to get the relevant rows
          filtered_df = df_arm_matched[(df_arm_matched['n_index'].astype(int) == n_index) & (df_arm_matched['fold_index'].astype(int) == fold_index) & (df_arm_matched['eval_index'].astype(int) == eval_index)].copy()
          filtered_df.fillna("", inplace=True)
          index_list = filtered_df.index
          paper_name = filtered_df.loc[index_list[0], "paper_name"]

          # Create a dictionary of arm names extracted by GPT and by human
          arm_dict = {}
          for i in range(5):
            key = filtered_df.loc[index_list[0], f"key_{i+1}"]
            value = filtered_df.loc[index_list[0], f"value_{i+1}"]
            if (key != "") and (value != ""):
              arm_dict[key] = value
        except:
          continue;

        # Iterate over the arm dictionary
        for key, value in arm_dict.items():
          # Construct the path to the GPT-extracted data JSON file
          GPT_arm_path = f"extracted_data_log_GPT_v3/{shot_name}/{n_index}_paper/{fold_index}_fold/extracted_data_{eval_index}.json"
          with open(GPT_arm_path, 'r', encoding='utf-8') as file:
            extracted_data = json.load(file)

          GPT_arm = key # Extracted arm name by GPT
          human_arm = value # Corresponding human arm name

          try:
            # Extract data for the GPT arm and filter numeric fields
            _extracted_data_GPT = extracted_data[paper_name][GPT_arm]
            extracted_data_GPT = {key: _extracted_data_GPT[key] for key in numeric_list if key in _extracted_data_GPT}

          except Exception as e:
            logging.error(f"{n_index}_paper/{fold_index}_fold/{eval_index}/{paper_name}/{GPT_arm}/{human_arm} --> no arm name: {e}")
            with open(f'error_log_{shot_name}.txt', 'a') as file:
              file.write(f"{n_index}_paper/{fold_index}_fold/{eval_index}/{paper_name}/{GPT_arm}/{human_arm} --> no arm name: {e}")
            continue;

          try:
            # Select rows corresponding to the human arm and convert to dictionary
            selected_rows = df[(df['study'] == paper_name) & (df['Arm'] == human_arm)].copy()
            filtered_df.fillna("*", inplace=True) # Replace NaN values with asterisks
            # Create a dictionary from the selected rows
            extracted_data_human =selected_rows.to_dict(orient='records')[-1]

          except Exception as e:
            logging.error(f"{n_index}_paper/{fold_index}_fold/{eval_index}/{paper_name}/{GPT_arm}/{human_arm} --> no selected_rows: {e}")
            with open(f'error_log_{shot_name}.txt', 'a') as file:
              file.write(f"{n_index}_paper/{fold_index}_fold/{eval_index}/{paper_name}/{GPT_arm}/{human_arm} --> no selected_rows: {e}")
            continue;

          # Summarize extracted human data to match the keys of GPT data
          summarized_extracted_data_human = {key: extracted_data_human.get(key, None) for key in extracted_data_GPT.keys()}
          answer_dict = {key: None for key in extracted_data_GPT}  # Initialize answer dictionary

          # Exclude RCTs with "EXCLUDE" in the Year field
          if "EXCLUDE" in str(summarized_extracted_data_human["Year"]):
            logging.error(f"{n_index}_paper/{fold_index}_fold/{eval_index}/{paper_name}/{GPT_arm}/{human_arm} --> excluded")
            with open(f'error_log_{shot_name}.txt', 'a') as file:
              file.write(f"{n_index}_paper/{fold_index}_fold/{eval_index}/{paper_name}/{GPT_arm}/{human_arm} --> excluded")
            continue;

          # Exclude RCTs with "None" in the Year field for GPT data
          if extracted_data_GPT["Year"] == "None":
            logging.error(f"{n_index}_paper/{fold_index}_fold/{eval_index}/{paper_name}/{GPT_arm}/{human_arm} --> GPT hasn't extracted")
            with open(f'error_log_{shot_name}.txt', 'a') as file:
              file.write(f"{n_index}_paper/{fold_index}_fold/{eval_index}/{paper_name}/{GPT_arm}/{human_arm} --> GPT hasn't extracted")
            continue;

          # Analyze the scores for each key
          for key_DE in extracted_data_GPT:
            # Retrieve values from human and GPT data
              value1 = summarized_extracted_data_human[key_DE]
              value2 = extracted_data_GPT[key_DE]

              # Handle missing or null values in human data
              if (str(value1) == "nan") or ("*" in str(value1)) or (str(value1) == "") or (str(value1) == "null"):
                value1 = -1

              if key_DE == "ICC_for_cRCT":# Protocol sets this to 0.05, but human extraction often uses '*'
                value2 = -1

              if (value2 == "-1") or (str(value2) == "null"):
                value2 = -1

              # If the severity at endpoint is reported, the change of severity does not need to be extracted
              if key_DE in ["Severity_ch_mean", "Severity_ch_sd", "Severity_ch_n"]:
                sum_ep = 0
                for _value in [extracted_data_GPT[_key] for _key in ["Severity_ep_mean",	"Severity_ep_sd",	"Severity_ep_n"]]:
                    if (_value != -1) and (str(_value) != "-1") and (str(_value) != "null") :
                      sum_ep += 1
                if sum_ep == 3:
                  answer_dict[key_DE] = "NA"
                  continue;

              if re.search(r"Severity_ch_.*_long", key_DE):
                sum_ep = 0
                for _value in [extracted_data_GPT[_key] for _key in ["Severity_mean_long", "Severity_sd_long", "Severity_n_long"]]:
                    if (_value != -1) and (str(_value) != "-1") and (str(_value) != "null") :
                      sum_ep += 1
                if sum_ep == 3:
                  answer_dict[key_DE] = "NA"
                  continue;

              # Count total data
              non_text_total += 1

              # Calculate true negatives, false positives, true positives, and false negatives
              if value1 == -1:
                judge_count_N += 1
                if value2 == -1:
                  judge_count_TN += 1
                else:
                  judge_count_FP += 1

              if value1 != -1:
                judge_count_P += 1
                if value2 != -1:
                  judge_count_TP += 1
                else:
                  judge_count_FN += 1

              # Compare data and update the answer dictionary
              if value1 == value2:
                answer_dict[key_DE] = "True"
                non_text_match_count += 1

              elif ((isinstance(value1, int) or isinstance(value1, float))  and (isinstance(value2, int) or isinstance(value2, int))) and (round(value1*10) == round(value2*10)): #Rounding to the second decimal point to see if they match
                answer_dict[key_DE] = "True"
                non_text_match_count += 1
              else:
                answer_dict[key_DE] = "False"

          # Combine extracted data from GPT and human, and the answer dictionary into a DataFrame
          all_dicts = [extracted_data_GPT, summarized_extracted_data_human, answer_dict]
          all_df = pd.DataFrame(all_dicts)
          all_df.insert(0, 'arm_name', [GPT_arm, human_arm, ""])
          all_df.insert(0, 'paper_name', [paper_name,"", ""])
          all_df.insert(0, 'eval_index', [eval_index, "", ""])
          all_df.insert(0, 'fold_index', [fold_index, "", ""])
          all_df.insert(0, 'n_index', [n_index, "", ""])
          all_df.insert(0, 'shot_name', [shot_name, "", ""])

          # Add a row of NaN values to separate different evaluations
          all_df.loc[len(all_df)] = [np.nan] * len(all_df.columns)

          # Append the results to the output DataFrame
          if first_flag:
            output = all_df.copy()
            first_flag = False
          else:
            output = pd.concat([output, all_df], ignore_index=True)
          print(n_index,fold_index,eval_index, "done.")

      # Calculate accuracy, sensitivity, and specificity for numerical variables
      non_text_accuracy = non_text_match_count / non_text_total if non_text_total > 0 else 0
      sensitivity = judge_count_TP / judge_count_P if judge_count_P > 0 else 0
      specificity = judge_count_TN / judge_count_N if judge_count_N > 0 else 0

      # Accumulate totals for averaging later
      accuracy_total += non_text_accuracy
      sensitivity_total += sensitivity
      specificity_total += specificity

      # Create DataFrames to store the results
      accuracy_output_df = pd.DataFrame({"match_count": [non_text_match_count], "text_total": [non_text_total], "accuracy": [non_text_accuracy]})
      sensitivity_output_df = pd.DataFrame({"TP": [judge_count_TP], "total_P": [judge_count_P], "sensitivity": [sensitivity]})
      specificity_output_df = pd.DataFrame({"TN": [judge_count_TN], "total_N": [judge_count_N], "specificity": [specificity]})

      # Combine the DataFrames horizontally
      combined_df = pd.concat([accuracy_output_df, sensitivity_output_df, specificity_output_df], axis=1)

      # Append the combined results to the output analysis DataFrame
      if first_flag_analysis:
        output_analysis = combined_df.copy()
        first_flag_analysis = False
      else:
        output_analysis = pd.concat([output_analysis, combined_df], ignore_index=True)


    accuracy_output_df = pd.DataFrame({"match_count": [n_index+1], "text_total": [""], "accuracy": [""]})
    sensitivity_output_df = pd.DataFrame({"TP": [""], "total_P": [""], "sensitivity": [""]})
    specificity_output_df = pd.DataFrame({"TN": [""], "total_N": [""], "specificity": [""]})

    combined_df = pd.concat([accuracy_output_df, sensitivity_output_df, specificity_output_df], axis=1)

    # Append the combined results to the output analysis DataFrame
    output_analysis = pd.concat([output_analysis, combined_df], ignore_index=True)

    # Calculate mean accuracy, sensitivity, and specificity
    mean_accuracy = accuracy_total/10
    mean_sensitivity = sensitivity_total/10
    mean_specificity = specificity_total/10

    # Store the mean values in dictionaries
    mean_accuracy_dict[n_index] = {"mean_accuracy": mean_accuracy}
    mean_sensitivity_dict[n_index] = {"mean_sensitivity": mean_sensitivity}
    mean_specificity_dict[n_index] = {"mean_specificity": mean_specificity}

# Create DataFrames from the mean dictionaries
mean_accuracy_df = pd.DataFrame(mean_accuracy_dict)
mean_sensitivity_df = pd.DataFrame(mean_sensitivity_dict)
mean_specificity_df = pd.DataFrame(mean_specificity_dict)

mean_combined_df = pd.concat([mean_accuracy_df, mean_sensitivity_df, mean_specificity_df])

5 0 0 done.
5 0 0 done.
5 0 1 done.
5 0 1 done.
5 0 2 done.
5 0 2 done.
5 0 2 done.
5 0 3 done.
5 0 3 done.
5 0 4 done.
5 0 4 done.
5 1 0 done.
5 1 0 done.
5 1 1 done.
5 1 1 done.
5 1 2 done.
5 1 2 done.


ERROR:root:5_paper/1_fold/4/Kaplan2018/CBTI-BP with RISE-UP/RISE-UP --> excluded
ERROR:root:5_paper/1_fold/4/Kaplan2018/Psychoeducation (PE)/PE --> excluded


5 2 0 done.
5 2 0 done.
5 2 2 done.
5 2 2 done.
5 2 3 done.
5 2 3 done.
5 3 1 done.
5 3 1 done.
5 3 3 done.
5 3 3 done.
5 3 4 done.
5 3 4 done.
5 4 0 done.
5 4 0 done.
5 4 0 done.


ERROR:root:5_paper/4_fold/1/Gehrman2021/in-person CBT-I/In-person --> no selected_rows: list index out of range


5 4 1 done.
5 4 1 done.
5 4 2 done.
5 4 2 done.
5 4 3 done.
5 4 3 done.
5 4 4 done.
5 4 4 done.
5 5 0 done.
5 5 0 done.
5 5 0 done.
5 5 1 done.
5 5 1 done.
5 5 1 done.
5 5 2 done.
5 5 2 done.
5 5 3 done.
5 5 3 done.
5 5 4 done.
5 5 4 done.
5 6 0 done.
5 6 0 done.
5 6 0 done.


ERROR:root:5_paper/6_fold/2/Mottaghi2015/Music and Cognitive-Behavioral Therapy (MAT)/MAT --> excluded
ERROR:root:5_paper/6_fold/2/Mottaghi2015/Cognitive-Behavioral Therapy (CBT)/CBT --> excluded
ERROR:root:5_paper/6_fold/2/Mottaghi2015/Control/Control --> excluded
ERROR:root:5_paper/6_fold/3/Belleville2007/Taper intervention alone/Taper --> excluded
ERROR:root:5_paper/6_fold/3/Belleville2007/Taper intervention combined with self-help CBT/Combined --> excluded


5 6 4 done.
5 6 4 done.


ERROR:root:5_paper/7_fold/2/Huberty2021/Calm app/Intervention --> excluded
ERROR:root:5_paper/7_fold/2/Huberty2021/wait-list control/Wait-list --> excluded


5 7 3 done.
5 7 3 done.
5 7 4 done.
5 7 4 done.
5 8 0 done.
5 8 0 done.
5 8 1 done.
5 8 1 done.
5 8 3 done.
5 8 3 done.


ERROR:root:5_paper/8_fold/4/Margolies2013/CBT-I with adjunctive IRT/CBTI --> excluded
ERROR:root:5_paper/8_fold/4/Margolies2013/waitlist control/Waitlist --> excluded


5 9 0 done.
5 9 0 done.
5 9 1 done.
5 9 1 done.
5 9 1 done.
5 9 2 done.
5 9 2 done.
5 9 4 done.
5 9 4 done.


In [None]:
# Save the output, mean combined, and output analysis DataFrames to an Excel file
with pd.ExcelWriter(f'output_analysis_{shot_name}.xlsx') as writer:
    output.to_excel(writer, sheet_name='Sheet1', index=False) # Save the 'output' DataFrame to Sheet1
    mean_combined_df.to_excel(writer, sheet_name='Sheet2', index=True) # Save the 'mean_combined_df' DataFrame to Sheet2
    output_analysis.to_excel(writer, sheet_name='Sheet3', index=False) # Save the 'output_analysis' DataFrame to Sheet3

## Calculate sensitivity and specificity

In [None]:
# Initialize variables and dictionaries for storing results
first_flag = True
first_flag_analysis = True
accuracy_output = {}
sensitivity_output = {}
specificity_output = {}

mean_accuracy_dict = {}
mean_sensitivity_dict = {}
mean_specificity_dict = {}

# List of shot names
shot_name_list = ["chat"]

# Loop through each shot name
for shot_name_num in range(1):
  shot_name = shot_name_list[shot_name_num]


  df_arm_matched = df_arm_matched_list[shot_name]
  logging.basicConfig(level=logging.ERROR, filename=f'error_log_{shot_name}.log', filemode='a',
                      format='%(asctime)s - %(levelname)s - %(message)s')
  for n_index in range(5,6):
    accuracy_output[n_index] = {}
    sensitivity_output[n_index] = {}
    specificity_output[n_index] = {}

    accuracy_total = 0
    sensitivity_total = 0
    specificity_total = 0


    for fold_index in range(10):
      # Initialize counters for evaluation metrics
      non_text_match_count = 0 #The number of data matched
      non_text_total = 0 #The total number of data
      judge_count_TP = 0 #The number of true positive
      judge_count_FN = 0 #The number of false negative
      judge_count_P = 0 #The total number of variable for which data exists
      judge_count_FP = 0 #The number of false positive
      judge_count_TN = 0 #The number of true negative
      judge_count_N = 0 #TThe total number of variable for which data does not exist

      for eval_index in range(5):
        try:
          # Filter the DataFrame to get the relevant rows
          filtered_df = df_arm_matched[(df_arm_matched['n_index'].astype(int) == n_index) & (df_arm_matched['fold_index'].astype(int) == fold_index) & (df_arm_matched['eval_index'].astype(int) == eval_index)].copy()
          filtered_df.fillna("", inplace=True)
          index_list = filtered_df.index
          paper_name = filtered_df.loc[index_list[0], "paper_name"]

          # Create a dictionary of arm names extracted by GPT and by human
          arm_dict = {}
          for i in range(5):
            key = filtered_df.loc[index_list[0], f"key_{i+1}"]
            value = filtered_df.loc[index_list[0], f"value_{i+1}"]
            if (key != "") and (value != ""):
              arm_dict[key] = value
        except:
          continue;

        # Iterate over the arm dictionary
        for key, value in arm_dict.items():
          GPT_arm_path = f"extracted_data_log_GPT_v3/{shot_name}/{n_index}_paper/{fold_index}_fold/extracted_data_{eval_index}.json"
          with open(GPT_arm_path, 'r', encoding='utf-8') as file:
            extracted_data = json.load(file)

          GPT_arm = key
          human_arm = value

          try:
            # Extract data for the GPT arm and filter numeric fields
            _extracted_data_GPT = extracted_data[paper_name][GPT_arm]
            extracted_data_GPT = {key: _extracted_data_GPT[key] for key in numeric_list if key in _extracted_data_GPT}

          except Exception as e:
            logging.error(f"{n_index}_paper/{fold_index}_fold/{eval_index}/{paper_name}/{GPT_arm}/{human_arm} --> no arm name: {e}")
            with open(f'error_log_{shot_name}.txt', 'a') as file:
              file.write(f"{n_index}_paper/{fold_index}_fold/{eval_index}/{paper_name}/{GPT_arm}/{human_arm} --> no arm name: {e}")
            continue;


          try:
            # Select rows corresponding to the human arm and convert to dictionary
            selected_rows = df[(df['study'] == paper_name) & (df['Arm'] == human_arm)].copy()
            filtered_df.fillna("*", inplace=True)
            extracted_data_human =selected_rows.to_dict(orient='records')[-1]

          except Exception as e:
            logging.error(f"{n_index}_paper/{fold_index}_fold/{eval_index}/{paper_name}/{GPT_arm}/{human_arm} --> no selected_rows: {e}")
            with open(f'error_log_{shot_name}.txt', 'a') as file:
              file.write(f"{n_index}_paper/{fold_index}_fold/{eval_index}/{paper_name}/{GPT_arm}/{human_arm} --> no selected_rows: {e}")
            continue;
          summarized_extracted_data_human = {key: extracted_data_human.get(key, None) for key in extracted_data_GPT.keys()}
          answer_dict = {key: None for key in extracted_data_GPT}

          if "EXCLUDE" in str(summarized_extracted_data_human["Year"]):
            logging.error(f"{n_index}_paper/{fold_index}_fold/{eval_index}/{paper_name}/{GPT_arm}/{human_arm} --> excluded")
            with open(f'error_log_{shot_name}.txt', 'a') as file:
              file.write(f"{n_index}_paper/{fold_index}_fold/{eval_index}/{paper_name}/{GPT_arm}/{human_arm} --> excluded")
            continue;

          if extracted_data_GPT["Year"] == "None":
            logging.error(f"{n_index}_paper/{fold_index}_fold/{eval_index}/{paper_name}/{GPT_arm}/{human_arm} --> GPT hasn't extracted")
            with open(f'error_log_{shot_name}.txt', 'a') as file:
              file.write(f"{n_index}_paper/{fold_index}_fold/{eval_index}/{paper_name}/{GPT_arm}/{human_arm} --> GPT hasn't extracted")
            continue;


          for key_DE in extracted_data_GPT:
              value1 = summarized_extracted_data_human[key_DE]
              value2 = extracted_data_GPT[key_DE]

              # Handle missing or null values
              if (str(value1) == "nan") or ("*" in str(value1)) or (str(value1) == "") or (str(value1) == "null"):
                value1 = -1

              if key_DE == "ICC_for_cRCT":
                value2 = -1
              if (value2 == "-1") or (str(value2) == "null"):
                value2 = -1

              if key_DE in ["Severity_ch_mean", "Severity_ch_sd", "Severity_ch_n"]:
                sum_ep = 0
                for _value in [extracted_data_GPT[_key] for _key in ["Severity_ep_mean",	"Severity_ep_sd",	"Severity_ep_n"]]:
                    if (_value != -1) and (str(_value) != "-1") and (str(_value) != "null") :
                      sum_ep += 1
                if sum_ep == 3:
                  answer_dict[key_DE] = "NA"
                  continue;

              if re.search(r"Severity_ch_.*_long", key_DE):
                sum_ep = 0
                for _value in [extracted_data_GPT[_key] for _key in ["Severity_mean_long", "Severity_sd_long", "Severity_n_long"]]:
                    if (_value != -1) and (str(_value) != "-1") and (str(_value) != "null") :
                      sum_ep += 1
                if sum_ep == 3:
                  answer_dict[key_DE] = "NA"
                  continue;

              #Store one of TN, FP, TP, or FN in answer_dict
              if value1 == -1:
                judge_count_N += 1
                if value2 == -1:
                  judge_count_TN += 1
                  answer_dict[key_DE] = "TN"
                  continue;

                else:
                  judge_count_FP += 1
                  answer_dict[key_DE] = "FP"
                  continue;

              if value1 != -1:
                judge_count_P += 1
                if value2 != -1:
                  judge_count_TP += 1
                  answer_dict[key_DE] = "TP"
                  continue;

                else:
                  judge_count_FN += 1
                  answer_dict[key_DE] = "FN"
                  continue;

          # Combine extracted data from GPT and human, and the answer dictionary into a DataFrame
          all_dicts = [extracted_data_GPT, summarized_extracted_data_human, answer_dict]
          all_df = pd.DataFrame(all_dicts)
          all_df.insert(0, 'arm_name', [GPT_arm, human_arm, ""])
          all_df.insert(0, 'paper_name', [paper_name,"", ""])
          all_df.insert(0, 'eval_index', [eval_index, "", ""])
          all_df.insert(0, 'fold_index', [fold_index, "", ""])
          all_df.insert(0, 'n_index', [n_index, "", ""])
          all_df.insert(0, 'shot_name', [shot_name, "", ""])
          all_df.loc[len(all_df)] = [np.nan] * len(all_df.columns)

          # Append the results to the output DataFrame
          if first_flag:
            output = all_df.copy()
            first_flag = False
          else:
            output = pd.concat([output, all_df], ignore_index=True)
          print(n_index,fold_index,eval_index, "done.")

5 0 0 done.
5 0 0 done.
5 0 1 done.
5 0 1 done.
5 0 2 done.
5 0 2 done.
5 0 2 done.
5 0 3 done.
5 0 3 done.
5 0 4 done.
5 0 4 done.
5 1 0 done.
5 1 0 done.
5 1 1 done.
5 1 1 done.
5 1 2 done.
5 1 2 done.


ERROR:root:5_paper/1_fold/4/Kaplan2018/CBTI-BP with RISE-UP/RISE-UP --> excluded
ERROR:root:5_paper/1_fold/4/Kaplan2018/Psychoeducation (PE)/PE --> excluded


5 2 0 done.
5 2 0 done.
5 2 2 done.
5 2 2 done.
5 2 3 done.
5 2 3 done.
5 3 1 done.
5 3 1 done.
5 3 3 done.
5 3 3 done.
5 3 4 done.
5 3 4 done.
5 4 0 done.
5 4 0 done.
5 4 0 done.


ERROR:root:5_paper/4_fold/1/Gehrman2021/in-person CBT-I/In-person --> no selected_rows: list index out of range


5 4 1 done.
5 4 1 done.
5 4 2 done.
5 4 2 done.
5 4 3 done.
5 4 3 done.
5 4 4 done.
5 4 4 done.
5 5 0 done.
5 5 0 done.
5 5 0 done.
5 5 1 done.
5 5 1 done.
5 5 1 done.
5 5 2 done.
5 5 2 done.
5 5 3 done.
5 5 3 done.
5 5 4 done.
5 5 4 done.
5 6 0 done.
5 6 0 done.


ERROR:root:5_paper/6_fold/2/Mottaghi2015/Music and Cognitive-Behavioral Therapy (MAT)/MAT --> excluded
ERROR:root:5_paper/6_fold/2/Mottaghi2015/Cognitive-Behavioral Therapy (CBT)/CBT --> excluded
ERROR:root:5_paper/6_fold/2/Mottaghi2015/Control/Control --> excluded


5 6 0 done.


ERROR:root:5_paper/6_fold/3/Belleville2007/Taper intervention alone/Taper --> excluded
ERROR:root:5_paper/6_fold/3/Belleville2007/Taper intervention combined with self-help CBT/Combined --> excluded
ERROR:root:5_paper/7_fold/2/Huberty2021/Calm app/Intervention --> excluded


5 6 4 done.
5 6 4 done.


ERROR:root:5_paper/7_fold/2/Huberty2021/wait-list control/Wait-list --> excluded


5 7 3 done.
5 7 3 done.
5 7 4 done.
5 7 4 done.
5 8 0 done.
5 8 0 done.
5 8 1 done.
5 8 1 done.
5 8 3 done.
5 8 3 done.


ERROR:root:5_paper/8_fold/4/Margolies2013/CBT-I with adjunctive IRT/CBTI --> excluded
ERROR:root:5_paper/8_fold/4/Margolies2013/waitlist control/Waitlist --> excluded


5 9 0 done.
5 9 0 done.
5 9 1 done.
5 9 1 done.
5 9 1 done.
5 9 2 done.
5 9 2 done.
5 9 4 done.
5 9 4 done.


In [None]:
# Save the output DataFrames to an Excel file
with pd.ExcelWriter(f'output_analysis_{shot_name}_sensitivity_specificity.xlsx') as writer:
    output.to_excel(writer, sheet_name='Sheet1', index=False)