# Manual Evaluation Explorer

In [1]:
import pandas as pd
from sklearn.metrics import jaccard_score
from pathlib import Path
import sys

module_path = Path("../src/reproscreener")
sys.path.append(str(module_path))
from manual_eval import ManualEvaluationParser

parser = ManualEvaluationParser(str(module_path/"manual_evaluations"))
evaluations = parser.load_all_evaluations()
dataset_names = list(evaluations.keys())
dataset_names

['abstract', 'agreement_gpt', 'manuscript', 'combined_abstract']

In [13]:
parser.standardized_metrics.keys()

dict_keys(['problem', 'objective', 'research_method', 'research_questions', 'pseudocode', 'dataset', 'hypothesis', 'prediction', 'code_avail', 'software_dependencies', 'experiment_setup', 'empirical_dataset', 'code_avail_article', 'pwc_link_avail', 'pwc_link_match', 'result_replication_code_avail', 'package', 'wrapper_scripts', 'hardware_specifications', 'will_it_reproduce', 'parsed_readme'])

### 1. `abstract`

In [18]:
df_abstract = evaluations['abstract']
# exclude all columns with names containing "_description"
df_abstract = df_abstract.drop(columns=["evaluation_type", "source_file", "paper_id"]
                               +[col for col in df_abstract.columns if "_description" in col])
df_abstract.head()

Unnamed: 0_level_0,problem,objective,research_method,research_questions,pseudocode,dataset,hypothesis,prediction,code_available,software_dependencies,experiment_setup
paper_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1606.04671,False,False,False,False,False,False,False,False,False,False,False
1903.09668,False,False,True,False,False,False,False,False,False,False,False
1904.10554,False,False,False,False,False,False,False,False,False,False,False
1908.05659,False,False,False,False,False,False,False,False,False,False,False
1909.00931,False,False,False,False,False,False,False,False,False,False,False


In [6]:
df_abstract.sum()

problem                  6
objective                1
research_method          1
research_questions       1
pseudocode               0
dataset                  1
hypothesis               0
prediction               0
code_available           3
software_dependencies    0
experiment_setup         0
dtype: int64

### 1. `agreement_gpt`

In [15]:
df_agreement_gpt = evaluations["agreement_gpt"]
# exclude all columns with names containing "_description"
df_agreement_gpt = df_agreement_gpt.drop(
    columns=["evaluation_type", "source_file", "paper_id"]
    + [col for col in df_agreement_gpt.columns if "_description" in col]
)
# rename all columns to remove the "gpt_" prefix
df_agreement_gpt = df_agreement_gpt.rename(
    columns={
        col: col.replace("gpt_", "")
        for col in df_agreement_gpt.columns
        if col.startswith("gpt_")
    }
)
df_agreement_gpt.head()

Unnamed: 0_level_0,problem,problem_agreement,objective,objective_agreement,research_method,research_method_agreement,research_questions,research_questions_agreement,pseudocode,pseudocode_agreement,...,hypothesis,hypothesis_agreement,prediction,prediction_agreement,code_available,code_available_agreement,software_dependencies,software_dependencies_agreement,experiment_setup,experiment_setup_agreement
paper_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1606.04671,True,True,True,False,True,False,False,True,False,True,...,False,True,True,True,False,True,False,True,False,True
1903.09668,True,True,True,True,True,True,False,True,False,True,...,False,False,False,True,False,True,False,True,True,False
1904.10554,True,True,True,True,True,True,False,True,False,True,...,False,True,False,True,False,True,False,True,False,False
1908.05659,True,False,True,True,False,True,False,True,False,True,...,False,True,False,True,False,True,False,True,False,True
1909.00931,True,True,True,True,True,True,False,True,False,True,...,True,True,True,False,False,True,True,True,True,False


### 1. `manuscript`

### 1. `abstract`

### 1. `combined_abstract`