# Checking correlation of aspect scores with acceptance outcome

In [21]:
#!pip install requests

import json
import os
import tempfile
from sklearn.metrics import accuracy_score, classification_report
import random
import requests

In [None]:
#get these scores for all papers
#dont necessarily need to store all the scores to get this but I may as well 
#set up a (nested?) dictionary in case there's other numerical analysis to be done

#dictionary format { paper1: {name= id, originality = 1, ..., outcome = accept}}

#y = acceptance (where true = 1, false = 0)?
#x = aspect score
#so logistic regression?

#go through files


In [29]:
def build_prompt(paper):
  metadata = paper.get('metadata') #metadata dictionary that contains the actual contents of the paper
  prompt_not_structured_output = f"""

  Please read the paper information below and predict whether this paper would be accepted or rejected at ICLR 2017. Then, explain your reasoning.
  Respond only in the following JSON format:
  {{
    "decision": "ACCEPTED" or "REJECTED",
    "rationale": "Explain your reasoning"
  }}

  Abstract: {metadata.get('abstractText', '').strip()}

  """
  prompt1 = f"""

  You will review the title and abstract of a research paper. In the JSON prediction field, provide your prediction of ACCEPT or REJECT for the paper's submission to ICLR 2017. 
  Then, in the JSON reasoning field, provide your reasoning for your prediction.

  Abstract: {metadata.get('abstractText', '').strip()}

  """
  #print(metadata)
  #metadata.get('title', '').strip()
  #metadata.get('abstractText', '').strip()
  

  prompt_acceptance = f"""

  You will review the contents of a research paper. 

  Paper Contents: {str(metadata.get('sections'))}

  In the JSON prediction field, provide your prediction of ACCEPT or REJECT for the paper's submission to ICLR 2017. 
  Then, in the JSON reasoning field, provide your reasoning for your prediction.
  """



  soundness_guidline = f"""Given that this is a short/long paper, is it sufficiently sound and thorough?
  Does it clearly state scientific claims and provide adequate support for them?
  For experimental papers: consider the depth and/or breadth of the research questions investigated, technical soundness of experiments, methodological validity of evaluation.
  For position papers, surveys: consider whether the current state of the field is adequately represented and main counter-arguments acknowledged. For resource papers: consider the data collection methodology, resulting data & the difference from existing resources are described in sufficient detail.
  
  5 = Excellent: This study is one of the most thorough I have seen, given its type.

  4 = Strong: This study provides sufficient support for all of its claims. Some extra experiments could be nice, but not essential.

  3 = Acceptable: This study provides sufficient support for its main claims. Some minor points may need extra support or details.

  2 = Poor: Some of the main claims are not sufficiently supported. There are major technical/methodological problems.

  1 = Major Issues: This study is not yet sufficiently thorough to warrant publication or is not relevant to ACL.
  """
  
  prompt_soundness = f""" You will read the contents of a research paper and score its SOUNDNESS as a reviewer for ACL 2017. 

  Paper Contents: {str(metadata.get('sections'))}

  ACL 2017 defines SOUNDNESS as folllows: {soundness_guidline}

  Using the paper contents and the ACL 2017 definition of SOUNDNESS, score the paper on SOUNDNESS and provide your reasoning.
  
  """

  return prompt_soundness


  

  
'''
{

  "decision": "REJECTED",
  "rationale": "The paper lacks novelty and the results are not clearly explained.",
  "aspect_scores": {
    "originality": 2,
    "clarity": 3,
    "soundness": 2,
    "impact": 2
  }
}

'''


'\n{\n\n  "decision": "REJECTED",\n  "rationale": "The paper lacks novelty and the results are not clearly explained.",\n  "aspect_scores": {\n    "originality": 2,\n    "clarity": 3,\n    "soundness": 2,\n    "impact": 2\n  }\n}\n\n'

In [30]:
def model_forecasting(model, prompt):
    #print(prompt)
    # Send request to Ollama


    res = requests.post(
        "http://localhost:11434/api/generate",
        json={
            "model": model, #llama3.2:3b , "qwen3:latest"
            "prompt": prompt, 
            "stream": False, 
            #"think": True,
            # should i include format field?
            "format":{
            "type": "object",
            "properties":{ "prediction": {"type": "string"}, "rationale": {"type":"string"} }, 
            "required": ["prediction", "reasoning"]
            }
        }
    )
    result = res.json()
    return result


implementing structured json outputs - https://ollama.com/blog/structured-outputs

In [31]:
pdf = ["C:\\Users\\G34371231\\OneDrive - The George Washington University\\Desktop\\PeerRead\\data\\iclr_2017\\train\\parsed_pdfs\\304.pdf.json"]
review = ["C:\\Users\\G34371231\\OneDrive - The George Washington University\\Desktop\\PeerRead\\data\\iclr_2017\\train\\reviews\\304.json"]
def predict_acceptance(pdf_path, review_path, results):
    
    with open(pdf_path, 'r') as f1:
        paper = json.load(f1) #json file contents for one research paper
    with open(review_path, 'r') as f2:
        review = json.load(f2)

    prompt = build_prompt(paper)
    model = "qwen3:latest"
    output = model_forecasting(model, prompt)
    json_response = json.loads(output.get("response"))
    
    results[paper.get("name")] = {
        "real_acceptance_label": review.get("accepted"),
        "predicted": json_response.get("prediction"),
        "rationale": json_response.get("rationale"),
        "complete_output": output
    }
    return results
        

In [44]:
def predict_aspect(pdf_path, review_path, results):
    with open(pdf_path, 'r') as f1:
        paper = json.load(f1) #json file contents for one research paper

    with open(review_path, 'r') as f2:
        review = json.load(f2)


    prompt = build_prompt(paper)
    model = "qwen3:latest"
    output = model_forecasting(model, prompt)
    json_response = json.loads(output.get("response"))
    actual_reviews_list = review.get("reviews")
    actual_reviews_dict = actual_reviews_list[0]

    results[paper.get("name")] = {
        "real_score": actual_reviews_dict.get("SOUNDNESS_CORRECTNESS"),
        "predicted_score": json_response.get("prediction"),
        "rationale": json_response.get("rationale"),
        "complete_output": output
    }
    return results

In [None]:
iclr_parsed_train_path = "C:\\Users\\G34371231\\OneDrive - The George Washington University\\Desktop\\PeerRead\\data\\iclr_2017\\train\\parsed_pdfs"
iclr_reviews_train_path = "C:\\Users\\G34371231\\OneDrive - The George Washington University\\Desktop\\PeerRead\\data\\iclr_2017\\train\\reviews"
output_path = "C:\\Users\\G34371231\\OneDrive - The George Washington University\\Desktop\\PeerRead\\dtais_summer\\qwen3_forecasting_paper_100.json"

In [None]:

def get_accuracy(parsed_path, reviews_path):
    sorted_paper_paths= sorted(os.listdir(parsed_path))
    sorted_review_paths = sorted(os.listdir(reviews_path))
    results = {} #dictionary of results

    for paper_json_file, review_json_file in zip(sorted_paper_paths[:100], sorted_review_paths[:100]):
        json_pdf_path = os.path.join(parsed_path, paper_json_file)
        json_review_path = os.path.join(reviews_path, review_json_file)
        #print(paper_json_file)
        results = predict_acceptance(json_pdf_path, json_review_path, results)
        
    with open(output_path,'a') as f3:
        json.dump(results,f3)

In [None]:
get_accuracy(iclr_parsed_train_path, iclr_reviews_train_path)

In [7]:
json_results_path ='C:\\Users\\G34371231\\OneDrive - The George Washington University\\Desktop\\PeerRead\\dtais_summer\\qwen3_forecasting_paper_100.json'
with open(json_results_path, 'r') as f:
    data = json.load(f)
#Extract true and predicted labels 
y_true = []
y_pred = []

for fname, entry in data.items():
    #print(data.items())
    true_label = entry.get("real_acceptance_label", None)
    pred_label = entry.get("predicted").strip().upper()

    if true_label is None or pred_label not in ["ACCEPT", "REJECT"]:
        print(f'missing label for {fname}')
        continue  #in case entry is not in epxected format

    true_label_str = "ACCEPT" if true_label else "REJECT"
    y_pred.append(pred_label)

if y_true:
    acc = accuracy_score(y_true, y_pred)
    print(f"Accuracy: {acc:.2f} on {len(y_true)} examples")
    print(classification_report(y_true, y_pred))
else:
    print("No valid data to evaluate.")


No valid data to evaluate.


In [51]:
#need to set a random seed to sample for evaluating accuracy or just shuffle and prompt for all ICLR training papers to get accuracy score?
#should keep track of what files contain true vs false for accepted and how uneven this distribution is!

accepted = []
rejected = []

def sort_by_acceptance(paper_path, review_path):
    for paper_review_pair in zip(os.listdir(paper_path), os.listdir(review_path)):
        fpaper_path = os.path.join(paper_path, paper_review_pair[0])
        freview_path = os.path.join(review_path, paper_review_pair[1])
        try: 
            with open(freview_path) as f:
                review_data = json.load(f)

            outcome = review_data.get("accepted")
            #file_basename = os.path.basename(freview_path)
            #https://stackoverflow.com/questions/678236/how-do-i-get-the-filename-without-the-extension-from-a-path-in-python
            
            #paper_id = os.path.splitext(file_basename)[0]
            if outcome == True: accepted.append([fpaper_path, freview_path])
            elif outcome == False: rejected.append([fpaper_path, freview_path])
            else: print(f"CHECK FILE {freview_path}: accepted field contains {outcome}")
        except Exception as e: 
            print(f"error reading {freview_path}: {e}")
            
    return sorted(accepted), sorted(rejected)


In [None]:
accepted, rejected = sort_by_acceptance(iclr_parsed_train_path, iclr_reviews_train_path)
#print(f"accepted: {accepted}")
#print(f"rejected: {rejected}")

In [59]:
def prompt_sample(random_sampling, half_num_samples, set_seed, parsed_path, reviews_path, output_path):
    random.seed(set_seed)
    paper_name_set = []
    results = {}
    if random_sampling: #set random_sampling = True to randomly sample a subset of papers in the folder. 
        all_accepted, all_rejected = sort_by_acceptance(parsed_path, reviews_path)
        accepted = random.sample(all_accepted, half_num_samples)
        rejected = random.sample(all_rejected, half_num_samples)
        paper_name_set = paper_name_set + accepted + rejected
        #number of accepted papers = number of rejected papers in sample set
        random.shuffle(paper_name_set)

    #print(accepted)
    #print(rejected)
    #print(paper_name_set)
    
    if random_sampling==False:
        paper_name_set =  zip(os.listdir(parsed_path), os.listdir(reviews_path)) #note: train and review folders contain the same 
        random.shuffle(paper_name_set)
        for i in range(0, len(paper_name_set)):
            paper_name_set[i] = [os.path.join(parsed_path, i)[0], os.path.join(reviews_path, i)[1]]

    #print(paper_name_set[0])
    for json_pdf_path, json_review_path in paper_name_set:
        results = predict_acceptance(json_pdf_path, json_review_path, results)
    with open(output_path,'a') as f3:
        json.dump(results,f3)
    
    return paper_name_set

In [None]:
output_path = "C:\\Users\\G34371231\\OneDrive - The George Washington University\\Desktop\\PeerRead\\dtais_summer\\qwen3_forecasting_paper_100.json"
prompt_sample(random_sampling = True, half_num_samples = 50, set_seed = 50, parsed_path = iclr_parsed_train_path, reviews_path = iclr_reviews_train_path, output_path = output_path)

In [94]:
def prompt_aspect_score(random_sampling, num_samples, set_seed, parsed_path, reviews_path, output_path):
    random.seed(set_seed)
    paper_name_set = []
    results = {}

    #paper_name_set =  list(zip(os.listdir(parsed_path), os.listdir(reviews_path))) #note: train and review folders contain the same
    parsed_path_list =  os.listdir(parsed_path)
    parsed_review_list = os.listdir(reviews_path)
    paper_name_set = list(list(x) for x in zip(parsed_path_list,parsed_review_list))
    
    for i in range(0, len(paper_name_set)):
        paper_name_set[i] = [os.path.join(parsed_path,paper_name_set[i][0]), os.path.join(reviews_path, paper_name_set[i][1])]

    if random_sampling: 
        paper_name_set = random.sample(paper_name_set, num_samples)
    #print(paper_name_set[0])
    for json_pdf_path, json_review_path in paper_name_set:
        results = predict_acceptance(json_pdf_path, json_review_path, results)
    with open(output_path,'a') as f3:
        json.dump(results,f3)
    
    return paper_name_set

In [95]:
parsed_path = "C:\\Users\\G34371231\\OneDrive - The George Washington University\\Desktop\\PeerRead\\data\\acl_2017\\train\\parsed_pdfs"
review_path = "C:\\Users\\G34371231\\OneDrive - The George Washington University\\Desktop\\PeerRead\\data\\acl_2017\\train\\reviews"
#make sure to create new github repo so i can make it private bc im using file paths that list my GWID which i should be kept private-
output_path = "C:\\Users\\G34371231\\OneDrive - The George Washington University\\Desktop\\PeerRead\\dtais_summer\\qwen3_soundness_paper_100_seed_50.json"
prompt_aspect_score(random_sampling=True, num_samples=100, set_seed = 50, parsed_path = parsed_path, reviews_path = review_path, output_path= output_path)

#https://stackoverflow.com/questions/71446341/what-security-issue-is-caused-by-changing-the-visibility-of-a-fork-on-github

[['C:\\Users\\G34371231\\OneDrive - The George Washington University\\Desktop\\PeerRead\\data\\acl_2017\\train\\parsed_pdfs\\477.pdf.json',
  'C:\\Users\\G34371231\\OneDrive - The George Washington University\\Desktop\\PeerRead\\data\\acl_2017\\train\\reviews\\477.json'],
 ['C:\\Users\\G34371231\\OneDrive - The George Washington University\\Desktop\\PeerRead\\data\\acl_2017\\train\\parsed_pdfs\\759.pdf.json',
  'C:\\Users\\G34371231\\OneDrive - The George Washington University\\Desktop\\PeerRead\\data\\acl_2017\\train\\reviews\\759.json'],
 ['C:\\Users\\G34371231\\OneDrive - The George Washington University\\Desktop\\PeerRead\\data\\acl_2017\\train\\parsed_pdfs\\276.pdf.json',
  'C:\\Users\\G34371231\\OneDrive - The George Washington University\\Desktop\\PeerRead\\data\\acl_2017\\train\\reviews\\276.json'],
 ['C:\\Users\\G34371231\\OneDrive - The George Washington University\\Desktop\\PeerRead\\data\\acl_2017\\train\\parsed_pdfs\\365.pdf.json',
  'C:\\Users\\G34371231\\OneDrive - The G