# Checking correlation of aspect scores with acceptance outcome

In [1]:
#!pip install requests

import json
import os
import tempfile
from sklearn.metrics import accuracy_score, classification_report
import random

In [None]:
#get these scores for all papers
#dont necessarily need to store all the scores to get this but I may as well 
#set up a (nested?) dictionary in case there's other numerical analysis to be done

#dictionary format { paper1: {name= id, originality = 1, ..., outcome = accept}}

#y = acceptance (where true = 1, false = 0)?
#x = aspect score
#so logistic regression?

#go through files


In [None]:
def build_prompt(paper):
  metadata = paper.get('metadata') #metadata dictionary that contains the actual contents of the paper
  prompt_not_structured_output = f"""

  Please read the paper information below and predict whether this paper would be accepted or rejected at ICLR 2017. Then, explain your reasoning.
  Respond only in the following JSON format:
  {{
    "decision": "ACCEPTED" or "REJECTED",
    "rationale": "Explain your reasoning"
  }}

  Abstract: {metadata.get('abstractText', '').strip()}

  """
  prompt1 = f"""

  You will review the title and abstract of a research paper. In the JSON prediction field, provide your prediction of ACCEPT or REJECT for the paper's submission to ICLR 2017. 
  Then, in the JSON reasoning field, provide your reasoning for your prediction.

  Abstract: {metadata.get('abstractText', '').strip()}

  """
  #print(metadata)
  #metadata.get('title', '').strip()
  #metadata.get('abstractText', '').strip()
  

  prompt = f"""

  You will review the contents of a research paper. 

  Paper Contents: {str(metadata.get('sections'))}

  In the JSON prediction field, provide your prediction of ACCEPT or REJECT for the paper's submission to ICLR 2017. 
  Then, in the JSON reasoning field, provide your reasoning for your prediction.
  """
  return prompt


'''
{
  "decision": "REJECTED",
  "rationale": "The paper lacks novelty and the results are not clearly explained.",
  "aspect_scores": {
    "originality": 2,
    "clarity": 3,
    "soundness": 2,
    "impact": 2
  }
}

'''


In [None]:
def model_forecasting(model, prompt):
    #print(prompt)
    # Send request to Ollama


    res = requests.post(
        "http://localhost:11434/api/generate",
        json={
            "model": model, #llama3.2:3b , "qwen3:latest"
            "prompt": prompt, 
            "stream": False, 
            #"think": True,
            # should i include format field?
            "format":{
            "type": "object",
            "properties":{ "prediction": {"type": "string"}, "rationale": {"type":"string"} }, 
            "required": ["prediction", "reasoning"]
            }
        }
    )
    result = res.json()
    return result


implementing structured json outputs - https://ollama.com/blog/structured-outputs

In [None]:
pdf = ["C:\\Users\\G34371231\\OneDrive - The George Washington University\\Desktop\\PeerRead\\data\\iclr_2017\\train\\parsed_pdfs\\304.pdf.json"]
review = ["C:\\Users\\G34371231\\OneDrive - The George Washington University\\Desktop\\PeerRead\\data\\iclr_2017\\train\\reviews\\304.json"]
def get_response_prefinetuning(pdf_path, review_path, results):
    
    with open(pdf_path, 'r') as f1:
        paper = json.load(f1) #json file contents for one research paper
    with open(review_path, 'r') as f2:
        review = json.load(f2)

    prompt = build_prompt(paper)
    model = "qwen3:latest"
    output = model_forecasting(model, prompt)
    json_response = json.loads(output.get("response"))
    
    results[paper.get("name")] = {
        "real_acceptance_label": review.get("accepted"),
        "predicted": json_response.get("prediction"),
        "rationale": json_response.get("rationale"),
        "complete_output": output
    }
    return results
        

In [None]:
iclr_parsed_train_path = "C:\\Users\\G34371231\\OneDrive - The George Washington University\\Desktop\\PeerRead\\data\\iclr_2017\\train\\parsed_pdfs"
iclr_reviews_train_path = "C:\\Users\\G34371231\\OneDrive - The George Washington University\\Desktop\\PeerRead\\data\\iclr_2017\\train\\reviews"
output_path = "C:\\Users\\G34371231\\OneDrive - The George Washington University\\Desktop\\PeerRead\\dtais_summer\\qwen3_forecasting_paper_100.json"

In [None]:

def get_accuracy(parsed_path, reviews_path):
    sorted_paper_paths= sorted(os.listdir(parsed_path))
    sorted_review_paths = sorted(os.listdir(reviews_path))
    results = {} #dictionary of results

    for paper_json_file, review_json_file in zip(sorted_paper_paths[:100], sorted_review_paths[:100]):
        json_pdf_path = os.path.join(parsed_path, paper_json_file)
        json_review_path = os.path.join(reviews_path, review_json_file)
        #print(paper_json_file)
        results = get_response_prefinetuning(json_pdf_path, json_review_path, results)
        
    with open(output_path,'a') as f3:
        json.dump(results,f3)

In [None]:
get_accuracy(iclr_parsed_train_path, iclr_reviews_train_path)

In [None]:
json_results_path ='C:\\Users\\G34371231\\OneDrive - The George Washington University\\Desktop\\PeerRead\\dtais_summer\\qwen3_forecasting_paper_100.json'
with open(json_results_path, 'r') as f:
    data = json.load(f)

#Extract true and predicted labels 
y_true = []
y_pred = []

for fname, entry in data.items():
    #print(data.items())
    true_label = entry.get("real_acceptance_label", None)
    pred_label = entry.get("predicted").strip().upper()

    if true_label is None or pred_label not in ["ACCEPT", "REJECT"]:
        print(f'missing label for {fname}')
        continue  #in case entry is not in epxected format

    true_label_str = "ACCEPT" if true_label else "REJECT"
    y_pred.append(pred_label)

if y_true:
    acc = accuracy_score(y_true, y_pred)
    print(f"Accuracy: {acc:.2f} on {len(y_true)} examples")
    print(classification_report(y_true, y_pred))
else:
    print("No valid data to evaluate.")


In [None]:
#need to set a random seed to sample for evaluating accuracy or just shuffle and prompt for all ICLR training papers to get accuracy score?
#should keep track of what files contain true vs false for accepted and how uneven this distribution is!

accepted = []
rejected = []

def sort_by_acceptance(paper_path, review_path):
    for paper_review_pair in zip(os.listdir(paper_path), os.listdir(review_path)):
        fpaper_path = os.path.join(paper_path, paper_review_pair[0])
        freview_path = os.path.join(review_path, paper_review_pair[1])
        try: 
            with open(freview_path) as f:
                review_data = json.load(f)

            outcome = review_data.get("accepted")
            #file_basename = os.path.basename(freview_path)
            #https://stackoverflow.com/questions/678236/how-do-i-get-the-filename-without-the-extension-from-a-path-in-python
            
            #paper_id = os.path.splitext(file_basename)[0]
            if outcome == True: accepted.append([fpaper_path, freview_path])
            elif outcome == False: rejected.append([fpaper_path, freview_path])
            else: print(f"CHECK FILE {freview_path}: accepted field contains {outcome}")
        except Exception as e: 
            print(f"error reading {freview_path}: {e}")
            
    return sorted(accepted), sorted(rejected)


In [None]:
accepted, rejected = sort_by_acceptance(iclr_parsed_train_path, iclr_reviews_train_path)
#print(f"accepted: {accepted}")
#print(f"rejected: {rejected}")

In [None]:
def prompt_sample(random_sampling, half_num_samples, set_seed, parsed_path, reviews_path, output_path):
    random.seed(set_seed)
    paper_name_set = []
    results = {}
    if random_sampling: #set random_sampling = True to randomly sample a subset of papers in the folder. 
        all_accepted, all_rejected = sort_by_acceptance(parsed_path, reviews_path)
        accepted = random.sample(all_accepted, half_num_samples)
        rejected = random.sample(all_rejected, half_num_samples)
        paper_name_set = paper_name_set + accepted + rejected
        #number of accepted papers = number of rejected papers in sample set
        random.shuffle(paper_name_set)

    #print(accepted)
    #print(rejected)
    #print(paper_name_set)
    
    if random_sampling==False:
        paper_name_set =  zip(os.list(parsed_path), os.list(reviews_path)) #note: train and review folders contain the same 
        random.shuffle(paper_name_set)
        for i in range(0, len(paper_name_set)):
            paper_name_set[i] = [os.path.join(parsed_path, i)[0], os.path.join(reviews_path, i)[1]]

    #print(paper_name_set[0])
    for json_pdf_path, json_review_path in paper_name_set:
        results = get_response_prefinetuning(json_pdf_path, json_review_path, results)
    with open(output_path,'a') as f3:
        json.dump(results,f3)
    
    return paper_name_set

In [None]:
output_path = "C:\\Users\\G34371231\\OneDrive - The George Washington University\\Desktop\\PeerRead\\dtais_summer\\qwen3_forecasting_paper_100.json"
prompt_sample(random_sampling = True, half_num_samples = 50, set_seed = 50, parsed_path = iclr_parsed_train_path, reviews_path = iclr_reviews_train_path, output_path = output_path)