# Processing the papers for the expert validation study

Automatic creation of short introduction texts for the papers.

## Add missing URLs

Start by importing a file containing missing URLs for the papers. The missing URLs were manually added.

In [1]:
import csv
import os
import requests
from bs4 import BeautifulSoup
from openai import OpenAI
import json
import requests

In [2]:
def extract_csv_to_list(folderpath, filename):
    """
    Extracts data from a CSV file and returns a list of dictionaries.

    Parameters:
    folderpath (str): The path to the folder containing the CSV file.
    filename (str): The name of the CSV file.

    Returns:
    list[dict]: A list of dictionaries where each dictionary represents a row in the CSV file.
                Each dictionary contains the keys:
                - "Paper Name"
                - "Research Questions"
                - "URL"
                - "Abstract" (if present in the CSV file)
    """
    csv_file_path = os.path.join(folderpath, filename)
    extracted_data = []
    
    with open(csv_file_path, newline='', encoding='utf-8') as csvfile:
        reader = csv.DictReader(csvfile)
        
        for row in reader:
            subset = {
                "Paper Name": row["Paper Name"],
                "Research Questions": row["Research Questions (max. 4)"],
                "URL": row["URL"]
            }
            # Check if "Abstract" column exists and add it
            if "Abstract" in row:
                subset["Abstract"] = row["Abstract"]
            
            extracted_data.append(subset)
    
    return extracted_data


In [3]:
def write_to_csv(data, filename):
    """
    Writes a list of dictionaries to a CSV file inside the 'csv_files' folder.

    Parameters:
    data (list of dict): A list of dictionaries where each dictionary represents a row in the CSV file.
    filename (str): The name of the CSV file to be created.

    Returns:
    None: The function writes the CSV file and prints a confirmation message upon success.
    """
    
    # Ensure the 'csv_files' directory exists
    os.makedirs('csv_files', exist_ok=True)  # Creates the folder if it doesn't exist
    
    # Construct the full file path by joining the folder name with the filename
    file_path = os.path.join('csv_files', filename)
    
    # Get the fieldnames from the first dictionary in the list (assumes all dicts have the same keys)
    fieldnames = data[0].keys()
    
    # Open the file in write mode, create a CSV DictWriter object
    with open(file_path, mode='w', newline='', encoding='utf-8') as file:
        writer = csv.DictWriter(file, fieldnames=fieldnames)
        
        # Write the header (fieldnames)
        writer.writeheader()
        
        # Write the rows (data)
        writer.writerows(data)
    
    print(f"Data successfully written to {file_path}")

In [1]:
folderpath = "../paper_selection/csv_files/papers_expert_study"
csv_file = "updated_url_papers_expert_val.csv"
sampled_papers_list = extract_csv_to_list(folderpath, csv_file)

NameError: name 'extract_csv_to_list' is not defined

## Add abstracts

In [4]:
def get_abstracts(url):
    """
    Scrapes abstracts from the given URL using predefined CSS selectors.

    This function sends an HTTP request to the specified URL, parses the HTML content 
    using BeautifulSoup, and extracts the abstract text based on a list of possible 
    selectors. It attempts to handle different website structures, including ArXiv, 
    IEEE Xplore, and ACM Digital Library.

    Parameters:
    url (str): The URL of the research paper or article from which to extract the abstract.

    Returns:
    list[str]: A list of extracted abstracts (as strings). If no abstracts are found,
               an empty list is returned.
    """
    headers = {"User-Agent": "Mozilla/5.0"}  # Avoid blocking by the server
    response = requests.get(url, headers=headers)
    
    if response.status_code != 200:
        print(f"Failed to fetch page, status code: {response.status_code}")
        return []
    
    soup = BeautifulSoup(response.text, 'html.parser')
    
    # Adjust these selectors based on the website structure
    possible_selectors = [
       'section[role="doc-abstract"] div[role="paragraph"]', # for dl.acm.org
        'div.abstract-text div.col-12 div.u-mb-1 div[xplmathjax]', #ieeexplore.ieee.org
        'span.abstract.mathjax',
        'div.abstract',
        'p.abstract',
        'section.abstract',
        'span.abstract',
        'blockquote.abstract.mathjax'  # for arxiv.org
    ]
    
    abstracts = []
    for selector in possible_selectors:
        elements = soup.select(selector)
        for elem in elements:
            # Find the text content inside the blockquote and remove the descriptor span
            if selector == 'blockquote.abstract.mathjax':
                abstract_text = " ".join([text.strip() for text in elem.stripped_strings if text != "Abstract:"])
                abstracts.append(abstract_text)
            else:
                abstracts.append(elem.get_text(strip=True))

    if not abstracts:
        print("Could not scrape abstract")
    
    return abstracts

In [5]:
def add_abstracts_to_papers(sampled_papers_list):
    """Adds abstracts to the list of papers."""
    for paper in sampled_papers_list:
        url = paper["URL"]
        abstract = get_abstracts(url)  # Get the abstract using the URL
        paper["Abstract"] = abstract  # Add the "Abstract" to the dictionary
    
    return sampled_papers_list

Scrape the abstract:

In [40]:
updated_papers_list = add_abstracts_to_papers(sampled_papers_list)

Failed to fetch page, status code: 418
Could not scrape abstract
Could not scrape abstract
Failed to fetch page, status code: 418
Failed to fetch page, status code: 418
Failed to fetch page, status code: 418
Failed to fetch page, status code: 418
Could not scrape abstract
Failed to fetch page, status code: 418
Failed to fetch page, status code: 418
Failed to fetch page, status code: 418
Failed to fetch page, status code: 418
Could not scrape abstract
Failed to fetch page, status code: 418
Could not scrape abstract
Failed to fetch page, status code: 418
Could not scrape abstract
Could not scrape abstract
Could not scrape abstract
Could not scrape abstract
Could not scrape abstract
Could not scrape abstract
Paper Name: Towards Understanding Fairness and its Composition in Ensemble Machine Learning


KeyError: 'Research Questions (max. 4)'

Create an arfitfact:

In [52]:
filename = 'abstracts_papers_expert_val.csv'
write_to_csv(updated_papers_list, filename)

Data successfully written to csv_files/papers_expert_study/abstracts_papers_expert_val.csv


## Create introductory texts for each paper

First import a completed file containing the missing abstracts from a file that was manually completed:

In [6]:
def post_process(result):
    as_dict = json.loads(result)
    return as_dict

In [7]:
def create_intro_text(paper_abstract, SYSTEM_PROMPT_GER):
    client = OpenAI(base_url="http://172.26.92.115")
    
    data = {
        "model": "gpt-4o-2024-11-20",
        "messages": [
            {"role": "system", "content": SYSTEM_PROMPT_GER},
            {"role": "user", "content": paper_abstract}
        ]
    }
    
    url = "http://172.26.92.115/chat_completion"
    api_key = os.environ.get("OPENAI_API_KEY")

    # Send request
    response = requests.post(
        url, 
        headers={'Authorization': f'Bearer {api_key}', 'Content-Type': 'application/json'},
        json=data
    )

    # Check response
    if response.status_code == 200:
        return response.json()  # Return full JSON response
    else:
        return f"Error {response.status_code}: {response.text}"


In [None]:
folderpath = "csv_files"

In [11]:
csv_file = "completed_abstracts_expert_val.csv"

papers_list = extract_csv_to_list(folderpath, csv_file)

Send the request to the OpenAI API:

In [10]:
#Debugging: Create intro text for a single paper
paper_abstract = """In software engineering, interruptions during tasks can have significant implications for productivity and well-being. While previous studies have investigated the effect of interruptions on productivity, to the best of our knowledge, no prior work has yet distinguished the effectofdifferenttypesofinterruptions onsoftwareengineering activities. This study explores the impact of interruptions on software engineering tasks, analyzing in-person and on-screen interruptions with different levels of urgency and dominance. Participants completed code writing, code comprehension, and code review tasks while experiencing interruptions. We collect physiological data using the Empatica EmbracePlus wristband and self-perceived evaluations through surveys. Results show that on-screen interruptions with high dominance of requester significantly increase time spent on code comprehension. In-person and on-screen interruptions combined significantly affect the time spent on code review, with varied effects based on specific interruption combinations. Both interruption type and task significantly influence stress measures, with code comprehension and review tasks associated with lower stress measures compared to code writing. Interestingly, in-person interruptions present a positive impact on physiological measures, indicating reduced stress measures. However, participants’ selfperceived stress scores do not align with physiological data, with higher stress reported during in-person interruptions despite lower physiological stress measures. These findings shed light on and emphasize the potential importance of considering the complex relationship between interruptions, objective measures, and subjective experiences in software development. We discuss insights that we hope can inform interruption management and implications on stress among software engineers."""

try:
    result = create_intro_text(paper_abstract, SYSTEM_PROMPT_GER_VER2)
except Exception as e:
    print("Exception at " + "paper")


In [11]:
#Debugging: Create intro text for a single paper
content = result["choices"][0]["message"]["content"]
print(content)

Unterbrechungen während der Arbeit können in der Softwareentwicklung erhebliche Auswirkungen auf Produktivität und Wohlbefinden haben. Diese Arbeit untersucht den Einfluss verschiedener Arten von Unterbrechungen auf softwarebezogene Aufgaben wie Codierung, Codeverständnis und Codeüberprüfung. Dabei werden physiologische Daten und subjektive Wahrnehmungen genutzt, um die komplexen Beziehungen zwischen Unterbrechungstypen, Aufgabenanforderungen und Stressindikatoren zu analysieren.


### Create intro texts in german

In [12]:
def get_introductory_text(papers_list):
    """
    Iterates through papers_list, adds the key 'Introductory text' to each paper,
    and populates it using the create_intro_text function.
    
    :param papers_list: List of dictionaries containing paper details.
    """
    for paper in papers_list:
        if 'Abstract' in paper:
            try:
                paper_abstract = paper['Abstract']
                result = create_intro_text(paper_abstract, SYSTEM_PROMPT_GER)
                introductory_text = result["choices"][0]["message"]["content"]
                paper['Introductory text'] = introductory_text
            except Exception as e:
                print("Exception at " + paper)
            
    return papers_list

In [14]:
SYSTEM_PROMPT_GER = """Write a concise introductory text in German for a paper based on the following abstract. 
                The text should provide a brief overview of the main themes and context of the paper without delving into specific methods, 
                results, or contributions. It should be written in an impersonal, third-person perspective (avoid using first-person plural like 
                'we' or 'our'). The tone should remain formal and academic. The introduction should be no longer than 2-3 sentences and should 
                strictly avoid mentioning the paper's contributions, findings, or implications. Focus solely on the broader subject matter and 
                relevance of the research field."""

In [15]:
# Use this prompt for results that are written in english instead of german
SYSTEM_PROMPT_GER_VER2 = """Write a concise introductory text in German for a paper based on the following abstract. 
                The text should provide a brief overview of the main themes and context of the paper without delving into specific methods, 
                results, or contributions. It should be written in an impersonal, third-person perspective (avoid using first-person plural like 
                'we' or 'our'). The tone should remain formal and academic. The introduction should be no longer than 2-3 sentences and should 
                strictly avoid mentioning the paper's contributions, findings, or implications. Focus solely on the broader subject matter and 
                relevance of the research field. The resulting text should be in german."""

In [None]:
# Create an introductory text for each entry
list_intro_texts = get_introductory_text(papers_list)

In [26]:
filename = "papers_with_intro_texts_german.csv"
write_to_csv(list_intro_texts, filename)

Data successfully written to csv_files/papers_with_intro_texts_german.csv


### Create intro texts in english

In [18]:
SYSTEM_PROMPT_ENG = """Write a concise introductory text for a paper based on the following abstract. 
                The text should provide a brief overview of the main themes and context of the paper without delving into specific methods, 
                results, or contributions. It should be written in an impersonal, third-person perspective (avoid using first-person plural like 
                'we' or 'our'). The tone should remain formal and academic. The introduction should be no longer than 2-3 sentences and should 
                strictly avoid mentioning the paper's contributions, findings, or implications. Focus solely on the broader subject matter and 
                relevance of the research field."""

In [19]:
def get_introductory_text_eng(papers_list):
    """
    Iterates through papers_list, adds the key 'Introductory text' to each paper,
    and populates it using the create_intro_text function.
    
    :param papers_list: List of dictionaries containing paper details.
    """
    for paper in papers_list:
        if 'Abstract' in paper:
            try:
                paper_abstract = paper['Abstract']
                result = create_intro_text(paper_abstract, SYSTEM_PROMPT_ENG)
                introductory_text = result["choices"][0]["message"]["content"]
                paper['Introductory text'] = introductory_text
            except Exception as e:
                print("Exception at " + paper)
            
    return papers_list

In [34]:
list_intro_texts_eng = get_introductory_text_eng(papers_list)

In [35]:
filename = "papers_with_intro_texts_english.csv"
write_to_csv(list_intro_texts_eng, filename)

Data successfully written to csv_files/papers_with_intro_texts_english.csv


In [20]:
#Debugging: Create intro text for a single paper
paper_abstract = """In software engineering, interruptions during tasks can have significant implications for productivity and well-being. While previous studies have investigated the effect of interruptions on productivity, to the best of our knowledge, no prior work has yet distinguished the effectofdifferenttypesofinterruptions onsoftwareengineering activities. This study explores the impact of interruptions on software engineering tasks, analyzing in-person and on-screen interruptions with different levels of urgency and dominance. Participants completed code writing, code comprehension, and code review tasks while experiencing interruptions. We collect physiological data using the Empatica EmbracePlus wristband and self-perceived evaluations through surveys. Results show that on-screen interruptions with high dominance of requester significantly increase time spent on code comprehension. In-person and on-screen interruptions combined significantly affect the time spent on code review, with varied effects based on specific interruption combinations. Both interruption type and task significantly influence stress measures, with code comprehension and review tasks associated with lower stress measures compared to code writing. Interestingly, in-person interruptions present a positive impact on physiological measures, indicating reduced stress measures. However, participants’ selfperceived stress scores do not align with physiological data, with higher stress reported during in-person interruptions despite lower physiological stress measures. These findings shed light on and emphasize the potential importance of considering the complex relationship between interruptions, objective measures, and subjective experiences in software development. We discuss insights that we hope can inform interruption management and implications on stress among software engineers."""

try:
    result = create_intro_text(paper_abstract, SYSTEM_PROMPT_ENG)
except Exception as e:
    print("Exception at " + "paper")


In [21]:
#Debugging: Create intro text for a single paper
content = result["choices"][0]["message"]["content"]
print(content)

Interruptions are a pervasive aspect of modern work environments, with significant implications for productivity and well-being. In the context of software engineering, where tasks often demand high levels of concentration and cognitive effort, the impact of interruptions remains a critical area of study. This research examines the effects of different types of interruptions on software engineering activities, focusing on their influence across various tasks and physiological and self-perceived stress measures.


## Extract the properties for the research questions

In [41]:
SYSTEM_PROMPT_PROPERRTIES = """Using the provided abstract and research questions, identify the key properties measured to address each research 
                            question. These properties may include, but are not limited to, accuracy, usability, reliability, performance, 
                            portability, CPU usage, and runtime.
                            For each research question, list the relevant properties without restating the question. Organize your answers by 
                            the research question number (e.g., RQ1, RQ2, etc.), ensuring that each property corresponds directly to the 
                            information in the abstract."""

In [35]:
def extract_properies(paper_abstract, paper_rqs, SYSTEM_PROMPT_PROPERRTIES):
    client = OpenAI(base_url="http://172.26.92.115")
    
    data = {
        "model": "gpt-4o-2024-11-20",
        "messages": [
            {"role": "system", "content": SYSTEM_PROMPT_PROPERRTIES},
            {"role": "user", "content": paper_abstract},
            {"role": "user", "content": paper_rqs}
        ]
    }
    
    url = "http://172.26.92.115/chat_completion"
    api_key = os.environ.get("OPENAI_API_KEY")

    # Send request
    response = requests.post(
        url, 
        headers={'Authorization': f'Bearer {api_key}', 'Content-Type': 'application/json'},
        json=data
    )

    # Check response
    if response.status_code == 200:
        return response.json()  # Return full JSON response
    else:
        return f"Error {response.status_code}: {response.text}"


In [39]:
# Debugging
paper_abstract = """['Recently, deep learning models have been widely applied in program understanding tasks, and these models achieve state-of-the-art results on many benchmark datasets. A major challenge of deep learning for program understanding is that the effectiveness of these approaches depends on the quality of their datasets, and these datasets often contain noisy data samples. A typical kind of noise in program understanding datasets is label noise, which means that the target outputs for some inputs are incorrect. Researchers have proposed various approaches to alleviate the negative impact of noisy labels, and formed a new research topic: noisy label learning (NLL). In this paper, we conduct an empirical study on the effectiveness of noisy label learning on deep learning for program understanding datasets. We evaluate various NLL approaches and deep learning models on three tasks: program classification, vulnerability detection, and code summarization. From the evaluation results, we come to the following findings: 1) small trained-from-scratch models are prone to label noises in program understanding, while large pre-trained models are highly robust against them. 2) NLL approaches significantly improve the program classification accuracies for small models on noisy training sets, but they only slightly benefit large pre-trained models in classification accuracies. 3) NLL can effectively detect synthetic noises in program understanding, but struggle in detecting real-world noises. We believe our findings can provide insights on the abilities of NLL in program understanding, and shed light on future works in tackling noises in software engineering datasets. We have released our code at this https URL .']"""
paper_rqs = """RQ1:Howdodifferenttypes of synthetic label noises in program classification affect the performance of deep learning models when NLL is not introduced? • 
            RQ2:Howdoexisting NLL approaches perform on different synthetic noises in program classification? 
            • RQ3:HowdoNLLapproaches perform on program understanding tasks with real-world noises?"""

try:
    result = extract_properies(paper_abstract, paper_rqs, SYSTEM_PROMPT_PROPERRTIES)
except Exception as e:
    print("Exception at " + "paper")


In [40]:
#Debugging
content = result["choices"][0]["message"]["content"]
print(content)

Here are the key properties measured to answer each research question based on the abstract:

### **RQ1: How do different types of synthetic label noises in program classification affect the performance of deep learning models when NLL is not introduced?**
- **Performance**: The effectiveness of the deep learning models in program classification tasks is analyzed under different synthetic label noise conditions.
- **Accuracy**: The classification accuracy of the deep learning models is measured to determine the impact of synthetic label noise.
- **Reliability**: The robustness of deep learning models against noise is considered, particularly in the absence of NLL.

---

### **RQ2: How do existing NLL approaches perform on different synthetic noises in program classification?**
- **Accuracy**: Improvements in classification accuracy when NLL approaches are applied to handle synthetic label noises.
- **Effectiveness**: The overall capability of NLL methods to mitigate the adverse effects

In [None]:
# TO-Do: Finish and use this function after the testing and debugging is finished
def get_properties(papers_list):
    """
    Iterates through papers_list, adds the key 'Properties' to each paper,
    and populates it using the extract_properies function.
    
    :param papers_list: List of dictionaries containing paper details.
    """
    for paper in papers_list:
        if 'Abstract' and 'Research Questions' in paper:
            try:
                paper_abstract = paper['Abstract']
                paper_rqs = paper['Research Questions']
                # To-Do: Remove comment to extract properties for all papers
                #result = extract_properies(paper_abstract, paper_rqs, SYSTEM_PROMPT_PROPERRTIES)
                properties = result["choices"][0]["message"]["content"]
                paper['Properties'] = properties
            except Exception as e:
                print("Exception at " + paper)
            
    return papers_list

## Save everything in a csv file

In [None]:
# To-Do: Reuse the old function write_to_csv instead of writing a new function??
def write_to_csv(data, filename):
    """
    Writes a list of dictionaries to a CSV file inside the 'csv_files/papers_expert_study' folder.

    Parameters:
    data (list of dict): A list of dictionaries where each dictionary represents a row in the CSV file.
    filename (str): The name of the CSV file to be created.

    Returns:
    None: The function writes the CSV file and prints a confirmation message upon success.
    """
    
    # Ensure the 'csv_files' directory exists
    os.makedirs('csv_files/papers_expert_study', exist_ok=True)  # Creates the folder if it doesn't exist
    
    # Construct the full file path by joining the folder name with the filename
    file_path = os.path.join('csv_files/papers_expert_study', filename)
    
    # Get the fieldnames from the first dictionary in the list (assumes all dicts have the same keys)
    fieldnames = data[0].keys()
    
    # Open the file in write mode, create a CSV DictWriter object
    with open(file_path, mode='w', newline='', encoding='utf-8') as file:
        writer = csv.DictWriter(file, fieldnames=fieldnames)
        
        # Write the header (fieldnames)
        writer.writeheader()
        
        # Write the rows (data)
        writer.writerows(data)
    
    print(f"Data successfully written to {file_path}")
