# Extraction of the sessions and the corresponding papers/talks

Load the dependencies:

In [74]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import time
from collections import defaultdict
import re
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.cluster import KMeans
from sklearn.metrics.pairwise import cosine_similarity
from enum import Enum
import matplotlib.pyplot as plt
import csv
import os

Extraction of the content of papers:

In [55]:
def extract_sessions_papers(url):
    """
    Extracts session and paper details from a given conference URL on researchr.

    Args:
        url (str): The URL of the conference page from which to extract data.

    Returns:
        dict: A dictionary where the keys are session names and the values are dictionaries containing paper details.
              Each paper detail dictionary contains:
              - 'name': The title of the paper (str)
              - 'authors': A list of authors' names (list of str)
              - 'url': The URL to the paper (str)
    """
    # Set up Chrome options
    chrome_options = Options()
    chrome_options.add_argument("--headless")  # Run in headless mode (no GUI)

    # Set up the Chrome WebDriver using ChromeDriverManager
    service = Service(ChromeDriverManager().install())
    driver = webdriver.Chrome(service=service, options=chrome_options)

    # Open the webpage
    driver.get(url)

    # Wait for the page to fully load (adjust time as needed)
    time.sleep(5)  # Increased wait time to ensure the page is fully loaded

    # Get the page source after JavaScript has rendered
    page_source = driver.page_source

    # Parse the page content with BeautifulSoup
    soup = BeautifulSoup(page_source, 'html.parser')

    # Find all divs with the classes 'hidable' and 'band'
    all_divs = soup.find_all("div", class_=["hidable", "band"])

    # Filter for only visible divs
    visible_divs = [div for div in all_divs if div.get("data-is-visible") == "true"]

    # Initialize the dictionary to store sessions with additional details
    sessions_details = {}

    # Iterate over each visible div
    for visible_div in visible_divs:
        # Find the session table inside the div
        session_table = visible_div.find("table", class_="session-table")
        
        if session_table:
            # Find the tbody element
            tbody = session_table.find("tbody")
            
            if tbody:
                # Find the session name
                session_info_elem = tbody.find(class_="session-info-in-table")
                if session_info_elem:
                    # Extract only the first part of the text before the <span> tag
                    session_name_parts = session_info_elem.contents
                    session_name = ""
                    
                    for part in session_name_parts:
                        if isinstance(part, str):  # Check if part is a string (text node)
                            session_name += part.strip()  # Append the text part
                            break  # Stop after the first text node

                session_name = session_name if session_name else "Unknown Session"
                
                # Find all rows (tr elements) within the tbody
                rows = tbody.find_all("tr")
                
                # Papers start from the third row, so we skip the first two
                paper_rows = rows[2:]
                
                # Extract the papers from the remaining rows, checking for visibility
                papers = []
                for row in paper_rows:
                    if row.get("data-is-visible") == "true" and row.get("style") != "display: none;":
                        # Extract the paper name from the 4th td element
                        td_elements = row.find_all("td")
                        if len(td_elements) >= 4:
                            #paper_name_elem = td_elements[3].find("strong")
                            #paper_name = paper_name_elem.get_text(strip=True) if paper_name_elem else "Unknown Paper"
                            paper_name_elem = td_elements[3].find("strong")
                            if paper_name_elem:
                                # Extract text directly from the <a> tag's first child
                                paper_name = paper_name_elem.a.contents[0].strip()
                            else:
                                paper_name = "Unknown Paper"
                                                    
                            # Extract authors
                            authors_div = td_elements[3].find("div", class_="performers")
                            authors = [a.get_text(strip=True) for a in authors_div.find_all("a")] if authors_div else []

                            # Extract paper URL
                            url_elem = td_elements[3].find("a", class_="publication-link")
                            paper_url = url_elem['href'] if url_elem else "No URL available"
                            
                            papers.append({
                                "name": paper_name,
                                "authors": authors,
                                "url": paper_url
                            })
                
                # Store the session details in the dictionary
                if papers:
                    sessions_details[session_name] = {
                        "papers": papers
                    }
    
    # Close the browser
    driver.quit()

    return sessions_details

In [3]:
def print_sessions_and_papers(conference_dict):
    """
    Prints details of sessions and their associated papers.

    Parameters:
    conference_dict (dict): A dictionary where keys are session names and values are dictionaries with papers.
    """
    for session_name, details in conference_dict.items():
        print(f"Session: {session_name}")
        for paper in details['papers']:
            print(f" - Paper: {paper['name']}")
            print(f"   Authors: {', '.join(paper['authors'])}")
            print(f"   URL: {paper['url']}")
        print("\n")

In [4]:
def print_session_names(conference_dict):
    """
    Prints the names of sessions from a dictionary.

    Parameters:
    conference_dict (dict): A dictionary where keys are session names.
    """
    for session_name in conference_dict.keys():
        print(f"{session_name}")

In [5]:
def roman_to_int(roman):
    """
    Converts a Roman numeral to an integer. This function only converts Roman numerals 
    that are standalone, preceded and followed by spaces or are at the start/end of the string.

    Parameters:
    roman (str): A Roman numeral as a string.

    Returns:
    int: The integer representation of the Roman numeral, or None if the input is not a valid standalone Roman numeral.
    """
    roman_numerals = {
        'I': 1, 'V': 5, 'X': 10, 'L': 50, 'C': 100, 
        'D': 500, 'M': 1000
    }
    
    # Regex pattern to match a valid Roman numeral surrounded by spaces or at the boundaries
    pattern = re.compile(r'(^|\s)(I{1,3}|IV|VI{0,3}|IX|X{1,3}|XL|L?X{0,3}|XC|C{1,3}|CD|D?C{0,3}|CM|M{0,3})(\s|$)')
    
    # Find if the input matches the Roman numeral pattern
    match = pattern.match(roman)
    
    if not match:
        return None
    
    roman = match.group(2)  # Extract the Roman numeral part from the match

    # Convert Roman numeral to integer
    total = 0
    prev_value = 0
    for char in reversed(roman):
        value = roman_numerals.get(char, 0)
        if value < prev_value:
            total -= value
        else:
            total += value
        prev_value = value
        
    return total

In [6]:
def merge_sessions_by_name(conference_dict):
    """
    Merges sessions with the same base topic into a single session, e.g. "Testing 1" and "Testing II".

    This function consolidates multiple sessions that share the same base topic but differ in their numbering 
    (either Arabic or Roman numerals) into a single session. It uses regular expressions to identify sessions 
    with similar base names, combines their associated papers, and returns a dictionary where each key 
    represents a unique base session name with all its associated papers aggregated.

    Parameters:
    conference_dict (dict): A dictionary where:
        - The keys are session names (strings), which may include a base topic and an optional number suffix (e.g., "Testing 1", "Testing II").
        - The values are dictionaries with a key 'papers', which maps to a list of dictionaries. Each dictionary in the 'papers' list contains:
            - 'name' (str): The title of the paper.
            - 'authors' (list of str): A list of authors of the paper.
            - 'url' (str): The URL to access the paper.

    Returns:
    dict: A dictionary where:
        - The keys are base session names (strings) derived from the input session names.
        - The values are dictionaries with a key 'papers' that contains a list of all papers associated with that base session name.

    """
    # Dictionary to group papers by session base name
    merged_sessions = defaultdict(lambda: {'papers': []})
    
    # Regular expression to extract base name and number (either Arabic or Roman numerals)
    pattern = re.compile(r'^(.*?)(?:\s+(\d+|[IVXLCDM]+))?$')
    
    for session_name, details in conference_dict.items():
        # Extract the base name and number using regex
        match = pattern.match(session_name)
        if match:
            base_name = match.group(1).strip()
            num_suffix = match.group(2).strip() if match.group(2) else ''
            
            # Convert Roman numerals to integers for consistency, if applicable
            if re.match(r'^[IVXLCDM]+$', num_suffix):
                num_suffix = roman_to_int(num_suffix)
            else:
                num_suffix = int(num_suffix) if num_suffix.isdigit() else None
            
            # Append papers to the corresponding base name in the merged_sessions
            merged_sessions[base_name]['papers'].extend(details['papers'])
    
    # Convert defaultdict to a regular dict
    return dict(merged_sessions)

In [7]:
def remove_prefix_of_sessions(conference_dict):
    """
    Cleans the session names in the dictionary by removing the prefix 'Technical Session' along with its number and 
    the dash ('-'). Only the part after the dash ('-') is kept as the session name.

    Parameters:
    conference_dict (dict): A dictionary where:
        - The keys are session names (strings), which may include 'Technical Session', a number, and a dash ('-') prefix.
        - The values are dictionaries with a key 'papers', which maps to a list of dictionaries. Each dictionary in the 'papers' list contains:
            - 'name' (str): The title of the paper.
            - 'authors' (list of str): A list of authors of the paper.
            - 'url' (str): The URL to access the paper.

    Returns:
    dict: A dictionary where:
        - The keys are cleaned session names (strings), derived from the input session names.
        - The values are dictionaries with a key 'papers' that contains a list of all papers associated with that cleaned session name.
    """
    
    cleaned_sessions = {}
    
    # Regular expression to match 'Technical Session' followed by a number and a dash
    pattern = re.compile(r'^Technical Session \d+ - (.+)$')

    for session_name, details in conference_dict.items():
        # Check if session name matches the pattern
        match = pattern.match(session_name)
        if match:
            # Extract and clean the session name
            cleaned_name = match.group(1).strip()
        else:
            # Keep the original name if it does not match the pattern
            cleaned_name = session_name
        
        # Add the papers to the cleaned session name
        if cleaned_name in cleaned_sessions:
            cleaned_sessions[cleaned_name]['papers'].extend(details['papers'])
        else:
            cleaned_sessions[cleaned_name] = details
    
    return cleaned_sessions

In [8]:
def merge_sessions(dicts):
    """
    Merge multiple dictionaries containing session and paper details based on exact session names.
    
    Args:
        dicts (list of dict): List of dictionaries where each dictionary contains sessions and their papers.
        
    Returns:
        dict: A merged dictionary with session names as keys and combined paper details as values.
    """
    # Dictionary to hold the merged results
    merged_sessions = defaultdict(lambda: {"papers": []})
    
    for session_dict in dicts:
        for session_name, details in session_dict.items():
            # Add the papers to the merged_sessions dictionary under the exact session name
            merged_sessions[session_name]["papers"].extend(details["papers"])
    
    # Convert defaultdict to a regular dict
    return dict(merged_sessions)

## ASE 2022-2023

### ASE 2023

In [9]:
ase2023_url = "https://conf.researchr.org/track/ase-2023/ase-2023-papers?track=ASE%20Research%20Papers#program"
ase2023 = extract_sessions_papers(ase2023_url)

print("Sessions in ASE 2023", len(ase2023), "\n")

Sessions in ASE 2023 36 



In [10]:
#print_sessions_and_papers(ase2023)
#print_session_names(ase2023)

In [11]:
# Merge sessions
ase2023_cleaned = merge_sessions_by_name(ase2023)

print("Sessions in ASE 2023 after cleaning", len(ase2023_cleaned), "\n")

print_session_names(ase2023_cleaned)

Sessions in ASE 2023 after cleaning 22 

Cloud and Distributed Systems
Testing AI Systems
Infrastructure, Build, and Logs
Open Source and Software Ecosystems
Smart Contracts, Blockchain, Energy efficiency, and green software
Vulnerability and Security
Code Generation
Web Development
Testing Tools and Techniques
Code Quality and Code Smells
Program Repair
Program Analysis
Code Summarization
Program Verification
Code Change Analysis
Software Testing for Specialized Systems
Bug Detection
Autonomous Systems and Agents
Mobile Development
Debugging
Fuzzing
Configuration and Version Management


### ASE 2022

In [12]:
ase2022_url = "https://conf.researchr.org/program/ase-2022/program-ase-2022/?track=ASE%20Research%20Papers"
ase2022 = extract_sessions_papers(ase2022_url)

print("Sessions in ASE 2022", len(ase2022), "\n")

Sessions in ASE 2022 35 



In [13]:
ase2022_noprefix = remove_prefix_of_sessions(ase2022)

In [14]:
# Merge sessions
ase2022_cleaned = merge_sessions_by_name(ase2022_noprefix)

print("Sessions in ASE 2022 after cleaning", len(ase2022_cleaned), "\n")

#print_session_names(ase2022_cleaned)

Sessions in ASE 2022 after cleaning 26 



### Merge both ASE dictionaries

In [15]:
# Merging the dictionaries
merged_dict_ase = merge_sessions([ase2022_cleaned, ase2023_cleaned])

print("Sessions in both ASE dictionaries", len(merged_dict_ase), "\n")

#print_sessions_and_papers(merged_dict)
print_session_names(merged_dict_ase)

#print(merged_dict_ase)

Sessions in both ASE dictionaries 47 

Welcome to Day
AI for SE
Debugging and Troubleshooting
Fuzzing
Mobile Apps
Code Analysis
Source Code Manipulation
Security and Privacy
Testing
Builds and Versions
Analysis and Types
Application Domains
Bug Prediction and Localization
Compilers and Languages
Software Vulnerabilities
Formal Methods and Models
SE for AI
Web, Cloud, Networking
Security
Code Summarization and Recommendation
Human Aspects
Software Repairs
Dynamic and Concolic Analysis
Safety-Critical and Self-Adaptive Systems
Code Similarities and Refactoring
Builds and Dependencies
Cloud and Distributed Systems
Testing AI Systems
Infrastructure, Build, and Logs
Open Source and Software Ecosystems
Smart Contracts, Blockchain, Energy efficiency, and green software
Vulnerability and Security
Code Generation
Web Development
Testing Tools and Techniques
Code Quality and Code Smells
Program Repair
Program Analysis
Code Summarization
Program Verification
Code Change Analysis
Software Testing 

## FSE 2023-2024

### FSE 2024

In [16]:
fse2024_url = "https://2024.esec-fse.org/program/program-fse-2024/?track=FSE%20Research%20Papers"
fse2024 = extract_sessions_papers(fse2024_url)

print("Sessions in FSE 2024", len(fse2024), "\n")

Sessions in FSE 2024 32 



In [17]:
#print_sessions_and_papers(fse2024)

In [18]:
# Merge sessions
fse2024_cleaned = merge_sessions_by_name(fse2024)

print("Sessions in FSE 2024 after cleaning", len(fse2024_cleaned), "\n")

print_session_names(fse2024_cleaned)

Sessions in FSE 2024 after cleaning 15 

Software Maintenance and Comprehension
Human Aspects
Formal Verification
Code Search and Completion
Processes, Requirements, and Architecture
Empirical Studies
Testing
AI4SE
Program Analysis and Performance
Program Repair and Synthesis
Fault Diagnosis and Root Cause Analysis
SE4AI
Security and Privacy
Log Analysis and Debugging
Fuzzing


### FSE 2023

In [19]:
fse2023_url = "https://2023.esec-fse.org/program/program-fse-2023/?track=ESEC%2FFSE%20Research%20Papers"

fse2023 = extract_sessions_papers(fse2023_url)

print("Sessions in FSE 2023", len(fse2023), "\n")

Sessions in FSE 2023 32 



In [20]:
#print_sessions_and_papers(fse2023)

In [21]:
# Merge sessions
fse2023_cleaned = merge_sessions_by_name(fse2023)

print("Sessions in FSE 2023 after cleaning", len(fse2023_cleaned), "\n")

print_session_names(fse2023_cleaned)

Sessions in FSE 2023 after cleaning 16 

Human Aspects
Testing
Machine Learning
Automated Repair
Empirical Studies
Software Evolution
Program Analysis
Code Search and Text to Code
Log Analysis and Debugging
Fault Diagnosis and Root Cause Analysis
Clone and Similarity Detection
Performance
Security
Fuzzing
Formal Verification
Models of Code and Documentation


### Merge both FSE dictionaries

In [22]:
# Merging the dictionaries
merged_dict_fse = merge_sessions([fse2023_cleaned, fse2024_cleaned])

print("Sessions in both FSE dictionaries", len(merged_dict_fse), "\n")

#print_sessions_and_papers(merged_dict)
print_session_names(merged_dict_fse)

Sessions in both FSE dictionaries 24 

Human Aspects
Testing
Machine Learning
Automated Repair
Empirical Studies
Software Evolution
Program Analysis
Code Search and Text to Code
Log Analysis and Debugging
Fault Diagnosis and Root Cause Analysis
Clone and Similarity Detection
Performance
Security
Fuzzing
Formal Verification
Models of Code and Documentation
Software Maintenance and Comprehension
Code Search and Completion
Processes, Requirements, and Architecture
AI4SE
Program Analysis and Performance
Program Repair and Synthesis
SE4AI
Security and Privacy


## ICSE 2023-2024

### ICSE 2024

In [23]:
icse2024_url = "https://conf.researchr.org/program/icse-2024/program-icse-2024/?track=ICSE%20Research%20Track"
icse2024 = extract_sessions_papers(icse2024_url)

print("Sessions in ICSE 2024: ", len(icse2024), "\n")

Sessions in ICSE 2024:  71 



In [24]:
# Merge sessions
icse2024_cleaned = merge_sessions_by_name(icse2024)

print("Sessions in ICSE 2024 after cleaning", len(icse2024_cleaned), "\n")

print_session_names(icse2024_cleaned)

Sessions in ICSE 2024 after cleaning 24 

AI & Security
Evolution & AI
Testing
Analysis
Human and Social
Generative AI studies
Language Models and Generated Code
Program Repair
Analytics
Security
Evolution
Analysis and Debugging
LLM, NN and other AI technologies
Dependability and Formal methods
Analytics & AI
Program binaries - evolvability
Testing: various bug types
Human and Social Aspects, and Requirements
Fuzzing
Requirements
Testing with and for AI
Vulnerability Detection
Static Detection Techniques
Testing of AI systems


### ICSE 2023

In [25]:
icse2023_url = "https://conf.researchr.org/program/icse-2023/program-icse-2023/?track=ICSE%20Technical%20Track"
icse2023 = extract_sessions_papers(icse2023_url)

print("Sessions in ICSE 2023", len(icse2023), "\n")

Sessions in ICSE 2023 63 



In [26]:
# Merge sessions
icse2023_cleaned = merge_sessions_by_name(icse2023)

print("Sessions in ICSE 2023 after cleaning", len(icse2023_cleaned), "\n")

print_session_names(icse2023_cleaned)

Sessions in ICSE 2023 after cleaning 60 

AI models for SE
Fuzzing: applications
Mining software repositories
Fault localization
Formal verification
APIs and libraries
Blockchain/smart contracts
Cognitive aspects of software development
Code smells and clones
Fuzzing: techniques and tools
Software architectures and design
Software security and privacy
AI systems engineering
Debugging
Defect analysis
Developers' behaviors
Program translation and synthesis
Posters
Documentation
Software logging
Test generation
SE for security
Development and evolution of AI-intensive systems
Vulnerability analysis and assessment
Defect detection and prediction
Studies on gender in SE
AI testing
Code review
Program repair techniques and applications
Requirements elicitation and understanding
Software verification
Testing of mobile, web and games
Recommender systems
Program repair with and for AI
Programming languages
AI bias and fairness
Requirements engineering
Software Evolution
Test quality and improve

### Merge both ICSE dictionaries

In [27]:
# Merging the dictionaries
merged_dict_icse = merge_sessions([icse2023_cleaned, icse2024_cleaned])

print("Sessions in both ICSE dictionaries", len(merged_dict_icse), "\n")

#print_sessions_and_papers(merged_dict)
print_session_names(merged_dict_icse)

Sessions in both ICSE dictionaries 84 

AI models for SE
Fuzzing: applications
Mining software repositories
Fault localization
Formal verification
APIs and libraries
Blockchain/smart contracts
Cognitive aspects of software development
Code smells and clones
Fuzzing: techniques and tools
Software architectures and design
Software security and privacy
AI systems engineering
Debugging
Defect analysis
Developers' behaviors
Program translation and synthesis
Posters
Documentation
Software logging
Test generation
SE for security
Development and evolution of AI-intensive systems
Vulnerability analysis and assessment
Defect detection and prediction
Studies on gender in SE
AI testing
Code review
Program repair techniques and applications
Requirements elicitation and understanding
Software verification
Testing of mobile, web and games
Recommender systems
Program repair with and for AI
Programming languages
AI bias and fairness
Requirements engineering
Software Evolution
Test quality and improveme

# Merge all dictionaries

In [28]:
# Merging all the dictionaries
merged_dict_all = merge_sessions([merged_dict_icse, merged_dict_fse, merged_dict_ase])

print("Sessions in both all dictionaries", len(merged_dict_all), "\n")

#print_sessions_and_papers(merged_dict_all)
print_session_names(merged_dict_all)

Sessions in both all dictionaries 143 

AI models for SE
Fuzzing: applications
Mining software repositories
Fault localization
Formal verification
APIs and libraries
Blockchain/smart contracts
Cognitive aspects of software development
Code smells and clones
Fuzzing: techniques and tools
Software architectures and design
Software security and privacy
AI systems engineering
Debugging
Defect analysis
Developers' behaviors
Program translation and synthesis
Posters
Documentation
Software logging
Test generation
SE for security
Development and evolution of AI-intensive systems
Vulnerability analysis and assessment
Defect detection and prediction
Studies on gender in SE
AI testing
Code review
Program repair techniques and applications
Requirements elicitation and understanding
Software verification
Testing of mobile, web and games
Recommender systems
Program repair with and for AI
Programming languages
AI bias and fairness
Requirements engineering
Software Evolution
Test quality and improveme

## Debugging: Check if the session names inside the dictionary (merged_dict_all) matches with the session names that we have writte in our text file

Reason: There was a spelling mistake taht lead to problems in the following code

In [None]:
list1 = []

# Loop through the conference_dict keys and append each session name to list1
for session_name in merged_dict_all.keys():
    list1.append(session_name)

# Print list1 to verify that session names have been saved
print(list1)

list2 = [
    "AI models for SE",
    "Fuzzing: applications",
    "Mining software repositories",
    "Fault localization",
    "Formal verification",
    "APIs and libraries",
    "Blockchain/smart contracts",
    "Cognitive aspects of software development",
    "Code smells and clones",
    "Fuzzing: techniques and tools",
    "Software architectures and design",
    "Software security and privacy",
    "AI systems engineering",
    "Debugging",
    "Defect analysis",
    "Developers' behaviors",
    "Program translation and synthesis",
    "Posters",
    "Documentation",
    "Software logging",
    "Test generation",
    "SE for security",
    "Development and evolution of AI-intensive systems",
    "Vulnerability analysis and assessment",
    "Defect detection and prediction",
    "Studies on gender in SE",
    "AI testing",
    "Code review",
    "Program repair techniques and applications",
    "Requirements elicitation and understanding",
    "Software verification",
    "Testing of mobile, web and games",
    "Recommender systems",
    "Program repair with and for AI",
    "Programming languages",
    "AI bias and fairness",
    "Requirements engineering",
    "Software Evolution",
    "Test quality and improvement",
    "Runtime analysis and self-adaptation",
    "Developers' forums",
    "Program comprehension",
    "Reverse engineering",
    "Software processes",
    "Static analysis",
    "Testing of database and low-level software",
    "Software performance",
    "Code generation",
    "Software development tools",
    "Fault injection and mutation",
    "Vulnerability detection",
    "Issue reporting and reproduction",
    "Software quality",
    "SE education methods and tools",
    "Metamorphic testing",
    "Pre-trained and few shot learning for SE",
    "Program analysis",
    "Vulnerability testing and patching",
    "Cyber-physical systems testing",
    "Software ecosystems",
    "AI & Security",
    "Evolution & AI",
    "Testing",
    "Analysis",
    "Human and Social",
    "Generative AI studies",
    "Language Models and Generated Code",
    "Program Repair",
    "Analytics",
    "Security",
    "Evolution",
    "Analysis and Debugging",
    "LLM, NN and other AI technologies",
    "Dependability and Formal methods",
    "Analytics & AI",
    "Program binaries - evolvability",
    "Testing: various bug types",
    "Human and Social Aspects, and Requirements",
    "Fuzzing",
    "Requirements",
    "Testing with and for AI",
    "Vulnerability Detection",
    "Static Detection Techniques",
    "Testing of AI systems",
    "Human Aspects",
    "Machine Learning",
    "Automated Repair",
    "Empirical Studies",
    "Program Analysis",
    "Code Search and Text to Code",
    "Log Analysis and Debugging",
    "Fault Diagnosis and Root Cause Analysis",
    "Clone and Similarity Detection",
    "Performance",
    "Formal Verification",
    "Models of Code and Documentation",
    "Software Maintenance and Comprehension",
    "Code Search and Completion",
    "Processes, Requirements, and Architecture",
    "AI4SE",
    "Program Analysis and Performance",
    "Program Repair and Synthesis",
    "SE4AI",
    "Security and Privacy",
    "Welcome to Day",
    "AI for SE",
    "Debugging and Troubleshooting",
    "Mobile Apps",
    "Code Analysis",
    "Source Code Manipulation",
    "Builds and Versions",
    "Analysis and Types",
    "Application Domains",
    "Bug Prediction and Localization",
    "Compilers and Languages",
    "Software Vulnerabilities",
    "Formal Methods and Models",
    "SE for AI",
    "Web, Cloud, Networking",
    "Code Summarization and Recommendation",
    "Software Repairs",
    "Dynamic and Concolic Analysis",
    "Safety-Critical and Self-Adaptive Systems",
    "Code Similarities and Refactoring",
    "Builds and Dependencies",
    "Cloud and Distributed Systems",
    "Testing AI Systems",
    "Infrastructure, Build, and Logs",
    "Open Source and Software Ecosystems",
    "Smart Contracts, Blockchain, Energy efficiency, and green software",
    "Vulnerability and Security",
    "Code Generation",
    "Web Development",
    "Testing Tools and Techniques",
    "Code Quality and Code Smells",
    "Code Summarization",
    "Program Verification",
    "Code Change Analysis",
    "Software Testing for Specialized Systems",
    "Bug Detection",
    "Autonomous Systems and Agents",
    "Mobile Development",
    "Configuration and Version Management"
]

print(list2)

['AI models for SE', 'Fuzzing: applications', 'Mining software repositories', 'Fault localization', 'Formal verification', 'APIs and libraries', 'Blockchain/smart contracts', 'Cognitive aspects of software development', 'Code smells and clones', 'Fuzzing: techniques and tools', 'Software architectures and design', 'Software security and privacy', 'AI systems engineering', 'Debugging', 'Defect analysis', "Developers' behaviors", 'Program translation and synthesis', 'Posters', 'Documentation', 'Software logging', 'Test generation', 'SE for security', 'Development and evolution of AI-intensive systems', 'Vulnerability analysis and assessment', 'Defect detection and prediction', 'Studies on gender in SE', 'AI testing', 'Code review', 'Program repair techniques and applications', 'Requirements elicitation and understanding', 'Software verification', 'Testing of mobile, web and games', 'Recommender systems', 'Program repair with and for AI', 'Programming languages', 'AI bias and fairness', '

In [30]:
for session_name in merged_dict_all.keys():
        print(f"{session_name}")

AI models for SE
Fuzzing: applications
Mining software repositories
Fault localization
Formal verification
APIs and libraries
Blockchain/smart contracts
Cognitive aspects of software development
Code smells and clones
Fuzzing: techniques and tools
Software architectures and design
Software security and privacy
AI systems engineering
Debugging
Defect analysis
Developers' behaviors
Program translation and synthesis
Posters
Documentation
Software logging
Test generation
SE for security
Development and evolution of AI-intensive systems
Vulnerability analysis and assessment
Defect detection and prediction
Studies on gender in SE
AI testing
Code review
Program repair techniques and applications
Requirements elicitation and understanding
Software verification
Testing of mobile, web and games
Recommender systems
Program repair with and for AI
Programming languages
AI bias and fairness
Requirements engineering
Software Evolution
Test quality and improvement
Runtime analysis and self-adaptation


In [57]:

def check_lists(list1, list2):
    # Check if each item in list1 is in list2
    for item in list1:
        if item not in list2:
            print(f"Item '{item}' from list1 is not in list2.")
    
    # Check if each item in list2 is in list1
    for item in list2:
        if item not in list1:
            print(f"Item '{item}' from list2 is not in list1.")

check_lists(list1, list2)

# Mark distinguished papers

## First create a list of distinguished papers

In [32]:
distinguished_papers = [
    {
        "title": "Merge-Replay: Efficient IFDS-Based Taint Analysis by Consolidating Equivalent Value Flows",
        "authors": ["Yujiang Gui", "Dongjie He", "Jingling Xue"]
    },
    {
        "title": "Domain Adaptive Code Completion via Language Models and Decoupled Domain Databases",
        "authors": ["Ze Tang", "Jidong Ge", "Shangqing Liu", "Tingwei Zhu", "Tongtong Xu", "Liguo Huang", "Bin Luo"]
    },
    {
        "title": "EndWatch: A Practical Method for Detecting Non-Termination in Real-World Software",
        "authors": ["Yao Zhang", "Xiaofei Xie", "Yi Li", "Sen Chen", "Cen Zhang", "Xiaohong Li"]
    },
    {
        "title": "Generative Type Inference for Python",
        "authors": ["Yun Peng", "Chaozheng Wang", "Wenxuan Wang", "Cuiyun Gao", "Michael Lyu"]
    },
    {
        "title": "Detecting Smart Home Automation Application Interferences with Domain Knowledge",
        "authors": ["Tao Wang", "Wei Chen", "Liwei Liu", "Guoquan Wu", "Jun Wei", "Tao Huang"]
    },
    {
        "title": "An Empirical Study on Fine-tuning Large Language Models of Code for Automated Program Repair",
        "authors": ["Kai Huang", "Xiangxin Meng", "Jian Zhang", "Yang Liu", "Wenjie Wang", "Shuhao Li", "Yuqing Zhang"]
    },
    {
        "title": "LeakPair: Proactive Repairing of Memory Leaks in Single Page Web Applications",
        "authors": ["Arooba Shahoor", "Askar Yeltayuly Khamit", "Jooyong Yi", "Dongsun Kim"]
    },
    {
        "title": "DeepScaler: Holistic Autoscaling for Microservices Based on Spatiotemporal GNN with Adaptive Graph Learning",
        "authors": ["Chunyang Meng", "Shijie Song", "Haogang Tong", "Maolin Pan", "Yang Yu"]
    },
    {
        "title": "Mutation-based Fault Localization of Deep Neural Networks",
        "authors": ["Ali Ghanbari", "Deepak-George Thomas", "Muhammad Arbab Arshad", "Hridesh Rajan"]
    },
    {
        "title": "PHYFU: Fuzzing Modern Physics Simulation Engines",
        "authors": ["Dongwei Xiao", "Zhibo Liu", "Shuai Wang"]
    },
    {
        "title": "Boosting the Revealing of Detected Violations in Deep Learning Testing: A Diversity-Guided Method",
        "authors": ["Xiaoyuan Xie", "Pengbo Yin", "Songqiang Chen"]
    },
    {
        "title": "CARGO: AI-Guided Dependency Analysis for Migrating Monolithic Applications to Microservices Architecture",
        "authors": ["Vikram Nitin", "Shubhi Asthana", "Baishakhi Ray", "Rahul Krishna"]
    },
    {
        "title": "Compiler Testing using Template Java Programs",
        "authors": ["Zhiqiang Zang", "Nathan Wiatrek", "Milos Gligoric", "August Shi"]
    },
    {
        "title": "CrystalBLEU: Precisely and Efficiently Measuring the Similarity of Code",
        "authors": ["Aryaz Eghbali", "Michael Pradel"]
    },
    {
        "title": "Detecting Blocking Errors in Go Programs using Localized Abstract Interpretation",
        "authors": ["Oskar Haarklou Veileborg", "Georgian-Vlad Saioc", "Anders Møller"]
    },
    {
        "title": "Efficient Greybox Fuzzing to Detect Memory Errors",
        "authors": ["Jinsheng Ba", "Gregory J. Duck", "Abhik Roychoudhury"]
    },
    {
        "title": "Fuzzle: Making a Puzzle for Fuzzers",
        "authors": ["Haeun Lee", "Soomin Kim", "Sang Kil Cha"]
    },
    {
        "title": "Has My Release Disobeyed Semantic Versioning? Static Detection Based On Semantic Differencing",
        "authors": ["Lyuye Zhang", "Chengwei Liu", "Zhengzi Xu", "Sen Chen", "Lingling Fan", "Bihuan Chen", "Yang Liu"]
    },
    {
        "title": "HyperAST: Enabling Efficient Analysis of Software Histories at Scale",
        "authors": ["Quentin Le-dilavrec", "Djamel Eddine Khelladi", "Arnaud Blouin", "Jean-Marc Jézéquel"]
    },
    {
        "title": "Learning to Construct Better Mutation Faults",
        "authors": ["Zhao Tian", "Junjie Chen", "Qihao Zhu", "Junjie Yang", "Lingming Zhang"]
    },
    {
        "title": "QATest: A Uniform Fuzzing Framework for Question Answering Systems",
        "authors": ["Zixi Liu", "Yang Feng", "Yining Yin", "Jingyu Sun", "Zhenyu Chen", "Baowen Xu"]
    },
    {
        "title": "Baldur: Whole-Proof Generation and Repair with Large Language Models",
        "authors": ["E. First", "M. Rabe", "T. Ringer", "Y. Brun"]
    },
    {
        "title": "Speeding up SMT Solving via Compiler Optimization",
        "authors": ["B. Mikek", "Q. Zhang"]
    },
    {
        "title": "Mate! Are You Really Aware? An Explainability-Guided Testing Framework for Robustness of Malware Detectors",
        "authors": ["R. Sun", "M. Xue", "G. Tyson", "T. Dong", "S. Li", "S. Wang", "H. Zhu", "S. Camtepe", "S. Nepal"]
    },
    {
        "title": "A Highly Scalable, Hybrid, Cross-Platform Timing Analysis Framework Providing Accurate Differential Throughput Estimation via Instruction-Level Tracing",
        "authors": ["M. Hsu", "F. Hetzelt", "D. Gens", "M. Maitland", "M. Franz"]
    },
    {
        "title": "An Automated Approach to Extracting Local Variables",
        "authors": ["X. Chi", "H. Liu", "G. Li", "W. Wang", "Y. Xia", "Y. Jiang", "Y. Zhang", "W. Ji"]
    },
    {
        "title": "Can Machine Learning Pipelines Be Better Configured?",
        "authors": ["Y. Wang", "Y. Wang", "T. Zhang", "Y. Yu", "S. Cheung", "H. Yu", "Z. Zhu"]
    },
    {
        "title": "NeuRI: Diversifying DNN Generation via Inductive Rule Inference",
        "authors": ["J. Liu", "J. Peng", "Y. Wang", "L. Zhang"]
    },
    {
        "title": "Benchmarking Robustness of AI-enabled Multi-sensor Fusion Systems: Challenges and Opportunities",
        "authors": ["X. Gao", "Z. Wang", "Y. Feng", "L. Ma", "Z. Chen", "B. Xu"]
    },
    {
        "title": "A Longitudinal Study of Student Contributions to OSS vs. OSS4SG with a Lightweight Intervention",
        "authors": ["Z. Fang", "M. Endres", "T. Zimmermann", "D. Ford", "W. Weimer", "K. Leach", "Y. Huang"]
    },
    {
        "title": "LExecutor: Learning-Guided Execution",
        "authors": ["B. Souza", "M. Pradel"]
    },
    {
        "title": "TransRacer: Function Dependence-Guided Transaction Race Detection for Smart Contracts",
        "authors": ["C. Ma", "W. Song", "J. Huang"]
    },
    {
        "title": "Recommending Analogical APIs via Knowledge Graph Embedding",
        "authors": ["M. Liu", "Y. Yang", "Y. Lou", "X. Peng", "Z. Zhou", "X. Du", "T. Yang"]
    },
    {
        "title": "Component Security Ten Years Later: An Empirical Study of Cross-Layer Threats in Real-World Mobile Applications",
        "authors": ["Keke Lian", "Lei Zhang", "Guangliang Yang", "Shuo Mao", "Xinjie Wang", "Yuan Zhang", "Min Yang"]
    },
    {
        "title": "Static Application Security Testing (SAST) Tools for Smart Contracts: How Far Are We?",
        "authors": ["Kaixuan Li", "Yue Xue", "Sen Chen", "Han Liu", "Kairan Sun", "Ming Hu", "Haijun Wang", "Yang Liu", "Yixiang Chen"]
    },
    {
        "title": "ProveNFix: Temporal Property-Guided Program Repair",
        "authors": ["Yahui Song", "Xiang Gao", "Wenhua Li", "Wei-Ngan Chin", "Abhik Roychoudhury"]
    },
    {
        "title": "Predictive Program Slicing via Execution Knowledge-Guided Dynamic Dependence Learning",
        "authors": ["Aashish Yadavally", "Yi Li", "Tien Nguyen"]
    },
    {
        "title": "TraStrainer: Adaptive Sampling for Distributed Traces with System Runtime State",
        "authors": ["Haiyu Huang", "Xiaoyu Zhang", "Pengfei Chen", "Zilong He", "Zhiming Chen", "Guangba Yu", "Hongyang Chen", "Chen Sun"]
    },
    {
        "title": "Fast Graph Simplification for Path-Sensitive Typestate Analysis through Tempo-Spatial Multi-Point Slicing",
        "authors": ["Xiao Cheng", "Jiawei Ren", "Yulei Sui"]
    },
    {
        "title": "Understanding Developers’ Discussions and Perceptions on Non-functional Requirements: The Case of the Spring Ecosystem",
        "authors": ["Anderson Oliveira", "João Correia", "Wesley K. G. Assunção", "Juliana Alves Pereira", "Rafael de Mello", "Daniel Coutinho", "Caio Barbosa", "Paulo Libório", "Alessandro Garcia"]
    },
    {
        "title": "A Transferability Study of Interpolation-Based Hardware Model Checking to Software Verification",
        "authors": ["Dirk Beyer", "Po-Chun Chien", "Marek Jankola", "Nian-Ze Lee"]
    },
    {
        "title": "Only diff Is Not Enough: Generating Commit Messages Leveraging Reasoning and Action of Large Language Model",
        "authors": ["Jiawei Li", "David Faragó", "Christian Petrov", "Iftekhar Ahmed"]
    },
    {
        "title": "“The Law Doesn’t Work Like a Computer”: Exploring Software Licensing Issues Faced by Legal Practitioners",
        "authors": ["Nathan Wintersgill", "Trevor Stalnaker", "Laura A. Heymann", "Oscar Chaparro", "Denys Poshyvanyk"]
    },
    {
        "title": "Improving the Learning of Code Review Successive Tasks with Cross-Task Knowledge Distillation",
        "authors": ["Oussama Ben Sghaier", "Houari Sahraoui"]
    },
    {
        "title": "Do I Belong? Modeling Sense of Virtual Community Among Linux Kernel",
        "authors": ["Bianca Trinkenreich", "Klaas-Jan Stol", "Anita Sarma", "Daniel M German", "Marco Gerosa", "Igor Steinmacher"]
    },
    {
        "title": "Compatible Remediation on Vulnerabilities from Third-Party Libraries for Java Projects",
        "authors": ["Lyuye Zhang", "Chengwei Liu", "Zhengzi Xu", "Sen Chen", "Lingling Fan", "Lida Zhao", "Jiahui Wu", "Yang Liu"]
    },
    {
        "title": "A Qualitative Study on the Implementation Design Decisions of Developers",
        "authors": ["Jenny T. Liang", "Maryam Arab", "Minhyuk Ko", "Amy J. Ko", "Thomas D. LaToza"]
    },
    {
        "title": "STILL AROUND: Experiences and Survival Strategies of Veteran Women Software Developers",
        "authors": ["Sterre van Breukelen", "Ann Barcomb", "Sebastian Baltes", "Alexander Serebrenik"]
    },
    {
        "title": "Understanding and Detecting On-the-Fly Configuration Bugs",
        "authors": ["Teng Wang", "Zhouyang Jia", "Shanshan Li", "Si Zheng", "Yue Yu", "Erci Xu", "Shaoliang Peng", "Xiangke Liao"]
    },
    {
        "title": "Testing Database Engines via Query Plan Guidance",
        "authors": ["Jinsheng Ba", "Manuel Rigger"]
    },
    {
        "title": "Sibyl: Improving Software Engineering Tools with SMT Selection",
        "authors": ["Will Leeson", "Matthew B Dwyer", "Antonio Filieri"]
    },
    {
        "title": "Lejacon: A Lightweight and Efficient Approach to Java Confidential Computing on SGX",
        "authors": ["Xinyuan Miao", "Ziyi Lin", "Shaojun Wang", "Lei Yu", "Sanhong Li", "Zihan Wang", "Pengbo Nie", "Yuting Chen", "Beijun Shen", "He Jiang"]
    },
    {
        "title": "Efficiency Matters: Speeding Up Automated Testing with GUI Rendering Inference",
        "authors": ["Sidong Feng", "Mulong Xie", "Chunyang Chen"]
    },
    {
        "title": "Rete: Learning Namespace Representation for Program Repair",
        "authors": ["Nikhil Parasaram", "Earl Barr", "Sergey Mechtaev"]
    },
    {
        "title": "EDEFuzz: A Web API Fuzzer for Excessive Data Exposures",
        "authors": ["Lianglu Pan", "Shaanan Cohney", "Toby Murray", "Thuan Pham"]
    },
    {
        "title": "Modularizing while Training: a New Paradigm for Modularizing DNN Models",
        "authors": ["Binhang Qi", "Hailong Sun", "Hongyu Zhang", "Ruobing Zhao", "Xiang Gao"]
    },
    {
        "title": "FAIR: Flow Type-Aware Pre-Training of Compiler Intermediate Representations",
        "authors": ["Changan Niu", "Chuanyi Li", "Vincent Ng", "David Lo", "Bin Luo"]
    },
    {
        "title": "Attention! Your Copied Data is Under Monitoring: A Systematic Study of Clipboard Usage in Android Apps",
        "authors": ["Yongliang Chen", "Ruoqin Tang", "Chaoshun Zuo", "Xiaokuan Zhang", "Lei Xue", "Xiapu Luo", "Qingchuan Zhao"]
    },
    {
        "title": "Breaking the Flow: A Study of Interruptions During Software Engineering Activities",
        "authors": ["Yimeng Ma", "Yu Huang", "Kevin Leach"]
    },
    {
        "title": "Code Search is All You Need? Improving Code Suggestions with Code Search",
        "authors": ["Junkai Chen", "Xing Hu", "Zhenhao Li", "Cuiyun Gao", "Xin Xia", "David Lo"]
    },
    {
        "title": "Analyzing and Debugging Normative Requirements via Satisfiability Checking",
        "authors": ["Nick Feng", "Lina Marsso", "Sinem Getir Yaman", "Yesugen Baatartogtokh", "Reem Ayad", "Victória Oldemburgo de Mello", "Beverley Townsend", "Isobel Standen", "Ioannis Stefanakos", "Calum Imrie", "Genaína Nunes Rodrigues", "Ana Cavalcanti", "Radu Calinescu", "Marsha Chechik"]
    },
    {
        "title": "Property-based testing in practice",
        "authors": ["Harrison Goldstein", "Joseph W. Cutler", "Daniel Dickstein", "Benjamin C. Pierce", "Andrew Head"]
    },
    {
        "title": "Causal Relationships and Programming Outcomes: A Transcranial Magnetic Stimulation Experiment",
        "authors": ["Hammad Ahmad", "Madeline Endres", "Kaia Newman", "Priscila Santiesteban", "Emma Shedden", "Westley Weimer"]
    },
    {
        "title": "GenderMag Improves Discoverability in the Field, Especially for Women",
        "authors": ["Emerson Murphy-Hill", "Alberto Elizondo", "Ambar Murillo", "Marian Harbach", "Bogdan Vasilescu", "Delphine Carlson", "Florian Dessloch"]
    },
    {
        "title": "Hard to Read and Understand Pythonic Idioms? DeIdiom and Explain Them in Non-Idiomatic Equivalent Code",
        "authors": ["Zejun Zhang", "Zhenchang Xing", "Dehai Zhao", "Qinghua Lu", "Xiwei (Sherry) Xu", "Liming Zhu"]
    },
    {
        "title": "Towards Finding Accounting Errors in Smart Contracts",
        "authors": ["Brian Zhang"]
    },
    {
        "title": "Semantic-Enhanced Static Vulnerability Detection in Baseband Firmware",
        "authors": ["Yiming Liu", "Cen Zhang", "Feng Li", "Yeting Li", "Jianhua Zhou", "Jian Wang", "Lanlan Zhan", "Yang Liu", "Wei Huo"]
    },
    {
        "title": "Safeguarding DeFi Smart Contracts against Oracle Deviations",
        "authors": ["Xun Deng", "Sidi Mohamed Beillahi", "Cyrus Minwalla", "Han Du", "Andreas Veneris", "Fan Long"]
    },
    {
        "title": "Video-based Training for Meeting Communication Skills",
        "authors": ["Matthias Galster", "Antonija Mitrovic", "Sanna Malinen", "Sreedevi Sankara Iyer", "Ja’afaru Musa", "Jay Holland"]
    },
    {
        "title": "Resolving Code Review Comments with Machine Learning",
        "authors": ["Alexander Frömmgen", "Jacob Austin", "Peter Choy", "Nimesh Ghelani", "Lera Kharatyan", "Gabriela Surita", "Elena Khrapko", "Pascal Lamblin", "Pierre-Antoine Manzagol", "Marcus Revaj", "Maxim Tabachnyk", "Danny Tarlow", "Kevin Villela", "Daniel Zheng", "Satish Chandra", "Petros Maniatis (Google)"]
    },
    {
        "title": "GWP-ASan: Sampling-Based Detection of Memory-Safety Bugs in Production",
        "authors": ["Kostya Serebryany", "Chris Kennelly", "Mitch Phillips", "Matt Denton", "Marco Elver", "Alexander Potapenko (Google)", "Matt Morehouse", "Vlad Tsyrklevich (unaffiliated)", "Christian Holler (Mozilla Corporation)", "Julian Lettner", "David Kilzer (Apple)", "Lander Brandt (Meta)"]
    },
    {
        "title": "User-Centric Deployment of Automated Program Repair at Bloomberg",
        "authors": ["David Williams", "James Callan (University College London)", "Serkan Kirbas (Bloomberg LP)", "Sergey Mechtaev", "Justyna Petke (University College London)", "Thomas Prideaux-Ghee (Bloomberg LP)", "Federica Sarro (University College London)"]
    },
    {
        "title": "On the Costs and Benefits of Adopting Lifelong Learning for Software Analytics - Empirical Study on Brown Build and Risk Prediction",
        "authors": ["Doriane Olewicki (Queen's University)", "Sarra Habchi", "Mathieu Nayrolles (Ubisoft Montréal)", "Mojtaba Faramarzi (Université de Montréal)", "Sarath Chandar (Polytechnique Montréal)", "Bram Adams (Queen's University)"]
    },
    {
        "title": "Beyond Self-Promotion: How Software Engineering Research Is Discussed on LinkedIn",
        "authors": ["Marvin Wyrich", "Justus Bogner"]
    }
]

Mark the distinguished papers inside the dictionary

In [63]:
def mark_distinguished_papers(merged_dict_all, distinguished_papers):
    """
    Mark distinguished papers in the merged dictionary.

    This function compares paper titles in a merged dictionary of sessions and papers
    against a list of distinguished papers, marking each paper as 'distinguished' if
    its title matches with any of the distinguished paper titles.

    Args:
        merged_dict_all (dict): A dictionary where each key is a session name and 
                                the value is a dictionary containing a list of papers.
                                Each paper is represented as a dictionary with keys
                                such as 'name' (paper title).
        distinguished_papers (list): A list of dictionaries where each dictionary
                                     contains a 'title' key representing the paper title.

    Returns:
        dict: The updated merged dictionary with a new 'distinguished' key (True/False)
              added to each paper based on the comparison.
    """
    # Create a set of distinguished paper titles in lowercase for case-insensitive comparison
    distinguished_titles = {paper['title'].lower() for paper in distinguished_papers}
    
    # Iterate through each session in the merged dictionary
    for session_name, session_data in merged_dict_all.items():
        for paper in session_data['papers']:
            # Use 'name' instead of 'title' to access the paper title
            paper['distinguished'] = paper['name'].lower() in distinguished_titles
    
    return merged_dict_all

In [None]:
def print_distinguished_papers(merged_dict_all):
    """
    Prints the details of distinguished papers from each session in the given dictionary.

    The function iterates over all sessions in the provided dictionary, filters the distinguished papers 
    from the list of papers in each session, and prints the title, authors, and URL of each distinguished paper.

    Parameters:
    merged_dict_all (dict): A dictionary where keys are session names and values are dictionaries containing session data.
                            Each session data dictionary must have a 'papers' key that holds a list of paper dictionaries.
                            Each paper dictionary may contain 'name' (paper title), 'authors' (list of author names), 
                            'url' (optional URL), and a boolean 'distinguished' to indicate whether the paper is distinguished.
    """
    # Iterate over each session in merged_dict_all
    for session_name, session_data in merged_dict_all.items():
        # Filter out distinguished papers in the current session
        distinguished_papers_in_session = [paper for paper in session_data['papers'] if paper.get('distinguished', False)]
        
        if distinguished_papers_in_session:
            print(f"Session: {session_name}")
            for paper in distinguished_papers_in_session:
                print(f" Title: {paper['name']}")
                print(f" Authors: {', '.join(paper['authors'])}")
                print(f" URL: {paper.get('url', 'No URL available')}")
                print()

In [76]:
def calculate_distinguished_papers(merged_dict_all):
    total_papers = len(distinguished_papers)
    distinguished_count = 0
    
    # Iterate over each session in merged_dict_all
    for session_name, session_data in merged_dict_all.items():
        # Iterate over each paper in the session
        for paper in session_data['papers']:
            # Count how many of the papers are distinguished
            if paper.get('distinguished', False):
                distinguished_count += 1
    
    # Calculate the percentage of distinguished papers
    if total_papers > 0:
        percentage = (distinguished_count / total_papers) * 100
    else:
        percentage = 0  
    
    return distinguished_count, percentage

In [67]:
merged_dict_all = mark_distinguished_papers(merged_dict_all, distinguished_papers)

#print_distinguished_papers(merged_dict_all)

In [77]:
distinguished_count, percentage = calculate_distinguished_papers(merged_dict_all)
print(f"Total Distinguished Papers: {distinguished_count}")
print(f"Percentage of matched Distinguished Papers: {percentage:.2f}%")


Total Distinguished Papers: 62
Percentage of matched Distinguished Papers: 83.78%


In [80]:
def find_missing_distinguished_papers(distinguished_papers, merged_dict_all):
    # Create a set of titles from the distinguished papers for easy lookup (case insensitive)
    distinguished_titles = {paper['title'].lower() for paper in distinguished_papers}
    
    # Set to store titles of distinguished papers found in merged_dict_all
    found_titles = set()
    
    # Iterate over each session in merged_dict_all
    for session_name, session_data in merged_dict_all.items():
        # Iterate over each paper in the session
        for paper in session_data['papers']:
            # If the paper is distinguished, add its title to found_titles
            if paper.get('distinguished', False):
                found_titles.add(paper['name'].lower())
    missing_dis_papers = []
    # Iterate through the distinguished papers and print those that were not found
    for paper in distinguished_papers:
        if paper['title'].lower() not in found_titles:
            print(f"Missing Distinguished Paper Title: {paper['title']}")
            missing_dis_papers.append(paper['title'])

    return missing_dis_papers
    
missing_dis_papers = find_missing_distinguished_papers(distinguished_papers, merged_dict_all)

Missing Distinguished Paper Title: TransRacer: Function Dependence-Guided Transaction Race Detection for Smart Contracts
Missing Distinguished Paper Title: Component Security Ten Years Later: An Empirical Study of Cross-Layer Threats in Real-World Mobile Applications
Missing Distinguished Paper Title: ProveNFix: Temporal Property-Guided Program Repair
Missing Distinguished Paper Title: Understanding Developers’ Discussions and Perceptions on Non-functional Requirements: The Case of the Spring Ecosystem
Missing Distinguished Paper Title: “The Law Doesn’t Work Like a Computer”: Exploring Software Licensing Issues Faced by Legal Practitioners
Missing Distinguished Paper Title: Do I Belong? Modeling Sense of Virtual Community Among Linux Kernel
Missing Distinguished Paper Title: STILL AROUND: Experiences and Survival Strategies of Veteran Women Software Developers
Missing Distinguished Paper Title: Video-based Training for Meeting Communication Skills
Missing Distinguished Paper Title: Res

### These papers that have not been matched were manually marked as distinguished in the csv file (the ones that inside the dictionary and the csv-file):

# Write dictionary including session and papers into a csv file

In [79]:
def export_dict_to_csv(merged_dict_all, output_filename='session and papers all conferences.csv'):
    """
    Export dictionary to CSV file with nested paper details.
    
    Args:
        merged_dict_all (dict): Dictionary containing paper information
        output_filename (str): Name of output CSV file
    """
    # Create the csv_file folder if it doesn't exist
    os.makedirs('csv_files', exist_ok=True)
    
    # Full path to save the CSV file
    full_path = os.path.join('csv_files', output_filename)
    
    with open(full_path, 'w', newline='', encoding='utf-8') as csvfile:
        fieldnames = ['category', 'name', 'authors', 'url', 'distinguished']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        
        writer.writeheader()
        for category, data in merged_dict_all.items():
            for paper in data.get('papers', []):
                writer.writerow({
                    'category': category,
                    'name': paper['name'],
                    'authors': ', '.join(paper['authors']),
                    'url': paper['url'],
                    'distinguished': paper['distinguished']
                })

export_dict_to_csv(merged_dict_all)