# Extraction of the sessions and the corresponding papers/talks

Load the dependencies:

In [165]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import time
from collections import defaultdict
import re
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.cluster import KMeans
from sklearn.metrics.pairwise import cosine_similarity

Extraction of dynamic content:

In [55]:
def extract_sessions_papers(url):
    """
    Extracts session and paper details from a given conference URL.

    Args:
        url (str): The URL of the conference page from which to extract data.

    Returns:
        dict: A dictionary where the keys are session names and the values are dictionaries containing paper details.
              Each paper detail dictionary contains:
              - 'name': The title of the paper (str)
              - 'authors': A list of authors' names (list of str)
              - 'url': The URL to the paper (str)
    """
    # Set up Chrome options
    chrome_options = Options()
    chrome_options.add_argument("--headless")  # Run in headless mode (no GUI)

    # Set up the Chrome WebDriver using ChromeDriverManager
    service = Service(ChromeDriverManager().install())
    driver = webdriver.Chrome(service=service, options=chrome_options)

    # Open the webpage
    driver.get(url)

    # Wait for the page to fully load (adjust time as needed)
    time.sleep(5)  # Increased wait time to ensure the page is fully loaded

    # Get the page source after JavaScript has rendered
    page_source = driver.page_source

    # Parse the page content with BeautifulSoup
    soup = BeautifulSoup(page_source, 'html.parser')

    # Find all divs with the classes 'hidable' and 'band'
    all_divs = soup.find_all("div", class_=["hidable", "band"])

    # Filter for only visible divs
    visible_divs = [div for div in all_divs if div.get("data-is-visible") == "true"]

    # Initialize the dictionary to store sessions with additional details
    sessions_details = {}

    # Iterate over each visible div
    for visible_div in visible_divs:
        # Find the session table inside the div
        session_table = visible_div.find("table", class_="session-table")
        
        if session_table:
            # Find the tbody element
            tbody = session_table.find("tbody")
            
            if tbody:
                # Find the session name
                session_info_elem = tbody.find(class_="session-info-in-table")
                if session_info_elem:
                    # Extract only the first part of the text before the <span> tag
                    session_name_parts = session_info_elem.contents
                    session_name = ""
                    
                    for part in session_name_parts:
                        if isinstance(part, str):  # Check if part is a string (text node)
                            session_name += part.strip()  # Append the text part
                            break  # Stop after the first text node

                session_name = session_name if session_name else "Unknown Session"
                
                # Find all rows (tr elements) within the tbody
                rows = tbody.find_all("tr")
                
                # Papers start from the third row, so we skip the first two
                paper_rows = rows[2:]
                
                # Extract the papers from the remaining rows, checking for visibility
                papers = []
                for row in paper_rows:
                    if row.get("data-is-visible") == "true" and row.get("style") != "display: none;":
                        # Extract the paper name from the 4th td element
                        td_elements = row.find_all("td")
                        if len(td_elements) >= 4:
                            paper_name_elem = td_elements[3].find("strong")
                            paper_name = paper_name_elem.get_text(strip=True) if paper_name_elem else "Unknown Paper"
                        
                            # Extract authors
                            authors_div = td_elements[3].find("div", class_="performers")
                            authors = [a.get_text(strip=True) for a in authors_div.find_all("a")] if authors_div else []

                            # Extract paper URL
                            url_elem = td_elements[3].find("a", class_="publication-link")
                            paper_url = url_elem['href'] if url_elem else "No URL available"
                            
                            papers.append({
                                "name": paper_name,
                                "authors": authors,
                                "url": paper_url
                            })
                
                # Store the session details in the dictionary
                if papers:
                    sessions_details[session_name] = {
                        "papers": papers
                    }
    
    # Close the browser
    driver.quit()

    return sessions_details

In [60]:
def print_sessions_and_papers(conference_dict):
    """
    Prints details of sessions and their associated papers.

    Parameters:
    conference_dict (dict): A dictionary where keys are session names and values are dictionaries with papers.
    """
    for session_name, details in conference_dict.items():
        print(f"Session: {session_name}")
        for paper in details['papers']:
            print(f" - Paper: {paper['name']}")
            print(f"   Authors: {', '.join(paper['authors'])}")
            print(f"   URL: {paper['url']}")
        print("\n")

In [70]:
def print_session_names(conference_dict):
    """
    Prints the names of sessions from a dictionary.

    Parameters:
    conference_dict (dict): A dictionary where keys are session names.
    """
    for session_name in conference_dict.keys():
        print(f"{session_name}")

In [92]:
def roman_to_int(roman):
    """
    Converts a Roman numeral to an integer. This function only converts Roman numerals 
    that are standalone, preceded and followed by spaces or are at the start/end of the string.

    Parameters:
    roman (str): A Roman numeral as a string.

    Returns:
    int: The integer representation of the Roman numeral, or None if the input is not a valid standalone Roman numeral.
    """
    roman_numerals = {
        'I': 1, 'V': 5, 'X': 10, 'L': 50, 'C': 100, 
        'D': 500, 'M': 1000
    }
    
    # Regex pattern to match a valid Roman numeral surrounded by spaces or at the boundaries
    pattern = re.compile(r'(^|\s)(I{1,3}|IV|VI{0,3}|IX|X{1,3}|XL|L?X{0,3}|XC|C{1,3}|CD|D?C{0,3}|CM|M{0,3})(\s|$)')
    
    # Find if the input matches the Roman numeral pattern
    match = pattern.match(roman)
    
    if not match:
        return None
    
    roman = match.group(2)  # Extract the Roman numeral part from the match

    # Convert Roman numeral to integer
    total = 0
    prev_value = 0
    for char in reversed(roman):
        value = roman_numerals.get(char, 0)
        if value < prev_value:
            total -= value
        else:
            total += value
        prev_value = value
        
    return total

In [93]:
def merge_sessions_by_name(conference_dict):
    """
    Merges sessions with the same base topic into a single session, e.g. "Testing 1" and "Testing II".

    This function consolidates multiple sessions that share the same base topic but differ in their numbering 
    (either Arabic or Roman numerals) into a single session. It uses regular expressions to identify sessions 
    with similar base names, combines their associated papers, and returns a dictionary where each key 
    represents a unique base session name with all its associated papers aggregated.

    Parameters:
    conference_dict (dict): A dictionary where:
        - The keys are session names (strings), which may include a base topic and an optional number suffix (e.g., "Testing 1", "Testing II").
        - The values are dictionaries with a key 'papers', which maps to a list of dictionaries. Each dictionary in the 'papers' list contains:
            - 'name' (str): The title of the paper.
            - 'authors' (list of str): A list of authors of the paper.
            - 'url' (str): The URL to access the paper.

    Returns:
    dict: A dictionary where:
        - The keys are base session names (strings) derived from the input session names.
        - The values are dictionaries with a key 'papers' that contains a list of all papers associated with that base session name.

    """
    # Dictionary to group papers by session base name
    merged_sessions = defaultdict(lambda: {'papers': []})
    
    # Regular expression to extract base name and number (either Arabic or Roman numerals)
    pattern = re.compile(r'^(.*?)(?:\s+(\d+|[IVXLCDM]+))?$')
    
    for session_name, details in conference_dict.items():
        # Extract the base name and number using regex
        match = pattern.match(session_name)
        if match:
            base_name = match.group(1).strip()
            num_suffix = match.group(2).strip() if match.group(2) else ''
            
            # Convert Roman numerals to integers for consistency, if applicable
            if re.match(r'^[IVXLCDM]+$', num_suffix):
                num_suffix = roman_to_int(num_suffix)
            else:
                num_suffix = int(num_suffix) if num_suffix.isdigit() else None
            
            # Append papers to the corresponding base name in the merged_sessions
            merged_sessions[base_name]['papers'].extend(details['papers'])
    
    # Convert defaultdict to a regular dict
    return dict(merged_sessions)

In [85]:
def remove_prefix_of_sessions(conference_dict):
    """
    Cleans the session names in the dictionary by removing the prefix 'Technical Session' along with its number and 
    the dash ('-'). Only the part after the dash ('-') is kept as the session name.

    Parameters:
    conference_dict (dict): A dictionary where:
        - The keys are session names (strings), which may include 'Technical Session', a number, and a dash ('-') prefix.
        - The values are dictionaries with a key 'papers', which maps to a list of dictionaries. Each dictionary in the 'papers' list contains:
            - 'name' (str): The title of the paper.
            - 'authors' (list of str): A list of authors of the paper.
            - 'url' (str): The URL to access the paper.

    Returns:
    dict: A dictionary where:
        - The keys are cleaned session names (strings), derived from the input session names.
        - The values are dictionaries with a key 'papers' that contains a list of all papers associated with that cleaned session name.
    """
    
    cleaned_sessions = {}
    
    # Regular expression to match 'Technical Session' followed by a number and a dash
    pattern = re.compile(r'^Technical Session \d+ - (.+)$')

    for session_name, details in conference_dict.items():
        # Check if session name matches the pattern
        match = pattern.match(session_name)
        if match:
            # Extract and clean the session name
            cleaned_name = match.group(1).strip()
        else:
            # Keep the original name if it does not match the pattern
            cleaned_name = session_name
        
        # Add the papers to the cleaned session name
        if cleaned_name in cleaned_sessions:
            cleaned_sessions[cleaned_name]['papers'].extend(details['papers'])
        else:
            cleaned_sessions[cleaned_name] = details
    
    return cleaned_sessions

In [102]:
def merge_sessions(dicts):
    """
    Merge multiple dictionaries containing session and paper details based on exact session names.
    
    Args:
        dicts (list of dict): List of dictionaries where each dictionary contains sessions and their papers.
        
    Returns:
        dict: A merged dictionary with session names as keys and combined paper details as values.
    """
    # Dictionary to hold the merged results
    merged_sessions = defaultdict(lambda: {"papers": []})
    
    for session_dict in dicts:
        for session_name, details in session_dict.items():
            # Add the papers to the merged_sessions dictionary under the exact session name
            merged_sessions[session_name]["papers"].extend(details["papers"])
    
    # Convert defaultdict to a regular dict
    return dict(merged_sessions)

## ASE 2022-2023

### ASE 2023

In [132]:
ase2023_url = "https://conf.researchr.org/track/ase-2023/ase-2023-papers?track=ASE%20Research%20Papers#program"
ase2023 = extract_sessions_papers(ase2023_url)

print("Sessions in ASE 2023", len(ase2023), "\n")

Sessions in ASE 2023 36 



In [133]:
#print_sessions_and_papers(ase2023)
#print_session_names(ase2023)

In [134]:
# Merge sessions
ase2023_cleaned = merge_sessions_by_name(ase2023)

print("Sessions in ASE 2023 after cleaning", len(ase2023_cleaned), "\n")

print_session_names(ase2023_cleaned)

Sessions in ASE 2023 after cleaning 22 

Cloud and Distributed Systems
Testing AI Systems
Infrastructure, Build, and Logs
Open Source and Software Ecosystems
Smart Contracts, Blockchain, Energy efficiency, and green software
Vulnerability and Security
Code Generation
Web Development
Testing Tools and Techniques
Code Quality and Code Smells
Program Repair
Program Analysis
Code Summarization
Program Verification
Code Change Analysis
Software Testing for Specialized Systems
Bug Detection
Autonomous Systems and Agents
Mobile Development
Debugging
Fuzzing
Configuration and Version Management


### ASE 2022

In [135]:
ase2022_url = "https://conf.researchr.org/program/ase-2022/program-ase-2022/?track=ASE%20Research%20Papers"
ase2022 = extract_sessions_papers(ase2022_url)

print("Sessions in ASE 2022", len(ase2022), "\n")

Sessions in ASE 2022 35 



In [136]:
ase2022_noprefix = remove_prefix_of_sessions(ase2022)
#print_session_names(ase2022_noprefix)

In [137]:
# Merge sessions
ase2022_cleaned = merge_sessions_by_name(ase2022_noprefix)

print("Sessions in ASE 2022 after cleaning", len(ase2022_cleaned), "\n")

print_session_names(ase2022_cleaned)

Sessions in ASE 2022 after cleaning 26 

Welcome to Day
AI for SE
Debugging and Troubleshooting
Fuzzing
Mobile Apps
Code Analysis
Source Code Manipulation
Security and Privacy
Testing
Builds and Versions
Analysis and Types
Application Domains
Bug Prediction and Localization
Compilers and Languages
Software Vulnerabilities
Formal Methods and Models
SE for AI
Web, Cloud, Networking
Security
Code Summarization and Recommendation
Human Aspects
Software Repairs
Dynamic and Concolic Analysis
Safety-Critical and Self-Adaptive Systems
Code Similarities and Refactoring
Builds and Dependencies


### Merge both ASE dictionaries

In [139]:
# Merging the dictionaries
merged_dict_ase = merge_sessions([ase2022_cleaned, ase2023_cleaned])

print("Sessions in both ASE dictionaries", len(merged_dict_ase), "\n")

#print_sessions_and_papers(merged_dict)
print_session_names(merged_dict_ase)

Sessions in both ASE dictionaries 47 

Welcome to Day
AI for SE
Debugging and Troubleshooting
Fuzzing
Mobile Apps
Code Analysis
Source Code Manipulation
Security and Privacy
Testing
Builds and Versions
Analysis and Types
Application Domains
Bug Prediction and Localization
Compilers and Languages
Software Vulnerabilities
Formal Methods and Models
SE for AI
Web, Cloud, Networking
Security
Code Summarization and Recommendation
Human Aspects
Software Repairs
Dynamic and Concolic Analysis
Safety-Critical and Self-Adaptive Systems
Code Similarities and Refactoring
Builds and Dependencies
Cloud and Distributed Systems
Testing AI Systems
Infrastructure, Build, and Logs
Open Source and Software Ecosystems
Smart Contracts, Blockchain, Energy efficiency, and green software
Vulnerability and Security
Code Generation
Web Development
Testing Tools and Techniques
Code Quality and Code Smells
Program Repair
Program Analysis
Code Summarization
Program Verification
Code Change Analysis
Software Testing 

## FSE 2023-2024

### FSE 2024

In [119]:
fse2024_url = "https://2024.esec-fse.org/program/program-fse-2024/?track=FSE%20Research%20Papers"
fse2024 = extract_sessions_papers(fse2024_url)

print("Sessions in FSE 2024", len(fse2024), "\n")

Sessions in FSE 2024 32 



In [120]:
#print_sessions_and_papers(fse2024)

In [121]:
# Merge sessions
fse2024_cleaned = merge_sessions_by_name(fse2024)

print("Sessions in FSE 2024 after cleaning", len(fse2024_cleaned), "\n")

print_session_names(fse2024_cleaned)

Sessions in FSE 2024 after cleaning 15 

Software Maintenance and Comprehension
Human Aspects
Formal Verification
Code Search and Completion
Processes, Requirements, and Architecture
Empirical Studies
Testing
AI4SE
Program Analysis and Performance
Program Repair and Synthesis
Fault Diagnosis and Root Cause Analysis
SE4AI
Security and Privacy
Log Analysis and Debugging
Fuzzing


### FSE 2023

In [122]:
fse2023_url = "https://2023.esec-fse.org/program/program-fse-2023/?track=ESEC%2FFSE%20Research%20Papers"

fse2023 = extract_sessions_papers(fse2023_url)

print("Sessions in FSE 2023", len(fse2023), "\n")

Sessions in FSE 2023 32 



In [123]:
#print_sessions_and_papers(fse2023)

In [124]:
# Merge sessions
fse2023_cleaned = merge_sessions_by_name(fse2023)

print("Sessions in FSE 2023 after cleaning", len(fse2023_cleaned), "\n")

print_session_names(fse2023_cleaned)

Sessions in FSE 2023 after cleaning 16 

Human Aspects
Testing
Machine Learning
Automated Repair
Empirical Studies
Software Evolution
Program Analysis
Code Search and Text to Code
Log Analysis and Debugging
Fault Diagnosis and Root Cause Analysis
Clone and Similarity Detection
Performance
Security
Fuzzing
Formal Verification
Models of Code and Documentation


### Merge both FSE dictionaries

In [140]:
# Merging the dictionaries
merged_dict_fse = merge_sessions([fse2023_cleaned, fse2024_cleaned])

print("Sessions in both FSE dictionaries", len(merged_dict_fse), "\n")

#print_sessions_and_papers(merged_dict)
print_session_names(merged_dict_fse)

Sessions in both FSE dictionaries 24 

Human Aspects
Testing
Machine Learning
Automated Repair
Empirical Studies
Software Evolution
Program Analysis
Code Search and Text to Code
Log Analysis and Debugging
Fault Diagnosis and Root Cause Analysis
Clone and Similarity Detection
Performance
Security
Fuzzing
Formal Verification
Models of Code and Documentation
Software Maintenance and Comprehension
Code Search and Completion
Processes, Requirements, and Architecture
AI4SE
Program Analysis and Performance
Program Repair and Synthesis
SE4AI
Security and Privacy


# ICSE 2023-2024

### ICSE 2024

In [150]:
icse2024_url = "https://conf.researchr.org/program/icse-2024/program-icse-2024/?track=ICSE%20Research%20Track"
icse2024 = extract_sessions_papers(icse2024_url)

print("Sessions in ICSE 2024: ", len(icse2024), "\n")

Sessions in ICSE 2024:  71 



In [151]:
# Merge sessions
icse2024_cleaned = merge_sessions_by_name(icse2024)

print("Sessions in ICSE 2024 after cleaning", len(icse2024_cleaned), "\n")

print_session_names(icse2024_cleaned)

Sessions in ICSE 2024 after cleaning 24 

AI & Security
Evolution & AI
Testing
Analysis
Human and Social
Generative AI studies
Language Models and Generated Code
Program Repair
Analytics
Security
Evolution
Analysis and Debugging
LLM, NN and other AI technologies
Dependability and Formal methods
Analytics & AI
Program binaries - evolvability
Testing: various bug types
Human and Social Aspects, and Requirements
Fuzzing
Requirements
Testing with and for AI
Vulnerability Detection
Static Detection Techniques
Testing of AI systems


### ICSE 2023

In [152]:
icse2023_url = "https://conf.researchr.org/program/icse-2023/program-icse-2023/?track=ICSE%20Technical%20Track"
icse2023 = extract_sessions_papers(icse2023_url)

print("Sessions in ICSE 2023", len(icse2023), "\n")

Sessions in ICSE 2023 63 



In [153]:
# Merge sessions
icse2023_cleaned = merge_sessions_by_name(icse2023)

print("Sessions in ICSE 2023 after cleaning", len(icse2023_cleaned), "\n")

print_session_names(icse2023_cleaned)

Sessions in ICSE 2023 after cleaning 60 

AI models for SE
Fuzzing: applications
Mining software repositories
Fault localization
Formal verification
APIs and libraries
Blockchain/smart contracts
Cognitive aspects of software development
Code smells and clones
Fuzzing: techniques and tools
Software architectures and design
Software security and privacy
AI systems engineering
Debugging
Defect analysis
Developers' behaviors
Program translation and synthesis
Posters
Documentation
Software logging
Test generation
SE for security
Development and evolution of AI-intensive systems
Vulnerability analysis and assessment
Defect detection and prediction
Studies on gender in SE
AI testing
Code review
Program repair techniques and applications
Requirements elicitation and understanding
Software verification
Testing of mobile, web and games
Recommender systems
Program repair with and for AI
Programming languages
AI bias and fairness
Requirements engineering
Software Evolution
Test quality and improve

### Merge both ICSE dictionaries

In [154]:
# Merging the dictionaries
merged_dict_icse = merge_sessions([icse2023_cleaned, icse2024_cleaned])

print("Sessions in both ICSE dictionaries", len(merged_dict_icse), "\n")

#print_sessions_and_papers(merged_dict)
print_session_names(merged_dict_icse)

Sessions in both ICSE dictionaries 84 

AI models for SE
Fuzzing: applications
Mining software repositories
Fault localization
Formal verification
APIs and libraries
Blockchain/smart contracts
Cognitive aspects of software development
Code smells and clones
Fuzzing: techniques and tools
Software architectures and design
Software security and privacy
AI systems engineering
Debugging
Defect analysis
Developers' behaviors
Program translation and synthesis
Posters
Documentation
Software logging
Test generation
SE for security
Development and evolution of AI-intensive systems
Vulnerability analysis and assessment
Defect detection and prediction
Studies on gender in SE
AI testing
Code review
Program repair techniques and applications
Requirements elicitation and understanding
Software verification
Testing of mobile, web and games
Recommender systems
Program repair with and for AI
Programming languages
AI bias and fairness
Requirements engineering
Software Evolution
Test quality and improveme

# Merge all dictionaries

In [156]:
# Merging all the dictionaries
merged_dict_all = merge_sessions([merged_dict_icse, merged_dict_fse, merged_dict_ase])

print("Sessions in both all dictionaries", len(merged_dict_all), "\n")

#print_sessions_and_papers(merged_dict)
print_session_names(merged_dict_all)

Sessions in both all dictionaries 143 

AI models for SE
Fuzzing: applications
Mining software repositories
Fault localization
Formal verification
APIs and libraries
Blockchain/smart contracts
Cognitive aspects of software development
Code smells and clones
Fuzzing: techniques and tools
Software architectures and design
Software security and privacy
AI systems engineering
Debugging
Defect analysis
Developers' behaviors
Program translation and synthesis
Posters
Documentation
Software logging
Test generation
SE for security
Development and evolution of AI-intensive systems
Vulnerability analysis and assessment
Defect detection and prediction
Studies on gender in SE
AI testing
Code review
Program repair techniques and applications
Requirements elicitation and understanding
Software verification
Testing of mobile, web and games
Recommender systems
Program repair with and for AI
Programming languages
AI bias and fairness
Requirements engineering
Software Evolution
Test quality and improveme

Grouping the sessions by textual similarity

In [166]:
def group_sessions_by_semantics(session_names, n_clusters=10):
    """
    Groups session names based on their semantic similarity.

    Parameters:
    session_names (list of str): A list of session names.
    n_clusters (int): The number of clusters to form (default is 10).

    Returns:
    dict: A dictionary where keys are group IDs and values are lists of session names in each group.
    """
    # Convert session names to TF-IDF matrix
    vectorizer = TfidfVectorizer().fit_transform(session_names)
    tfidf_matrix = vectorizer.toarray()

    # Perform clustering using KMeans
    clustering = KMeans(n_clusters=n_clusters, random_state=42)
    labels = clustering.fit_predict(tfidf_matrix)

    # Group session names by their cluster labels
    grouped_sessions = {}
    for label, session_name in zip(labels, session_names):
        if label not in grouped_sessions:
            grouped_sessions[label] = []
        grouped_sessions[label].append(session_name)

    return grouped_sessions

In [167]:
session_names = [
    "AI models for SE",
    "Fuzzing: applications",
    "Mining software repositories",
    "Fault localization",
    "Formal verification",
    "APIs and libraries",
    "Blockchain/smart contracts",
    "Cognitive aspects of software development",
    "Code smells and clones",
    "Fuzzing: techniques and tools",
    "Software architectures and design",
    "Software security and privacy",
    "AI systems engineering",
    "Debugging",
    "Defect analysis",
    "Developers' behaviors",
    "Program translation and synthesis",
    "Posters",
    "Documentation",
    "Software logging",
    "Test generation",
    "SE for security",
    "Development and evolution of AI-intensive systems",
    "Vulnerability analysis and assessment",
    "Defect detection and prediction",
    "Studies on gender in SE",
    "AI testing",
    "Code review",
    "Program repair techniques and applications",
    "Requirements elicitation and understanding",
    "Software verification",
    "Testing of mobile, web and games",
    "Recommender systems",
    "Program repair with and for AI",
    "Programming languages",
    "AI bias and fairness",
    "Requirements engineering",
    "Software Evolution",
    "Test quality and improvement",
    "Runtime analysis and self-adaptation",
    "Developers' forums",
    "Program comprehension",
    "Reverse engineering",
    "Software processes",
    "Static analysis",
    "Testing of database and low-level software",
    "Software performance",
    "Code generation",
    "Software development tools",
    "Fault injection and mutation",
    "Vulnerability detection",
    "Issue reporting and reproduction",
    "Software quality",
    "SE education methods and tools",
    "Metamorphic testing",
    "Pre-trained and few shot learning for SE",
    "Program analysis",
    "Vulnerability testing and patching",
    "Cyber-physical systems testing",
    "Software ecosystems",
    "AI & Security",
    "Evolution & AI",
    "Testing",
    "Analysis",
    "Human and Social",
    "Generative AI studies",
    "Language Models and Generated Code",
    "Program Repair",
    "Analytics",
    "Security",
    "Evolution",
    "Analysis and Debugging",
    "LLM, NN and other AI technologies",
    "Dependability and Formal methods",
    "Analytics & AI",
    "Program binaries - evolvability",
    "Testing: various bug types",
    "Human and Social Aspects, and Requirements",
    "Fuzzing",
    "Requirements",
    "Testing with and for AI",
    "Vulnerability Detection",
    "Static Detection Techniques",
    "Testing of AI systems",
    "Human Aspects",
    "Machine Learning",
    "Automated Repair",
    "Empirical Studies",
    "Program Analysis",
    "Code Search and Text to Code",
    "Log Analysis and Debugging",
    "Fault Diagnosis and Root Cause Analysis",
    "Clone and Similarity Detection",
    "Performance",
    "Formal Verification",
    "Models of Code and Documentation",
    "Software Maintenance and Comprehension",
    "Code Search and Completion",
    "Processes, Requirements, and Architecture",
    "AI4SE",
    "Program Analysis and Performance",
    "Program Repair and Synthesis",
    "SE4AI",
    "Security and Privacy",
    "Welcome to Day",
    "AI for SE",
    "Debugging and Troubleshooting",
    "Mobile Apps",
    "Code Analysis",
    "Source Code Manipulation",
    "Builds and Versions",
    "Analysis and Types",
    "Application Domains",
    "Bug Prediction and Localization",
    "Compilers and Languages",
    "Software Vulnerabilities",
    "Formal Methods and Models",
    "SE for AI",
    "Web, Cloud, Networking",
    "Code Summarization and Recommendation",
    "Software Repairs",
    "Dynamic and Concolic Analysis",
    "Safety-Critical and Self-Adaptive Systems",
    "Code Similarities and Refactoring",
    "Builds and Dependencies",
    "Cloud and Distributed Systems",
    "Testing AI Systems",
    "Infrastructure, Build, and Logs",
    "Open Source and Software Ecosystems",
    "Smart Contracts, Blockchain, Energy efficiency, and green software",
    "Vulnerability and Security",
    "Code Generation",
    "Web Development",
    "Testing Tools and Techniques",
    "Code Quality and Code Smells",
    "Code Summarization",
    "Program Verification",
    "Code Change Analysis",
    "Software Testing for Specialized Systems",
    "Bug Detection",
    "Autonomous Systems and Agents",
    "Mobile Development",
    "Configuration and Version Management"
]

In [168]:
# Group sessions by semantics with 10 clusters
grouped_sessions = group_sessions_by_semantics(session_names, n_clusters=10)

# Print grouped sessions
for group_id, sessions in grouped_sessions.items():
    print(f"Group {group_id}:")
    for session in sessions:
        print(f" - {session}")
    print("\n")

Group 2:
 - AI models for SE
 - Software security and privacy
 - SE for security
 - Studies on gender in SE
 - Pre-trained and few shot learning for SE
 - AI & Security
 - Security
 - Security and Privacy
 - AI for SE
 - SE for AI
 - Vulnerability and Security


Group 4:
 - Fuzzing: applications
 - Formal verification
 - Blockchain/smart contracts
 - Debugging
 - Defect analysis
 - Developers' behaviors
 - Posters
 - Documentation
 - Test generation
 - Vulnerability analysis and assessment
 - Programming languages
 - Runtime analysis and self-adaptation
 - Developers' forums
 - Reverse engineering
 - Static analysis
 - Analysis
 - Analytics
 - Analysis and Debugging
 - Fuzzing
 - Machine Learning
 - Empirical Studies
 - Log Analysis and Debugging
 - Fault Diagnosis and Root Cause Analysis
 - Performance
 - Formal Verification
 - AI4SE
 - SE4AI
 - Welcome to Day
 - Mobile Apps
 - Analysis and Types
 - Application Domains
 - Web, Cloud, Networking
 - Dynamic and Concolic Analysis


Group