### File Structure of the Dataset Folder
```
├── journal-meta/
│   ├── 0968-090X.csv         # TRC
│   ├── mini-dataset.csv      # A mini dataset for tutorial, extracted from TRC
│   └── ... (other journal CSV files)
├── journal-full-text/
│   ├── 0968-090X/
│   │   └── 10.1016_j.trc.2023.104311.xml
│   └── ... (other DOI folders)
```

In [3]:
import pandas as pd
import os
import re
import xml.etree.ElementTree as ET
# here replace the path to the folder of your downloaded dataset
full_text_folder = '/Users/junyi/Work/RR/rr-measure-dataset/journal-full-text'
meta_folder = '/Users/junyi/Work/RR/rr-measure-dataset/journal-meta'
# List all files in the folder with csv
journals = [f for f in os.listdir(meta_folder) if f.endswith('.csv')]
journal_issn_list = [['TRA','0965-8564'],
                     ['TRB','0191-2615'],
                     ['TRC','0968-090X'],
                     ['TRD','1361-9209'],
                     ['TRE','1366-5545'],
                     ['TRF','1369-8478'],
                     ['TRIP','2590-1982'],
                     ['mini-dataset','0968-090X']]
journal_issn_df = pd.DataFrame(journal_issn_list, columns=['journal','issn'])
save_folder = '/Users/junyi/Work/RR/rr-measure-dataset/results/GitHub'

In [4]:
# This section demonstrates how to work with the dataset, 
# utilizing the dataframe's apply method for efficient iteration in a loop.
# An example here is to clean up the abstract.
def cleanup_abstract(abstract):
    """
    Cleans up an abstract string by standardizing spacing.

    Args:
        abstract (str): The abstract of a journal article, which may contain irregular spacing,
                        including multiple spaces, leading spaces, or trailing spaces.

    Returns:
        str: A cleaned string where all excessive spaces are replaced with a single space,
             and any leading or trailing spaces are removed. This is essential for preparing
             text data for further analysis or display, ensuring uniformity in the formatting
             of abstracts.

    Example:
        >>> cleanup_abstract("  This  is   an example   abstract.  ")
        'This is an example abstract.'
    """
    # Check if the input is a string
    if not isinstance(abstract, str):
        raise ValueError("Input must be a string.")
    
    return re.sub(r'\s+', ' ', abstract).strip()
for journal in journals:
    if journal == 'mini-dataset.csv': # to take the mini dataset for tutorial
        # connect the journal name with the issn from the journal_issn_df
        journal_issn = journal_issn_df[journal_issn_df['journal'] == journal.replace('.csv', '')]['issn'].values[0]
        journal_meta = pd.read_csv(os.path.join(meta_folder, journal))
        journal_meta['issn'] = journal_issn
        journal_meta['abstract'] = journal_meta['abstract'].apply(cleanup_abstract) # to clean up the abstract
        journal_meta.to_csv(os.path.join(meta_folder, journal), index=False) # at the end, save the cleaned dataset

In [7]:
# This section demonstrates how to link the doi with the full text
def doi_to_unique_id(doi):
    """
    Converts a DOI to a unique identifier by replacing slashes with underscores.

    Args:
        doi (str): The DOI of a journal article.

    Returns:
        str: A unique identifier where slashes are replaced with underscores.

    Example:
        >>> doi_to_unique_id("10.1016/j.trc.2023.104311")
        "10.1016_j.trc.2023_104311"
    """
    return doi.replace('/', '_')

import xml.etree.ElementTree as ET

def extract_sections_and_text_from_xml(file_path):
    """
    Extracts sections and text from an XML file.
    
    Args:
        file_path (str): The path to the XML file.

    Returns:
        list: A list of dictionaries, each containing the label, title, text, subsections, and subsubsections of a section.
    
    Example:
        >>> extract_sections_and_text_from_xml('/path/to/file.xml')
        [{'label': '1', 'title': 'Introduction', 'text': 'This is the introduction...', 'subsections': []}]
    """
    # Parse the XML file
    tree = ET.parse(file_path)
    root = tree.getroot()

    # Namespace to handle XML namespaces
    namespaces = {
        'xocs': 'http://www.elsevier.com/xml/xocs/dtd',
        'ce': 'http://www.elsevier.com/xml/common/dtd',
        'ja': 'http://www.elsevier.com/xml/ja/dtd',
        'mml': 'http://www.w3.org/1998/Math/MathML'
    }

    # Extracting the sections using the item-toc element
    sections = []
    for item in root.findall('.//xocs:item-toc-entry', namespaces):
        section_title = item.find('xocs:item-toc-section-title', namespaces)
        section_label = item.find('xocs:item-toc-label', namespaces)
        section_text = []
        
        # Use the section label to find the corresponding section id in <ce:section>
        if section_label is not None and section_title is not None:
            label_text = section_label.text.strip()
            section_elem = root.find(f".//ce:section[ce:label='{label_text}']", namespaces)
        else:
            # If there's no label, find the section by title only (e.g., References)
            section_elem = root.find(f".//ce:section[ce:section-title='{section_title.text}']", namespaces) if section_title is not None else None

        if section_elem is not None:
            # Get all text under the section element, including paragraphs and other texts
            section_text_parts = []
            subsections = []
            before_subsection_text = True

            # Iterate over all elements within the section
            for elem in section_elem:
                # Check if this element is a subsection
                if elem.tag == f"{{{namespaces['ce']}}}section":
                    # This is a subsection, process it
                    subsection_title_elem = elem.find(f"ce:section-title", namespaces)
                    if subsection_title_elem is not None:
                        subsection_title = subsection_title_elem.text
                        subsection_paragraphs = []
                        subsubsections = []
                        
                        for sub_elem in elem:
                            # If this is a paragraph, append text
                            if sub_elem.tag == f"{{{namespaces['ce']}}}para":
                                paragraph_text = ''.join(sub_elem.itertext())
                                subsection_paragraphs.append(paragraph_text)
                            
                            # If this is a sub-subsection, process it
                            elif sub_elem.tag == f"{{{namespaces['ce']}}}section":
                                subsubsection_title_elem = sub_elem.find(f"ce:section-title", namespaces)
                                if subsubsection_title_elem is not None:
                                    subsubsection_title = subsubsection_title_elem.text
                                    subsubsection_paragraphs = []
                                    for subsub_elem in sub_elem.findall('ce:para', namespaces=namespaces):
                                        paragraph_text = ''.join(subsub_elem.itertext())
                                        subsubsection_paragraphs.append(paragraph_text)
                                    subsubsection_text = ' '.join(subsubsection_paragraphs)
                                    subsubsections.append({
                                        "label": sub_elem.find(f"ce:label", namespaces).text if sub_elem.find(f"ce:label", namespaces) is not None else "",
                                        "title": subsubsection_title,
                                        "text": subsubsection_text
                                    })
                        
                        subsection_text = ' '.join(subsection_paragraphs)
                        subsections.append({
                            "label": elem.find(f"ce:label", namespaces).text if elem.find(f"ce:label", namespaces) is not None else "",
                            "title": subsection_title,
                            "text": subsection_text,
                            "subsubsections": subsubsections
                        })
                else:
                    # Collect text before any subsection starts
                    if before_subsection_text and elem.tag == f"{{{namespaces['ce']}}}para":
                        paragraph_text = ''.join(elem.itertext())
                        section_text_parts.append(paragraph_text)

            section_text = ' '.join(section_text_parts)
            
            sections.append({
                "label": section_label.text if section_label is not None else "",
                "title": section_title.text if section_title is not None else "",
                "text": section_text,
                "subsections": subsections
            })

    # Extract the data availability section separately
    data_availability = root.find('.//ce:data-availability', namespaces)
    if data_availability is not None:
        data_availability_text = ''.join(data_availability.itertext())
        sections.append({
            "label": "",
            "title": "Data Availability",
            "text": data_availability_text,
            "subsections": []
        })

    return sections

# Function to postprocess sections, subsections, and subsubsections
def postprocess_sections(data):
    """
    Postprocesses sections, subsections, and subsubsections by removing duplicate labels and ensuring unique content.

    Args:
        data (list): A list of dictionaries, each containing the label, title, text, subsections, and subsubsections of a section.
    
    Returns:
        list: A list of dictionaries, each containing the label, title, text, subsections, and subsubsections of a section.

    Example:
        >>> reorganized_sections = postprocess_sections(sections)
        # Save the reorganized sections to a JSON file
        import json
        # Define the file path for the output
        output_file_path = '../example.json'
        
        # Open the file in write mode and dump the data
        with open(output_file_path, 'w') as file:
            json.dump(reorganized_sections, file, indent=4)  # Added indentation for better readability
        
        for section in reorganized_sections:
            print(section['label'], section['title'])
            for subsection in section['subsections']:
                print("    ", subsection['label'], subsection['title'])
                for subsubsection in subsection['subsubsections']:
                    print("        ", subsubsection['label'], subsubsection['title'])
    """
    reorganized_data = []
    
    labels_to_remove = set()
    
    for section in data:
        # Skip if the section is marked for removal
        if section["label"] in labels_to_remove:
            continue
        
        new_section = {
            "label": section["label"],
            "title": section["title"],
            "text": section["text"],
            "subsections": []
        }
        
        # Iterate through subsections to reorganize them
        for subsection in data:
            # Check if the subsection label starts with the section label and follows the x.x format
            if subsection["label"].startswith(section["label"] + ".") and len(subsection["label"].split('.')) == 2:
                new_subsection = {
                    "label": subsection["label"],
                    "title": subsection["title"],
                    "text": subsection["text"],
                    "subsubsections": []
                }
                labels_to_remove.add(subsection["label"])
                
                # Iterate through subsubsections to reorganize them under the appropriate subsection
                for subsubsection in data:
                    if subsubsection["label"].startswith(new_subsection["label"] + "."):
                        new_subsubsection = {
                            "label": subsubsection["label"],
                            "title": subsubsection["title"],
                            "text": subsubsection["text"]
                        }
                        labels_to_remove.add(subsubsection["label"])
                        new_subsection["subsubsections"].append(new_subsubsection)
                
                # Add the subsection only if it is unique or has no subsubsections
                if new_subsection["subsubsections"]:
                    # If subsubsections exist, avoid duplicate content
                    new_subsection["text"] = ""
                new_section["subsections"].append(new_subsection)
        
        reorganized_data.append(new_section)
    
    return reorganized_data


# search the "github.com" across all the text in all the sections, subsections, and subsubsections
# and extract the full github url, like https://github.com/username/repository
def extract_github_urls(text):
    # Regular expression to match only the main GitHub repository URLs
    github_url_pattern = r"https?://github\.com/[\w-]+/[\w-]+"
    
    # Find all matching GitHub repository URLs in the text
    github_urls = re.findall(github_url_pattern, text)
    
    return github_urls


In [8]:
for journal in journals:
    if journal == 'mini-dataset.csv': # to take the mini dataset for tutorial
        journal_issn = journal_issn_df[journal_issn_df['journal'] == journal.replace('.csv', '')]['issn'].values[0]
        journal_meta = pd.read_csv(os.path.join(meta_folder, journal))
        journal_meta['unique_id'] = journal_meta['doi'].apply(doi_to_unique_id) # to convert the doi to a unique id
        # here we use the for loop to help understand how it works, it can be done in one line of code later
        for i in range(len(journal_meta)):
            github_urls = []
            fulltext_path = os.path.join(full_text_folder, journal_issn, journal_meta.iloc[i]['unique_id'] + '.xml')
            sections = extract_sections_and_text_from_xml(fulltext_path)
            reorganized_sections = postprocess_sections(sections)
            for section in reorganized_sections:
                # add a preprcessing for the text here to make it more readable
                urls  = extract_github_urls(cleanup_abstract(section['text']))
                if urls:
                    github_urls.extend(urls)
                for subsection in section['subsections']:
                    urls = github_urls.extend(extract_github_urls(cleanup_abstract(subsection['text'])))
                    if urls:
                        github_urls.extend(urls)
                    for subsubsection in subsection['subsubsections']:
                        urls = extract_github_urls(cleanup_abstract(subsubsection['text']))
                        if urls:
                            github_urls.extend(urls)
            if github_urls:
                print(github_urls)

['https://github.com/pabloguarda/isuelogit', 'https://github.com/pabloguarda/pesuelogit']
['https://github.com/zhandongxu/GP_RTAP']
['https://github.com/ethz-coss/voting_traffic']
['https://github.com/tjzxh/EADC']
['https://github.com/LehmannJonas/2E-MT-VRP-PTW-Instances', 'https://github.com/LehmannJonas/2E-MT-VRP-PTW-Instances']
['https://github.com/LiBiyue/MAST-GNN']
['https://github.com/HDDL/DPRDDM']


In [24]:
import ollama
def get_context_with_url(location_text, url, context_up_range=2, context_down_range=2):
    """
    Extracts the sentence containing the URL along with a specified number of surrounding sentences.

    Args:
        location_text (str): The input text containing the URL.
        url (str): The URL to find in the text.
        context_range (int): The number of sentences before and after the target sentence to include.

    Returns:
        str: The context containing the URL along with the surrounding sentences, or a message if URL is not found.
    """
    # Step 1: Find the position of the URL
    sentence_index = location_text.find(url)

    # If the URL is found, proceed
    if sentence_index != -1:
        # Step 2: Split the text into sentences
        sentences = re.split(r'(?<=[.!?])\s+', location_text)

        # Step 3: Find the index of the sentence containing the URL
        target_index = None
        for index, sentence in enumerate(sentences):
            if url in sentence:
                target_index = index
                break

        # Step 4: Extract the surrounding context (few sentences before and after)
        if target_index is not None:
            start_index = max(0, target_index - context_up_range)  # Make sure index is not negative
            end_index = min(len(sentences), target_index + context_down_range + 1)  # Make sure index does not exceed list length

            # Get the sentences within the specified range
            surrounding_sentences = sentences[start_index:end_index]

            # Join the sentences to form the context
            context_info = " ".join(surrounding_sentences)
            return context_info
        else:
            return "URL not found in any sentence."
    else:
        return "URL not found in the text."
def determine_repository_tool(context_info):
    response = ollama.chat(
        model="phi3:14b",
        messages=[
            {
                "role": "user",
                "content": (
                    context_info + 
                    "Based on the description above, please determine if the code repository is an implementation of the method mentioned in the article." 
                    " Respond with 'yes' or 'no' and why."
                )
            },
        ],
    )
    return response["message"]["content"]

import openai
import yaml
import json
# Load API key from config.yaml
with open("/Users/junyi/Work/RR/config.yaml", "r") as config_file:
    config = yaml.safe_load(config_file)

openai.api_key = config["openai_api_key"]
def determine_repository_tool_openai(context_info):
    """
    Uses OpenAI's API to determine if the repository implements the described method,
    and ensures the output is structured as JSON, even if extra formatting is present.
    """
    prompt = """
    **Task**:
    Based on the description above, evaluate whether the provided code repository is an actual implementation of the method discussed in the article.

    **Guidelines**:

    - **Definition of Implementation**: The code is considered an implementation if it is written to test the method or to provide code for reproducing the results presented in the article or to share the code and data for the article.
    
    - **Non-Implementation**: Some repositories may be cited because they utilize existing tools or datasets rather than providing an implementation of the described method. If the GitHub link is just for an existing tool or dataset and not an implementation of the method, please respond with "no".

    - **Indicators of Implementation**:
    - Look for explicit statements such as "the code is available/provided/can be accessed at" or "the code is available at".
    - Such statements are good indicators that the repository contains an implementation of the method.

    Please respond with a structured output in the following format:

    ```json
    {
    "isImplementation": "yes" or "no",
    "reason": "[Provide a brief explanation here]",
    "keyIndicators": ["[Optional: List any key points or observations supporting your reasoning (direct quotes from the text)]"]
    }
    ```
    """
    try:
        response = openai.ChatCompletion.create(
            model="gpt-4o-mini",
            messages=[
                {"role": "system", "content": "You are an expert researcher specializing in transportation systems and software tools."},
                {"role": "user", "content": (
                    context_info + prompt
                )}
            ],
            temperature=0,
            max_tokens=16383,
        )

        # Extract the model's response
        content = response.choices[0].message.content

        # Cleanup: Remove code block markers if present
        if content.startswith("```json"):
            content = content[7:-3].strip()  # Strip the ```json and ending ``` markers

        # Ensure the output is valid JSON
        try:
            structured_output = json.loads(content)
            return structured_output  # Return as a Python dictionary
        except json.JSONDecodeError:
            # Handle cases where the output isn't valid JSON
            return {
                "error": "Invalid JSON format returned by the model, even after cleanup.",
                "rawResponse": content
            }

    except Exception as e:
        # Handle API call errors
        return {
            "error": f"An error occurred: {str(e)}"
        }
    
def determine_repository_tool_openai_4o(context_info):
    """
    Uses OpenAI's API to determine if the repository implements the described method,
    and ensures the output is structured as JSON, even if extra formatting is present.
    """
    prompt = """
    **Task**:
    Based on the description above, evaluate whether the provided code repository is an actual implementation of the method discussed in the article.

    **Guidelines**:

    - **Definition of Implementation**: The code is considered an implementation if it is written to test the method or to provide code for reproducing the results presented in the article or to share the code and data for the article.
    
    - **Non-Implementation**: Some repositories may be cited because they utilize existing tools or datasets rather than providing an implementation of the described method. If the GitHub link is just for an existing tool or dataset and not an implementation of the method, please respond with "no".

    - **Indicators of Implementation**:
    - Look for explicit statements such as "the code is available/provided/can be accessed at" or "the code is available at".
    - Such statements are good indicators that the repository contains an implementation of the method.

    ```
    """
    try:
        response = openai.ChatCompletion.create(
            model="gpt-4o",
            messages=[
                {"role": "system", "content": "You are an expert researcher specializing in transportation systems and software tools."},
                {"role": "user", "content": (
                    context_info + prompt
                )},
            ],
            response_format={
                "type": "json_schema",
                "json_schema": {
                "name": "implementation_schema",
                "strict": True,
                "schema": {
                    "type": "object",
                    "properties": {
                    "isImplementation": {
                        "type": "string",
                        "description": "Indicates whether the implementation is present or not.",
                        "enum": [
                        "yes",
                        "no"
                        ]
                    },
                    "reason": {
                        "type": "string",
                        "description": "A brief explanation supporting the isImplementation value."
                    },
                    "keyIndicators": {
                        "type": "array",
                        "description": "A list of key points or observations supporting the reasoning.",
                        "items": {
                        "type": "string"
                        }
                    }
                    },
                    "required": [
                    "isImplementation",
                    "reason",
                    "keyIndicators"
                    ],
                    "additionalProperties": False
                }
                }
            },
            temperature=0,
            max_tokens=16383,
            top_p=1,
            frequency_penalty=0,
            presence_penalty=0
            )
        # Extract the model's response
        content = response.choices[0].message.content

        # Cleanup: Remove code block markers if present
        if content.startswith("```json"):
            content = content[7:-3].strip()  # Strip the ```json and ending ``` markers

        # Ensure the output is valid JSON
        try:
            structured_output = json.loads(content)
            return structured_output  # Return as a Python dictionary
        except json.JSONDecodeError:
            # Handle cases where the output isn't valid JSON
            return {
                "error": "Invalid JSON format returned by the model, even after cleanup.",
                "rawResponse": content
            }

    except Exception as e:
        # Handle API call errors
        return {
            "error": f"An error occurred: {str(e)}"
        }    
        

In [25]:
for journal in journals:
    if journal == '0968-090X.csv': # to take the trc dataset for test
        journal_issn = journal.replace('.csv', '')
        # journal_issn = journal_issn_df[journal_issn_df['journal'] == journal.replace('.csv', '')]['issn'].values[0]
        journal_name = journal_issn_df[journal_issn_df['issn'] == journal_issn]['journal'].values[0]
        print(journal_name)
        journal_meta = pd.read_csv(os.path.join(meta_folder, journal))
        journal_meta['unique_id'] = journal_meta['doi'].apply(doi_to_unique_id) # to convert the doi to a unique id
        # transform the journal_meta to a json
        journal_meta_json = journal_meta.to_json(orient='records', default_handler=str)
        # here we use the for loop to help understand how it works, it can be done in one line of code later
        count = 0
        for i in range(len(journal_meta)):
        # for i in range(50):
            github_urls = []
            fulltext_path = os.path.join(full_text_folder, journal_issn, journal_meta.iloc[i]['unique_id'] + '.xml')
            sections = extract_sections_and_text_from_xml(fulltext_path)
            reorganized_sections = postprocess_sections(sections)
            paragraphs = []
            for section in reorganized_sections:
                # add a preprcessing for the text here to make it more readable
                urls  = extract_github_urls(cleanup_abstract(section['text']))
                # get the paragraph where the github url is located
                if urls:
                    github_urls.extend(urls)
                    print(urls)
                for subsection in section['subsections']:
                    urls = github_urls.extend(extract_github_urls(cleanup_abstract(subsection['text'])))
                    if urls:
                        github_urls.extend(urls)
                    for subsubsection in subsection['subsubsections']:
                        urls = extract_github_urls(cleanup_abstract(subsubsection['text']))
                        if urls:
                            github_urls.extend(urls)
            if github_urls:
                count+=1
                # create a json file to store the results
                results = []
                for url in github_urls:
                    for section in reorganized_sections:
                        if url in section['text']:
                            # print(cleanup_abstract(section['text']))
                            location_text = cleanup_abstract(section['text'])
                            context_info = get_context_with_url(location_text, url, context_up_range=20, context_down_range=2)
                            tool_or_not = determine_repository_tool_openai_4o(context_info)
                            print(tool_or_not)
                            if tool_or_not['isImplementation'] == 'yes':
                                print(url)
                            results.append({
                                "url": url,
                                "context_identified": tool_or_not,
                                "context_info": context_info
                            })
                        for subsection in section['subsections']:
                            if url in subsection['text']:
                                # print(cleanup_abstract(subsection['text']))
                                location_text = cleanup_abstract(subsection['text'])
                                context_info = get_context_with_url(location_text, url, context_up_range=20, context_down_range=2)
                                tool_or_not = determine_repository_tool_openai_4o(context_info)
                                print(tool_or_not)
                                if tool_or_not['isImplementation'] == 'yes':
                                    print(url)
                                results.append({
                                    "url": url,
                                        "context_identified": tool_or_not,
                                        "context_info": context_info
                                    })
                            for subsubsection in subsection['subsubsections']:
                                if url in subsubsection['text']:
                                    # print(cleanup_abstract(subsubsection['text']))
                                    location_text = cleanup_abstract(subsubsection['text'])
                                    context_info = get_context_with_url(location_text, url, context_up_range=20, context_down_range=2)
                                    tool_or_not = determine_repository_tool_openai_4o(context_info)
                                    print(tool_or_not)
                                    if tool_or_not.get('error') != 'yes':
                                        results.append({
                                            "url": url,
                                            "context_identified": tool_or_not,
                                            "context_info": context_info
                                        })
                                    results.append({
                                        "url": url,
                                        "context_identified": tool_or_not,
                                        "context_info": context_info
                                    })
                # save the results to a json file
                save_context_path = os.path.join(save_folder, f'{journal_name}/{journal_meta.iloc[i]["unique_id"]}.json')
                # create the folder if not exists
                os.makedirs(save_folder, exist_ok=True)
                os.makedirs(save_folder + '/' + journal_name, exist_ok=True)
                # save the results to a json file for postprocessing
                with open(save_context_path, 'w') as f:
                    json.dump(results, f, indent=4)
                # break
        print(f'{journal_name}: {count}/{len(journal_meta)}')

TRC
{'isImplementation': 'yes', 'reason': 'The repository contains Matlab code specifically for conducting numerical experiments related to the performance of different models and data representations in a random missing scenario, as described in the article.', 'keyIndicators': ['The repository is explicitly mentioned in the context of conducting numerical experiments.', "The description indicates that the code is used to test different models and data representations, which aligns with the article's focus.", 'The repository is likely to contain scripts or functions that implement the methods discussed in the article.']}
https://github.com/lijunsun/bgcp_imputation
{'isImplementation': 'no', 'reason': 'The GitHub repository mentioned is a collection of transportation network datasets, including the Sioux Falls network, rather than an implementation of a specific method or algorithm discussed in the article.', 'keyIndicators': ["The repository is titled 'TransportationNetworks', indicati

In [60]:
results

[{'url': 'https://github.com/DanqingZ/CPS_TRC',
  'context_identified': {'isImplementation': 'yes',
   'reason': 'The provided GitHub link contains code developed for the experiments described, specifically for generating synthetic populations and simulating social networks within the San Francisco Bay Area context as discussed in the article.',
   'keyIndicators': ['The code we developed in this work is available at https://github.com/DanqingZ/CPS_TRC.',
    'To sum up, in this experiment section, we perform an illustrative experiment by generating a synthetic connected population.']}}]

In [31]:
tool_or_not['keyIndicators']

['The repository specifically mentions the BGCP model implementation.',
 'MCMC sampling algorithms are included in the code, as discussed in the experiments.',
 "The numerical experiments are structured to evaluate model performance, consistent with the article's exploration of different representations."]

In [32]:
journal_meta

Unnamed: 0,title,doi,volume,date,year,month,abstract,unique_id
0,A copula-based estimation of distribution algo...,10.1016/j.trc.2018.12.008,98,2019-01-01,2019,1,\n The importance of calibrat...,10.1016_j.trc.2018.12.008
1,A dynamic two-dimensional (D2D) weight-based m...,10.1016/j.trc.2018.12.009,98,2019-01-01,2019,1,\n Existing map-Matching (MM)...,10.1016_j.trc.2018.12.009
2,Where shall we sync? Clustering passenger flow...,10.1016/j.trc.2018.12.013,98,2019-01-01,2019,1,\n Minimizing passenger trans...,10.1016_j.trc.2018.12.013
3,Machine learning approach to predict aircraft ...,10.1016/j.trc.2018.09.007,98,2019-01-01,2019,1,\n Reliable and predictable g...,10.1016_j.trc.2018.09.007
4,Data-driven activity scheduler for agent-based...,10.1016/j.trc.2018.12.002,98,2019-01-01,2019,1,\n Activity-based modelling i...,10.1016_j.trc.2018.12.002
...,...,...,...,...,...,...,...,...
2045,A novel framework of the alternating direction...,10.1016/j.trc.2024.104843,169,2024-12-01,2024,12,\n This paper proposes a nove...,10.1016_j.trc.2024.104843
2046,A time-embedded attention-based transformer fo...,10.1016/j.trc.2024.104831,169,2024-12-01,2024,12,\n The real-time crash likeli...,10.1016_j.trc.2024.104831
2047,Copula-based transferable models for synthetic...,10.1016/j.trc.2024.104830,169,2024-12-01,2024,12,\n Population synthesis invol...,10.1016_j.trc.2024.104830
2048,The role of individual compensation and accept...,10.1016/j.trc.2024.104834,169,2024-12-01,2024,12,"\n High demand, rising custom...",10.1016_j.trc.2024.104834
