In [1]:
import pandas as pd
import os
import re
folder = '../journal-full-text'
# List all files in the folder with csv
journals = [f for f in os.listdir(folder) if f.endswith('.csv')]
journal_issn_list = [['TRA','0965-8564'],
                     ['TRB','0191-2615'],
                     ['TRC','0968-090X'],
                     ['TRD','1361-9209'],
                     ['TRE','1366-5545'],
                     ['TRF','1369-8478'],
                     ['TRIP','2590-1982']]
journal_issn_df = pd.DataFrame(journal_issn_list, columns=['journal','issn'])

In [2]:
for journal in journals:
    # get the journal without the .csv
    journal_issn = journal.split('.csv')[0]
    journal_folder = os.path.join(folder, journal_issn)
    files = os.listdir(journal_folder)
    count = 0
    for file in files:
        # filter the file with .txt
        if file.endswith('.txt'):
            # read the file
            with open(journal_folder + '/' + file, 'r') as f:
                text = f.read()
                if 'github.com' in text:
                    count += 1
    journal_name = journal_issn_df[journal_issn_df['issn'] == journal_issn]['journal'].values[0]
    print(f"{journal_name}:{(count/len(files) * 100):.2f}%")
    # print the journal name in match with the issn

TRIP:3.04%
TRA:3.67%
TRB:10.05%
TRE:3.40%
TRF:1.43%
TRC:12.49%
TRD:3.35%


## Example of llama for the code and data availability

In [70]:
import xml.etree.ElementTree as ET

def extract_sections_and_text_from_xml(file_path):
    # Parse the XML file
    tree = ET.parse(file_path)
    root = tree.getroot()

    # Namespace to handle XML namespaces
    namespaces = {
        'xocs': 'http://www.elsevier.com/xml/xocs/dtd',
        'ce': 'http://www.elsevier.com/xml/common/dtd',
        'ja': 'http://www.elsevier.com/xml/ja/dtd'
    }

    # Extracting the sections using the item-toc element
    sections = []
    for item in root.findall('.//xocs:item-toc-entry', namespaces):
        section_title = item.find('xocs:item-toc-section-title', namespaces)
        section_label = item.find('xocs:item-toc-label', namespaces)
        section_text = []
        
        # Use the section label to find the corresponding section id in <ce:section>
        if section_label is not None:
            label_text = section_label.text.strip()
            section_elem = root.find(f".//ce:section[ce:label='{label_text}']", namespaces)
            if section_elem is not None:
                section_id = section_elem.get('id')
                if section_id:
                    # Now use the section id to extract paragraphs
                    paragraphs = section_elem.findall('.//ce:para', namespaces=namespaces)
                    section_text = ' '.join([para.text for para in paragraphs if para is not None])  # Get all paragraph texts for the specific section
        
        if section_title is not None and section_label is not None:
            sections.append({
                "label": section_label.text,
                "title": section_title.text,
                "text": section_text
            })

    return sections

# Example usage
file_path = '../10.1016_j.trb.2018.10.011.xml'
sections = extract_sections_and_text_from_xml(file_path)
for section in sections:
    if 'experiment' in section['title']:
        print(f"Section {section['label']}: {section['title']}")
        print(f"Text: {section['text']}")
        sectiontext = section['text']
        print("-" * 50)

Section 6: Numerical experiments
Text: This section reports on the results of the experiments conducted to show the effectiveness and efficiency of the Lagrangian relaxation-based method. The solution algorithm is implemented in C using the CPLEX of version 12.6. Experiments are performed on an Intel Xeon (2.1 GHz) Desktop PC with 16 GB RAM. In  We generate the test data based on practical conditions of the Pearl River Delta (PRD) of China 
                         
                         
                         Consider a planning period of 5 hours (i.e.,  
                         For the Lagrangian relaxation-based method proposed in  
                         
                         
                         We next examine the convergence of the Lagrangian relaxation-based method for solving the large instances having 80 and 100 vessels.  To prove computationally that the proposed Lagrangian relaxation-based method is of practical interest in producing quick and good solutio

In [71]:
import ollama

response = ollama.chat(
    model="llama3.2",
    messages=[
        {
            "role": "user",
            "content": sectiontext + "Which programing language the algorithm is deployed? Only with the name.",
        },
    ],
)
print(response["message"]["content"])

CPLEX


In [72]:
import ollama

response = ollama.chat(
    model="llama3.2",
    messages=[
        {
            "role": "user",
            "content": sectiontext + "Is the data mentioned here publicly accessible? Can I get the data? Answer with Yes or No.",
        },
    ],
)
print(response["message"]["content"])

No, the data mentioned in this text does not appear to be publicly accessible. The text mentions that the location data of 20 vessels sailing inside the PRD zone is captured from an automatic identification system (AIS), but it does not provide information on how to access this data or where it was originally obtained.


## Sankey diagram toy example for results

In [73]:
import plotly.graph_objects as go

# Define nodes (categories)
nodes = dict(
    label=["Input", "Output 1", "Output 2", "Output 3", "Another Input", "Output A", "Output B"],  # Labels for nodes
    pad=20,       # Padding between nodes
    thickness=20, # Thickness of nodes
    color=["blue", "orange", "green", "red", "purple", "yellow", "cyan"]  # Colors for nodes
)

# Define links (flows between nodes)
links = dict(
    source=[0, 0, 0, 0, 4, 4],  # Indices of source nodes
    target=[1, 2, 3, 4, 5, 6],  # Indices of target nodes
    value=[100, 30, 20, 50, 15, 35],  # Values of the flows
    color=["blue", "orange", "green", "red", "purple", "yellow"]  # Colors for links (optional)
)

# Create the Sankey diagram
fig = go.Figure(go.Sankey(
    node=nodes,
    link=links
))

# Add a title and show the figure
fig.update_layout(title_text="Sankey Diagram Example", font_size=14)
fig.write_html("../sankey_diagram.html")