In [19]:
import pandas as pd
import os
from tqdm import tqdm
import json
from utils import *
# replace it with your downloaded folder path
full_data_folder = '/Users/junyi/Work/RR/rr-measure-dataset/journal-full-text'
meta_data_folder = '/Users/junyi/Work/RR/rr-measure-dataset/journal-meta'
result_data_folder = '/Users/junyi/Work/RR/rr-measure-dataset/journal-results'

In [53]:
import openai
from openai import OpenAI
import yaml
with open("/Users/junyi/Work/RR/config.yaml", "r") as config_file:
    config = yaml.safe_load(config_file)
openai.api_key = config["openai_api_key"]
client = OpenAI(api_key=config["openai_api_key"])

from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
import time

import string

def correct_url(url):
    return url.rstrip(string.punctuation)
corrected_url = correct_url(url)
corrected_url

def fetch_redirected_url_with_selenium(doi_url):
    try:
        # Set up Selenium WebDriver with headless mode
        options = Options()
        options.add_argument('--headless')
        options.add_argument('--disable-gpu')
        options.add_argument('--no-sandbox')

        service = Service()  # Ensure ChromeDriver is correctly installed or provide the path
        driver = webdriver.Chrome(service=service, options=options)

        driver.get(doi_url)
        time.sleep(5)  # Allow time for redirection if needed

        redirected_url = driver.current_url
        driver.quit()
        return redirected_url
    except Exception as e:
        return f"Error: {str(e)}"
    
def is_dataset_link(url, client=client):
    """
    Determines if a given URL is a dataset link.

    Args:
        client (OpenAI): The OpenAI client instance.
        url (str): The URL to evaluate.

    Returns:
        bool: True if the URL is a dataset link, False otherwise.
    """
    response = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {
                "role": "user",
                "content": [
                    {
                        "type": "text",
                        "text": "Is this a link to host the dataset by university?" + url
                    }
                ]
            }
        ],
        response_format={
            "type": "json_schema",
            "json_schema": {
                "name": "my_schema",
                "strict": True,
                "schema": {
                    "type": "object",
                    "properties": {
                        "is_dataset_link": {
                            "type": "boolean"
                        }
                    },
                    "required": [
                        "is_dataset_link"
                    ],
                    "additionalProperties": False
                }
            }
        },
        temperature=0,
        max_completion_tokens=2048,
        top_p=1,
        frequency_penalty=0,
        presence_penalty=0
    )
    # Extract and return the result
    return response.choices[0].message.content


In [21]:
data = pd.read_csv(os.path.join(meta_data_folder, 'full-meta-dataset.csv'))
data.columns

Index(['title', 'doi', 'volume', 'date', 'year', 'month', 'abstract', 'issn',
       'journal_name', 'unique_id', 'is_github', 'num_of_github_urls',
       'is_availablity_statement', 'is_data_mentioned_in_section_title',
       'is_experiment_mentioned_in_section_title',
       'is_link_in_avaiablity_statement',
       'num_of_links_in_avaiablity_statement'],
      dtype='object')

In [22]:
if not os.path.exists(result_data_folder + '/data-links'):
    os.makedirs(result_data_folder + '/data-links')
# Regular expression to find URLs
url_pattern = r'https?://[^\s\)\.]+(?:[^\s\)]+)' # Matches 'http://' or 'https://' followed by non-whitespace characters
full_url_list = []
for i in tqdm(range(len(data))):
# for i in tqdm(range(2000, 2101)):
    # Initialize data context
    data_context = ''
    label = 0
    try:
        # Construct paths for journal and paper
        journal_path = os.path.join(full_data_folder, data['issn'][i])
        paper_path = os.path.join(journal_path, f"{data['unique_id'][i]}.xml")
        
        # get the abstract from the xml
        abstract = extract_abstract_from_xml(paper_path)
        if abstract:
            # if "data" in abstract.lower():
            if "data" in abstract.lower():
                label = 1
        data_context += (abstract + '\n')
        # Extract and process sections from the paper XML
        sections = extract_sections_and_text_from_xml(paper_path)
        reorganized_sections = postprocess_sections(sections)

        # Traverse the sections and collect data-related text
        for section in reorganized_sections:
            if 'data' in section['title'].lower() and 'supplementary' not in section['title'].lower():
                data_context += (section['text'] + '\n')
                label = 1
            for subsection in section.get('subsections', []):
                if 'data' in subsection['title'].lower():
                    data_context += (subsection['text'] + '\n')
                    label = 1
                for subsubsection in subsection.get('subsubsections', []):
                    if 'data' in subsubsection['title'].lower():
                        data_context += (subsubsection['text'] + '\n')
                        label = 1
    except KeyError as e:
        print(f"Missing key in data: {e}")
    except FileNotFoundError as e:
        print(f"File not found: {e}")
    except Exception as e:
        print(f"An error occurred: {e}")
    if data_context:
        # Find all URLs in the text
        urls = re.findall(url_pattern, data_context)
        if urls:
            print(f"Found {len(urls)} URLs in the text")
            full_url_list.extend(urls)
            # Save the URLs to a file
            with open(os.path.join(result_data_folder, 'data-links', f"{data['unique_id'][i]}.txt"), 'w') as f:
                for url in urls:
                    f.write(url + '\n')
            # check if doi.org is in the url
            for url in urls:
                # result = is_dataset_link(url)
                if (
                    'doi.org' in url
                    or 'osf.io' in url
                    or 'figshare.com' in url
                    or 'data.mendeley.com' in url
                    or 'zenodo' in url
                    or 'dataverse' in url
                    or 'github' in url
                    or 'dataverse.harvard.edu' in url
                    or 'data-in-brief' in url
                    or 'datadryad.org' in url
                ):
                    print(f"Found DOI URL: {url}")

  0%|          | 0/10990 [00:00<?, ?it/s]

  0%|          | 41/10990 [00:00<01:58, 92.20it/s]

Found 1 URLs in the text
Found 1 URLs in the text


  1%|          | 126/10990 [00:01<01:48, 100.03it/s]

Found 1 URLs in the text
Found 2 URLs in the text
Found DOI URL: https://github.com/numenta/NAB/tree/master/data


  1%|▏         | 150/10990 [00:01<01:48, 99.63it/s] 

Found 1 URLs in the text


  2%|▏         | 175/10990 [00:01<01:38, 109.56it/s]

Found 4 URLs in the text


  2%|▏         | 222/10990 [00:02<01:45, 102.20it/s]

Found 1 URLs in the text
Found DOI URL: http://doi.org/10.17605/OSF.IO/DE8KM.
Found 1 URLs in the text
Found DOI URL: https://doi.org/10.7910/DVN/VGMDPC


  2%|▏         | 246/10990 [00:02<01:46, 101.00it/s]

Found 7 URLs in the text
Found 2 URLs in the text


  3%|▎         | 279/10990 [00:02<01:51, 95.92it/s] 

Found 4 URLs in the text
Found DOI URL: https://datasketch.github.io/mobility-actions/.
Found 4 URLs in the text


  3%|▎         | 341/10990 [00:03<01:48, 98.36it/s] 

Found 2 URLs in the text


  3%|▎         | 363/10990 [00:03<01:49, 96.86it/s]

Found 4 URLs in the text
Found 3 URLs in the text


  4%|▎         | 395/10990 [00:04<01:52, 94.28it/s] 

Found 1 URLs in the text


  4%|▍         | 435/10990 [00:04<02:09, 81.47it/s]

Found 2 URLs in the text


  4%|▍         | 465/10990 [00:04<02:11, 80.11it/s]

Found 1 URLs in the text
Found 1 URLs in the text


  5%|▍         | 522/10990 [00:05<02:12, 79.12it/s]

Found 1 URLs in the text
Found 1 URLs in the text


  5%|▌         | 580/10990 [00:06<02:09, 80.58it/s]

Found 1 URLs in the text


  6%|▌         | 627/10990 [00:06<02:10, 79.62it/s]

Found 3 URLs in the text


  6%|▌         | 652/10990 [00:07<02:31, 68.35it/s]

Found 1 URLs in the text
Found 1 URLs in the text
Found 1 URLs in the text
Found DOI URL: https://doi.org/10.17632/tfkgpmgf9t.1.


  6%|▌         | 686/10990 [00:08<02:55, 58.56it/s]

Found 1 URLs in the text
Found DOI URL: https://doi.org/10.6084/m9.figshare.13171862.


  6%|▋         | 712/10990 [00:08<02:43, 62.94it/s]

Found 1 URLs in the text
Found DOI URL: https://dx.doi.org/10.17632/5gy6s8trbm.2


  7%|▋         | 728/10990 [00:08<02:27, 69.62it/s]

Found 1 URLs in the text
Found 1 URLs in the text
Found DOI URL: https://github.com/Lilyhanig/transit_covid_precautions.


  7%|▋         | 751/10990 [00:09<03:02, 56.23it/s]

Found 1 URLs in the text
Found 2 URLs in the text
Found 1 URLs in the text


  7%|▋         | 793/10990 [00:09<02:24, 70.50it/s]

Found 1 URLs in the text


  8%|▊         | 825/10990 [00:10<02:41, 63.06it/s]

Found 1 URLs in the text


  8%|▊         | 867/10990 [00:10<02:14, 75.08it/s]

Found 1 URLs in the text


  8%|▊         | 886/10990 [00:10<02:05, 80.82it/s]

Found 1 URLs in the text
Found DOI URL: https://doi.org/10.6084/m9.figshare.20311812.v2
Found 1 URLs in the text
Found DOI URL: https://doi.org/10.6084/m9.figshare.24231706.


  8%|▊         | 914/10990 [00:11<02:03, 81.40it/s]

Found 4 URLs in the text
Found 2 URLs in the text
Found 1 URLs in the text
Found DOI URL: https://doi.org/10.25338/B8035D
Found 3 URLs in the text
Found 1 URLs in the text


  8%|▊         | 932/10990 [00:11<02:05, 79.89it/s]

Found DOI URL: https://doi.org/10.4121/e4324d0c-2a82-4f03-8cbb-64dd207cd522.


  9%|▊         | 961/10990 [00:11<02:08, 78.11it/s]

Found 1 URLs in the text
Found DOI URL: https://osf.io/sw8k2/.


  9%|▉         | 987/10990 [00:12<02:07, 78.39it/s]

Found 1 URLs in the text


  9%|▉         | 1025/10990 [00:12<02:02, 81.43it/s]

Found 1 URLs in the text


 10%|▉         | 1050/10990 [00:13<02:33, 64.91it/s]

Found 2 URLs in the text
Found DOI URL: https://zenodo.org/doi/10.5281/zenodo.10778028.
Found DOI URL: https://zenodo.org/doi/10.5281/zenodo.10778028
Found 1 URLs in the text


 10%|▉         | 1077/10990 [00:13<02:14, 73.55it/s]

Found 6 URLs in the text


 10%|▉         | 1094/10990 [00:13<02:18, 71.41it/s]

Found 1 URLs in the text
Found DOI URL: https://doi.org/10.17605/OSF.IO/X7AER
Found 1 URLs in the text


 10%|█         | 1143/10990 [00:14<02:23, 68.44it/s]

Found 1 URLs in the text
Found DOI URL: https://datadryad.org/stash/dataset/doi:10.5061/dryad.rv15dv4f0
Found 2 URLs in the text
Found DOI URL: https://doi.org/10.4121/a59fcc0f-4603-490a-b0b8-8b2287141e30.


 11%|█         | 1157/10990 [00:14<02:34, 63.83it/s]

Found 1 URLs in the text


 11%|█         | 1205/10990 [00:15<02:20, 69.58it/s]

Found 1 URLs in the text


 11%|█         | 1229/10990 [00:15<02:32, 64.21it/s]

Found 4 URLs in the text
Found 1 URLs in the text


 12%|█▏        | 1269/10990 [00:16<02:04, 77.91it/s]

Found 2 URLs in the text
Found 1 URLs in the text


 12%|█▏        | 1285/10990 [00:16<02:38, 61.26it/s]

Found 2 URLs in the text


 12%|█▏        | 1302/10990 [00:16<02:20, 68.96it/s]

Found 1 URLs in the text


 12%|█▏        | 1341/10990 [00:17<02:17, 70.07it/s]

Found 3 URLs in the text


 12%|█▏        | 1363/10990 [00:17<02:50, 56.44it/s]

Found 1 URLs in the text


 13%|█▎        | 1388/10990 [00:18<03:12, 49.88it/s]

Found 3 URLs in the text
Found 2 URLs in the text


 13%|█▎        | 1412/10990 [00:18<03:12, 49.80it/s]

Found 1 URLs in the text


 13%|█▎        | 1457/10990 [00:19<02:16, 69.72it/s]

Found 1 URLs in the text
Found 6 URLs in the text
Found 1 URLs in the text


 14%|█▎        | 1498/10990 [00:20<02:59, 52.79it/s]

Found 1 URLs in the text


 14%|█▍        | 1528/10990 [00:20<02:23, 65.89it/s]

Found 3 URLs in the text
Found 1 URLs in the text
Found 1 URLs in the text
Found DOI URL: https://github.com/JusteRaimbault/EnergyPriceAt


 14%|█▍        | 1542/10990 [00:21<02:39, 59.12it/s]

Found 2 URLs in the text
Found 2 URLs in the text


 14%|█▍        | 1549/10990 [00:21<02:52, 54.73it/s]

Found 3 URLs in the text
Found 1 URLs in the text


 14%|█▍        | 1569/10990 [00:21<02:32, 61.62it/s]

Found 1 URLs in the text
Found 1 URLs in the text
Found 2 URLs in the text


 14%|█▍        | 1576/10990 [00:21<02:28, 63.29it/s]

Found 2 URLs in the text
Found 1 URLs in the text


 15%|█▍        | 1623/10990 [00:22<03:12, 48.62it/s]

Found 1 URLs in the text


 15%|█▍        | 1648/10990 [00:22<03:05, 50.45it/s]

Found 8 URLs in the text
Found 1 URLs in the text


 15%|█▌        | 1671/10990 [00:23<03:11, 48.63it/s]

Found 1 URLs in the text
Found 3 URLs in the text


 15%|█▌        | 1685/10990 [00:23<02:59, 51.88it/s]

Found 1 URLs in the text
Found 1 URLs in the text


 16%|█▌        | 1723/10990 [00:24<03:45, 41.17it/s]

Found 2 URLs in the text
Found 1 URLs in the text


 16%|█▌        | 1739/10990 [00:24<02:50, 54.39it/s]

Found 1 URLs in the text


 16%|█▌        | 1753/10990 [00:25<02:44, 56.29it/s]

Found 1 URLs in the text
Found 1 URLs in the text


 16%|█▌        | 1784/10990 [00:25<02:24, 63.53it/s]

Found 1 URLs in the text


 16%|█▋        | 1799/10990 [00:25<02:28, 62.00it/s]

Found 1 URLs in the text


 16%|█▋        | 1813/10990 [00:26<02:33, 59.91it/s]

Found 2 URLs in the text
Found 1 URLs in the text
Found 1 URLs in the text
Found 3 URLs in the text


 17%|█▋        | 1827/10990 [00:26<02:36, 58.71it/s]

Found 1 URLs in the text


 17%|█▋        | 1862/10990 [00:27<02:37, 58.02it/s]

Found 1 URLs in the text
Found 1 URLs in the text
Found DOI URL: https://github.com/uky-transport-data-science/forecastcarddata.
Found 1 URLs in the text
Found DOI URL: https://github.com/CxAalto/gtfspy


 17%|█▋        | 1908/10990 [00:27<02:45, 54.72it/s]

Found 2 URLs in the text


 18%|█▊        | 1927/10990 [00:28<02:52, 52.60it/s]

Found 2 URLs in the text
Found 2 URLs in the text


 18%|█▊        | 1942/10990 [00:28<02:43, 55.25it/s]

Found 1 URLs in the text
Found 1 URLs in the text
Found 3 URLs in the text


 18%|█▊        | 1989/10990 [00:29<02:38, 56.66it/s]

Found 1 URLs in the text
Found 1 URLs in the text
Found 3 URLs in the text


 18%|█▊        | 2005/10990 [00:29<02:17, 65.48it/s]

Found 1 URLs in the text


 18%|█▊        | 2032/10990 [00:30<03:03, 48.83it/s]

Found 5 URLs in the text
Found 1 URLs in the text
Found 1 URLs in the text


 19%|█▊        | 2045/10990 [00:30<02:49, 52.84it/s]

Found 1 URLs in the text
Found 1 URLs in the text
Found DOI URL: https://doi.org/10.5281/zenodo.5107737
Found 1 URLs in the text
Found 2 URLs in the text


 19%|█▉        | 2069/10990 [00:30<02:21, 63.18it/s]

Found 1 URLs in the text
Found DOI URL: https://osf.io/djxs6/?view_only=036e96dae2f94c0b9976372b10a0801b


 19%|█▉        | 2114/10990 [00:31<02:28, 59.69it/s]

Found 1 URLs in the text


 19%|█▉        | 2135/10990 [00:32<02:39, 55.44it/s]

Found 5 URLs in the text
Found 1 URLs in the text


 20%|█▉        | 2149/10990 [00:32<02:41, 54.65it/s]

Found 1 URLs in the text
Found 1 URLs in the text


 20%|█▉        | 2166/10990 [00:32<03:10, 46.35it/s]

Found 1 URLs in the text


 20%|█▉        | 2187/10990 [00:33<02:42, 54.06it/s]

Found 3 URLs in the text
Found 3 URLs in the text


 20%|██        | 2215/10990 [00:33<02:21, 62.04it/s]

Found 1 URLs in the text
Found 1 URLs in the text
Found 1 URLs in the text
Found 1 URLs in the text


 20%|██        | 2230/10990 [00:33<02:16, 64.13it/s]

Found 1 URLs in the text
Found DOI URL: https://github.com/CSSEGISandData/COVID-19
Found 1 URLs in the text
Found 3 URLs in the text


 20%|██        | 2250/10990 [00:34<02:39, 54.79it/s]

Found 1 URLs in the text
Found DOI URL: https://github.com/websense/CIAMpassengers
Found 4 URLs in the text
Found 1 URLs in the text
Found 1 URLs in the text


 21%|██        | 2262/10990 [00:34<02:46, 52.54it/s]

Found 2 URLs in the text
Found 3 URLs in the text
Found DOI URL: https://data.mendeley.com/datasets/m2ccbv7rcr/1.


 21%|██        | 2322/10990 [00:35<03:36, 39.99it/s]

Found 1 URLs in the text


 21%|██        | 2332/10990 [00:36<03:44, 38.48it/s]

Found 1 URLs in the text


 21%|██▏       | 2338/10990 [00:36<03:57, 36.39it/s]

Found 1 URLs in the text
Found DOI URL: https://osf.io/2t48x/?view_only=9f580f8ba16e4cc69ecf3a43e8bd96eb
Found 1 URLs in the text


 21%|██▏       | 2344/10990 [00:36<03:29, 41.30it/s]

Found 3 URLs in the text
Found 13 URLs in the text


 22%|██▏       | 2369/10990 [00:37<05:54, 24.30it/s]

Found 1 URLs in the text
Found 1 URLs in the text
Found DOI URL: https://github.com/giovanni-cal/future-transit.
Found 3 URLs in the text


 22%|██▏       | 2389/10990 [00:38<03:54, 36.61it/s]

Found 2 URLs in the text
Found DOI URL: https://www.doi.org/10.17632/f38zrhc2k7.1.
Found DOI URL: https://doi.org/10.17632/f38zrhc2k7.1.
Found 2 URLs in the text


 22%|██▏       | 2401/10990 [00:38<03:21, 42.57it/s]

Found 1 URLs in the text
Found 2 URLs in the text
Found 4 URLs in the text


 22%|██▏       | 2421/10990 [00:38<02:56, 48.50it/s]

Found 3 URLs in the text
Found DOI URL: https://github.com/UTEL-UIUC/Ridesharing-Scale-Effects.
Found 1 URLs in the text


 22%|██▏       | 2433/10990 [00:38<02:53, 49.41it/s]

Found 1 URLs in the text
Found DOI URL: https://github.com/chechezhe/IEUATMP
Found 2 URLs in the text
Found DOI URL: https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/FSGDQM.


 22%|██▏       | 2466/10990 [00:39<02:37, 54.02it/s]

Found 1 URLs in the text
Found 1 URLs in the text
Found DOI URL: https://doi.org/10.7910/DVN/UOAKTM.


 23%|██▎       | 2478/10990 [00:39<03:16, 43.21it/s]

Found 2 URLs in the text


 23%|██▎       | 2493/10990 [00:40<03:21, 42.20it/s]

Found 2 URLs in the text
Found 2 URLs in the text
Found DOI URL: https://github.com/ghirte/WorkingFromSelfDrivingCar.
Found DOI URL: https://github.com/ghirte/WorkingFromCar.


 23%|██▎       | 2503/10990 [00:40<03:32, 39.90it/s]

Found 3 URLs in the text


 23%|██▎       | 2510/10990 [00:40<03:21, 42.12it/s]

Found 2 URLs in the text
Found 1 URLs in the text
Found 1 URLs in the text


 23%|██▎       | 2522/10990 [00:40<03:18, 42.70it/s]

Found 2 URLs in the text
Found 1 URLs in the text


 23%|██▎       | 2540/10990 [00:41<03:15, 43.21it/s]

Found 4 URLs in the text
Found 1 URLs in the text
Found DOI URL: https://github.com/matsim-scenarios/matsim-vulkaneifel.
Found 3 URLs in the text
Found 1 URLs in the text
Found DOI URL: https://zenodo.org/record/6803956#.Y-HdfXZKiUk
Found 1 URLs in the text
Found DOI URL: http://doi.org/10.17035/d.2023.0298704521.


 23%|██▎       | 2554/10990 [00:41<02:57, 47.60it/s]

Found 1 URLs in the text


 23%|██▎       | 2568/10990 [00:41<02:48, 50.12it/s]

Found 1 URLs in the text
Found 2 URLs in the text


 23%|██▎       | 2580/10990 [00:42<03:18, 42.47it/s]

Found 6 URLs in the text


 24%|██▎       | 2587/10990 [00:42<03:10, 44.08it/s]

Found 2 URLs in the text
Found 1 URLs in the text


 24%|██▎       | 2605/10990 [00:42<03:02, 46.05it/s]

Found 1 URLs in the text
Found 1 URLs in the text


 24%|██▍       | 2615/10990 [00:42<03:23, 41.09it/s]

Found 1 URLs in the text


 24%|██▍       | 2625/10990 [00:43<03:27, 40.38it/s]

Found 3 URLs in the text


 24%|██▍       | 2648/10990 [00:43<03:12, 43.39it/s]

Found 1 URLs in the text
Found DOI URL: https://data.mendeley.com/datasets/xt69cnwh56/1


 24%|██▍       | 2666/10990 [00:44<03:27, 40.07it/s]

Found 1 URLs in the text


 25%|██▍       | 2693/10990 [00:44<03:00, 45.97it/s]

Found 1 URLs in the text


 25%|██▍       | 2715/10990 [00:45<02:58, 46.39it/s]

Found 1 URLs in the text
Found DOI URL: https://github.com/yuki-oyama/pedzone-design.


 25%|██▍       | 2720/10990 [00:45<03:52, 35.57it/s]

Found 7 URLs in the text
Found 3 URLs in the text
Found 2 URLs in the text
Found 4 URLs in the text


 25%|██▍       | 2735/10990 [00:45<03:22, 40.70it/s]

Found 2 URLs in the text


 25%|██▍       | 2740/10990 [00:46<03:33, 38.70it/s]

Found 1 URLs in the text
Found 1 URLs in the text
Found 3 URLs in the text
Found 1 URLs in the text


 25%|██▌       | 2763/10990 [00:46<04:04, 33.70it/s]

Found 2 URLs in the text


 25%|██▌       | 2771/10990 [00:46<03:46, 36.24it/s]

Found 1 URLs in the text
Found 11 URLs in the text


 25%|██▌       | 2781/10990 [00:47<03:35, 38.13it/s]

Found 1 URLs in the text


 25%|██▌       | 2793/10990 [00:47<04:22, 31.28it/s]

Found 2 URLs in the text
Found 3 URLs in the text


 25%|██▌       | 2801/10990 [00:47<04:28, 30.52it/s]

Found 1 URLs in the text
Found DOI URL: https://github.com/goat-community/goat.


 26%|██▌       | 2840/10990 [00:48<03:20, 40.66it/s]

Found 2 URLs in the text
Found DOI URL: https://github.com/GretaGalliani/dynamic-OD-estimation-railway-network.
Found DOI URL: https://github.com/GretaGalliani/dynamic-OD-estimation-railway-network
Found 1 URLs in the text
Found 1 URLs in the text
Found 1 URLs in the text
Found DOI URL: https://doi.org/10.4121/f1272944-9b3d-480f-a37a-b41d972aec7d
Found 4 URLs in the text
Found 1 URLs in the text


 27%|██▋       | 2915/10990 [00:51<06:02, 22.28it/s]

Found 1 URLs in the text


 27%|██▋       | 2924/10990 [00:51<04:47, 28.02it/s]

Found 1 URLs in the text


 27%|██▋       | 2990/10990 [00:54<05:58, 22.30it/s]

Found 1 URLs in the text


 27%|██▋       | 2996/10990 [00:54<06:11, 21.50it/s]

Found 1 URLs in the text


 28%|██▊       | 3087/10990 [00:58<05:34, 23.63it/s]

Found 2 URLs in the text


 28%|██▊       | 3106/10990 [00:59<05:31, 23.79it/s]

Found 1 URLs in the text
Found 1 URLs in the text
Found 1 URLs in the text


 29%|██▉       | 3163/10990 [01:02<05:54, 22.10it/s]

Found 4 URLs in the text


 29%|██▉       | 3173/10990 [01:02<06:08, 21.19it/s]

Found 1 URLs in the text


 30%|██▉       | 3249/10990 [01:06<06:46, 19.04it/s]

Found 1 URLs in the text


 30%|██▉       | 3262/10990 [01:06<05:49, 22.12it/s]

Found 2 URLs in the text
Found DOI URL: https://github.com/CATS-Lab-USF.
Found DOI URL: https://github.com/CATS-Lab-USF


 30%|██▉       | 3274/10990 [01:07<05:47, 22.20it/s]

Found 3 URLs in the text


 30%|██▉       | 3277/10990 [01:07<06:43, 19.11it/s]

Found 1 URLs in the text


 30%|██▉       | 3287/10990 [01:08<07:06, 18.08it/s]

Found 3 URLs in the text


 30%|███       | 3315/10990 [01:09<06:28, 19.77it/s]

Found 1 URLs in the text


 30%|███       | 3349/10990 [01:11<06:57, 18.31it/s]

Found 2 URLs in the text


 31%|███       | 3364/10990 [01:11<06:09, 20.63it/s]

Found 1 URLs in the text
Found 2 URLs in the text
Found DOI URL: https://github.com/Lyurlik/multidirectional-traffic-model,


 31%|███       | 3415/10990 [01:14<07:27, 16.93it/s]

Found 3 URLs in the text
Found DOI URL: https://data.mendeley.com/datasets/72z2mb843k/1..
Found 1 URLs in the text


 31%|███       | 3421/10990 [01:15<08:36, 14.66it/s]

Found 2 URLs in the text
Found DOI URL: https://github.com/deborahmit/TasteNet-MNL.
Found 2 URLs in the text
Found DOI URL: https://doi.org/10.13140/RG.2.2.22522.11209.


 31%|███       | 3426/10990 [01:15<07:46, 16.22it/s]

Found 1 URLs in the text
Found DOI URL: https://github.com/BUILTNYU/transit-data-game.
Found 1 URLs in the text


 31%|███▏      | 3442/10990 [01:16<05:27, 23.03it/s]

Found 1 URLs in the text


 31%|███▏      | 3453/10990 [01:16<04:48, 26.14it/s]

Found 1 URLs in the text
Found DOI URL: http://dx.doi.org/10.17632/jybm8hkp9y.1.
Found 1 URLs in the text
Found DOI URL: https://github.com/xr2006/AMXL.git


 32%|███▏      | 3472/10990 [01:17<05:43, 21.87it/s]

Found 1 URLs in the text


 32%|███▏      | 3518/10990 [01:20<05:57, 20.90it/s]

Found 2 URLs in the text


 32%|███▏      | 3559/10990 [01:22<06:55, 17.88it/s]

Found 1 URLs in the text


 33%|███▎      | 3589/10990 [01:24<07:04, 17.45it/s]

Found 1 URLs in the text


 33%|███▎      | 3595/10990 [01:24<09:11, 13.40it/s]

Found 1 URLs in the text
Found DOI URL: https://github.com/pabloguarda/isuelogit.


 33%|███▎      | 3639/10990 [01:27<06:58, 17.58it/s]

Found 3 URLs in the text


 33%|███▎      | 3670/10990 [01:29<07:43, 15.80it/s]

Found 2 URLs in the text
Found DOI URL: https://github.com/valhalla/valhalla


 34%|███▍      | 3723/10990 [01:32<05:29, 22.03it/s]

Found 1 URLs in the text


 34%|███▍      | 3732/10990 [01:32<05:27, 22.19it/s]

Found 2 URLs in the text


 34%|███▍      | 3742/10990 [01:33<05:38, 21.39it/s]

Found 1 URLs in the text
Found 2 URLs in the text


 34%|███▍      | 3764/10990 [01:34<05:44, 20.99it/s]

Found 1 URLs in the text


 34%|███▍      | 3779/10990 [01:35<05:07, 23.44it/s]

Found 1 URLs in the text


 35%|███▍      | 3805/10990 [01:36<04:30, 26.54it/s]

Found 1 URLs in the text
Found DOI URL: https://doi.org//10-2014/15


 35%|███▌      | 3886/10990 [01:39<06:14, 18.97it/s]

Found 1 URLs in the text


 36%|███▌      | 3935/10990 [01:42<05:05, 23.10it/s]

Found 1 URLs in the text


 36%|███▋      | 3984/10990 [01:44<04:45, 24.50it/s]

Found 2 URLs in the text


 36%|███▋      | 3993/10990 [01:44<05:19, 21.92it/s]

Found 3 URLs in the text
Found DOI URL: https://doi.org/10.4121/uuid:5725add4-7fe8-41d1-a452-b1fc011e0bae
Found DOI URL: https://doi.org/10.4121/uuid:5725add4-7fe8-41d1-a452-b1fc011e0bae
Found 1 URLs in the text


 36%|███▋      | 4007/10990 [01:45<04:19, 26.88it/s]

Found 1 URLs in the text


 37%|███▋      | 4027/10990 [01:46<05:30, 21.06it/s]

Found 1 URLs in the text


 37%|███▋      | 4071/10990 [01:48<05:00, 22.99it/s]

Found 1 URLs in the text


 37%|███▋      | 4090/10990 [01:49<04:46, 24.07it/s]

Found 1 URLs in the text


 38%|███▊      | 4139/10990 [01:51<05:41, 20.06it/s]

Found 1 URLs in the text


 39%|███▊      | 4240/10990 [01:56<04:05, 27.48it/s]

Found 5 URLs in the text
Found DOI URL: http://dx.doi.org/10.17632/zsmk3dxh7s.1


 39%|███▊      | 4246/10990 [01:56<04:55, 22.80it/s]

Found 1 URLs in the text


 39%|███▊      | 4256/10990 [01:57<05:17, 21.23it/s]

Found 4 URLs in the text
Found 1 URLs in the text


 39%|███▉      | 4268/10990 [01:57<04:40, 23.95it/s]

Found 1 URLs in the text


 39%|███▉      | 4283/10990 [01:58<04:54, 22.79it/s]

Found 1 URLs in the text


 39%|███▉      | 4318/10990 [02:00<05:30, 20.20it/s]

Found 2 URLs in the text


 39%|███▉      | 4324/10990 [02:00<04:47, 23.19it/s]

Found 1 URLs in the text


 40%|███▉      | 4361/10990 [02:02<05:14, 21.07it/s]

Found 1 URLs in the text
Found 1 URLs in the text


 40%|████      | 4415/10990 [02:05<05:05, 21.50it/s]

Found 1 URLs in the text


 40%|████      | 4439/10990 [02:06<05:30, 19.85it/s]

Found 1 URLs in the text


 41%|████      | 4451/10990 [02:07<05:28, 19.88it/s]

Found 2 URLs in the text


 41%|████      | 4469/10990 [02:08<06:00, 18.08it/s]

Found 2 URLs in the text


 41%|████      | 4519/10990 [02:10<04:08, 26.02it/s]

Found 1 URLs in the text


 42%|████▏     | 4563/10990 [02:12<04:42, 22.73it/s]

Found 1 URLs in the text


 42%|████▏     | 4583/10990 [02:13<04:38, 22.99it/s]

Found 2 URLs in the text


 42%|████▏     | 4589/10990 [02:14<05:45, 18.54it/s]

Found 1 URLs in the text
Found DOI URL: https://doi.org/10.17863/CAM.85777.


 42%|████▏     | 4598/10990 [02:14<04:29, 23.68it/s]

Found 1 URLs in the text
Found DOI URL: https://doi.org/10.48328/tudatalib-800.


 42%|████▏     | 4635/10990 [02:16<05:52, 18.05it/s]

Found 1 URLs in the text


 42%|████▏     | 4657/10990 [02:17<04:50, 21.77it/s]

Found 1 URLs in the text


 42%|████▏     | 4664/10990 [02:18<07:09, 14.72it/s]

Found 1 URLs in the text


 43%|████▎     | 4707/10990 [02:20<04:43, 22.18it/s]

Found 1 URLs in the text


 43%|████▎     | 4733/10990 [02:21<04:59, 20.87it/s]

Found 2 URLs in the text


 43%|████▎     | 4763/10990 [02:23<04:17, 24.19it/s]

Found 3 URLs in the text
Found DOI URL: https://github.com/418399/COVID-19-related-data.git.


 43%|████▎     | 4767/10990 [02:23<03:56, 26.27it/s]

Found 1 URLs in the text


 44%|████▍     | 4810/10990 [02:26<05:29, 18.75it/s]

Found 2 URLs in the text


 44%|████▍     | 4828/10990 [02:26<04:41, 21.86it/s]

Found 1 URLs in the text
Found 1 URLs in the text
Found 1 URLs in the text


 44%|████▍     | 4877/10990 [02:29<04:27, 22.86it/s]

Found 1 URLs in the text


 44%|████▍     | 4886/10990 [02:29<04:51, 20.94it/s]

Found 4 URLs in the text


 45%|████▍     | 4909/10990 [02:30<05:40, 17.88it/s]

Found 1 URLs in the text


 45%|████▍     | 4912/10990 [02:31<06:09, 16.45it/s]

Found 1 URLs in the text


 45%|████▍     | 4919/10990 [02:31<05:05, 19.89it/s]

Found 3 URLs in the text


 45%|████▍     | 4941/10990 [02:32<04:36, 21.86it/s]

Found 2 URLs in the text
Found 3 URLs in the text
Found 1 URLs in the text
Found 3 URLs in the text


 45%|████▌     | 4951/10990 [02:32<05:16, 19.11it/s]

Found 1 URLs in the text


 45%|████▌     | 4958/10990 [02:33<04:55, 20.44it/s]

Found 5 URLs in the text


 45%|████▌     | 4965/10990 [02:33<04:35, 21.85it/s]

Found 1 URLs in the text
Found DOI URL: https://github.com/DanieleGioia/PerishableMEC.


 45%|████▌     | 4978/10990 [02:34<06:22, 15.71it/s]

Found 2 URLs in the text
Found DOI URL: https://doi.org/10.25592/uhhfdm.10389
Found DOI URL: https://doi.org/10.25592/uhhfdm.11182


 45%|████▌     | 4984/10990 [02:34<05:49, 17.18it/s]

Found 2 URLs in the text
Found 3 URLs in the text
Found DOI URL: https://github.com/amatc2/Graph-Multi-Attention-Network.
Found 2 URLs in the text


 46%|████▌     | 5018/10990 [02:36<06:08, 16.20it/s]

Found 1 URLs in the text


 46%|████▌     | 5032/10990 [02:37<05:20, 18.59it/s]

Found 1 URLs in the text


 46%|████▌     | 5058/10990 [02:38<05:03, 19.56it/s]

Found 1 URLs in the text


 46%|████▌     | 5078/10990 [02:39<04:52, 20.20it/s]

Found 1 URLs in the text
Found 3 URLs in the text
Found 1 URLs in the text


 46%|████▋     | 5104/10990 [02:40<04:55, 19.91it/s]

Found 1 URLs in the text


 47%|████▋     | 5133/10990 [02:42<04:11, 23.26it/s]

Found 1 URLs in the text
Found DOI URL: https://data.mendeley.com/preview/kcjftcdxyz?a=602bac27-ef73-4f25-8a3d-b7a346f48251.
Found 1 URLs in the text
Found DOI URL: https://github.com/burnsaar/Parking_Slot_Assignment_Problem-Public.


 47%|████▋     | 5146/10990 [02:43<06:29, 14.99it/s]

Found 1 URLs in the text


 47%|████▋     | 5150/10990 [02:43<06:48, 14.28it/s]

Found 1 URLs in the text


 47%|████▋     | 5166/10990 [02:44<05:11, 18.72it/s]

Found 1 URLs in the text
Found DOI URL: https://github.com/FengYinLeo/PGI-SDMI.


 47%|████▋     | 5207/10990 [02:46<04:19, 22.32it/s]

Found 1 URLs in the text


 48%|████▊     | 5241/10990 [02:48<04:14, 22.56it/s]

Found 1 URLs in the text
Found 15 URLs in the text


 48%|████▊     | 5265/10990 [02:49<04:26, 21.47it/s]

Found 2 URLs in the text


 48%|████▊     | 5289/10990 [02:50<06:38, 14.31it/s]

Found 1 URLs in the text


 48%|████▊     | 5296/10990 [02:50<05:01, 18.91it/s]

Found 1 URLs in the text
Found DOI URL: https://github.com/lgraff/nomad


 48%|████▊     | 5303/10990 [02:51<04:33, 20.77it/s]

Found 1 URLs in the text


 48%|████▊     | 5316/10990 [02:51<04:24, 21.49it/s]

Found 1 URLs in the text
Found DOI URL: https://github.com/Shenglin807/SARP_RL_Dataset/tree/master.


 50%|████▉     | 5483/10990 [02:53<01:15, 73.38it/s] 

Found 1 URLs in the text


 50%|█████     | 5534/10990 [02:54<01:04, 84.74it/s]

Found 1 URLs in the text
Found DOI URL: https://doi.org/10.17605/OSF.IO/8JE5C.
Found 3 URLs in the text


 51%|█████     | 5620/10990 [02:55<01:14, 72.37it/s]

Found 1 URLs in the text
Found 2 URLs in the text
Found DOI URL: https://github.com/jramshur/ECG_Viewer.


 52%|█████▏    | 5736/10990 [02:57<00:57, 91.96it/s]

Found 1 URLs in the text


 53%|█████▎    | 5771/10990 [02:57<01:23, 62.74it/s]

Found 1 URLs in the text
Found DOI URL: https://doi.org/doi:10.7282/t3-9d68-0k53.
Found 2 URLs in the text


 54%|█████▎    | 5884/10990 [02:59<01:05, 77.62it/s]

Found 1 URLs in the text


 54%|█████▎    | 5901/10990 [02:59<01:10, 72.36it/s]

Found 1 URLs in the text
Found 1 URLs in the text
Found 1 URLs in the text


 54%|█████▍    | 5944/10990 [03:00<01:06, 76.44it/s]

Found 1 URLs in the text


 55%|█████▍    | 5992/10990 [03:00<01:01, 81.14it/s]

Found 2 URLs in the text
Found 1 URLs in the text
Found DOI URL: https://doi.org/10.4121/14483220.


 55%|█████▍    | 6001/10990 [03:00<01:00, 82.27it/s]

Found 2 URLs in the text
Found DOI URL: https://github.com/bazilinskyy/coupled-sim.
Found DOI URL: https://doi.org/10.4121/14096067.
Found 1 URLs in the text
Found 3 URLs in the text
Found DOI URL: https://github.com/sccn/labstreaminglayer/blob/master/README.rst


 55%|█████▌    | 6058/10990 [03:01<00:57, 85.72it/s]

Found 1 URLs in the text
Found 1 URLs in the text


 55%|█████▌    | 6095/10990 [03:02<01:07, 72.00it/s]

Found 1 URLs in the text
Found DOI URL: https://osf.io/sxf4u/


 56%|█████▌    | 6147/10990 [03:02<01:02, 77.64it/s]

Found 1 URLs in the text
Found DOI URL: https://doi.org/10.4121/16866709.


 56%|█████▌    | 6164/10990 [03:03<01:04, 74.72it/s]

Found 1 URLs in the text


 57%|█████▋    | 6213/10990 [03:03<01:04, 74.29it/s]

Found 1 URLs in the text


 57%|█████▋    | 6299/10990 [03:04<00:58, 80.77it/s]

Found 1 URLs in the text
Found 1 URLs in the text


 58%|█████▊    | 6336/10990 [03:05<01:03, 73.67it/s]

Found 1 URLs in the text
Found DOI URL: https://osf.io/3g5jm/


 58%|█████▊    | 6344/10990 [03:05<01:27, 52.96it/s]

Found 1 URLs in the text
Found DOI URL: https://www.doi.org/10.17605/OSF.IO/6G8VS


 58%|█████▊    | 6383/10990 [03:06<01:14, 61.75it/s]

Found 2 URLs in the text
Found DOI URL: https://osf.io/k9w8q/
Found DOI URL: https://osf.io/k9w8q/
Found 2 URLs in the text
Found DOI URL: https://doi.org/10.4121/21952685.
Found DOI URL: https://github.com/bazilinskyy/risk-dash-crowdsourcing.
Found 2 URLs in the text


 58%|█████▊    | 6421/10990 [03:06<00:56, 80.28it/s]

Found 1 URLs in the text
Found DOI URL: https://osf.io/rndxz/
Found 1 URLs in the text
Found DOI URL: https://osf.io/kq7f5/?view_only=421edb20fa4f499abd6385f1b1389d0b.
Found 1 URLs in the text
Found DOI URL: https://osf.io/k3cmn.


 59%|█████▊    | 6430/10990 [03:06<01:04, 70.97it/s]

Found 1 URLs in the text
Found DOI URL: https://doi.org/10.5281/zenodo.5107737.
Found 1 URLs in the text
Found DOI URL: https://doi.org/10.4121/21069820.


 59%|█████▉    | 6461/10990 [03:07<00:53, 84.52it/s]

Found 2 URLs in the text
Found 1 URLs in the text
Found DOI URL: https://doi.org/10.48328/tudatalib-1030.


 59%|█████▉    | 6537/10990 [03:08<00:58, 75.92it/s]

Found 1 URLs in the text


 60%|█████▉    | 6580/10990 [03:08<01:02, 70.56it/s]

Found 1 URLs in the text
Found DOI URL: https://doi.org/10.17605/OSF.IO/FN4VW.


 60%|██████    | 6632/10990 [03:09<01:04, 68.09it/s]

Found 2 URLs in the text
Found DOI URL: https://doi.org/10.17605/OSF.IO/4KNVP.
Found DOI URL: https://osf.io/fd4a7/.
Found 1 URLs in the text


 60%|██████    | 6648/10990 [03:09<01:03, 68.32it/s]

Found 1 URLs in the text
Found DOI URL: https://osf.io/j6t45/.


 61%|██████    | 6686/10990 [03:10<01:06, 64.44it/s]

Found 1 URLs in the text
Found DOI URL: https://osf.io/qbtgh/.
Found 1 URLs in the text


 61%|██████    | 6710/10990 [03:10<01:02, 68.15it/s]

Found 1 URLs in the text
Found DOI URL: https://osf.io/r7whj/?view_only=0f465dc9edf54377be06edf34e4c0bde.
Found 1 URLs in the text
Found 1 URLs in the text
Found DOI URL: https://osf.io/h27nz


 62%|██████▏   | 6775/10990 [03:11<00:53, 78.22it/s]

Found 1 URLs in the text
Found DOI URL: https://doi.org/10.17605/OSF.IO/J7FYS.


 62%|██████▏   | 6800/10990 [03:11<00:58, 71.14it/s]

Found 1 URLs in the text
Found DOI URL: https://osf.io/p6emd/?view_only=97afb5a9d61c40a6b2b1410eea043045.
Found 1 URLs in the text
Found DOI URL: https://github.com/NUS-DBE/Pretrain-Finetune-safety-climate.
Found 1 URLs in the text


 62%|██████▏   | 6816/10990 [03:12<00:57, 72.28it/s]

Found 1 URLs in the text
Found 1 URLs in the text
Found DOI URL: https://osf.io/5d23m/


 62%|██████▏   | 6824/10990 [03:12<01:01, 67.71it/s]

Found 1 URLs in the text
Found DOI URL: https://osf.io/p2wme.
Found 8 URLs in the text


 62%|██████▏   | 6854/10990 [03:12<01:04, 64.36it/s]

Found 1 URLs in the text


 62%|██████▏   | 6867/10990 [03:13<01:16, 54.09it/s]

Found 16 URLs in the text
Found 1 URLs in the text
Found DOI URL: http://doi.org/10.4121/uuid:1ccca375-68ca-4cb6-8fc0-926712f50404


 63%|██████▎   | 6879/10990 [03:13<01:29, 46.08it/s]

Found 1 URLs in the text
Found DOI URL: https://doi.org/10.5281/zenodo.1205229.


 63%|██████▎   | 6923/10990 [03:14<01:46, 38.34it/s]

Found 1 URLs in the text


 63%|██████▎   | 6960/10990 [03:15<01:52, 35.70it/s]

Found 2 URLs in the text


 63%|██████▎   | 6978/10990 [03:16<01:45, 38.18it/s]

Found 2 URLs in the text


 64%|██████▎   | 7006/10990 [03:16<02:03, 32.35it/s]

Found 3 URLs in the text


 64%|██████▍   | 7057/10990 [03:18<01:53, 34.51it/s]

Found 2 URLs in the text


 64%|██████▍   | 7068/10990 [03:18<01:47, 36.51it/s]

Found 7 URLs in the text


 65%|██████▍   | 7104/10990 [03:19<02:02, 31.78it/s]

Found 1 URLs in the text


 65%|██████▍   | 7116/10990 [03:20<02:39, 24.30it/s]

Found 1 URLs in the text


 65%|██████▍   | 7136/10990 [03:21<02:54, 22.14it/s]

Found 1 URLs in the text


 66%|██████▌   | 7201/10990 [03:23<01:51, 34.05it/s]

Found 1 URLs in the text


 66%|██████▌   | 7229/10990 [03:24<02:04, 30.20it/s]

Found 1 URLs in the text


 66%|██████▌   | 7256/10990 [03:25<01:46, 34.91it/s]

Found 2 URLs in the text
Found 1 URLs in the text
Found DOI URL: https://github.com/DrKeHan/DTD.


 66%|██████▌   | 7267/10990 [03:25<01:30, 41.31it/s]

Found 1 URLs in the text
Found 1 URLs in the text
Found DOI URL: https://github.com/zhiyongc/Seattle-Loop-Data


 66%|██████▋   | 7286/10990 [03:25<01:47, 34.49it/s]

Found 2 URLs in the text


 67%|██████▋   | 7328/10990 [03:27<01:30, 40.63it/s]

Found 2 URLs in the text
Found 1 URLs in the text
Found DOI URL: https://github.com/MeixinZhu/Velocity_control.


 67%|██████▋   | 7350/10990 [03:27<01:48, 33.49it/s]

Found 1 URLs in the text
Found 2 URLs in the text
Found DOI URL: https://github.com/zhiyongc/Seattle-Loop-Data
Found DOI URL: https://github.com/liyaguang/DCRNN


 67%|██████▋   | 7365/10990 [03:28<02:06, 28.71it/s]

Found 2 URLs in the text
Found 1 URLs in the text


 67%|██████▋   | 7382/10990 [03:28<01:48, 33.34it/s]

Found 1 URLs in the text
Found DOI URL: https://github.com/venktesh22/ExpressLanes_Deep-RL.


 67%|██████▋   | 7391/10990 [03:29<01:52, 32.05it/s]

Found 1 URLs in the text


 67%|██████▋   | 7413/10990 [03:29<01:47, 33.37it/s]

Found 1 URLs in the text


 68%|██████▊   | 7470/10990 [03:31<01:49, 32.18it/s]

Found 1 URLs in the text


 68%|██████▊   | 7480/10990 [03:32<01:36, 36.19it/s]

Found 2 URLs in the text


 68%|██████▊   | 7508/10990 [03:33<02:27, 23.61it/s]

Found 1 URLs in the text


 68%|██████▊   | 7525/10990 [03:34<02:15, 25.66it/s]

Found 5 URLs in the text
Found DOI URL: https://github.com/twintproject/twint.
Found 2 URLs in the text


 69%|██████▊   | 7538/10990 [03:34<02:06, 27.32it/s]

Found 3 URLs in the text
Found 2 URLs in the text
Found 2 URLs in the text
Found DOI URL: https://github.com/gm-tools/gm-tools


 69%|██████▊   | 7546/10990 [03:35<02:17, 25.12it/s]

Found 1 URLs in the text
Found 4 URLs in the text


 69%|██████▉   | 7578/10990 [03:36<01:49, 31.19it/s]

Found 1 URLs in the text
Found DOI URL: https://github.com/wangleicuail/EVsharingBasicData


 69%|██████▉   | 7607/10990 [03:37<01:58, 28.59it/s]

Found 1 URLs in the text
Found DOI URL: https://github.com/LiTrans/reslogit-example.
Found 1 URLs in the text
Found DOI URL: https://osf.io/n46k5/?view_only=3f6f91f014f942c982d71a72c90c508a.


 69%|██████▉   | 7620/10990 [03:37<01:51, 30.35it/s]

Found 1 URLs in the text


 70%|██████▉   | 7648/10990 [03:38<01:41, 32.77it/s]

Found 1 URLs in the text


 70%|██████▉   | 7661/10990 [03:38<01:42, 32.37it/s]

Found 1 URLs in the text
Found DOI URL: https://github.com/RomainLITUD/DGCN_traffic_forecasting.
Found 1 URLs in the text


 70%|██████▉   | 7683/10990 [03:39<02:09, 25.59it/s]

Found 1 URLs in the text
Found DOI URL: https://github.com/vita-epfl/RRB.


 70%|███████   | 7701/10990 [03:40<01:38, 33.45it/s]

Found 1 URLs in the text
Found 1 URLs in the text


 70%|███████   | 7717/10990 [03:40<01:56, 28.08it/s]

Found 3 URLs in the text
Found DOI URL: https://doi.org/10.5281/zenodo.3939792.
Found DOI URL: https://doi.org/10.5281/zenodo.1205229.


 70%|███████   | 7728/10990 [03:41<01:38, 33.00it/s]

Found 1 URLs in the text


 70%|███████   | 7742/10990 [03:41<01:53, 28.60it/s]

Found 3 URLs in the text


 70%|███████   | 7745/10990 [03:41<01:54, 28.42it/s]

Found 1 URLs in the text
Found 1 URLs in the text


 71%|███████   | 7761/10990 [03:42<01:48, 29.88it/s]

Found 1 URLs in the text
Found 1 URLs in the text
Found DOI URL: https://github.com/Turbo87/utm.


 71%|███████   | 7811/10990 [03:44<01:38, 32.13it/s]

Found 1 URLs in the text


 71%|███████   | 7818/10990 [03:44<01:54, 27.64it/s]

Found 1 URLs in the text


 71%|███████▏  | 7831/10990 [03:44<02:00, 26.13it/s]

Found 1 URLs in the text
Found 3 URLs in the text


 71%|███████▏  | 7849/10990 [03:45<02:06, 24.88it/s]

Found 1 URLs in the text


 72%|███████▏  | 7867/10990 [03:46<01:29, 34.76it/s]

Found 1 URLs in the text
Found DOI URL: https://github.com/rgrahn/FMLM.
Found 1 URLs in the text


 72%|███████▏  | 7896/10990 [03:46<01:23, 37.13it/s]

Found 1 URLs in the text
Found 2 URLs in the text
Found DOI URL: https://github.com/snu-adsl/ddpgcn-dataset
Found 2 URLs in the text


 72%|███████▏  | 7918/10990 [03:47<01:38, 31.18it/s]

Found 1 URLs in the text
Found DOI URL: https://doi.org/10–2019/11


 72%|███████▏  | 7926/10990 [03:48<01:38, 31.18it/s]

Found 2 URLs in the text
Found DOI URL: https://github.com/FinestStone/AV-Interactions-City-Traffic.
Found 2 URLs in the text


 72%|███████▏  | 7930/10990 [03:48<01:32, 32.98it/s]

Found 1 URLs in the text


 72%|███████▏  | 7941/10990 [03:48<01:51, 27.26it/s]

Found 1 URLs in the text
Found 1 URLs in the text


 72%|███████▏  | 7951/10990 [03:49<02:00, 25.31it/s]

Found 3 URLs in the text
Found 1 URLs in the text


 72%|███████▏  | 7965/10990 [03:49<01:58, 25.55it/s]

Found 3 URLs in the text
Found DOI URL: https://github.com/ChengTraffic/Polynomial-Arrival-Queue-PAQ.
Found 3 URLs in the text


 73%|███████▎  | 7972/10990 [03:49<01:53, 26.69it/s]

Found 2 URLs in the text


 73%|███████▎  | 7987/10990 [03:50<02:22, 21.11it/s]

Found 1 URLs in the text


 73%|███████▎  | 7999/10990 [03:51<02:23, 20.89it/s]

Found 2 URLs in the text


 73%|███████▎  | 8013/10990 [03:51<02:03, 24.07it/s]

Found 1 URLs in the text
Found DOI URL: https://doi.org/10.5281/zenodo.6011886


 73%|███████▎  | 8022/10990 [03:52<02:14, 22.05it/s]

Found 1 URLs in the text
Found DOI URL: https://github.com/goaheand/AdapGL-pytorch.


 73%|███████▎  | 8040/10990 [03:52<01:57, 25.14it/s]

Found 1 URLs in the text
Found DOI URL: https://github.com/HaoZhouGT/openpilot
Found 2 URLs in the text


 73%|███████▎  | 8047/10990 [03:53<02:05, 23.39it/s]

Found 1 URLs in the text


 73%|███████▎  | 8066/10990 [03:54<01:43, 28.22it/s]

Found 1 URLs in the text
Found 1 URLs in the text
Found DOI URL: https://s-attack.github.io/.


 73%|███████▎  | 8077/10990 [03:54<02:03, 23.67it/s]

Found 1 URLs in the text
Found DOI URL: https://doi.org/10.5281/zenodo.1205229.


 74%|███████▎  | 8090/10990 [03:55<01:57, 24.64it/s]

Found 1 URLs in the text
Found DOI URL: https://github.com/HaoZhouGT/openpilot.


 74%|███████▎  | 8093/10990 [03:55<02:18, 20.87it/s]

Found 1 URLs in the text


 74%|███████▍  | 8116/10990 [03:56<02:43, 17.58it/s]

Found 1 URLs in the text


 74%|███████▍  | 8136/10990 [03:57<02:17, 20.80it/s]

Found 2 URLs in the text
Found DOI URL: https://github.com/liyaguang/DCRNN.
Found DOI URL: https://github.com/zhiyongc/Graph-Markov-Network.


 74%|███████▍  | 8167/10990 [03:58<01:36, 29.40it/s]

Found 2 URLs in the text


 75%|███████▍  | 8192/10990 [03:59<01:33, 30.04it/s]

Found 22 URLs in the text
Found DOI URL: https://github.com/liyaguang/DCRNN.
Found DOI URL: https://github.com/liyaguang/DCRNN.
Found DOI URL: https://github.com/zhiyongc/Seattle-Loop-Data.
Found DOI URL: https://github.com/lehaifeng/T-GCN/tree/master/data.
Found DOI URL: https://github.com/lucktroy/DeepST/tree/master/data/TaxiBJ.
Found DOI URL: https://github.com/lehaifeng/T-GCN.
Found DOI URL: https://github.com/fivethirtyeight/uber-tlc-foil-response.
Found DOI URL: https://github.com/ivechan/PVCGNZ.
Found DOI URL: https://github.com/ivechan/PVCGN.
Found 1 URLs in the text
Found DOI URL: https://github.com/Yifanny/Generative_CF_Model_Conditioned_On_DS.git.


 75%|███████▍  | 8202/10990 [03:59<01:26, 32.24it/s]

Found 1 URLs in the text
Found 1 URLs in the text


 75%|███████▍  | 8239/10990 [04:01<01:43, 26.71it/s]

Found 2 URLs in the text
Found DOI URL: https://github.com/liyaguang/DCRNN.


 75%|███████▌  | 8264/10990 [04:02<01:52, 24.14it/s]

Found 2 URLs in the text
Found DOI URL: https://doi.org/10.5281/zenodo.7426506


 76%|███████▌  | 8299/10990 [04:03<01:46, 25.21it/s]

Found 1 URLs in the text
Found DOI URL: https://github.com/liangchunyaobing/RCM-AIRL.


 76%|███████▌  | 8325/10990 [04:04<01:48, 24.47it/s]

Found 1 URLs in the text
Found DOI URL: https://github.com/MYL23/MASO-MSF


 76%|███████▌  | 8351/10990 [04:05<01:47, 24.66it/s]

Found 1 URLs in the text
Found DOI URL: https://github.com/danlegend5/FitFun.


 76%|███████▌  | 8354/10990 [04:05<01:53, 23.26it/s]

Found 2 URLs in the text
Found DOI URL: https://github.com/nytimes/covid-19-data.
Found 2 URLs in the text
Found DOI URL: https://doi.org/10.5281/zenodo.3939793.
Found 1 URLs in the text


 76%|███████▌  | 8369/10990 [04:06<02:23, 18.28it/s]

Found 1 URLs in the text


 76%|███████▋  | 8383/10990 [04:07<02:01, 21.47it/s]

Found 1 URLs in the text
Found 1 URLs in the text
Found 1 URLs in the text
Found DOI URL: https://github.com/RadetzkyLi/3P-MSPointNet


 77%|███████▋  | 8408/10990 [04:08<01:34, 27.21it/s]

Found 4 URLs in the text
Found DOI URL: https://github.com/zephyr-data-specs/GMNS;
Found DOI URL: https://asu-trans-ai-lab.github.io/web/index.html.
Found DOI URL: https://github.com/asu-trans-ai-lab/Integrated_modeling_GMNS/tree/main/examples/United_States_network


 77%|███████▋  | 8425/10990 [04:09<01:35, 26.97it/s]

Found 1 URLs in the text
Found DOI URL: https://github.com/mcgill-smart-transport/circDMDsp.


 77%|███████▋  | 8429/10990 [04:09<02:12, 19.38it/s]

Found 1 URLs in the text
Found DOI URL: https://github.com/zouguojian/STGIN/tree/main/data.


 77%|███████▋  | 8437/10990 [04:10<02:35, 16.39it/s]

Found 1 URLs in the text
Found 1 URLs in the text


 77%|███████▋  | 8466/10990 [04:11<02:42, 15.56it/s]

Found 4 URLs in the text
Found 1 URLs in the text
Found DOI URL: https://github.com/Jimmy-Ke404/ORHP.


 77%|███████▋  | 8505/10990 [04:14<01:44, 23.75it/s]

Found 1 URLs in the text
Found DOI URL: https://github.com/JoseAngelMartinB/prediction-behavioural-analysis-ml-travel-mode-choice.
Found 1 URLs in the text


 78%|███████▊  | 8521/10990 [04:14<01:41, 24.21it/s]

Found 2 URLs in the text


 78%|███████▊  | 8530/10990 [04:15<02:07, 19.24it/s]

Found 1 URLs in the text
Found DOI URL: https://data.mendeley.com/datasets/j36zgxnc5p/2.
Found 1 URLs in the text
Found DOI URL: https://github.com/sancarlim/Explainable-MP/tree/v1.1.


 78%|███████▊  | 8536/10990 [04:15<02:02, 20.09it/s]

Found 2 URLs in the text
Found DOI URL: https://github.com/tongnie/GNN4Flow.


 78%|███████▊  | 8539/10990 [04:15<02:04, 19.64it/s]

Found 3 URLs in the text


 78%|███████▊  | 8548/10990 [04:16<02:04, 19.63it/s]

Found 1 URLs in the text
Found 2 URLs in the text
Found DOI URL: https://github.com/aberke/drones-consumer-privacy.
Found 1 URLs in the text


 78%|███████▊  | 8552/10990 [04:16<01:58, 20.56it/s]

Found 1 URLs in the text
Found DOI URL: https://figshare.com/articles/dataset/EPOS-based_Plans_for_Drones/21432804.


 78%|███████▊  | 8558/10990 [04:16<02:12, 18.40it/s]

Found 3 URLs in the text


 78%|███████▊  | 8565/10990 [04:17<02:16, 17.83it/s]

Found 4 URLs in the text
Found 1 URLs in the text


 78%|███████▊  | 8590/10990 [04:18<01:46, 22.45it/s]

Found 2 URLs in the text
Found DOI URL: https://github.com/pabloguarda/isuelogit.
Found DOI URL: https://github.com/pabloguarda/pesuelogit.


 78%|███████▊  | 8599/10990 [04:18<01:30, 26.53it/s]

Found 1 URLs in the text


 78%|███████▊  | 8617/10990 [04:20<02:11, 18.06it/s]

Found 1 URLs in the text
Found 1 URLs in the text
Found DOI URL: https://github.com/ethz-coss/voting_traffic.


 78%|███████▊  | 8624/10990 [04:20<01:55, 20.57it/s]

Found 1 URLs in the text


 79%|███████▊  | 8630/10990 [04:20<01:45, 22.31it/s]

Found 1 URLs in the text
Found DOI URL: https://github.com/LehmannJonas/2E-MT-VRP-PTW-Instances.


 79%|███████▊  | 8645/10990 [04:21<01:49, 21.46it/s]

Found 2 URLs in the text
Found DOI URL: https://zenodo.org/record/1205229.


 79%|███████▉  | 8668/10990 [04:22<01:28, 26.12it/s]

Found 2 URLs in the text
Found 1 URLs in the text


 79%|███████▉  | 8675/10990 [04:22<01:22, 28.07it/s]

Found 1 URLs in the text
Found DOI URL: https://github.com/cliu01/Dataset_LSSAVDARP_ClermontFd.
Found 1 URLs in the text
Found DOI URL: https://github.com/ekinugurel/GPSImpute.


 79%|███████▉  | 8684/10990 [04:23<01:28, 25.92it/s]

Found 1 URLs in the text
Found DOI URL: https://github.com/xiaoluban/Modeling-Interpretable-Social-Interactions-for-Pedestrian-Trajectory.


 79%|███████▉  | 8690/10990 [04:23<01:30, 25.35it/s]

Found 1 URLs in the text
Found DOI URL: https://github.com/jcpinap/DTSP-DR.


 79%|███████▉  | 8696/10990 [04:23<01:35, 24.10it/s]

Found 1 URLs in the text
Found DOI URL: https://github.com/zeonchen/SFMGTL.
Found 1 URLs in the text
Found DOI URL: https://github.com/DimVlachogiannis/HumanLight.git.


 79%|███████▉  | 8712/10990 [04:24<01:18, 29.17it/s]

Found 1 URLs in the text


 80%|███████▉  | 8741/10990 [04:25<01:50, 20.31it/s]

Found 1 URLs in the text


 80%|███████▉  | 8747/10990 [04:25<01:50, 20.24it/s]

Found 1 URLs in the text
Found DOI URL: https://github.com/Yiru-Jiao/Explaining-headway-reduction-of-HVs-following-AVs.


 80%|███████▉  | 8758/10990 [04:26<01:51, 20.03it/s]

Found 4 URLs in the text
Found DOI URL: https://github.com/zhiyongc/Seattle-Loop-Data.


 80%|███████▉  | 8765/10990 [04:26<01:45, 21.15it/s]

Found 2 URLs in the text


 80%|███████▉  | 8782/10990 [04:27<02:30, 14.63it/s]

Found 8 URLs in the text
Found DOI URL: https://github.com/CSSEGISandData/COVID-19.


 80%|████████  | 8792/10990 [04:27<01:44, 20.94it/s]

Found 1 URLs in the text


 80%|████████  | 8798/10990 [04:28<01:38, 22.23it/s]

Found 1 URLs in the text


 80%|████████  | 8820/10990 [04:29<01:33, 23.16it/s]

Found 1 URLs in the text
Found 1 URLs in the text
Found DOI URL: https://doi.org/10.4121/3ad2db22-ea82-4436-8df5-ebbbdb4aeec6
Found 1 URLs in the text
Found DOI URL: https://github.com/Milad-Haghani/Physical-movement-of-individuals.
Found 1 URLs in the text
Found DOI URL: https://github.com/Milad-Haghani/Decision-making-of-individuals.


 80%|████████  | 8826/10990 [04:29<01:35, 22.72it/s]

Found 1 URLs in the text


 80%|████████  | 8839/10990 [04:30<01:50, 19.42it/s]

Found 1 URLs in the text
Found DOI URL: https://github.com/yuki-oyama/fluid-particle-csd.


 81%|████████  | 8857/10990 [04:31<01:33, 22.78it/s]

Found 2 URLs in the text
Found DOI URL: https://github.com/RomainLITUD/conflict_resolution_dataset.


 81%|████████  | 8880/10990 [04:31<01:15, 27.82it/s]

Found 1 URLs in the text
Found DOI URL: https://github.com/Xuan-1998/LPSim


 81%|████████  | 8894/10990 [04:32<01:24, 24.67it/s]

Found 3 URLs in the text
Found 2 URLs in the text


 81%|████████  | 8907/10990 [04:32<01:02, 33.07it/s]

Found 1 URLs in the text
Found DOI URL: https://github.com/bugracnr/TRC-INSTANCES.


 81%|████████  | 8925/10990 [04:33<00:55, 36.99it/s]

Found 2 URLs in the text


 81%|████████▏ | 8948/10990 [04:33<00:43, 46.90it/s]

Found 3 URLs in the text


 82%|████████▏ | 8974/10990 [04:34<00:31, 63.97it/s]

Found 3 URLs in the text


 82%|████████▏ | 8988/10990 [04:34<00:36, 54.69it/s]

Found 1 URLs in the text


 82%|████████▏ | 9025/10990 [04:34<00:25, 76.57it/s]

Found 2 URLs in the text
Found 1 URLs in the text
Found 1 URLs in the text
Found DOI URL: http://doi.org/10.4121/uuid:994f0ab2-0fa2-493f-88ad-fbf4eaaf470e


 83%|████████▎ | 9077/10990 [04:35<00:27, 69.01it/s]

Found 1 URLs in the text


 83%|████████▎ | 9131/10990 [04:36<00:24, 77.44it/s]

Found 2 URLs in the text
Found 1 URLs in the text


 83%|████████▎ | 9148/10990 [04:36<00:26, 70.61it/s]

Found 1 URLs in the text
Found 1 URLs in the text


 84%|████████▎ | 9191/10990 [04:37<00:25, 69.24it/s]

Found 1 URLs in the text


 84%|████████▍ | 9218/10990 [04:37<00:35, 49.94it/s]

Found 5 URLs in the text
Found 1 URLs in the text


 84%|████████▍ | 9250/10990 [04:38<00:33, 51.64it/s]

Found 2 URLs in the text


 84%|████████▍ | 9282/10990 [04:39<00:28, 60.88it/s]

Found 1 URLs in the text
Found 3 URLs in the text


 85%|████████▍ | 9289/10990 [04:39<00:30, 55.31it/s]

Found 1 URLs in the text
Found 2 URLs in the text
Found 1 URLs in the text


 85%|████████▍ | 9337/10990 [04:40<00:34, 47.37it/s]

Found 2 URLs in the text
Found 1 URLs in the text


 85%|████████▌ | 9380/10990 [04:40<00:23, 68.97it/s]

Found 5 URLs in the text
Found 1 URLs in the text


 86%|████████▌ | 9427/10990 [04:41<00:24, 63.05it/s]

Found 1 URLs in the text
Found 1 URLs in the text


 86%|████████▌ | 9441/10990 [04:41<00:23, 65.52it/s]

Found 1 URLs in the text
Found 1 URLs in the text


 86%|████████▌ | 9456/10990 [04:42<00:26, 57.05it/s]

Found 1 URLs in the text


 86%|████████▋ | 9499/10990 [04:42<00:21, 70.02it/s]

Found 1 URLs in the text


 87%|████████▋ | 9521/10990 [04:43<00:23, 62.06it/s]

Found 1 URLs in the text


 87%|████████▋ | 9542/10990 [04:43<00:23, 61.47it/s]

Found 2 URLs in the text
Found 3 URLs in the text


 87%|████████▋ | 9590/10990 [04:44<00:22, 61.61it/s]

Found 4 URLs in the text


 87%|████████▋ | 9606/10990 [04:44<00:21, 65.15it/s]

Found 1 URLs in the text


 88%|████████▊ | 9622/10990 [04:44<00:19, 70.72it/s]

Found 2 URLs in the text
Found 1 URLs in the text


 88%|████████▊ | 9649/10990 [04:45<00:18, 71.46it/s]

Found 2 URLs in the text
Found 1 URLs in the text
Found 2 URLs in the text
Found 1 URLs in the text


 88%|████████▊ | 9675/10990 [04:45<00:18, 72.54it/s]

Found 2 URLs in the text


 88%|████████▊ | 9704/10990 [04:46<00:20, 62.85it/s]

Found 2 URLs in the text
Found DOI URL: https://github.com/giacfalk/EV_charging_network_accessibility_analysis.
Found 1 URLs in the text
Found 5 URLs in the text
Found 1 URLs in the text


 89%|████████▊ | 9741/10990 [04:46<00:18, 66.58it/s]

Found 4 URLs in the text
Found 1 URLs in the text
Found DOI URL: https://doi.org/10.7910/DVN/HNHJHU.
Found 2 URLs in the text
Found 2 URLs in the text
Found DOI URL: https://doi.org/10.7927/H4ZK5DQS.


 89%|████████▉ | 9787/10990 [04:47<00:23, 51.83it/s]

Found 1 URLs in the text
Found 3 URLs in the text


 89%|████████▉ | 9800/10990 [04:47<00:21, 54.81it/s]

Found 1 URLs in the text


 89%|████████▉ | 9811/10990 [04:48<00:31, 37.57it/s]

Found 1 URLs in the text


 89%|████████▉ | 9824/10990 [04:48<00:24, 46.87it/s]

Found 2 URLs in the text


 90%|████████▉ | 9841/10990 [04:49<00:32, 35.49it/s]

Found 1 URLs in the text
Found DOI URL: https://doi.org/10.17605/osf.io/t6gjh.
Found 1 URLs in the text


 90%|████████▉ | 9856/10990 [04:49<00:29, 37.87it/s]

Found 7 URLs in the text
Found 1 URLs in the text


 90%|████████▉ | 9874/10990 [04:49<00:24, 45.90it/s]

Found 3 URLs in the text
Found 1 URLs in the text
Found DOI URL: https://github.com/openmobilityfoundation/mobility-data-specification


 90%|█████████ | 9899/10990 [04:50<00:18, 60.21it/s]

Found 1 URLs in the text


 90%|█████████ | 9928/10990 [04:50<00:14, 70.88it/s]

Found 1 URLs in the text
Found 1 URLs in the text


 91%|█████████ | 9951/10990 [04:51<00:17, 58.52it/s]

Found 1 URLs in the text
Found 1 URLs in the text
Found 3 URLs in the text


 91%|█████████ | 9981/10990 [04:51<00:15, 63.76it/s]

Found 1 URLs in the text


 91%|█████████ | 9996/10990 [04:51<00:16, 60.83it/s]

Found 1 URLs in the text
Found DOI URL: https://zenodo.org/record/5102564#.Youu56jMLIU
Found 2 URLs in the text


 91%|█████████▏| 10036/10990 [04:52<00:16, 57.83it/s]

Found 1 URLs in the text
Found 1 URLs in the text


 91%|█████████▏| 10043/10990 [04:52<00:16, 56.50it/s]

Found 2 URLs in the text


 92%|█████████▏| 10081/10990 [04:53<00:14, 61.94it/s]

Found 1 URLs in the text
Found 1 URLs in the text
Found 1 URLs in the text


 92%|█████████▏| 10096/10990 [04:53<00:14, 60.37it/s]

Found 2 URLs in the text
Found DOI URL: https://doi.org/10.5281/zenodo.6587266.
Found DOI URL: https://github.com/keesvanginkel/europe_flood_road_disruption


 92%|█████████▏| 10111/10990 [04:53<00:15, 56.36it/s]

Found 3 URLs in the text
Found 1 URLs in the text


 92%|█████████▏| 10161/10990 [04:54<00:16, 50.56it/s]

Found 2 URLs in the text
Found DOI URL: https://osf.io/95qj6/.
Found DOI URL: https://osf.io/95qj6/
Found 1 URLs in the text


 93%|█████████▎| 10177/10990 [04:55<00:18, 43.06it/s]

Found 1 URLs in the text
Found 1 URLs in the text
Found 1 URLs in the text


 93%|█████████▎| 10196/10990 [04:55<00:18, 42.86it/s]

Found 1 URLs in the text


 93%|█████████▎| 10211/10990 [04:55<00:14, 53.16it/s]

Found 1 URLs in the text


 93%|█████████▎| 10229/10990 [04:56<00:11, 66.34it/s]

Found 1 URLs in the text
Found 2 URLs in the text


 93%|█████████▎| 10257/10990 [04:56<00:12, 58.32it/s]

Found 1 URLs in the text
Found 1 URLs in the text


 93%|█████████▎| 10273/10990 [04:56<00:11, 64.93it/s]

Found 3 URLs in the text
Found 1 URLs in the text
Found 2 URLs in the text
Found 2 URLs in the text


 94%|█████████▎| 10302/10990 [04:57<00:11, 62.12it/s]

Found 1 URLs in the text
Found 1 URLs in the text
Found DOI URL: https://zenodo.org/record/5776533#.YbdNH73MJaQ
Found 1 URLs in the text


 94%|█████████▍| 10309/10990 [04:57<00:11, 57.89it/s]

Found 2 URLs in the text


 94%|█████████▍| 10329/10990 [04:57<00:11, 55.32it/s]

Found 2 URLs in the text
Found DOI URL: https://osf.io/xbhqf/?view_only=9ed604cac4614be8ac318ef71edb5254.


 94%|█████████▍| 10351/10990 [04:58<00:11, 55.02it/s]

Found 2 URLs in the text


 94%|█████████▍| 10358/10990 [04:58<00:11, 53.77it/s]

Found 1 URLs in the text
Found 1 URLs in the text


 94%|█████████▍| 10377/10990 [04:58<00:13, 44.57it/s]

Found 2 URLs in the text


 94%|█████████▍| 10382/10990 [04:59<00:16, 36.32it/s]

Found 1 URLs in the text


 95%|█████████▍| 10393/10990 [04:59<00:15, 39.04it/s]

Found 1 URLs in the text
Found DOI URL: https://github.com/ChullEPG/Bumpy-Ride,


 95%|█████████▍| 10414/10990 [04:59<00:11, 49.15it/s]

Found 1 URLs in the text
Found DOI URL: https://github.com/futuremobilitylabariel/EnvironmentModel


 95%|█████████▍| 10428/10990 [04:59<00:10, 53.15it/s]

Found 2 URLs in the text
Found 1 URLs in the text


 95%|█████████▌| 10441/10990 [05:00<00:10, 51.31it/s]

Found 1 URLs in the text


 95%|█████████▌| 10469/10990 [05:00<00:09, 54.73it/s]

Found 4 URLs in the text


 95%|█████████▌| 10482/10990 [05:00<00:09, 52.31it/s]

Found 1 URLs in the text
Found 1 URLs in the text
Found 1 URLs in the text


 96%|█████████▌| 10496/10990 [05:01<00:09, 53.18it/s]

Found 1 URLs in the text
Found 1 URLs in the text
Found DOI URL: https://zenodo.org/record/7866504#.ZEo6B3ZByUn


 96%|█████████▌| 10523/10990 [05:01<00:09, 51.27it/s]

Found 1 URLs in the text
Found DOI URL: https://github.com/graphhopper/graphhopper
Found 1 URLs in the text


 96%|█████████▌| 10535/10990 [05:01<00:09, 48.40it/s]

Found 1 URLs in the text
Found 1 URLs in the text
Found 1 URLs in the text


 96%|█████████▌| 10558/10990 [05:02<00:07, 57.63it/s]

Found 1 URLs in the text


 96%|█████████▋| 10579/10990 [05:02<00:06, 61.19it/s]

Found 2 URLs in the text


 96%|█████████▋| 10594/10990 [05:02<00:06, 61.06it/s]

Found 2 URLs in the text
Found 1 URLs in the text
Found 1 URLs in the text
Found 1 URLs in the text
Found 1 URLs in the text


 97%|█████████▋| 10616/10990 [05:03<00:06, 58.76it/s]

Found 2 URLs in the text
Found DOI URL: https://data.mendeley.com/datasets/wfn2c3437n/2.
Found 1 URLs in the text
Found 3 URLs in the text


 97%|█████████▋| 10634/10990 [05:03<00:05, 68.07it/s]

Found 1 URLs in the text
Found 1 URLs in the text
Found 1 URLs in the text
Found 1 URLs in the text


 97%|█████████▋| 10656/10990 [05:04<00:06, 54.83it/s]

Found 1 URLs in the text


 97%|█████████▋| 10668/10990 [05:04<00:07, 43.58it/s]

Found 2 URLs in the text
Found 1 URLs in the text


 97%|█████████▋| 10684/10990 [05:04<00:05, 54.67it/s]

Found 7 URLs in the text
Found 1 URLs in the text


 97%|█████████▋| 10696/10990 [05:04<00:05, 52.84it/s]

Found 1 URLs in the text
Found 1 URLs in the text
Found DOI URL: https://github.com/peterberr/sufficcs_mobility/tree/main/outputs.


 98%|█████████▊| 10730/10990 [05:05<00:05, 51.27it/s]

Found 1 URLs in the text
Found 2 URLs in the text


 98%|█████████▊| 10743/10990 [05:05<00:04, 54.17it/s]

Found 3 URLs in the text


 98%|█████████▊| 10761/10990 [05:06<00:05, 44.51it/s]

Found 19 URLs in the text


 98%|█████████▊| 10773/10990 [05:06<00:04, 49.18it/s]

Found 1 URLs in the text
Found 1 URLs in the text


 98%|█████████▊| 10785/10990 [05:06<00:04, 47.13it/s]

Found 1 URLs in the text
Found 1 URLs in the text
Found 2 URLs in the text


 98%|█████████▊| 10802/10990 [05:07<00:04, 45.50it/s]

Found 1 URLs in the text
Found 1 URLs in the text
Found 3 URLs in the text


 98%|█████████▊| 10819/10990 [05:07<00:03, 43.50it/s]

Found 3 URLs in the text
Found 1 URLs in the text


 99%|█████████▊| 10829/10990 [05:07<00:03, 43.86it/s]

Found 1 URLs in the text
Found 3 URLs in the text


 99%|█████████▉| 10863/10990 [05:08<00:02, 45.37it/s]

Found 1 URLs in the text


 99%|█████████▉| 10882/10990 [05:08<00:02, 52.45it/s]

Found 2 URLs in the text
Found 2 URLs in the text
Found 1 URLs in the text
Found 1 URLs in the text


 99%|█████████▉| 10901/10990 [05:09<00:01, 52.04it/s]

Found 1 URLs in the text


 99%|█████████▉| 10916/10990 [05:09<00:01, 59.48it/s]

Found 1 URLs in the text
Found 1 URLs in the text
Found 1 URLs in the text
Found DOI URL: https://doi.org/10.17632/r49tcwnyb6.1


 99%|█████████▉| 10929/10990 [05:09<00:01, 46.76it/s]

Found 5 URLs in the text


100%|█████████▉| 10953/10990 [05:10<00:00, 50.22it/s]

Found 2 URLs in the text
Found 2 URLs in the text


100%|█████████▉| 10972/10990 [05:10<00:00, 50.59it/s]

Found 2 URLs in the text
Found 1 URLs in the text
Found DOI URL: https://github.com/ibatur/atus_heat_paper.
Found 1 URLs in the text


100%|█████████▉| 10988/10990 [05:10<00:00, 45.69it/s]

Found 1 URLs in the text
Found DOI URL: https://doi.org/10.4121/777220e4-9a78-4653-8780-89c2572f6c81.
Found 1 URLs in the text
Found 2 URLs in the text
Found 1 URLs in the text


100%|██████████| 10990/10990 [05:10<00:00, 35.36it/s]


In [19]:
with open(os.path.join(result_data_folder, 'data-links', f"{data['unique_id'][i]}.txt"), 'w') as file:
	file.write(data_context)

In [None]:
# save the full url list
with open(os.path.join(result_data_folder, 'data-links', 'full-url-list.txt'), 'w') as f:
    for url in full_url_list:
        f.write(url + '\n')

In [64]:
# transform str to json
import json
result = json.loads(result)

result['is_dataset_link']

True

In [17]:
import re

# Pattern to match URLs
url_pattern = r'(https?://[A-Za-z0-9.\-_/]+)(?=$|\s|[.,\)])'
text = "Found DOI URL: https://github.com/RRinTransportation/rr-measure-basic/. The Check https://example.com/path) for more information."

# Find URLs
urls = re.findall(url_pattern, text)

# Clean up trailing parentheses and periods
clean_urls = [re.sub(r'[)\.]+$', '', url) for url in urls]
clean_urls

['https://github.com/RRinTransportation/rr-measure-basic/',
 'https://example.com/path']

In [54]:
import re
from collections import Counter
from urllib.parse import urlparse
import requests

# Load the URL list from the file
file_path = os.path.join(result_data_folder, 'data-links', 'full-url-list.txt')
with open(file_path, 'r') as file:
    urls = file.readlines()

# Remove whitespace characters
urls = [url.strip() for url in urls]

# Extract domain, protocol, and resource type
domains = []
protocols = []
resource_types = []

for url in urls:
    url = correct_url(url)
    if 'doi.org' in url:
        url = fetch_redirected_url_with_selenium(url)
        print(f"Redirected URL: {url}")
    parsed_url = urlparse(url)
    domains.append(parsed_url.netloc)
    protocols.append(parsed_url.scheme)
    
    # Identify resource type (e.g., API, PDF, general website)
    if parsed_url.path.endswith('.pdf'):
        resource_types.append('PDF')
    elif 'api' in parsed_url.path or 'api' in parsed_url.netloc:
        resource_types.append('API')
    else:
        resource_types.append('Website')

# Count occurrences
domain_counts = Counter(domains)
protocol_counts = Counter(protocols)
resource_type_counts = Counter(resource_types)

# Print insights
print("Domain Distribution:")
for domain, count in domain_counts.most_common(10):
    print(f"{domain}: {count}")

print("\nProtocol Analysis:")
for protocol, count in protocol_counts.items():
    print(f"{protocol}: {count}")

print("\nResource Type Analysis:")
for resource_type, count in resource_type_counts.items():
    print(f"{resource_type}: {count}")


Redirected URL: https://osf.io/de8km/
Redirected URL: https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/VGMDPC
Redirected URL: https://data.mendeley.com/datasets/tfkgpmgf9t/1
Redirected URL: https://figshare.com/articles/software/Data_and_code_for_FDA_analysis/13171862
Redirected URL: https://data.mendeley.com/datasets/5gy6s8trbm/2
Redirected URL: https://figshare.com/articles/dataset/Pedestrian_moral_disengagement_scale_for_sustainable_urban_planning/20311812/2
Redirected URL: https://figshare.com/articles/software/Snapshot_of_GitHub_Repository_to_accompany_paper_b_Evaluation_of_bicycle_sharing_scheme_data_as_a_proxy_for_cycling_mobility_-_how_COVID-19_measures_influenced_cycling_in_Paris_b_/24231706?file=42570667
Redirected URL: https://datadryad.org/stash/dataset/doi:10.25338/B8035D
Redirected URL: https://data.4tu.nl/datasets/e4324d0c-2a82-4f03-8cbb-64dd207cd522
Redirected URL: https://accounts.osf.io/login?service=https%3A%2F%2Fosf.io%2Fx7aer%2F
Redirected U

In [56]:
# Print insights
print("Domain Distribution:")
for domain, count in domain_counts.items():
    print(f"{domain}: {count}")

print("\nProtocol Analysis:")
for protocol, count in protocol_counts.items():
    print(f"{protocol}: {count}")

print("\nResource Type Analysis:")
for resource_type, count in resource_type_counts.items():
    print(f"{resource_type}: {count}")

Domain Distribution:
bitre.gov.au: 1
www.nco.ncep.noaa.gov: 1
cdn.wartsila.com: 1
github.com: 111
cran.r-project.org: 2
www.afrra.org: 1
www.who.int: 4
www.worldometers.info: 1
www.google.com: 9
covid19.ncdc.gov.ng: 1
osf.io: 29
dataverse.harvard.edu: 4
netlipse.eu: 1
www.wko.at: 1
www.aviationreg.ie: 2
www.youtube.com: 5
factfinder.census.gov: 1
www.census.gov: 11
pedbikeinfo.org: 3
www.covidmobilityworks.org: 1
datasketch.github.io: 1
www.sfcta.org: 1
cycleatlanta.org: 1
www.thempo.org: 1
maps.nyc.gov: 1
covid19-dashboard.ages.at: 1
www.norgeshelsa.no: 1
www.innocom.gov.cn: 1
www.qilib.com: 1
www.udparty.com: 1
www.stats.gov.cn: 4
covid19.apple.com: 1
covid19.saglik.gov.tr: 1
drive.google.com: 3
map.baidu.com: 4
dianshi.baidu.com: 1
data.worldbank.org: 3
asrs.arc.nasa.gov: 1
data.covid.umd.edu: 3
openaq.org: 1
www.icao.int: 4
jeodpp.jrc.ec.europa.eu: 1
www.mrlc.gov: 1
www.kaggle.com: 3
www.statistics.gr: 1
data.mendeley.com: 13
figshare.com: 4
www.ncei.noaa.gov: 3
c2smart.engineering

In [31]:
response.url

'https://data.4tu.nl/articles/_/12681650/1'

In [55]:
print("Domain Distribution:")
for domain, count in domain_counts.most_common(20):
    print(f"{domain}: {count}")

Domain Distribution:
github.com: 111
osf.io: 29
www.openstreetmap.org: 20
zenodo.org: 17
data.4tu.nl: 16
lbs.amap.com: 16
www.wjx.cn: 14
data.cityofchicago.org: 14
www1.nyc.gov: 14
data.mendeley.com: 13
www.census.gov: 11
www.google.com: 9
pems.dot.ca.gov: 8
www.12306.cn: 7
www.ssb.no: 7
www.citibikenyc.com: 7
gaia.didichuxing.com: 7
www.wunderground.com: 6
www.transtats.bts.gov: 6
developers.google.com: 6


In [52]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
import time

def fetch_redirected_url_with_selenium(doi_url):
    try:
        # Set up Selenium WebDriver with headless mode
        options = Options()
        options.add_argument('--headless')
        options.add_argument('--disable-gpu')
        options.add_argument('--no-sandbox')

        service = Service()  # Ensure ChromeDriver is correctly installed or provide the path
        driver = webdriver.Chrome(service=service, options=options)

        driver.get(doi_url)
        time.sleep(5)  # Allow time for redirection if needed

        redirected_url = driver.current_url
        driver.quit()
        return redirected_url
    except Exception as e:
        return f"Error: {str(e)}"

# Example usage
doi_url = "https://doi.org/10.17605/OSF.IO/DE8KM."

doi_url = correct_url(doi_url)
redirected_url = fetch_redirected_url_with_selenium(doi_url)
print("Redirected URL:", redirected_url)

Redirected URL: https://osf.io/de8km/


In [49]:
import string

def correct_url(url):
    return url.rstrip(string.punctuation)
corrected_url = correct_url(url)
corrected_url

'https://doi.org/10.17632/tfkgpmgf9t.1'