In [2]:
import json
import os
import requests

# input_dir = f"{destination_base}/2013"

In [5]:
def fetchContentAbstract(url):
    headers = {
        'X-ELS-APIKey': API_KEY,
        'Accept': 'application/json'
    }

    response = requests.get(url, headers=headers)

    if response.status_code == 200:
        data = response.json()

        creator = data['abstracts-retrieval-response']['coredata'].get('dc:creator', None)
        if creator is not None:
            creator = data['abstracts-retrieval-response']['coredata']['dc:creator']['author']

        source_id = data['abstracts-retrieval-response']['coredata'].get('source-id', None)
        
        return creator, source_id
    else:
        print(f"Error: {response.status_code}")

In [None]:
input_dir = f"{destination_base}/2013"

files = [f"{input_dir}/{file}" for file in os.listdir(input_dir) if file.endswith(".json")]

for file in files:
    with open(file, 'r') as f:
        data = json.load(f)
        
        for paper in data:
            print("Fetching content abstract for", paper['prism:doi'])

            author, source_id = fetchContentAbstract(paper['prism:url'])
            
            if author is not None:
                paper['author'] = author
            if source_id is not None:
                paper['source-id'] = source_id

        # with open(f"{os.path.splitext(file)[0]}_fetchContentAbstract.json", 'w') as f:
        #     json.dump(data, f, indent=4)
        
        break

In [3]:
import os
import json
import requests
import time
import random
import logging
from concurrent.futures import ThreadPoolExecutor, as_completed
import tqdm

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s: %(message)s',
    handlers=[
        logging.FileHandler('content_abstract_fetch.log', mode='w'),
        logging.StreamHandler()
    ]
)
logger = logging.getLogger(__name__)

In [4]:
class RateLimitHandler:
    def __init__(self, api_key, max_retries=3):
        self.api_key = api_key
        self.max_retries = max_retries
        self.session = requests.Session()

    def fetch_with_retry(self, url, doi):
        headers = {
            'X-ELS-APIKey': self.api_key,
            'Accept': 'application/json'
        }

        for attempt in range(self.max_retries):
            try:
                response = self.session.get(url, headers=headers, timeout=10)
                
                if response.status_code == 429:
                    # Exponential backoff with jitter
                    wait_time = min(60, (2 ** attempt) + random.uniform(0, 1))
                    logger.warning(f"Rate limited. Waiting {wait_time} seconds for DOI {doi}...")
                    time.sleep(wait_time)
                    continue
                
                if response.status_code != 200:
                    logger.error(f"Unexpected status code {response.status_code} for DOI {doi}")
                    return None
                
                return response.json()
            
            except requests.exceptions.RequestException as e:
                logger.error(f"Error fetching {url} for DOI {doi} (Attempt {attempt + 1}): {e}")
                
                # If it's the last attempt, return None
                if attempt == self.max_retries - 1:
                    return None
                
                # Wait a bit before retrying
                time.sleep(1)
        
        return None

In [5]:
def fetchContentAbstract(paper, rate_limiter):
    try:
        doi = paper['prism:doi']
        url = paper['prism:url']
        
        data = rate_limiter.fetch_with_retry(url, doi)
        
        if data is None:
            logger.warning(f"No data retrieved for DOI: {doi}")
            return None
        
        try:
            # More robust data extraction with extensive logging
            creator = data.get('abstracts-retrieval-response', {}) \
                        .get('coredata', {}) \
                        .get('dc:creator', None)
            
            if creator is not None:
                # Handle different possible structures of creator
                if isinstance(creator, dict):
                    creator = creator.get('author')
                elif isinstance(creator, list):
                    creator = [a.get('author', a) for a in creator]
            
            source_id = data.get('abstracts-retrieval-response', {}) \
                           .get('coredata', {}) \
                           .get('source-id', None)
            
            return {
                'doi': doi,
                'author': creator,
                'source_id': source_id
            }
        
        except Exception as parse_error:
            logger.error(f"Error parsing data for DOI {doi}: {parse_error}")
            logger.error(f"Problematic data structure: {data}")
            return None
    
    except Exception as e:
        logger.error(f"Processing error for DOI {doi}: {e}")
        return None

In [6]:
def process_file(file_path, api_key, max_workers=3):
    # Read input file
    with open(file_path, 'r') as f:
        data = json.load(f)
    
    # Initialize rate limiter
    rate_limiter = RateLimitHandler(api_key)
    
    # Track processed papers
    processed_data = data.copy()
    
    logger.info(f"Processing {len(data)} papers from {file_path}")
    
    # Track successful and failed retrievals
    successful_retrievals = 0
    failed_retrievals = 0
    
    # Use ThreadPoolExecutor for concurrent processing
    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        # Progress bar
        progress_bar = tqdm.tqdm(total=len(data), desc="Fetching Abstracts", unit="paper")
        
        # Submit all tasks
        futures = {
            executor.submit(fetchContentAbstract, paper, rate_limiter): paper 
            for paper in data
        }
        
        # Process results as they complete
        for future in as_completed(futures):
            original_paper = futures[future]
            result = future.result()
            
            # Update progress
            progress_bar.update(1)
            
            if result:
                # Find and update the corresponding paper
                for updated_paper in processed_data:
                    if updated_paper['prism:doi'] == result['doi']:
                        if result['author'] is not None:
                            updated_paper['author'] = result['author']
                        if result['source_id'] is not None:
                            updated_paper['source-id'] = result['source_id']
                        successful_retrievals += 1
                        break
            else:
                failed_retrievals += 1
        
        # Close progress bar
        progress_bar.close()
    
    # Write updated data
    output_file = f"{os.path.splitext(file_path)[0]}_fetchContentAbstract.json"
    with open(output_file, 'w') as f:
        json.dump(processed_data, f, indent=4)
    
    # Log detailed retrieval statistics
    logger.info(f"Retrieval Statistics for {file_path}:")
    logger.info(f"Total Papers: {len(data)}")
    logger.info(f"Successful Retrievals: {successful_retrievals}")
    logger.info(f"Failed Retrievals: {failed_retrievals}")
    logger.info(f"Success Rate: {successful_retrievals/len(data)*100:.2f}%")
    
    print(f"Completed processing: {output_file}")
    return output_file

In [7]:
def main(input_dir, api_key):
    start_time = time.time()
    
    files = [f"{input_dir}/{file}" for file in os.listdir(input_dir) if file.endswith(".json")]
    
    for file in files:
        print(f"Processing {file}")
        process_file(file, api_key)
    
    end_time = time.time()
    print(f"Total processing time: {end_time - start_time:.2f} seconds")

In [None]:
destination_base = "JsonFile_Scrape"
YEAR = ["2015","2016","2017","2018","2019","2020","2021","2022","2023"]
MAP_APIKEY = {"2015":"",
              "2016":"",
              "2017":"",
              "2018":"",
              "2019":"",
              "2020":"",
              "2021":"",
              "2022":"",
              "2023":""}

for year in YEAR:
    input_dir = f"{destination_base}/{year}"
    API_KEY = MAP_APIKEY[year]
    main(input_dir, API_KEY)

2024-12-05 07:08:22,476 - INFO: Processing 1250 papers from JsonFile_Scrape/2015/2015_conference_paper_not_proceeding.json


Processing JsonFile_Scrape/2015/2015_conference_paper_not_proceeding.json


Fetching Abstracts: 100%|██████████| 1250/1250 [03:07<00:00,  6.68paper/s]
2024-12-05 07:11:29,769 - INFO: Retrieval Statistics for JsonFile_Scrape/2015/2015_conference_paper_not_proceeding.json:
2024-12-05 07:11:29,770 - INFO: Total Papers: 1250
2024-12-05 07:11:29,771 - INFO: Successful Retrievals: 1250
2024-12-05 07:11:29,772 - INFO: Failed Retrievals: 0
2024-12-05 07:11:29,774 - INFO: Success Rate: 100.00%
2024-12-05 07:11:29,841 - INFO: Processing 1250 papers from JsonFile_Scrape/2015/2015_conference_paper_proceeding.json


Completed processing: JsonFile_Scrape/2015/2015_conference_paper_not_proceeding_fetchContentAbstract.json
Processing JsonFile_Scrape/2015/2015_conference_paper_proceeding.json


Fetching Abstracts: 100%|██████████| 1250/1250 [03:04<00:00,  6.79paper/s]
2024-12-05 07:14:34,243 - INFO: Retrieval Statistics for JsonFile_Scrape/2015/2015_conference_paper_proceeding.json:
2024-12-05 07:14:34,244 - INFO: Total Papers: 1250
2024-12-05 07:14:34,245 - INFO: Successful Retrievals: 1250
2024-12-05 07:14:34,247 - INFO: Failed Retrievals: 0
2024-12-05 07:14:34,248 - INFO: Success Rate: 100.00%
2024-12-05 07:14:34,307 - INFO: Processing 1250 papers from JsonFile_Scrape/2015/2015_not_conference_paper_not_proceeding.json


Completed processing: JsonFile_Scrape/2015/2015_conference_paper_proceeding_fetchContentAbstract.json
Processing JsonFile_Scrape/2015/2015_not_conference_paper_not_proceeding.json


Fetching Abstracts: 100%|██████████| 1250/1250 [03:06<00:00,  6.71paper/s]
2024-12-05 07:17:40,827 - INFO: Retrieval Statistics for JsonFile_Scrape/2015/2015_not_conference_paper_not_proceeding.json:
2024-12-05 07:17:40,828 - INFO: Total Papers: 1250
2024-12-05 07:17:40,829 - INFO: Successful Retrievals: 1250
2024-12-05 07:17:40,830 - INFO: Failed Retrievals: 0
2024-12-05 07:17:40,830 - INFO: Success Rate: 100.00%
2024-12-05 07:17:40,885 - INFO: Processing 1250 papers from JsonFile_Scrape/2015/2015_not_conference_paper_proceeding.json


Completed processing: JsonFile_Scrape/2015/2015_not_conference_paper_not_proceeding_fetchContentAbstract.json
Processing JsonFile_Scrape/2015/2015_not_conference_paper_proceeding.json


Fetching Abstracts: 100%|██████████| 1250/1250 [02:57<00:00,  7.05paper/s]
2024-12-05 07:20:38,504 - INFO: Retrieval Statistics for JsonFile_Scrape/2015/2015_not_conference_paper_proceeding.json:
2024-12-05 07:20:38,506 - INFO: Total Papers: 1250
2024-12-05 07:20:38,507 - INFO: Successful Retrievals: 1250
2024-12-05 07:20:38,508 - INFO: Failed Retrievals: 0
2024-12-05 07:20:38,509 - INFO: Success Rate: 100.00%
2024-12-05 07:20:38,570 - INFO: Processing 1250 papers from JsonFile_Scrape/2016/2016_conference_paper_not_proceeding.json


Completed processing: JsonFile_Scrape/2015/2015_not_conference_paper_proceeding_fetchContentAbstract.json
Total processing time: 736.08 seconds
Processing JsonFile_Scrape/2016/2016_conference_paper_not_proceeding.json


Fetching Abstracts: 100%|██████████| 1250/1250 [03:03<00:00,  6.80paper/s]
2024-12-05 07:23:42,793 - INFO: Retrieval Statistics for JsonFile_Scrape/2016/2016_conference_paper_not_proceeding.json:
2024-12-05 07:23:42,794 - INFO: Total Papers: 1250
2024-12-05 07:23:42,795 - INFO: Successful Retrievals: 1250
2024-12-05 07:23:42,796 - INFO: Failed Retrievals: 0
2024-12-05 07:23:42,798 - INFO: Success Rate: 100.00%
2024-12-05 07:23:42,902 - INFO: Processing 1250 papers from JsonFile_Scrape/2016/2016_conference_paper_proceeding.json


Completed processing: JsonFile_Scrape/2016/2016_conference_paper_not_proceeding_fetchContentAbstract.json
Processing JsonFile_Scrape/2016/2016_conference_paper_proceeding.json


Fetching Abstracts: 100%|██████████| 1250/1250 [03:03<00:00,  6.81paper/s]
2024-12-05 07:26:46,568 - INFO: Retrieval Statistics for JsonFile_Scrape/2016/2016_conference_paper_proceeding.json:
2024-12-05 07:26:46,570 - INFO: Total Papers: 1250
2024-12-05 07:26:46,571 - INFO: Successful Retrievals: 1250
2024-12-05 07:26:46,572 - INFO: Failed Retrievals: 0
2024-12-05 07:26:46,573 - INFO: Success Rate: 100.00%
2024-12-05 07:26:46,640 - INFO: Processing 1250 papers from JsonFile_Scrape/2016/2016_not_conference_paper_not_proceeding.json


Completed processing: JsonFile_Scrape/2016/2016_conference_paper_proceeding_fetchContentAbstract.json
Processing JsonFile_Scrape/2016/2016_not_conference_paper_not_proceeding.json


Fetching Abstracts: 100%|██████████| 1250/1250 [03:10<00:00,  6.55paper/s]
2024-12-05 07:29:57,860 - INFO: Retrieval Statistics for JsonFile_Scrape/2016/2016_not_conference_paper_not_proceeding.json:
2024-12-05 07:29:57,862 - INFO: Total Papers: 1250
2024-12-05 07:29:57,863 - INFO: Successful Retrievals: 1250
2024-12-05 07:29:57,864 - INFO: Failed Retrievals: 0
2024-12-05 07:29:57,867 - INFO: Success Rate: 100.00%
2024-12-05 07:29:57,930 - INFO: Processing 1250 papers from JsonFile_Scrape/2016/2016_not_conference_paper_proceeding.json


Completed processing: JsonFile_Scrape/2016/2016_not_conference_paper_not_proceeding_fetchContentAbstract.json
Processing JsonFile_Scrape/2016/2016_not_conference_paper_proceeding.json


Fetching Abstracts:  30%|██▉       | 371/1250 [00:54<01:57,  7.46paper/s]2024-12-05 07:30:52,634 - ERROR: Unexpected status code 500 for DOI 10.1109/NANO.2016.7751290
Fetching Abstracts: 100%|██████████| 1250/1250 [02:59<00:00,  6.97paper/s]
2024-12-05 07:32:57,450 - INFO: Retrieval Statistics for JsonFile_Scrape/2016/2016_not_conference_paper_proceeding.json:
2024-12-05 07:32:57,452 - INFO: Total Papers: 1250
2024-12-05 07:32:57,453 - INFO: Successful Retrievals: 1249
2024-12-05 07:32:57,454 - INFO: Failed Retrievals: 1
2024-12-05 07:32:57,456 - INFO: Success Rate: 99.92%
2024-12-05 07:32:57,526 - INFO: Processing 1250 papers from JsonFile_Scrape/2017/2017_conference_paper_not_proceeding.json


Completed processing: JsonFile_Scrape/2016/2016_not_conference_paper_proceeding_fetchContentAbstract.json
Total processing time: 738.95 seconds
Processing JsonFile_Scrape/2017/2017_conference_paper_not_proceeding.json


Fetching Abstracts: 100%|██████████| 1250/1250 [03:02<00:00,  6.84paper/s]
2024-12-05 07:36:00,471 - INFO: Retrieval Statistics for JsonFile_Scrape/2017/2017_conference_paper_not_proceeding.json:
2024-12-05 07:36:00,473 - INFO: Total Papers: 1250
2024-12-05 07:36:00,474 - INFO: Successful Retrievals: 1250
2024-12-05 07:36:00,475 - INFO: Failed Retrievals: 0
2024-12-05 07:36:00,477 - INFO: Success Rate: 100.00%
2024-12-05 07:36:00,547 - INFO: Processing 1250 papers from JsonFile_Scrape/2017/2017_conference_paper_proceeding.json


Completed processing: JsonFile_Scrape/2017/2017_conference_paper_not_proceeding_fetchContentAbstract.json
Processing JsonFile_Scrape/2017/2017_conference_paper_proceeding.json


Fetching Abstracts: 100%|██████████| 1250/1250 [03:03<00:00,  6.83paper/s]
2024-12-05 07:39:03,872 - INFO: Retrieval Statistics for JsonFile_Scrape/2017/2017_conference_paper_proceeding.json:
2024-12-05 07:39:03,874 - INFO: Total Papers: 1250
2024-12-05 07:39:03,875 - INFO: Successful Retrievals: 1250
2024-12-05 07:39:03,876 - INFO: Failed Retrievals: 0
2024-12-05 07:39:03,878 - INFO: Success Rate: 100.00%
2024-12-05 07:39:03,948 - INFO: Processing 1250 papers from JsonFile_Scrape/2017/2017_not_conference_paper_not_proceeding.json


Completed processing: JsonFile_Scrape/2017/2017_conference_paper_proceeding_fetchContentAbstract.json
Processing JsonFile_Scrape/2017/2017_not_conference_paper_not_proceeding.json


Fetching Abstracts: 100%|██████████| 1250/1250 [03:06<00:00,  6.69paper/s]
2024-12-05 07:42:11,028 - INFO: Retrieval Statistics for JsonFile_Scrape/2017/2017_not_conference_paper_not_proceeding.json:
2024-12-05 07:42:11,029 - INFO: Total Papers: 1250
2024-12-05 07:42:11,030 - INFO: Successful Retrievals: 1250
2024-12-05 07:42:11,034 - INFO: Failed Retrievals: 0
2024-12-05 07:42:11,035 - INFO: Success Rate: 100.00%
2024-12-05 07:42:11,096 - INFO: Processing 1250 papers from JsonFile_Scrape/2017/2017_not_conference_paper_proceeding.json


Completed processing: JsonFile_Scrape/2017/2017_not_conference_paper_not_proceeding_fetchContentAbstract.json
Processing JsonFile_Scrape/2017/2017_not_conference_paper_proceeding.json


Fetching Abstracts: 100%|██████████| 1250/1250 [02:58<00:00,  6.99paper/s]
2024-12-05 07:45:10,244 - INFO: Retrieval Statistics for JsonFile_Scrape/2017/2017_not_conference_paper_proceeding.json:
2024-12-05 07:45:10,245 - INFO: Total Papers: 1250
2024-12-05 07:45:10,247 - INFO: Successful Retrievals: 1250
2024-12-05 07:45:10,248 - INFO: Failed Retrievals: 0
2024-12-05 07:45:10,249 - INFO: Success Rate: 100.00%
2024-12-05 07:45:10,318 - INFO: Processing 1155 papers from JsonFile_Scrape/2018/2018_conference_paper_not_proceeding.json


Completed processing: JsonFile_Scrape/2017/2017_not_conference_paper_proceeding_fetchContentAbstract.json
Total processing time: 732.79 seconds
Processing JsonFile_Scrape/2018/2018_conference_paper_not_proceeding.json


Fetching Abstracts: 100%|██████████| 1155/1155 [02:54<00:00,  6.60paper/s]
2024-12-05 07:48:05,556 - INFO: Retrieval Statistics for JsonFile_Scrape/2018/2018_conference_paper_not_proceeding.json:
2024-12-05 07:48:05,558 - INFO: Total Papers: 1155
2024-12-05 07:48:05,560 - INFO: Successful Retrievals: 1155
2024-12-05 07:48:05,561 - INFO: Failed Retrievals: 0
2024-12-05 07:48:05,562 - INFO: Success Rate: 100.00%
2024-12-05 07:48:05,621 - INFO: Processing 1032 papers from JsonFile_Scrape/2018/2018_conference_paper_proceeding.json


Completed processing: JsonFile_Scrape/2018/2018_conference_paper_not_proceeding_fetchContentAbstract.json
Processing JsonFile_Scrape/2018/2018_conference_paper_proceeding.json


Fetching Abstracts: 100%|██████████| 1032/1032 [02:48<00:00,  6.14paper/s]
2024-12-05 07:50:53,855 - INFO: Retrieval Statistics for JsonFile_Scrape/2018/2018_conference_paper_proceeding.json:
2024-12-05 07:50:53,856 - INFO: Total Papers: 1032
2024-12-05 07:50:53,858 - INFO: Successful Retrievals: 1032
2024-12-05 07:50:53,859 - INFO: Failed Retrievals: 0
2024-12-05 07:50:53,860 - INFO: Success Rate: 100.00%
2024-12-05 07:50:53,928 - INFO: Processing 1146 papers from JsonFile_Scrape/2018/2018_not_conference_paper_proceeding.json


Completed processing: JsonFile_Scrape/2018/2018_conference_paper_proceeding_fetchContentAbstract.json
Processing JsonFile_Scrape/2018/2018_not_conference_paper_proceeding.json


Fetching Abstracts: 100%|██████████| 1146/1146 [02:43<00:00,  7.03paper/s]
2024-12-05 07:53:37,262 - INFO: Retrieval Statistics for JsonFile_Scrape/2018/2018_not_conference_paper_proceeding.json:
2024-12-05 07:53:37,264 - INFO: Total Papers: 1146
2024-12-05 07:53:37,265 - INFO: Successful Retrievals: 1146
2024-12-05 07:53:37,266 - INFO: Failed Retrievals: 0
2024-12-05 07:53:37,267 - INFO: Success Rate: 100.00%
2024-12-05 07:53:37,339 - INFO: Processing 1188 papers from JsonFile_Scrape/2019/2019_conference_paper_not_proceeding.json


Completed processing: JsonFile_Scrape/2018/2018_not_conference_paper_proceeding_fetchContentAbstract.json
Total processing time: 507.02 seconds
Processing JsonFile_Scrape/2019/2019_conference_paper_not_proceeding.json


Fetching Abstracts: 100%|██████████| 1188/1188 [02:54<00:00,  6.82paper/s]
2024-12-05 07:56:31,721 - INFO: Retrieval Statistics for JsonFile_Scrape/2019/2019_conference_paper_not_proceeding.json:
2024-12-05 07:56:31,722 - INFO: Total Papers: 1188
2024-12-05 07:56:31,724 - INFO: Successful Retrievals: 1188
2024-12-05 07:56:31,725 - INFO: Failed Retrievals: 0
2024-12-05 07:56:31,726 - INFO: Success Rate: 100.00%
2024-12-05 07:56:31,794 - INFO: Processing 911 papers from JsonFile_Scrape/2019/2019_conference_paper_proceeding.json


Completed processing: JsonFile_Scrape/2019/2019_conference_paper_not_proceeding_fetchContentAbstract.json
Processing JsonFile_Scrape/2019/2019_conference_paper_proceeding.json


Fetching Abstracts: 100%|██████████| 911/911 [02:13<00:00,  6.80paper/s]
2024-12-05 07:58:45,941 - INFO: Retrieval Statistics for JsonFile_Scrape/2019/2019_conference_paper_proceeding.json:
2024-12-05 07:58:45,943 - INFO: Total Papers: 911
2024-12-05 07:58:45,945 - INFO: Successful Retrievals: 911
2024-12-05 07:58:45,947 - INFO: Failed Retrievals: 0
2024-12-05 07:58:45,948 - INFO: Success Rate: 100.00%
2024-12-05 07:58:46,012 - INFO: Processing 1147 papers from JsonFile_Scrape/2019/2019_not_conference_paper_proceeding.json


Completed processing: JsonFile_Scrape/2019/2019_conference_paper_proceeding_fetchContentAbstract.json
Processing JsonFile_Scrape/2019/2019_not_conference_paper_proceeding.json


Fetching Abstracts:  47%|████▋     | 538/1147 [01:14<02:12,  4.58paper/s]2024-12-05 08:00:10,490 - ERROR: Error fetching https://api.elsevier.com/content/abstract/scopus_id/85085185551 for DOI 10.1109/ICIIP47207.2019.8985752 (Attempt 1): HTTPSConnectionPool(host='api.elsevier.com', port=443): Read timed out. (read timeout=10)
2024-12-05 08:00:10,676 - ERROR: Error fetching https://api.elsevier.com/content/abstract/scopus_id/85085175450 for DOI 10.1109/ICIIP47207.2019.8985744 (Attempt 1): HTTPSConnectionPool(host='api.elsevier.com', port=443): Read timed out. (read timeout=10)
2024-12-05 08:00:10,770 - ERROR: Error fetching https://api.elsevier.com/content/abstract/scopus_id/85085164872 for DOI 10.1109/ICIIP47207.2019.8985752 (Attempt 1): HTTPSConnectionPool(host='api.elsevier.com', port=443): Read timed out. (read timeout=10)
Fetching Abstracts: 100%|██████████| 1147/1147 [02:58<00:00,  6.41paper/s]
2024-12-05 08:01:45,055 - INFO: Retrieval Statistics for JsonFile_Scrape/2019/2019_not_

Completed processing: JsonFile_Scrape/2019/2019_not_conference_paper_proceeding_fetchContentAbstract.json
Total processing time: 487.79 seconds
Processing JsonFile_Scrape/2020/2020_conference_paper_not_proceeding.json


Fetching Abstracts: 100%|██████████| 1168/1168 [02:59<00:00,  6.51paper/s]
2024-12-05 08:04:44,886 - INFO: Retrieval Statistics for JsonFile_Scrape/2020/2020_conference_paper_not_proceeding.json:
2024-12-05 08:04:44,888 - INFO: Total Papers: 1168
2024-12-05 08:04:44,889 - INFO: Successful Retrievals: 1168
2024-12-05 08:04:44,890 - INFO: Failed Retrievals: 0
2024-12-05 08:04:44,891 - INFO: Success Rate: 100.00%
2024-12-05 08:04:44,957 - INFO: Processing 1032 papers from JsonFile_Scrape/2020/2020_conference_paper_proceeding.json


Completed processing: JsonFile_Scrape/2020/2020_conference_paper_not_proceeding_fetchContentAbstract.json
Processing JsonFile_Scrape/2020/2020_conference_paper_proceeding.json


Fetching Abstracts: 100%|██████████| 1032/1032 [02:35<00:00,  6.64paper/s]
2024-12-05 08:07:20,640 - INFO: Retrieval Statistics for JsonFile_Scrape/2020/2020_conference_paper_proceeding.json:
2024-12-05 08:07:20,642 - INFO: Total Papers: 1032
2024-12-05 08:07:20,643 - INFO: Successful Retrievals: 1032
2024-12-05 08:07:20,644 - INFO: Failed Retrievals: 0
2024-12-05 08:07:20,645 - INFO: Success Rate: 100.00%
2024-12-05 08:07:20,715 - INFO: Processing 1145 papers from JsonFile_Scrape/2020/2020_not_conference_paper_proceeding.json


Completed processing: JsonFile_Scrape/2020/2020_conference_paper_proceeding_fetchContentAbstract.json
Processing JsonFile_Scrape/2020/2020_not_conference_paper_proceeding.json


Fetching Abstracts: 100%|██████████| 1145/1145 [02:45<00:00,  6.92paper/s]
2024-12-05 08:10:06,431 - INFO: Retrieval Statistics for JsonFile_Scrape/2020/2020_not_conference_paper_proceeding.json:
2024-12-05 08:10:06,433 - INFO: Total Papers: 1145
2024-12-05 08:10:06,434 - INFO: Successful Retrievals: 1145
2024-12-05 08:10:06,435 - INFO: Failed Retrievals: 0
2024-12-05 08:10:06,437 - INFO: Success Rate: 100.00%
2024-12-05 08:10:06,518 - INFO: Processing 1224 papers from JsonFile_Scrape/2021/2021_conference_paper_not_proceeding.json


Completed processing: JsonFile_Scrape/2020/2020_not_conference_paper_proceeding_fetchContentAbstract.json
Total processing time: 501.38 seconds
Processing JsonFile_Scrape/2021/2021_conference_paper_not_proceeding.json


Fetching Abstracts:   3%|▎         | 41/1224 [00:09<04:22,  4.50paper/s]2024-12-05 08:10:16,610 - ERROR: Error fetching https://api.elsevier.com/content/abstract/scopus_id/85127439488 for DOI 10.1075/pc.20027.hei (Attempt 1): HTTPSConnectionPool(host='api.elsevier.com', port=443): Read timed out. (read timeout=10)
Fetching Abstracts: 100%|██████████| 1224/1224 [03:08<00:00,  6.48paper/s]
2024-12-05 08:13:15,740 - INFO: Retrieval Statistics for JsonFile_Scrape/2021/2021_conference_paper_not_proceeding.json:
2024-12-05 08:13:15,742 - INFO: Total Papers: 1224
2024-12-05 08:13:15,743 - INFO: Successful Retrievals: 1224
2024-12-05 08:13:15,745 - INFO: Failed Retrievals: 0
2024-12-05 08:13:15,746 - INFO: Success Rate: 100.00%
2024-12-05 08:13:15,814 - INFO: Processing 1085 papers from JsonFile_Scrape/2021/2021_conference_paper_proceeding.json


Completed processing: JsonFile_Scrape/2021/2021_conference_paper_not_proceeding_fetchContentAbstract.json
Processing JsonFile_Scrape/2021/2021_conference_paper_proceeding.json


Fetching Abstracts: 100%|██████████| 1085/1085 [02:39<00:00,  6.80paper/s]
2024-12-05 08:15:55,546 - INFO: Retrieval Statistics for JsonFile_Scrape/2021/2021_conference_paper_proceeding.json:
2024-12-05 08:15:55,550 - INFO: Total Papers: 1085
2024-12-05 08:15:55,552 - INFO: Successful Retrievals: 1085
2024-12-05 08:15:55,554 - INFO: Failed Retrievals: 0
2024-12-05 08:15:55,556 - INFO: Success Rate: 100.00%
2024-12-05 08:15:55,624 - INFO: Processing 1248 papers from JsonFile_Scrape/2021/2021_not_conference_paper_proceeding.json


Completed processing: JsonFile_Scrape/2021/2021_conference_paper_proceeding_fetchContentAbstract.json
Processing JsonFile_Scrape/2021/2021_not_conference_paper_proceeding.json


Fetching Abstracts: 100%|██████████| 1248/1248 [02:59<00:00,  6.94paper/s]
2024-12-05 08:18:55,626 - INFO: Retrieval Statistics for JsonFile_Scrape/2021/2021_not_conference_paper_proceeding.json:
2024-12-05 08:18:55,627 - INFO: Total Papers: 1248
2024-12-05 08:18:55,629 - INFO: Successful Retrievals: 1248
2024-12-05 08:18:55,630 - INFO: Failed Retrievals: 0
2024-12-05 08:18:55,631 - INFO: Success Rate: 100.00%
2024-12-05 08:18:55,708 - INFO: Processing 1227 papers from JsonFile_Scrape/2022/2022_conference_paper_not_proceeding.json


Completed processing: JsonFile_Scrape/2021/2021_not_conference_paper_proceeding_fetchContentAbstract.json
Total processing time: 529.19 seconds
Processing JsonFile_Scrape/2022/2022_conference_paper_not_proceeding.json


Fetching Abstracts: 100%|██████████| 1227/1227 [03:06<00:00,  6.59paper/s]
2024-12-05 08:22:02,309 - INFO: Retrieval Statistics for JsonFile_Scrape/2022/2022_conference_paper_not_proceeding.json:
2024-12-05 08:22:02,310 - INFO: Total Papers: 1227
2024-12-05 08:22:02,312 - INFO: Successful Retrievals: 1227
2024-12-05 08:22:02,313 - INFO: Failed Retrievals: 0
2024-12-05 08:22:02,314 - INFO: Success Rate: 100.00%
2024-12-05 08:22:02,382 - INFO: Processing 1073 papers from JsonFile_Scrape/2022/2022_conference_paper_proceeding.json


Completed processing: JsonFile_Scrape/2022/2022_conference_paper_not_proceeding_fetchContentAbstract.json
Processing JsonFile_Scrape/2022/2022_conference_paper_proceeding.json


Fetching Abstracts: 100%|██████████| 1073/1073 [02:38<00:00,  6.79paper/s]
2024-12-05 08:24:40,611 - INFO: Retrieval Statistics for JsonFile_Scrape/2022/2022_conference_paper_proceeding.json:
2024-12-05 08:24:40,613 - INFO: Total Papers: 1073
2024-12-05 08:24:40,614 - INFO: Successful Retrievals: 1073
2024-12-05 08:24:40,615 - INFO: Failed Retrievals: 0
2024-12-05 08:24:40,616 - INFO: Success Rate: 100.00%
2024-12-05 08:24:40,679 - INFO: Processing 1239 papers from JsonFile_Scrape/2022/2022_not_conference_paper_proceeding.json


Completed processing: JsonFile_Scrape/2022/2022_conference_paper_proceeding_fetchContentAbstract.json
Processing JsonFile_Scrape/2022/2022_not_conference_paper_proceeding.json


Fetching Abstracts: 100%|██████████| 1239/1239 [02:55<00:00,  7.04paper/s]
2024-12-05 08:27:36,866 - INFO: Retrieval Statistics for JsonFile_Scrape/2022/2022_not_conference_paper_proceeding.json:
2024-12-05 08:27:36,868 - INFO: Total Papers: 1239
2024-12-05 08:27:36,869 - INFO: Successful Retrievals: 1239
2024-12-05 08:27:36,870 - INFO: Failed Retrievals: 0
2024-12-05 08:27:36,871 - INFO: Success Rate: 100.00%
2024-12-05 08:27:36,947 - INFO: Processing 1219 papers from JsonFile_Scrape/2023/2023_conference_paper_not_proceeding.json


Completed processing: JsonFile_Scrape/2022/2022_not_conference_paper_proceeding_fetchContentAbstract.json
Total processing time: 521.24 seconds
Processing JsonFile_Scrape/2023/2023_conference_paper_not_proceeding.json


Fetching Abstracts: 100%|██████████| 1219/1219 [03:12<00:00,  6.34paper/s]
2024-12-05 08:30:49,411 - INFO: Retrieval Statistics for JsonFile_Scrape/2023/2023_conference_paper_not_proceeding.json:
2024-12-05 08:30:49,412 - INFO: Total Papers: 1219
2024-12-05 08:30:49,414 - INFO: Successful Retrievals: 1219
2024-12-05 08:30:49,416 - INFO: Failed Retrievals: 0
2024-12-05 08:30:49,418 - INFO: Success Rate: 100.00%
2024-12-05 08:30:49,478 - INFO: Processing 1143 papers from JsonFile_Scrape/2023/2023_conference_paper_proceeding.json


Completed processing: JsonFile_Scrape/2023/2023_conference_paper_not_proceeding_fetchContentAbstract.json
Processing JsonFile_Scrape/2023/2023_conference_paper_proceeding.json


Fetching Abstracts: 100%|██████████| 1143/1143 [02:56<00:00,  6.47paper/s]
2024-12-05 08:33:46,428 - INFO: Retrieval Statistics for JsonFile_Scrape/2023/2023_conference_paper_proceeding.json:
2024-12-05 08:33:46,430 - INFO: Total Papers: 1143
2024-12-05 08:33:46,431 - INFO: Successful Retrievals: 1143
2024-12-05 08:33:46,432 - INFO: Failed Retrievals: 0
2024-12-05 08:33:46,433 - INFO: Success Rate: 100.00%
2024-12-05 08:33:46,502 - INFO: Processing 1246 papers from JsonFile_Scrape/2023/2023_not_conference_paper_proceeding.json


Completed processing: JsonFile_Scrape/2023/2023_conference_paper_proceeding_fetchContentAbstract.json
Processing JsonFile_Scrape/2023/2023_not_conference_paper_proceeding.json


Fetching Abstracts: 100%|██████████| 1246/1246 [03:00<00:00,  6.89paper/s]
2024-12-05 08:36:47,519 - INFO: Retrieval Statistics for JsonFile_Scrape/2023/2023_not_conference_paper_proceeding.json:
2024-12-05 08:36:47,521 - INFO: Total Papers: 1246
2024-12-05 08:36:47,522 - INFO: Successful Retrievals: 1246
2024-12-05 08:36:47,524 - INFO: Failed Retrievals: 0
2024-12-05 08:36:47,525 - INFO: Success Rate: 100.00%


Completed processing: JsonFile_Scrape/2023/2023_not_conference_paper_proceeding_fetchContentAbstract.json
Total processing time: 550.65 seconds
